module DEMO_UTILS
use LSQ
implicit none
private
public  :: SEPARATE_TEXT, PRINTC

contains


subroutine SEPARATE_TEXT(TEXT, NAME, NUMBER)

!     Takes the character string in `text' and separates it into `names'.
!     This version only allows spaces as delimiters.

character (len = *), intent(in out)            :: TEXT
character (len = *), dimension(:), intent(out) :: NAME
integer, intent(out)                           :: NUMBER

!     Local variables

integer  :: LENGTH, LEN_NAME, POS1, POS2, NCHARS

LENGTH = len_trim(TEXT)
NUMBER = 0
if (LENGTH == 0) then
  return
end if

LEN_NAME = len(NAME(1))
if (LEN_NAME < 1) then
  return
end if

POS1 = 1
do
  do                                   ! Remove any leading blanks
    if (TEXT(POS1:POS1) == " ") then
      TEXT(POS1:) = TEXT(POS1+1:)
      LENGTH = LENGTH - 1
      cycle
    else
      exit
    end if
  end do
  POS2 = index(TEXT(POS1:LENGTH), " ") ! Find position of next blank
  if (POS2 > 0) then
    POS2 = POS2 + POS1 - 1
  else                                 ! Last name, no blank found
    POS2 = LENGTH + 1
  end if
  NUMBER = NUMBER + 1
  NCHARS = min(LEN_NAME, POS2-POS1)
  NAME(NUMBER) = TEXT(POS1:POS1+NCHARS-1)
  POS1 = POS2 + 1                      ! Move past the blank
  if (POS1 > LENGTH) then
    return
  end if
end do

return
end subroutine SEPARATE_TEXT



subroutine PRINTC(INVAR, CORMAT, DIMC, YCORR, VNAME, YNAME, IOPT, LOUT, IER)

!     Print (partial) correlations calculated using partial_corr to unit lout.
!     If IOPT = 0, print correlations with the Y-variable only.

integer, intent(in)                          :: INVAR, DIMC, IOPT, LOUT
integer, intent(out)                         :: IER
real (kind=DP), dimension(:), intent(in)     :: CORMAT, YCORR
character (len=*), dimension(0:), intent(in) :: VNAME
character (len=*), intent(in)                :: YNAME

!     Local variables.

integer                   :: NROWS, J1, J2, I1, I2, ROW, UPOS, TPOS, LAST
character (len=74)        :: TEXT
character (len= 9), save  :: CHAR1 = " 1.0     "

!     Check validity of arguments

IER = 0
if (INVAR >= NCOL) then
  IER = 1
end if
if (NCOL <= 1) then
  IER = IER + 2
end if
NROWS = NCOL - INVAR
if (DIMC <= NROWS*(NROWS-1)/2) then
  IER = IER + 4
end if
if (IER /= 0) then
  return
end if

!     If iopt.NE.0 output heading

if (IOPT /= 0) then
  write(unit=LOUT, fmt="(t6, 'Correlation matrix')")
  J1 = INVAR + 1

  do
    J2 = min(J1+6, NCOL)
    I1 = J1 - INVAR
    I2 = J2 - INVAR
    write(unit=LOUT, fmt="(t12, 7(a8, ' '))") VNAME(VORDER(J1:J2))

!     Print correlations for rows 1 to i2, columns i1 to i2.

    do ROW = 1, I2
      TEXT = " " // VNAME(VORDER(ROW+INVAR))
      if (I1 > ROW) then
        UPOS = (ROW-1) * (NROWS+NROWS-ROW) /2 + (I1-ROW)
        LAST = UPOS + I2 - I1
        write(unit=TEXT(12:74), fmt="(7(F8.5, ' '))") CORMAT(UPOS:LAST)
      else
        UPOS = (ROW-1) * (NROWS+NROWS-ROW) /2 + 1
        TPOS = 12 + 9*(ROW-I1)
        TEXT(TPOS:TPOS+8) = CHAR1
        LAST = UPOS + I2 - ROW - 1
        if (ROW < I2) then
          write(unit=TEXT(TPOS+9:74), fmt="(6(F8.5, ' '))") CORMAT(UPOS:LAST)
        end if
      end if
      write(unit=LOUT, fmt="(a)") TEXT
    end do

!     Move onto the next block of columns.

    J1 = J2 + 1
    if (J1 > NCOL) then
      exit
    end if
  end do
end if

!     Correlations with the Y-variable.

write(unit=LOUT, fmt="(/t6, 'Correlations with the dependent variable: ', a)") &
      YNAME
J1 = INVAR + 1
do
  J2 = min(J1+7, NCOL)
  write(unit=LOUT, fmt="(/' ', 8(A8, ' '))") VNAME(VORDER(J1:J2))
  write(unit=LOUT, fmt="(' ', 8(F8.5, ' '))") YCORR(J1:J2)
  J1 = J2 + 1
  if (J1 > NCOL) then
    exit
  end if
end do

return
end subroutine PRINTC

end module DEMO_UTILS



program DEMO

!   Program to demonstrate the use of module LSQ for unconstrained
!   linear least-squares regression.
!   The data on fuel consumption are from:
!   Sanford Weisberg `Applied Linear Regression', 2nd edition, 1985,
!   pages 35-36.   Publisher: Wiley

!   The program is designed so that users can easily edit it for their
!   problems.   If you are likely to have more than 500 cases then increase
!   the value of maxcases below.   Similarly, if you may have more than
!   30 predictor variables, change maxvar below.

!   The subscript 0 is used to relate to the constant in the model.
!   Module LSQ treats the constant in the model, if there is one, just as
!   any other variable.   The numbering of all arrays starts from 1 in LSQ.

! This version is for the F-compiler

use LSQ                      ! Must be the first declaration in the program
use DEMO_UTILS
implicit none

integer, parameter                   :: MAXCASES = 500, MAXVAR = 30,       &
                                        MAX_CDIM = MAXVAR*(MAXVAR+1)/2,    &
                                        LOUT = 6
integer                              :: CASENUM, NVAR, IOSTATUS, NREQ, IFAULT, &
                                        I, J, INVAR, I1, I2
integer, dimension(MAXVAR)           :: LIST
real (kind=DP)                       :: YY, WT, VAR, HII, TEMP
real (kind=DP), parameter            :: ONE = 1.0_DP
real (kind=DP), dimension(0:MAXVAR)  :: XX, BETA, STERR
real (kind=DP), dimension(1:MAXVAR)  :: YCORR
real (kind=DP), dimension(MAX_CDIM)  :: COVMAT, CORMAT
real                                 :: TMIN, R2, STD_ERR_PRED, &
                                        FITTED, STDEV_RES
real, dimension(MAXCASES, MAXVAR)    :: X
real, dimension(MAXCASES)            :: Y, RESID, STD_RESID
real, dimension(0:MAXVAR)            :: T
character (len=80)                       :: HEADING, OUTPUT
character (len= 2), dimension(50)        :: STATE
character (len= 8)                       :: Y_NAME
character (len= 8), dimension(0:MAXVAR)  :: VNAME
logical                                  :: FIT_CONST
logical, dimension(0:MAXVAR)             :: LINDEP


write(unit=*, fmt=*)"The data on fuel consumption are from:"
write(unit=*, fmt=*)"Sanford Weisberg 'Applied Linear Regression', 2nd edition, 1985,"
write(unit=*, fmt=*)"pages 35-36.   Publisher: Wiley   ISBN: 0-471-87957-6"
write(unit=*, fmt=*)

open(unit=8, file="fuelcons.dat", status="old", action="read")

!     The first line of this file contains the names of the variables.

read(unit=8, fmt="(a)") HEADING

VNAME(0) = "Constant"

!     Read the heading to get the variable names & the number of variables

call SEPARATE_TEXT(HEADING, VNAME, NVAR)

NVAR = NVAR - 2              ! nvar is the number of variables.
                             ! 1 is subtracted as the first field in this
                             ! file contains the abbreviated state name,
                             ! & 1 is subtracted for the Y-variable.
VNAME(1:NVAR) = VNAME(2:NVAR+1)
Y_NAME = VNAME(NVAR+2)

write(unit=*, fmt=*)"No. of variables =", NVAR
write(unit=*, fmt=*)"Predictor variables are:"
write(unit=*, fmt="(' ', 9a9)") VNAME(1:NVAR)
write(unit=*, fmt=*)"Dependent variable is: ", Y_NAME

FIT_CONST = .true.           ! Change to .false. if fitting a model without
                             ! a constant.

call STARTUP(NVAR, FIT_CONST)          ! Initializes the QR-factorization

!     Read in the data, one line at a time, and progressively update the
!     QR-factorization.

WT = ONE
CASENUM = 1

do
  read(unit=8, fmt=*, iostat=IOSTATUS) STATE(CASENUM), X(CASENUM, 1:NVAR), &
       Y(CASENUM)
  if (IOSTATUS > 0) then               ! Error in data
    cycle
  end if
  if (IOSTATUS < 0) then               ! End of file
    exit
  end if

  XX(0) = ONE                          ! A one is inserted as the first
                                       ! variable if a constant is being fitted.
  XX(1:NVAR) = X(CASENUM, 1:NVAR)      ! New variables and transformed variables
                                       ! will often be generated here.
  YY = Y(CASENUM)
  call INCLUD(WT, XX, YY)
  CASENUM = CASENUM + 1
end do

write(unit=*, fmt=*)"No. of observations =", NOBS

call SING(LINDEP, IFAULT)              ! Checks for singularities

if (IFAULT == 0) then
  write(unit=*, fmt=*)"QR-factorization is not singular"
else
  do I = 1, NVAR
    if (LINDEP(I)) then
      write(unit=*, fmt=*) VNAME(I), " is exactly linearly related to earlier variables"
    end if
  end do
end if
write(unit=*, fmt=*)

!     Show correlations (INVAR = 1 for `usual' correlations)

INVAR = 1
call PARTIAL_CORR(INVAR, CORMAT, MAX_CDIM, YCORR, IFAULT)
call PRINTC(INVAR, CORMAT, MAX_CDIM, YCORR, VNAME, Y_NAME, 1, LOUT, IFAULT)
write(unit=*, fmt=*)
write(unit=*, fmt=*)"Press ENTER to continue"
read(unit=*, fmt=*)

!     Weisberg only uses variables TAX, INC, ROAD and DLIC.   These are
!     currently in positions 2, 4, 5 & 7.
!     We could have set NVAR = 4 and copied only these variables from X to
!     XX, but we will show how regressions can be performed for a subset
!     of the variables in the QR-factorization.   Routine REORDR will be used
!     to re-order the variables.

LIST(1:4) = (/ 2, 4, 5, 7/)
call REORDR(LIST, 4, 2, IFAULT)        ! Re-order so that the first 4 variables
                                       ! appear in positions 2,3,4 & 5.
                                       ! N.B. Though variables # 2, 4, 5 & 7
                                       !      will occupy positions 2-5, they
                                       !      may be in any order.
                                       ! The constant remains in position 1.
write(unit=*, fmt="(' Current order of variables:'/' ', 8a9/)")  &
      (VNAME(VORDER(1:NCOL)))

!     Calculate regression coefficients of Y against the first variable, which
!     was just a constant = 1.0, and the next 4 predictors.

call TOLSET()                          ! Calculate tolerances before calling
                                       ! subroutine regcf.
NREQ = 5                               ! i.e. Const, TAX, INC, ROAD, DLIC
call REGCF(BETA, NREQ, IFAULT)

call SS()                              ! Calculate residual sums of squares

!     Calculate covariance matrix of the regression coefficients & their
!     standard errors.

VAR = RSS(NREQ) / (NOBS - NREQ)
call COV(NREQ, VAR, COVMAT, MAX_CDIM, STERR, IFAULT)

!     Calculate t-values

T(0:NREQ-1) = BETA(0:NREQ-1) / STERR(0:NREQ-1)

!     Output regression table, residual sums of squares, and R-squared.

write(unit=*, fmt=*)
write(unit=*, fmt=*)"Variable   Regn.coeff.   Std.error  t-value   Res.sum of sq."
do I = 0, NREQ-1
  write(unit=*, fmt="(' ', a8, '  ', g12.4, '  ', g11.4, ' ', f7.2, '  ', g14.6)")  &
        VNAME(VORDER(I+1)), BETA(I), STERR(I), T(I), RSS(I+1)
end do
write(unit=*, fmt=*)

!     Output correlations of the parameter estimates

write(unit=*, fmt=*) "Covariances of parameter estimates"
I2 = NREQ
do I = 1, NREQ-1
  I1 = I2 + 1
  I2 = I2 + NREQ - I
  write(unit=OUTPUT, fmt="(' ', a8)") VNAME(VORDER(I+1))
  write(unit=OUTPUT(10*I:), fmt="(7f10.3)") COVMAT(I1:I2)
  write(unit=*, fmt="(a)") OUTPUT
end do
write(unit=*, fmt=*)

!     Now delete the variable with the smallest t-value by moving it
!     to position 5 and then repeating the calculations for the constant
!     and the next 3 variables.

J = 1
TMIN = abs(T(1))
do I = 2, NREQ-1
  if (abs(T(I)) < TMIN) then
    J = I
    TMIN = abs(T(I))
  end if
end do
J = J + 1                    ! Add 1 as the t-array started at subscript 0

write(unit=*, fmt=*)"Removing variable in position", J
call VMOVE(J, NREQ, IFAULT)
NREQ = NREQ - 1
call REGCF(BETA, NREQ, IFAULT)
call SS()
call COV(NREQ, VAR, COVMAT, MAX_CDIM, STERR, IFAULT)
T(0:NREQ-1) = BETA(0:NREQ-1) / STERR(0:NREQ-1)

!     Output regression table, residual sums of squares, and R-squared.

write(unit=*, fmt=*)
write(unit=*, fmt=*)"Variable   Regn.coeff.   Std.error  t-value   Res.sum of sq."
do I = 0, NREQ-1
  write(unit=*, fmt="(' ', a8, '  ', g12.4, '  ', g11.4, ' ', f7.2, '  ', g14.6))")  &
        VNAME(VORDER(I+1)), BETA(I), STERR(I), T(I), RSS(I+1)
end do
write(unit=*, fmt=*)

VAR = RSS(NREQ)/(NOBS - NREQ)
STDEV_RES = sqrt( VAR )
R2 = ONE - RSS(NREQ) / RSS(1)          ! RSS(1) is the rss if only the constant
                                       ! is fitted; RSS(nreq) is the rss after
                                       ! fitting the requested NREQ variables.
write(unit=*, fmt="(' R^2 =', f8.4, '     Std. devn. of residuals =', g12.4/)")      &
      R2, STDEV_RES
write(unit=*, fmt=*)"N.B. Some statistical packages wrongly call the standard deviation"
write(unit=*, fmt=*)"     of the residuals the standard error of prediction"
write(unit=*, fmt=*)

!     Calculate residuals, hii, standardized residuals & standard errors of
!     prediction.

write(unit=*, fmt=*)"Press ENTER to continue"
read(unit=*, fmt=*)

write(unit=*, fmt=*)"State     Actual    Fitted  Residual  Std.resid.  SE(prediction)"
do I = 1, NOBS
  XX(0) = ONE
  do J = 1, NREQ-1
    XX(J) = X(I, VORDER(J+1))          ! N.B. Regression coefficient j is for
                                       !      the variable vorder(j+1)
  end do
  FITTED = dot_product( BETA(0:NREQ-1), XX(0:NREQ-1) )
  RESID(I) = Y(I) - FITTED
  call HDIAG(XX, NREQ, HII, IFAULT)
  STD_RESID(I) = RESID(I) / sqrt(VAR*(ONE - HII))
!     The sqrt was omitted from the initial version of this demo in line below.
  call VARPRD(XX, NREQ, TEMP)
  STD_ERR_PRED = sqrt(TEMP)
  write(unit=*, fmt="('   ', a2, '  ', 3f10.1, f9.2, '     ', f10.0)")  &
        STATE(I), Y(I), FITTED, RESID(I), STD_RESID(I), STD_ERR_PRED

  if (I == 24) then
    write(unit=*, fmt=*)"Press ENTER to continue"
    read(unit=*, fmt=*)
    write(unit=*, fmt=*)"State     Actual    Fitted  Residual  Std.resid.  SE(prediction)"
  end if
end do

stop
end program DEMO
