MODULE Logistic_Regression
! A package for fitting linear logistic models by iteratively re-weighted
! least squares.

! The model fitted is that the probability of a `success' is:

!      p = 1 - 1/[1 + exp(b0 + b1.X1 + b2.X2 + ... + bk.Xk)]

! where X1, X2, ... , Xk are the predictor variables, and the coefficients
! b0, b1, b2, ..., bk are to be determined.

! N.B. The residual variance used here in estimating standard errors is the
!      larger of 1.0 and that from the weighted least squares calculations;
!      it is not the theoretical residual variance (1.0) assuming a binomial
!      distribution about the logistic curve.   If the fit of the logistic
!      is poor, the standard errors of the coefficients in the logistic will
!      be much larger.

! The calculation of chi-squared was corrected on 9 August 2003.
! My thanks to Zhang Guan

! By Alan Miller
! amiller @ bigpond.net.au
! users.bigpond.net.au/amiller/

! Latest revision - 9 August 2003

USE lsq
IMPLICIT NONE

PUBLIC  :: logistic

CONTAINS


SUBROUTINE logistic(ngroups, x, k, s, n, chisq, devnce, ndf, beta, se_beta,  &
                    ier, cov_beta, fit, stdres)

! Input arguments:

! ngroups     The number of groups of observations.

! x(:,:)      x(i,j) contains the value of the j-th predictor for group i.
!             The X-values are the same for all the n(i) trials in group i.

! k           The number of predictors.   N.B. A constant will be fitted in
!             the model; do not include it in the k predictor variables.

! s(:), n(:)  s(i) is the number of `successes' out of the n(i) `trials' in
!             the i-th group.   In many applications, each n(i) = 1, that is
!             each case will be treated individually, and then s(i) = 0 or 1
!             for each of the two possible outcomes.

INTEGER, INTENT(IN)    :: ngroups, k, s(:), n(:)
REAL (dp), INTENT(IN)  :: x(:,:)

! Output arguments:

! chisq       The value of chi-squared on exit, when a model has been fitted.

! devnce      The deviance on exit, when a model has been fitted.

! ndf         Number of degrees of freedom.

! beta(0:)    The fitted coefficients in the logistic model.

! se_beta(0:) Approximate standard errors of the beta coefficients.

! ier         Error indicator
!             = 0 successful termination
!             = 1 if ngroups < 2 or ndf < 0
!             = 2 if any n(i) < 0
!             = 3 if any r(i) < 0
!             = 4 if any r(i) > n(i)
!             = 5 if any X-variable is constant
!             = 6 if a singularity is detected
!             = 7 if any beta(i) is tending to +/- infinity
!             = 8 failure to converge in 20 iterations

REAL (dp), INTENT(OUT)  :: chisq, devnce, beta(0:), se_beta(0:)
INTEGER, INTENT(OUT)    :: ndf, ier

! Optional output arguments:

! cov_beta(0:,0:)     Approximate covariance matrix of the fitted coefficients.

! fit(:)      The fitted probabilities of a success for each group.

! stdres(:)   Vector of standardized residuals.

REAL (dp), INTENT(OUT), OPTIONAL  :: cov_beta(0:,0:), fit(:), stdres(:)


! Local variables

INTEGER    :: i, iter, j, ncov, pos
REAL (dp)  :: propn(ngroups), p(ngroups), wt(ngroups), xrow(0:k), db(0:k), &
              bnew(0:k), dev_new, xb, pnew(ngroups), wnew(ngroups), a, b,  &
              range(k), var, e(ngroups), hii
LOGICAL    :: lindep(0:k)
REAL (dp), ALLOCATABLE  :: covmat(:)

! Initial checks

ier = 0
ndf = ngroups - k - 1
IF (ngroups < 2 .OR. ndf < 0) THEN
  ier = 1
  RETURN
END IF
IF (ANY(n(1:ngroups) < 0)) THEN
  ier = 2
  RETURN
END IF
IF (ANY(s(1:ngroups) < 0)) THEN
  ier = 3
  RETURN
END IF
IF (ANY(s(1:ngroups) > n(1:ngroups))) THEN
  ier = 4
  RETURN
END IF

! Calculate ranges of the X-variables to use in testing whether any beta
! is tending to +/- infinity.  Also test that no variable is constant.

DO i = 1, k
  a = MAXVAL(x(1:ngroups,i))
  b = MINVAL(x(1:ngroups,i))
  range(i) = a - b
  IF (range(i) < EPSILON(0.0_dp) * (ABS(a) + ABS(b))) THEN
    ier = 5
    RETURN
  END IF
END DO

! Start with all beta's = 0 and weights = 1.

beta(0:k) = 0.0_dp
wt(1:ngroups) = 1.0_dp
p(1:ngroups) = 0.5_dp

! propn stores the sample proportions, i.e. s(i) / n(i)
propn(1:ngroups) = REAL(s(1:ngroups), KIND=dp) / n(1:ngroups)
iter = 1

! Start of iterative cycle

DO
  CALL startup(k, .TRUE.)
  DO i = 1, ngroups
    IF (iter == 1) THEN
      xrow(0) = 0.25_dp
      xrow(1:k) = 0.25_dp*x(i, 1:k)
    ELSE
      xrow(0) = p(i)*(1.0_dp - p(i))
      xrow(1:k) = p(i)*(1.0_dp - p(i))*x(i, 1:k)
    END IF
    CALL includ(wt(i), xrow, propn(i)-p(i))
  END DO

! Test for a singularity

  CALL sing(lindep, ier)
  IF (ier /= 0) THEN
    DO i = 1, k
      IF (lindep(i)) THEN
        WRITE(*, '(a, i6, a)') ' Variable number ', i,  &
                               ' is linearly dependent upon earlier variables'
      END IF
    END DO
    ier = 6
    RETURN
  END IF

  CALL regcf(db, k+1, ier)
  10 bnew = beta(0:k) + db

! Calculate new p(i)'s, weights & deviance

  dev_new = 0.0_dp
  DO i = 1, ngroups
    xb = DOT_PRODUCT( x(i,1:k), bnew(1:k) ) + bnew(0)
    xb = EXP(xb)
    pnew(i) = xb / (1.0_dp + xb)
    wnew(i) = REAL(n(i), KIND=dp) / (pnew(i)*(1.0_dp - pnew(i)))
    IF (iter == 1) wnew(i) = SQRT(wnew(i))
    IF (s(i) > 0) dev_new = dev_new + s(i)*LOG(propn(i)/pnew(i))
    IF (s(i) < n(i)) dev_new = dev_new +   &
                           (n(i)-s(i))*LOG((1.0_dp-propn(i))/(1.0_dp-pnew(i)))
  END DO
  dev_new = 2 * dev_new

! If deviance has increased, reduce the step size.

  IF (iter > 2) THEN
    IF (dev_new > devnce*1.0001_dp) THEN
      db = 0.5_dp * db
      GO TO 10
    END IF
  END IF

! Replace betas, weights & p's with new values

  beta(0:k) = bnew(0:k)
  wt = wnew
  p(1:ngroups) = pnew

! Test for convergence

  IF (iter > 2 .AND. devnce - dev_new < 0.0001_dp) EXIT
  devnce = dev_new
  iter = iter + 1
  IF (iter > 20) THEN
    ier = 8
    RETURN
  END IF

! Test for a very large beta

  DO i = 1, k
    IF (ABS(beta(i))*range(i) > 30.0_dp) THEN
      WRITE(*, '(a, i4, a)') ' Coefficient for variable no.', i,  &
                             ' tending to infinity'
      ier = 7
      RETURN
    END IF
  END DO

END DO

e = n(1:ngroups)*p(1:ngroups)
chisq = SUM( (s(1:ngroups) - e)**2 / (e * (1.0_dp - p(1:ngroups))) )
devnce = dev_new

! Calculate the approximate covariance matrix for the beta's, if ndf > 0.

IF (ndf > 0) THEN
  ncov = (k+1)*(k+2)/2
  ALLOCATE( covmat(ncov) )
  CALL cov(k+1, var, covmat, ncov, se_beta, ier)
  IF (var < 1.0_dp) THEN
    covmat = covmat / var
    se_beta = se_beta / SQRT(var)
  END IF

  IF(PRESENT(cov_beta)) THEN
    pos = 1
    DO i = 0, k
      cov_beta(i,i) = covmat(pos)
      pos = pos + 1
      DO j = i+1, k
        cov_beta(i,j) = covmat(pos)
        cov_beta(j,i) = covmat(pos)
        pos = pos + 1
      END DO
    END DO
  END IF
END IF

IF(PRESENT(fit)) fit(1:ngroups) = p

IF (PRESENT(stdres)) THEN
  DO i = 1, ngroups
    xrow(0) = p(i)*(1.0_dp - p(i))
    xrow(1:k) = p(i)*(1.0_dp - p(i))*x(i, 1:k)
    CALL hdiag(xrow, k+1, hii, ier)
    stdres(i) = (s(i) - n(i)*p(i)) /   &
                SQRT(n(i)*p(i)*(1.0_dp - p(i))*(1.0_dp - hii))
  END DO
END IF

IF (ALLOCATED(covmat)) DEALLOCATE( covmat )

RETURN
END SUBROUTINE logistic



FUNCTION chi_squared(ndf, chi2) RESULT(prob)
! Calculate the chi-squared distribution function
! ndf  = number of degrees of freedom
! chi2 = chi-squared value
! prob = probability of a chi-squared value <= chi2 (i.e. the left-hand
!        tail area)

INTEGER, INTENT(IN)    :: ndf
REAL (dp), INTENT(IN)  :: chi2
REAL (dp)              :: prob

! Local variables
REAL (dp) :: half = 0.5_dp, x, p

x = half * chi2
p = half * REAL(ndf)
prob = gammad(x, p)
RETURN

END FUNCTION chi_squared



FUNCTION gammad(x, p) RESULT(gamma_prob)

!  ALGORITHM AS239  APPL. STATIST. (1988) VOL. 37, NO. 3

!  Computation of the Incomplete Gamma Integral

!  Auxiliary functions required: ALNORM = algorithm AS66 (included) & LNGAMMA

!  Converted to be compatible with ELF90 by Alan Miller
!  N.B. The return parameter IFAULT has been removed as ELF90 allows only
!  one output parameter from functions.   An error message is issued instead.

! This revision - 15 October 1996

REAL (dp), INTENT(IN) :: x, p
REAL (dp)             :: gamma_prob

!     Local variables

REAL (dp) :: pn1, pn2, pn3, pn4, pn5, pn6, tol = 1.d-14, oflo = 1.d+37,  &
             xbig = 1.d+8, arg, c, rn, a, b, one = 1._dp, zero = 0._dp, an, &
             two = 2._dp, elimit = -88._dp, plimit = 1000._dp, three = 3._dp, &
             nine = 9._dp

gamma_prob = zero

!      Check that we have valid values for X and P

IF (p <= zero .OR. x < zero) THEN
  WRITE(*, *)'Error: Function gammad.  1st argument < 0 or 2nd argument <= 0'
  RETURN
END IF
IF (x == zero) RETURN

!      Use a normal approximation if P > PLIMIT

IF (p > plimit) THEN
  pn1 = three * SQRT(p) * ((x / p) ** (one / three) + one / (nine * p) - one)
  gamma_prob = alnorm(pn1, .false.)
  RETURN
END IF

!      If X is extremely large compared to P then set gamma_prob = 1

IF (x > xbig) THEN
  gamma_prob = one
  RETURN
END IF

IF (x <= one .OR. x < p) THEN

!      Use Pearson's series expansion.
!      (Note that P is not large enough to force overflow in LNGAMMA)

  arg = p * LOG(x) - x - lngamma(p + one)
  c = one
  gamma_prob = one
  a = p
  DO
    a = a + one
    c = c * x / a
    gamma_prob = gamma_prob + c
    IF (c < tol) EXIT
  END DO
  arg = arg + LOG(gamma_prob)
  gamma_prob = zero
  IF (arg >= elimit) gamma_prob = EXP(arg)

ELSE

!      Use a continued fraction expansion

  arg = p * LOG(x) - x - lngamma(p)
  a = one - p
  b = a + x + one
  c = zero
  pn1 = one
  pn2 = x
  pn3 = x + one
  pn4 = x * b
  gamma_prob = pn3 / pn4
  DO
    a = a + one
    b = b + two
    c = c + one
    an = a * c
    pn5 = b * pn3 - an * pn1
    pn6 = b * pn4 - an * pn2
    IF (ABS(pn6) > zero) THEN
      rn = pn5 / pn6
      IF (ABS(gamma_prob - rn) <= MIN(tol, tol * rn)) EXIT
      gamma_prob = rn
    END IF

    pn1 = pn3
    pn2 = pn4
    pn3 = pn5
    pn4 = pn6
    IF (ABS(pn5) >= oflo) THEN

  !      Re-scale terms in continued fraction if terms are large

      pn1 = pn1 / oflo
      pn2 = pn2 / oflo
      pn3 = pn3 / oflo
      pn4 = pn4 / oflo
    END IF
  END DO
  arg = arg + LOG(gamma_prob)
  gamma_prob = one
  IF (arg >= elimit) gamma_prob = one - EXP(arg)
END IF

RETURN
END FUNCTION gammad



FUNCTION alnorm(x, upper) RESULT(norm_prob)

!  Algorithm AS66 Applied Statistics (1973) vol.22, no.3

!  Evaluates the tail area of the standardised normal curve
!  from x to infinity if upper is .true. or
!  from minus infinity to x if upper is .false.

REAL (dp), INTENT(IN)  :: x
LOGICAL, INTENT(IN)    :: upper
REAL (dp)              :: norm_prob


! Local variables
REAL (dp) :: zero = 0.0_dp, one = 1.0_dp, half = 0.5_dp
REAL (dp) :: con = 1.28_dp, z, y, ltone = 7.0_dp, utzero = 18.66_dp
REAL (dp) :: p = 0.398942280444_dp, q = 0.39990348504_dp,   &
             r = 0.398942280385_dp, a1 = 5.75885480458_dp,  &
             a2 = 2.62433121679_dp, a3 = 5.92885724438_dp,  &
             b1 = -29.8213557807_dp, b2 = 48.6959930692_dp, &
             c1 = -3.8052D-8, c2 = 3.98064794D-4,         &
             c3 = -0.151679116635_dp, c4 = 4.8385912808_dp, &
             c5 = 0.742380924027_dp, c6 = 3.99019417011_dp, &
             d1 = 1.00000615302_dp, d2 = 1.98615381364_dp,  &
             d3 = 5.29330324926_dp, d4 = -15.1508972451_dp, &
             d5 = 30.789933034_dp
LOGICAL   :: up

up = upper
z = x
IF(z >=  zero) GO TO 10
up = .NOT. up
z = -z
10 IF(z <= ltone .OR. up .AND. z <= utzero) GO TO 20
norm_prob = zero
GO TO 40
20 y = half*z*z
IF(z > con) GO TO 30

norm_prob = half - z*(p-q*y/(y+a1+b1/(y+a2+b2/(y+a3))))
GO TO 40
30 norm_prob = r*EXP(-y)/(z+c1+d1/(z+c2+d2/(z+c3+d3/(z+c4+d4/(z+c5+d5/(z+c6))))))
40 IF(.NOT. up) norm_prob = one - norm_prob
RETURN

END FUNCTION alnorm



FUNCTION lngamma(z) RESULT(lanczos)

!  Uses Lanczos-type approximation to ln(gamma) for z > 0.
!  Reference:
!       Lanczos, C. 'A precision approximation of the gamma
!               function', J. SIAM Numer. Anal., B, 1, 86-96, 1964.
!  Accuracy: About 14 significant digits except for small regions
!            in the vicinity of 1 and 2.

!  Programmer: Alan Miller
!              1 Creswick Street, Brighton, Vic. 3187, Australia
!  Latest revision - 14 October 1996

REAL(dp), INTENT(IN) :: z
REAL(dp)             :: lanczos

! Local variables

REAL(dp)  :: a(9) = (/ 0.9999999999995183_dp, 676.5203681218835_dp, &
                      -1259.139216722289_dp, 771.3234287757674_dp, &
                      -176.6150291498386_dp, 12.50734324009056_dp, &
                      -0.1385710331296526_dp, 0.9934937113930748D-05, &
                       0.1659470187408462D-06 /), zero = 0._dp,   &
                       one = 1._dp, lnsqrt2pi = 0.9189385332046727_dp, &
                       half = 0.5_dp, sixpt5 = 6.5_dp, seven = 7._dp, tmp
INTEGER   :: j

IF (z <= zero) THEN
  WRITE(*, *)'Error: zero or -ve argument for lngamma'
  RETURN
END IF

lanczos = zero
tmp = z + seven
DO j = 9, 2, -1
  lanczos = lanczos + a(j)/tmp
  tmp = tmp - one
END DO
lanczos = lanczos + a(1)
lanczos = LOG(lanczos) + lnsqrt2pi - (z + sixpt5) + (z - half)*LOG(z + sixpt5)
RETURN

END FUNCTION lngamma


END MODULE Logistic_Regression