!*************************************************************************************
!This FORTRAN program will compute the PATI-LOC based on:
! (a) the overall plant toxicity test distributions from Erickson 2010 
! (b) a user-specified set of experimental ecosystem (cosm) data.
!Calculations differ from Erickson 2010 by being based on CUMULATIVE PATI values
!per the 2011 addendum to this report
!
!Russell Erickson, U.S.EPA Mid-Continent Ecology Division, Duluth, MN
!erickson.russell@epa.gov, 218-529-5157
!July 25, 2011
!
!*************************************************************************************

    MODULE PATIANALYSISVARS
!Cosm Data
    INTEGER ICOSM,NCOSM                                         !Index for and Total Number of Cosm Exposures
    INTEGER COSMDURAT(100)                                      !Duration of Cosm Exposure
    CHARACTER*3 COSMID(100)                                     !Identification Code for Cosm Exposure
    INTEGER COSMEFFECT(100)                                     !Binary Effect Designation for Cosm Exposure
    INTEGER IDAT,NDAT                                           !Index for and Total Number of Concentration Measurements for Cosm Exposure
    INTEGER TIMEDAT(365)                                        !Time (day) for Concentration                                             
    REAL*8 CONCDAT(365)                                         !Measured or Estimated Concentration 
    REAL*8 LOGCONC                                              !Log10 Transform of Concentration
    INTEGER IDAY                                                !Index for Day
    REAL*8 COSMCONC(100,365)                                    !Daily Time-Series for Concentration in Cosm Exposure
!Toxicity Parameters 
    INTEGER ISAMP,NSAMP                                         !Index for and Total Number of Toxicity Relationships
    REAL*8 LOGEC50(1000),STEEP(1000)                            !LogEC50s and Steepnesses for Toxicity Relationships
    REAL*8, PARAMETER :: TAXALOGEC50MEANBEST=2.12               !Mean for LogEC50s
    REAL*8, PARAMETER :: TAXALOGEC50STDVBEST=0.37               !Standard Deviation for LogEC50s 
    REAL*8, PARAMETER :: TAXALOGSTEEPMEANBEST=-0.05             !Mean for LogSteeps
    REAL*8, PARAMETER :: TAXALOGSTEEPSTDVBEST=0.18              !Standard Deviation for LogSteeps
    INTEGER IDISCARD                                            !Index for Discarding First Group of Random Numbers
    REAL*8 XDISCARD                                             !Random Number to Discard
!PATI variables
    REAL*8 PTOX                                                 !Temporary Variable for Fraction Toxicity Effect
    REAL*8 PATI(365)                                            !Daily PATI Values
    INTEGER ASSESSPERIOD                                        !Assessment Period for Cumulative PATI Value
    REAL*8 RUNCUMPATI,MAXCUMPATI                                !Running and Maximum Running Cumulative PATI values                                
    REAL*8 COSMPATI(100)                                        !Final PATI Value for Cosm Exposure
    REAL*8 LOCPATI                                              !PATI-LOC Best Estimate from Regression                                              
    REAL*8 LOGLOCPATI                                           !Log10 PATI-LOC Best Estimate from Regression
!Variables for Logistic Regression
	INTEGER IREG,NREG                                           !Index for and Total Number of Data Points for Regression
	INTEGER REGEFF(100)                                         !Regression Dependent Variable (=COSMEFFECT)                 											
	REAL*8 REGVAR(100)                                          !Regression Independent Variable (=COSMPATI)
	INTEGER IPLOT,NPLOT                                         !Index for and Total Number of Regression Plot Points
	REAL*8 PLOTLOGPATI                                          !Independent Variable for Regression Plot Points
!Optimization Variables for IMSL Routine 
	INTEGER NPARM												!Total Number of Parameters
	REAL*8 PARMGUESS(2)										    !User-Specified Initial Guess for Parameter Values
	REAL*8 PARMSOLUT(2)										    !Final Parameter Values
	REAL*8 FUNCSOLUT											!Final Minimization Function Value
	INTEGER IPARAM(7)                                           !Variable for Calling IMSL Routine UMINF
	REAL*8 XSCALE(2)/2*1.0/,FSCALE/1.0/,RPARAM(7)               !Variables for Calling IMSL Routine UMINF
!End of Variable Module
    END MODULE PATIANALYSISVARS

!******************
!***Main Program***
!******************
    
    PROGRAM BASICPATILOCSPECIFICATION
	include 'link_f90_static.h'                                 !IMSL Library
    USE PATIANALYSISVARS                                        
    IMPLICIT NONE
	EXTERNAL IMSLFUNC                                           !Function called by IMSL for optimization step in regression analysis
    REAL*8 RNNOF                                                !IMSL function for a random normal variate
    INTEGER IS,IGO                                              !Control flags
    CHARACTER*200 COSMFILE,OUTFILE                              !Cosm data file, Output file

!***   
!***User cues for assessment period, input/output files
!***

    TYPE *,'ENTER ASSESSMENT PERIOD FOR CUMULATIVE PATI'
    ACCEPT *,ASSESSPERIOD
    TYPE *,'ENTER FILENAME FOR COSM EFFECTS DATASET'
    ACCEPT *,COSMFILE
    TYPE *,'ENTER FILENAME FOR PROGRAM OUTPUT'
    ACCEPT *,OUTFILE         
    OPEN(UNIT=11,FILE=OUTFILE,STATUS='REPLACE')

!***
!***STEP 1: ESTABLISH EFFECTS STATUS AND EXPOSURE TIME-SERIES FOR EACH EXPERIMENTAL ECOSYSTEM EXPOSURE
!***
!***The experimental ecosystem data are read to determine the number of experimental ecosystem exposures (NCOSM),
!***and, for each exposure, its duration (COSMDURAT), binary effects score (COSMEFFECT), and time-series of 
!***daily concentrations (COSMCONC).
!***
!***For PATI, calculations require a concentration for each incremental whole day of the specified duration.
!***Exposure information varies in that sometimes the measurements start with 0 and sometimes with 1.
!***If the latter is true, it is assumed here that each given concentration corresponds to an incremental
!***whole day needed for PATI (linear interpolation to fill in missing days).
!***If the former is true, it is assumed that time represents elapsed time, in which case each given value
!***applies half to both the preceding and subsequent whole incremental day needed by PATI.
!***

!Open cosm effects file and loop through records 
    OPEN(UNIT=1,FILE=COSMFILE,STATUS='OLD',READONLY)
    NCOSM=0
    DO
!Read next cosm record for cosm exposure ID, duration, and effects designation
!Exit if EOF, otherwise increment number of cosms and proceed to get exposure information  
        READ(1,"(A3,I5,I3)",IOSTAT=IS) COSMID(NCOSM+1),COSMDURAT(NCOSM+1),COSMEFFECT(NCOSM+1)
        IF(IS.NE.0) EXIT
        NCOSM=NCOSM+1
!Open exposure file (in default directory) for cosm (= "Exposure#" concatenated with cosm ID) and loop through data
        OPEN(UNIT=2,FILE='Exposure#'//TRIM(COSMID(NCOSM))//'.txt',STATUS='OLD',READONLY)
        NDAT=0
        DO
!Read next exposure record for time of concentration measurement (TIMEDAT) and concentration value (CONCDAT)
!Exit if EOF, otherwise increment number of data and proceed to next record 
            READ(2,*,IOSTAT=IS) TIMEDAT(NDAT+1),CONCDAT(NDAT+1)
            IF(IS.NE.0) EXIT
            NDAT=NDAT+1
        ENDDO
!If first concentration time point =0, linearly interpolate concentrations and, for each elapsed time, 
!apply half to previous and half to subsequent day.   
        IF(TIMEDAT(1).EQ.0) THEN
            DO IDAT=1,NDAT-1
                DO IDAY=TIMEDAT(IDAT)+1,TIMEDAT(IDAT+1)
                    COSMCONC(NCOSM,IDAY)=(CONCDAT(IDAT)*(TIMEDAT(IDAT+1)-IDAY+0.5)+CONCDAT(IDAT+1)*(IDAY-TIMEDAT(IDAT)-0.5))/(TIMEDAT(IDAT+1)-TIMEDAT(IDAT))
                ENDDO
            ENDDO
!If first concentration time point >=1, first set concentration to that of first datum for days up to first datum 
!Then linearly interpolate days up to last concentration            
        ELSE
            DO IDAY=1,TIMEDAT(1)
                COSMCONC(NCOSM,IDAY)=CONCDAT(1)
            ENDDO
            DO IDAT=1,NDAT-1
                DO IDAY=TIMEDAT(IDAT)+1,TIMEDAT(IDAT+1)
                    COSMCONC(NCOSM,IDAY)=(CONCDAT(IDAT)*(TIMEDAT(IDAT+1)-IDAY)+CONCDAT(IDAT+1)*(IDAY-TIMEDAT(IDAT)))/(TIMEDAT(IDAT+1)-TIMEDAT(IDAT))
                ENDDO
            ENDDO
        ENDIF
!For days after last concentration value and up to study duration, set concentration to that of last value
        DO IDAY=TIMEDAT(NDAT)+1,COSMDURAT(NCOSM)
            COSMCONC(NCOSM,IDAY)=CONCDAT(NDAT)
        ENDDO
    ENDDO
    TYPE *,'COSM EFFECTS AND EXPOSURE-TIME SERIES ESTABLISHED'

!***
!***STEP 2: GENERATE 1000 RANDOM LOGEC50 AND STEEP PAIRS FOR DESIGNATED TOXICITY DISTRIBUTION PARAMETERS
!***
!***The toxicity relationship for an organism is described with a logistic function for fraction reduced growth
!***versus log concentration and is characterized by two parameters - the "logEC50" (the log concentration
!***causing 50% effect) and by the slope of this relationship ("STEEP") at the logEC50.  PATI is calculated based
!***on the distributions of logEC50 and STEEP among the tests conducted by various investigators on various species.  
!***In this step of the program, the best overall estimates of these distributions from my report (logEC50 
!***mean=2.12 and stdv=0.37; logSTEEP mean -0.05 and stdv=0.18) are used, and are applied to the PATI calculations
!***based on randomly sampling 10000 sets of logEC50 and STEEP from these distributions.
!***

    NSAMP=1000
    CALL RNSET(538595)
    DO IDISCARD=1,1000
        XDISCARD=RNNOF()
    ENDDO
    DO ISAMP=1,NSAMP
        STEEP(ISAMP)=10.0**(TAXALOGSTEEPMEANBEST+RNNOF()*TAXALOGSTEEPSTDVBEST)
        LOGEC50(ISAMP)=TAXALOGEC50MEANBEST+RNNOF()*TAXALOGEC50STDVBEST
    ENDDO
    TYPE *,'LOGEC50 AND STEEP SAMPLES ARE ESTABLISHED'

!***
!***STEP 3: DETERMINE PATI VALUE FOR EACH EXPERIMENTAL ECOSYSTEM EXPOSURE
!***
!***In the third step of the program, a maximum cumulative PATI value is computed for each experimental 
!***ecosystem exposure. For each exposure, daily PATI values are computed by determining the average fraction  
!***growth reduction for each day's exposure concentration across the entire sample of 10000 toxicity parameters
!***obtained in STEP 2. The maximum cumulative value of these daily PATI values, subject to the limit of
!***the specified assessment period is then computed.  The final PATI value for the cosm exposure (COSMPATI)
!***is set to this maximum, converted to a percentage basis. 
!*** 

!Loop through cosms to determine average PATI value for each cosm 
    DO ICOSM=1,NCOSM
        RUNCUMPATI=0.0
        MAXCUMPATI=0.0
!Determine daily PATI values
        DO IDAY=1,COSMDURAT(ICOSM)
            LOGCONC=LOG10(MAX(1D-6,COSMCONC(ICOSM,IDAY)))
            PATI(IDAY)=0.0
            DO ISAMP=1,NSAMP
                PTOX=1/(1+EXP(4*STEEP(ISAMP)*(LOGCONC-LOGEC50(ISAMP))))
                PATI(IDAY)=PATI(IDAY)+(1-PTOX)/NSAMP
            ENDDO
        ENDDO
!Determine maximum running average of daily PATI values, and set COSMPATI to the percentage value of this 
        DO IDAY=1,MIN(COSMDURAT(ICOSM),ASSESSPERIOD)
            RUNCUMPATI=RUNCUMPATI+PATI(IDAY)
        ENDDO
        MAXCUMPATI=RUNCUMPATI
        IF(COSMDURAT(ICOSM).GT.ASSESSPERIOD) THEN
            DO IDAY=ASSESSPERIOD+1,COSMDURAT(ICOSM)
                RUNCUMPATI=RUNCUMPATI+(PATI(IDAY)-PATI(IDAY-ASSESSPERIOD))
                IF(RUNCUMPATI.GT.MAXCUMPATI) MAXCUMPATI=RUNCUMPATI
            ENDDO
        ENDIF
        COSMPATI(ICOSM)=100*MAXCUMPATI
    ENDDO

!***
!***STEP 4: CONDUCT BINARY LOGISTIC REGRESSION TO DETERMINE PATI-LOC
!***
!***A binary regression is conducted for the effects in the cosm exposure versus the log PATI values
!***for the cosm exposures.  The PATI-LOC is set to the PATI value corresponding to a 50% probability of effect.
!***

!Set regression data to the cosm effect data and the log of the cosm PATI values 
    NREG=NCOSM
    DO IREG=1,NCOSM
       REGVAR(IREG)=LOG10(COSMPATI(IREG))
       REGEFF(IREG)=COSMEFFECT(IREG)
    ENDDO
!Call optimization routine to estimate regression parameters 
!(Function IMSLFUNC provides regression optimization functions)
	NPARM=2
	PARMGUESS(1)=2.0
	PARMGUESS(2)=2.0
    IPARAM(1)=0
    CALL DUMINF(IMSLFUNC,2,PARMGUESS,XSCALE,FSCALE,IPARAM,RPARAM,PARMSOLUT,FUNCSOLUT)
!Log PATI-LOC is antilog of regression parameter 1  
    LOCPATI=10.0**PARMSOLUT(1)
    
!***
!***Provide Output
!***

    WRITE(11,"(' COSMID ',I4,'dCUMPATI  EFFECT')") ASSESSPERIOD
    DO ICOSM=1,NCOSM 
        WRITE(11,"(I3,2X,A3,F11.1,I6)") ICOSM,COSMID(ICOSM),COSMPATI(ICOSM),COSMEFFECT(ICOSM)
    ENDDO
    WRITE(11,"(/'REGRESSION LOGEC50 AND STEEPNESS =',2F10.4)") PARMSOLUT(1),PARMSOLUT(2)
    WRITE(11,"('REGRESSION CURVE POINTS:')")
    NPLOT=21
    DO IPLOT=1,NPLOT
        PLOTLOGPATI=(IPLOT-1.0)/(NPLOT-1.0)*4.0
        WRITE(11,"(2F10.4)") 10.0**PLOTLOGPATI,1D0/(1D0+EXP(-PARMSOLUT(2)*(PLOTLOGPATI-PARMSOLUT(1))))
    ENDDO
    WRITE(11,"(/'LOC FOR CUMPATI =',F8.1)") LOCPATI
    TYPE "('CUMPATI-LOC VALUE ESTABLISHED =',F7.1)",LOCPATI

!***
!***End of Program
!***

    TYPE *,'ENTER ANY KEY TO TERMINATE PROGRAM'
    ACCEPT *,IGO
    END PROGRAM BASICPATILOCSPECIFICATION

    
!***************************************************************************************************
!***Maximum Log Likelihood Function for Binary Regression, to Be Called By IMSL Optimization Routine
!***************************************************************************************************

	SUBROUTINE IMSLFUNC(NIMSL,XPARMCURR,FUNCCALC)
	USE PATIANALYSISVARS
	INTEGER NIMSL
	REAL*8 XPARMCURR(2),PROBEFF,FUNCCALC
!***Initialize log likelihood function and loop through regression data
	FUNCCALC=0D0
	DO IREG=1,NREG
!***Probability of effect is a logistic relationship with a 5% probability of an effect determination
!***even when there is no effect
		PROBEFF=0.05+0.95/(1D0+EXP(-XPARMCURR(2)*(REGVAR(IREG)-XPARMCURR(1))))
!***Log likelihood function is sum of log probabilities for each datum, this probability 
!***being PROBEFF if there was an effect and 1-PROBEFF if there wasn't an effect
		IF(REGEFF(IREG).EQ.1) THEN
			FUNCCALC=FUNCCALC-LOG(MAX(1D-10,MIN(1-1D-10,PROBEFF)))
		ELSE
			FUNCCALC=FUNCCALC-LOG(MAX(1D-10,MIN(1-1D-10,1-PROBEFF)))
		ENDIF
	ENDDO
!***End of likelihood function routine	
	RETURN
	END SUBROUTINE IMSLFUNC

   
    
