######################################################################### # # # Program to implement the 3-step GLS algorithm with # # cmax total iterations, theta unknown, using the R # # function nls(), estimating theta by PL # # # # Details on the nls() function may be found in Chapter 10 # # of the book "Statistical Models in S" edited by J.M. Chambers # # and T. Hastie (1993, Chapman and Hall) # # # # The program may be used for any problem by changing the # # code defining the mean function and "weights" # # # ######################################################################### ######################################################################### # # # Define the mean function f and the gradient matrix # # of its partial derivatives with respect to beta (n x p) as an # # attribute. The nls() function will know to use analytic derivatives # # when it spots the presence of the attribute "gradient" defined # # along with the function # # # ######################################################################### # 4 parameter logistic model meanfunc <- function(x,b1,b2,b3,b4){ # note that we must be careful at x=0 -- the function approaches # b2 as x goes to 0 xx <-(x>0)*x+(x==0)*0.0001 denom <- 1+exp(b4*(log(xx)-b3)) f1 <- b1 + (b2-b1)/denom fpl <- (x>0)*f1+(x==0)*b2 # compute analytical dervivatives -- create the gradient matrix X # must again take care with x=0 meangrad <- array(0,c(length(x),4),list(NULL,c("b1","b2","b3","b4"))) meangrad[,"b1"] <- (x>0)*(1-1/denom)+(x==0)*0 meangrad[,"b2"] <- (x>0)*(1/denom)+(x==0)*1 meangrad[,"b3"] <- (x>0)*(b4*(b2-b1)*exp(b4*(log(xx)-b3)))/(denom**2) +(x==0)*0 meangrad[,"b4"] <- -(x>0)*(b2-b1)* exp(b4*(log(xx)-b3))*(log(xx)-b3)/(denom**2)+(x==0)*0 attr(fpl,"gradient") <- meangrad fpl } ######################################################################### # # # To implement step (iii), we wish to do weighted least squares # # with known weights. This is accomplished by transforming the # # the response and mean function to a problem with constant variance, # # as shown in the notes on p. 1.6. nls() is then called to # # do OLS on the transformed problem, thereby doing WLS # # # # Because the weights in this case are a function of the mean, # # I also define the mean function with no gradient attribute. # # This is because the transformed mean function will depend # # on the current estimated weights, which are found by evaluating # # the mean function at the current estimate. Because of the nature # # of attributes, if calculated using the function # # "indofunc" above, the weights will carry along the gradient # # attribute, which will confuse things when we wish to calculate # # the gradient of the transformed mean function assuming the # # weights are constant! There are more elegant ways around this, # # but doing it this way here is meant to highlight the issue so # # you will be aware of it. # # # ######################################################################### unweightfunc <- function(x,b1,b2,b3,b4){ xx <-(x>0)*x+(x==0)*0.0001 denom <- 1+exp(b4*(log(xx)-b3)) f1 <- b1 + (b2-b1)/denom fpl <- (x>0)*f1+(x==0)*b2 fpl } # The transformed mean function (multiplied by the square root # of the current estimated weights, which are considered fixed) weightfunc <- function(x,b1,b2,b3,b4,wt){ pred <- unweightfunc(x,b1,b2,b3,b4) w12 <- sqrt(wt) weightf <- pred*w12 xx <-(x>0)*x+(x==0)*0.0001 denom <- 1+exp(b4*(log(xx)-b3)) # compute analytical dervivatives -- create the gradient matrix X weightgrad <- array(0,c(length(x),4),list(NULL,c("b1","b2","b3","b4"))) weightgrad[,"b1"] <- ((x>0)*(1-1/denom)+(x==0)*0)*w12 weightgrad[,"b2"] <- ((x>0)*(1/denom)+(x==0)*1)*w12 weightgrad[,"b3"] <- ((x>0)*(b4*(b2-b1)*exp(b4*(log(xx)-b3)))/(denom**2) +(x==0)*0)*w12 weightgrad[,"b4"] <- -((x>0)*(b2-b1)* exp(b4*(log(xx)-b3))*(log(xx)-b3)/(denom**2)+(x==0)*0)*w12 attr(weightf,"gradient") <- weightgrad weightf } ######################################################################### # # # To estimate theta, we use the "trick" of turning the PL estimation # # problem into a "nonlinear regression" problem. Here, the function # # trkfunc() is the mean function for the "dummy" regression problem # # that is solved to estimated theta. # # # ######################################################################### trkfunc <- function(resid,mudot,mu,theta){ trk <- resid*((mudot/mu)**theta) trkgrad <- array(0,c(length(mu),1),list(NULL,c("theta"))) trkgrad[,"theta"] <- trk*log(mudot/mu) attr(trk,"gradient") <- trkgrad # analytic derivative trk } ######################################################################### # # # Read in the data # # # ######################################################################### thedat <- scan("assay.dat") thedat <- matrix(thedat,ncol=3,byrow=T) y <- thedat[,3] x <- thedat[,1] n <- length(x) p <- 4 ######################################################################### # # # Define list of starting values for beta and theta # # # ######################################################################### bstart <- list(b1= 600, b2=6000, b3=2, b4=1) tstart <- 1 outfile <- "assay_pl.Rout" ######################################################################### # # # Create the data frame for nls() # # # ######################################################################### indat <- data.frame(x,y) ######################################################################### # # # Specify the max number of iterations of GLS (C) # # # ######################################################################### cmax <- 20 ######################################################################### # # # Step (i) -- initial fit by OLS # # # # A call to nls() is pretty self-explanatory. The first argument # # specifies the model: on the LHS of the ~ is the response variable, # # on the RHS is the mean function (may also be just an expression; # # it need not be a function call). The second argument is the name # # of the dataframe where the data reside, and the third is a list # # containing the starting values for the Gauss-Newton algorithm. # # Additional options are described in the Chambers/Hastie book. # # The algorithm employs a form of step-halving; this also described # # in the book. The call creates an object containing the # # parameter estimates and other summary information from the fit. # # # ######################################################################### ols.fit <- nls(y ~ meanfunc(x,b1,b2,b3,b4),indat,bstart) ######################################################################### # # # Extract the estimate from the object olsfit # # # ######################################################################### bols <- coef(ols.fit) mu <- unweightfunc(x,bols[1],bols[2],bols[3],bols[4]) resid <- y-mu sigols <- sqrt(sum(resid*resid)/(n-p)) ######################################################################### # # # Print out the results to a file -- we round the results to 6 # # decimal places to the right of the decimal # # # ######################################################################### cat("FIT OF THE ASSAY DATA BY GLS",file=outfile,"\n", append=F) cat("WITH POWER VARIANCE, THETA UNKNOWN AND ESTIMATED BY PL",file=outfile,"\n","\n","\n",append=T) cat("OLS estimate ",round(bols,6),file=outfile,"\n","\n",append=T) cat("Estimate of assumed constant SD of y ",round(sigols,6),file=outfile, "\n","\n","\n",append=T) ######################################################################### # # # Use the OLS estimator as the preliminary estimator # # # ######################################################################### bgls <- bols ######################################################################### # # # Begin iterating between steps (ii) and (iii) # # # ######################################################################### for (k in 1:cmax){ ######################################################################### # # # Step (ii) -- estimate theta and calculate the weights and the # # transformed response to use in the WLS calculation in (iii) # # # ######################################################################### # compute the geometric mean and predicted values mu <- unweightfunc(x,bgls[1],bgls[2],bgls[3],bgls[4]) mudot <- prod(mu)**(1/n) mudot <- rep(mudot,n) resid <- y-mu dummy <- rep(0,length(x)) pldat <- data.frame(resid,mu,mudot) # Now estimate theta using PL "trick" thetafit <- nls(dummy ~ trkfunc(resid,mudot,mu,theta),pldat, list(theta=tstart)) theta <- coef(thetafit) cat("Iteration ",k,"\n",file=outfile,append=T) cat("Estimate of theta ",round(theta,4),"\n", file=outfile,append=T,"\n") mu <- unweightfunc(x,bgls[1],bgls[2],bgls[3],bgls[4]) wt <- 1/(mu^(2*theta)) ywt <- y*sqrt(wt) ######################################################################### # # # Step (iii) -- update estimation of beta by WLS with the weights # # held fixed. First, create the updated data frame of transformed # # responses and weights for use by nls() # # # ######################################################################### indat2 <- data.frame(x,ywt,wt) gls.fit <- nls(ywt ~ weightfunc(x,b1,b2,b3,b4,wt),indat2,bstart, control=nls.control(maxiter=200)) ######################################################################### # # # Get the updated GLS estimate to use for constructing weights # # on the next iteration # # # ######################################################################### bgls <- coef(gls.fit) ######################################################################### # # # Print results of this iteration to the output file # # # ######################################################################### cat("Iteration ",k,"\n",file=outfile,append=T) cat("GLS estimate of beta ",round(bgls,6),"\n","\n",file=outfile, append=T) } ######################################################################### # # # Finished iteration loop -- now compute the estimate of sigma^2 # # based on the final GLS estimate. Use the "adjusted" version # # # ######################################################################### mu <- unweightfunc(x,bgls[1],bgls[2],bgls[3],bgls[4]) resid <- y-mu g <- mu^theta sigma2 <- sum((resid/g)**2)/(n-p) sigma <- sqrt(sigma2) ######################################################################### # # # Print out the final estimatea of sigma and theta # # # ######################################################################### cat("Final estimate of sigma ",round(sigma,6),"\n","\n", file=outfile,append=T) cat("Final estimate of theta ",round(theta,6),"\n","\n", file=outfile,append=T) sink(outfile,append=T) print(summary(gls.fit)) sink()