######################################################################### # # # Program to implement the 3-step GLS algorithm with # # cmax total iterations, theta known, using the R function nls # # # # Use it here to fit Poisson regression # # # # The program may be used for any problem by changing the # # code defining the mean function and "weights" # # # ######################################################################### ######################################################################### # # # Define the mean function f and the gradient matrix # # of its partial derivatives with respect to beta (n x p) as an # # attribute. The nls() function will know to use analytic derivatives # # when it spots the presence of the attribute "gradient" defined # # along with the function # # # ######################################################################### meanfunc <- function(x,b1,b2,b3,b4,b5,b6){ temp <- (b1+b2*x[,1]+b3*x[,2]+b4*x[,3]+b5*x[,4]+b6*x[,5]) fpl <- exp(temp) # compute analytical dervivatives -- create the gradient matrix X meangrad <- array(0,c(nrow(x),6),list(NULL,c("b1","b2","b3","b4","b5", "b6"))) meangrad[,"b1"] <- fpl meangrad[,"b2"] <- fpl*x[,1] meangrad[,"b3"] <- fpl*x[,2] meangrad[,"b4"] <- fpl*x[,3] meangrad[,"b5"] <- fpl*x[,4] meangrad[,"b6"] <- fpl*x[,5] attr(fpl,"gradient") <- meangrad fpl } ######################################################################### # # # To implement step (iii), we wish to do weighted least squares # # with known weights. This is accomplished by transforming the # # the response and mean function to a problem with constant variance. # # nls() is then called to do OLS on the transformed problem, # # thereby doing WLS. # # # # Because the weights in this case are a function of the mean, # # we also define the mean function with no gradient attribute. # # This is because the transformed mean function will depend # # on the current estimated weights, which are found by evaluating # # the mean function at the current estimate. Because of the nature # # of attributes, if calculated using the function # # "meanfunc" above, the weights will carry along the gradient # # attribute, which will confuse things when we wish to calculate # # the gradient of the transformed mean function assuming the # # weights are constant! There are more elegant ways around this, # # but doing it this way here is meant to highlight the issue so # # you will be aware of it. # # # ######################################################################### unweightfunc <- function(x,b1,b2,b3,b4,b5,b6){ temp <- (b1+b2*x[,1]+b3*x[,2]+b4*x[,3]+b5*x[,4]+b6*x[,5]) fpl <- exp(temp) fpl } # The transformed mean function (multiplied by the square root # of the current estimated weights, which are considered fixed) weightfunc <- function(x,b1,b2,b3,b4,b5,b6,wt){ temp <- (b1+b2*x[,1]+b3*x[,2]+b4*x[,3]+b5*x[,4]+b6*x[,5]) pred1 <- exp(temp) w12 <- sqrt(wt) fpl <- pred1*w12 # compute analytical dervivatives -- create the gradient matrix X weightgrad <- array(0,c(nrow(x),6),list(NULL,c("b1","b2","b3","b4","b5", "b6"))) weightgrad[,"b1"] <- pred1*w12 weightgrad[,"b2"] <- pred1*x[,1]*w12 weightgrad[,"b3"] <- pred1*x[,2]*w12 weightgrad[,"b4"] <- pred1*x[,3]*w12 weightgrad[,"b5"] <- pred1*x[,4]*w12 weightgrad[,"b6"] <- pred1*x[,5]*w12 attr(fpl,"gradient") <- weightgrad fpl } ######################################################################### # # # The data # # # ######################################################################### # data thedata <- scan("skincancer.dat") thedata <- matrix(thedata,ncol=6,byrow=T) # need to create the 2 dummy variables for sunburn y <- thedata[,6] covs <- thedata[,2:5] x1 <- covs[,1]==2 x2 <- covs[,1]==3 n <- length(y) # form the appropriate vector x as in problem x <- cbind(x1,x2,covs[,2:4]) # starting value for beta ybar <- mean(y) xstar <- cbind(rep(1,n),x) ystar <- (y+ybar)/2 wstar <- diag(ystar) eta <- log(ystar) zstar <- eta + (y-ystar)/ystar bs <- solve(t(xstar)%*%wstar%*%xstar)%*%t(xstar)%*%wstar%*%zstar bstart = list(b1=bs[1], b2=bs[2], b3=bs[3], b4=bs[4], b5=bs[5], b6=bs[6]) outfile <- "skincancer.gls" ######################################################################### # # # Create the data frame for nls() # # # ######################################################################### thedat <- data.frame(x,y) ######################################################################### # # # Specify the max number of iterations of GLS (C) # # # # Alternatively, we could check for convergence after step (iii) # # # ######################################################################### cmax <- 50 ######################################################################### # # # Step (i) -- initial fit by OLS # # # ######################################################################### # Note that we have used the nls.control option to change the # convergence criterion to something more stringent than the # default of 1e-5 olsfit <- nls(y ~ meanfunc(x,b1,b2,b3,b4,b5,b6),thedat,bstart, nls.control(tol=1e-8)) ######################################################################### # # # Extract the estimate from the object olsfit # # # ######################################################################### bols <- coef(olsfit) ######################################################################### # # # Print out the results to a file -- we round the results to 6 # # decimal places to the right of the decimal # # # ######################################################################### cat("FIT OF THE SKIN CANCER DATA BY GLS",file=outfile,"\n","\n","\n", append=F) cat("OLS estimate ",round(bols,6),file=outfile,"\n","\n","\n",append=T) ######################################################################### # # # Use the OLS estimator as the preliminary estimator # # # ######################################################################### bgls <- bols ######################################################################### # # # Begin iterating between steps (ii) and (iii) # # # ######################################################################### for (k in 1:cmax){ # alternatively can use the most recent iterate as the starting value # bstart <- list(b1=bgls[1], b2=bgls[2], b3=bgls[3], b4=bgls[4], # b5=bgls[5], b6=bgls[6]) ######################################################################### # # # Step (ii) -- calculate the weights and the transformed response # # to use in the WLS calculation in (iii) # # # ######################################################################### mu <- unweightfunc(x,bgls[1],bgls[2],bgls[3],bgls[4],bgls[5],bgls[6]) wt <- 1/mu ywt <- y*sqrt(wt) ######################################################################### # # # Step (iii) -- update estimation of beta by WLS with the weights # # held fixed. First, create the updated data frame of transformed # # responses and weights for use by nls() # # # ######################################################################### thedat2 <- data.frame(x,ywt,wt) glsfit <- nls(ywt ~ weightfunc(x,b1,b2,b3,b4,b5,b6,wt),thedat2, bstart,nls.control(tol=1e-8)) ######################################################################### # # # Get the updated GLS estimate to use for constructing weights # # on the next iteration # # # ######################################################################### bgls <- coef(glsfit) ######################################################################### # # # Print results of this iteration to the output file # # # ######################################################################### cat("Iteration ",k,"\n",file=outfile,append=T) cat("GLS estimate of beta ",round(bgls,6),"\n","\n",file=outfile, append=T) } ######################################################################### # # # Finished iteration loop -- now compute the estimate of sigma^2 # # based on the final GLS estimate. Use the "adjusted" version # # # ######################################################################### mu <- unweightfunc(x,bgls[1],bgls[2],bgls[3],bgls[4],bgls[5],bgls[6]) resid <- y-mu ######################################################################### # # # Print out summary provided by the nls() function for the final # # iteration # # # ######################################################################### sink(outfile,append=T) print(summary(glsfit)) sink()