############################################################### # # # ST 732 - Sample code to make "sphagetti plots" in R # # # # Note: This code is not the most efficient way to exploit # # the capabilities of R to produce these plots. The code # # has been written so that even if you do not know R, you # # may be able to follow it! # # # # We demonstrate with the dental study data # # # ############################################################### # Read in the data from the file # This code simply reads the data from a file into a matrix # that has 5 columns and then names the columns thedat <- matrix(scan("dental.dat"),ncol=5,byrow=T) # the columns are put into vectors with these names with this code: ids <- thedat[,2] age <- thedat[,3] distance <- thedat[,4] gender <- thedat[,5] # alternatively, we could read the data into a "data frame" # as follows and then reference the columns as, for example # thedat$distance. So in the code below, you'd have to change, # for instance, distance to thedat$distance or else define # distance <- thedat$distance # # thedat <- read.table("dental.dat", # col.names=c("obsno", "id", "age", "distance", "gender")) # We are going to plot the mean profiles, so we need to get the # sample means of distance at each age for girls and boys separately # all girls and boys have the same ages, so we get the unique values # of these using the R function "unique()", which finds the vector # consisting of the unique values of age. This code will only work # for balanced data. uage <- unique(age) # find the vector of means at each age for girls by looping through # the unique ages -- get the mean for each age and add it on to the # vector gemans that will include all age means at the end of the loop. gmeans <- NULL for (a in uage) { thismean <- mean(distance[age==a & gender==0]) gmeans <- c(gmeans,thismean) } # and do the same for boys bmeans <- NULL for (a in uage) { thismean <- mean(distance[age==a & gender==1]) bmeans <- c(bmeans,thismean) } # tell R to create a postscript file named "dentalplots.ps" # type -- this opens the file for graphics output # postscript("dentalplots.ps",width=8) # you could then convert this to a pdf file if you like # you could also create a pdf file (which we do here) by instead using pdf("dentalplots.pdf",width=8) # type "help(postscript)" and "help(pdf)" into R to learn more # you can experiment with different widths, heights, and other # parameters; see the help() files # Now make a 2-panel plot, one for each gender -- type "help(par)" # into R to find out what all these arcane things mean! cex controls # the size of symbols and text par(mfrow=c(1,2),cex=0.75,oma=c(2,2,2,0),mar=c(4,4,3,2)) # find the min and max values of distance so that we can ask # R to scale the vertical axis of the plots accordingly ymin <- min(distance) ymax <- max(distance) # gender = 0 (females) # get the data for girls and the unique girl ids y <- distance[gender==0] t <- age[gender==0] girlids <- ids[gender==0] ugirls <- unique(girlids) # call the plot() function to set up the plot (axes, labels, etc) # but do not plot anything yet (this is accomplished with type="n" plot(t,y,type="n",xlab="age (years)", ylab="distance (mm)", ylim=c(ymin,ymax),cex=0.7) # now loop through the girls and get their hunks of data one at a time # and plot the spaghetti for (j in ugirls){ thist <- t[girlids==j] thisy <- y[girlids==j] # use the lines() function to plot a line for this girl; type="b" # asks to print both a solid line (lty=1; try lty=2,3,... for different # style lines) and a symbol (pch=18 plots diamonds) at the actual # distance observations lines(thist,thisy,type="b",lty=1,pch=18) } # now that the plot is made, superimpose the mean profile on top; # we use a thicker line, accomplished with the lwd option (see # help(par) lines(uage,gmeans,type="b",lty=1,pch=18,lwd=3) # put a title at the top of the plot title("Girls",cex=0.7) # gender = 1 (males) -- do the same thing y <- distance[gender==1] t <- age[gender==1] boyids <- ids[gender==1] uboys <- unique(boyids) plot(t,y,type="n",xlab="age (years)", ylab="distance (mm)", ylim=c(ymin,ymax),cex=0.7) for (j in uboys){ thist <- t[boyids==j] thisy <- y[boyids==j] lines(thist,thisy,type="b",lty=1,pch=18) } lines(uage,bmeans,type="b",lty=1,pch=18,lwd=3) title("Boys",cex=0.7) # shut off the graphics output device so that the graphics file # is created graphics.off() # We can also create a plot with both boys and girls on the same # graph, as in Figure 1 of Chapter 1, using the gender symbol as # the plotting symbol. You can use either postscript or pdf; we # use pdf here # postscript("girlsandboys.ps",height=6) pdf("girlsandboys.pdf",height=5) par(mfrow=c(1,1),cex=0.75,oma=c(2,2,2,0),mar=c(4,4,3,2)) # set up the axes plot(age,distance,type="n",xlab="age (years)", ylab="distance (mm)", ylim=c(ymin,ymax),cex=0.7) # loop through each child for (j in unique(ids)){ thist <- age[ids==j] thisy <- distance[ids==j] thisgender <- unique(gender[ids==j]) lines(thist,thisy,type="b",lty=1,pch=as.character(thisgender)) } title("Dental Study Data",cex=0.7) graphics.off()