/*** The SAS data step: The data step is a loop that is executed repeatedly until the data are exhausted. Initially the data are read into a vector called the PDV = Program Data Vector. By default this happens to the PDV (1) At the end of the data step (each time through) the contents of the PDV are written as the next line of the dataset. (2) All non-retained variables are set to missing. The PUT statement writes messages and evaluations to the log file by default. PUT _ALL_ ; writes current PDV. */ ***************************** **program 1 default input **; *****************************; Data a; put "Start:" _ALL_; input X Y; put "End:" _ALL_; cards; 1 2 3 4 5 6 ; run; /* The RETAIN statement is used to hold values from one pass through the data step to the next. You can also initialize values in this way. The RETAIN statement is executed at complile time. */ *************** ** program 2 ** ***************; Data a; put "Start:" _ALL_; input X Y; Z=Z+1; C=C+1; D+1; put "End:" _ALL_ /; retain C 10; cards; 1 2 3 4 5 6 ; proc print; run; /* The DROP statement simply "marks" a variable so that it does not get placed in the data set. */ *************** ** program 3 ** ***************; Data a; input x y; drop x; xsq=x*x; x=10; cards; 1 2 3 4 5 6 ; proc print; run; /* The KEEP statement really means "drop everything but" */ *************** ** program 4 ** ***************; Data a; input x y; keep x; ysq=y*y; xsq=x*x; cards; 1 2 3 4 5 6 ; proc print; run; /* DROP and RETAIN are not contradictory ! Loop indices are put into dataset by default */ ; **************** ** Program 5 ** ****************; Data a; Do i = 1 to 5; Drop X; X = X + 2; Y=5*X; output; end; Retain X 10; proc print; run; /* PDV contains automatically generated variables _n_ and _error_ These variables are DROPped */ *************** ** Program 6 ** ***************; Data a; input X Y; If (_n_=2) then Z=10 ; cards; 1 2 3 4 5 6 ; proc print; run; /* Add a RETAIN statement */ *************** ** program 7 ** ***************; Data a; retain Z; input X Y; If (_n_=2) then Z=10 ; cards; 1 2 3 4 5 6 ; proc print; run; /* Can jump in programming statements */ **************** ** program 8 *** ****************; Data a; do i = 1 to 100; exp = 2**i; if exp>50 then go to one; output ; end; one: result = "jumped"; output; proc print; run; /* These instructions are for a PC environment. May (?) work with UNIX too. Programming KEYS Tools -> Options -> keys F12 out; clear; log; clear; wpgm; sub; Programming enhanced editor keys Tools -> Options -> Enhanced Editor Keys (1) Select "Assign Keys " (2) Select "Collapse all Folding Blocks" (3) Cursor in "Press key" box lower right (4) Press F11, say (5) Click "Assign" I also assign F10 to Expand all Folding Blocks */ /* SAS has ARRAYS. These are really just arrays of variable names */ **************** ** program 9 ** ***************; Data A; array Y(4); Input Y1 @10 Y2 Y3 4.2 Y4 4.2; * :4.2 ; *<---Needs a colon!!; Do i = 1 to 4; Y(i) = Y(i) + 100; end; *23456789 123456789 123456789 123456789; cards; 1 2 3 4 5 6.12 7.12 8.12 9.12 10 11 12 13 proc print; run; /* SAS has lots of functions, e.g. random number generators, statistical functions */ ***************** ** program 10 ** ****************; Data A; Fcrit = Finv(0.95,1,20); ** 95the percentile of F(1,20) **; Power = 1-ProbF(Fcrit,1,20,5); ** Power at noncentrality 5 **; proc print; run; /* Run a small simulation to check these power computations */; ***************** ** program 11 ** *****************; Data check; Fcrit = Finv(0.95,1,20); array Y(20); do i = 1 to 20000; Do j=1 to 10; Y(j) = 10 + 2*normal(12345); Y(j+10) = 12 + 2*normal(12345); end; /* noncentrality is 2(10)/4 */ Sum1 = sum(of Y1-Y10); Sum2 = sum(of Y11-Y20); MStrt = (Sum1-Sum2)**2/20; MSE = ( CSS(of Y1-Y10) + CSS(of Y11-Y20) )/18; F = MStrt/MSE; reject = (F>Fcrit); output; end; Keep F reject ; proc print data=check(obs=2); proc means; var reject; run; /* Some simulation hints. (1) LOG window may fill up with messages and halt processing. To avoid, use OPTIONS NONOTES; (2) PROC APPEND can concatenate data with existing data. */ ***************** ** program 12 ** *****************; Data A; input Y X; XY = X*Y; cards; 1 2 3 4 5 6 ; PROC APPEND DATA=A BASE=B; PROC PRINT DATA=B; title "B data first round"; run; Data next; input X Y XY; cards; 5 6 7 8 9 10 ; proc append data=next base=b; Title "Round 2, (next added)"; proc print; run; options nonotes; data last; input X Y XY; cards; 101 102 103 proc append data=last base=b; proc print; run; *** Compare LOG window to previous runs ***; /* Can save permanent SAS data sets. Use two level name and LIBNAME statement. SASUSER library is always there */ **************** ** program 13 ** ****************; Libname dad "C:\TEMP"; ** Library on drive C of a PC **; Data dad.junk; Input X Y; cards; 1 2 3 4 5 6 ; Data sasuser.junk; Input X Y; cards; 1 2 3 4 5 6 ; proc contents data=sasuser._all_ nods; proc contents data=sasuser.junk; proc contents data=dad._all_; run; proc datasets library=dad; delete junk; proc contents data=dad._all_; run; /* Can join datasets horizontally */ **************** ** program 13 ** ****************; Data A; Input X Y Z @@; cards; 1 10 0.1 2 11 0.2 4 12 0.3 5 13 . Data B; Input X Z W @@; cards; 1 0.1 90 2 . 91 3 0.9 92 4 0.6 93 5 0.7 94 proc print data=A; title "A"; proc print; title "B"; Data AB; Merge A B; proc print; title "AB"; run; Data BA; merge B A; proc print; title "BA"; run; Data ABbyX; merge A B; by X; proc print; title "AB by X"; run; /* Can concatenate vertically */ *************** ** program 14 ** ****************; Data A_B; set A B; proc print; title "set A B"; run; Data A_B; set A; set B; proc print; title "set A set B"; run; Data A_B; set A; output; set B; output; proc print; title "set-output-set-output"; run; /* Can reshape datasets using PROC TRANSPOSE */ **************** ** program 15 ** ****************; Data A; Array Y(20); do rep = 1 to 2; do i=1 to 20; Y(i) = ranuni(123); end; output; end; proc print data=a; proc transpose data=a out=b; var Y1-Y20; proc print data=B; proc transpose data=A out=c prefix=response; var Y1-Y20; by rep; proc print data=C; run;