> # housed.r Demonstration of Householder for QR > # step by step > # response vector > y <-c( 4.5, 5.5, 6.5, 8, 10, 12) > # design matrix -- intercept, linear, quadratic > X <- matrix(1,6,3) # get shape and intercept, fix later > X[,2] <- 1:6 > X[,3] <- (1:6)*(1:6) > # put X and y together for easy printing > Xy <- cbind(X,y) # put them together > Xy # write it out y [1,] 1 1 1 4.5 [2,] 1 2 4 5.5 [3,] 1 3 9 6.5 [4,] 1 4 16 8.0 [5,] 1 5 25 10.0 [6,] 1 6 36 12.0 > I6 <- diag(c(1,1,1,1,1,1)) # will need this > # > # first iteration > # > u1 <- Xy[,1] # construct first Householder tfm > s2 <- crossprod(u1,u1) > s <- sqrt(s2)*sign(u1[1]) # choose sign to avoid cancellation > d <- 1/(s2 + s*u1[1]) > u1[1] <- u1[1] + s > U1 <- I6 - outer(d*u1,u1) > Xy <- U1 %*% Xy # first step > Xy # write it out y [1,] -2.449490e+00 -8.5732141 -37.150594 -18.9835455 [2,] 1.387779e-16 -0.7752551 -7.059779 -1.3078317 [3,] 8.326673e-17 0.2247449 -2.059779 -0.3078317 [4,] 8.326673e-17 1.2247449 4.940221 1.1921683 [5,] 9.714451e-17 2.2247449 13.940221 3.1921683 [6,] 1.110223e-16 3.2247449 24.940221 5.1921683 > # coefficients and error ss > Xy[1,4]/Xy[1,1] # should be ybar [1] 7.75 > crossprod(Xy[2:6,4],Xy[2:6,4]) # should be css [,1] [1,] 40.375 > # modify for later use > u1 <- u1/s > # > # second iteration > # > u2 <- Xy[2:6,2] # construct second Householder tfm > s2 <- crossprod(u2,u2) > s <- sqrt(s2)*sign(u2[1]) # choose sign to avoid cancellation > d <- 1/(s2 + s*u2[1]) > u2[1] <- u2[1] + s > U2 <- I6 # start with this and modify > U2[2:6,2:6] <- I6[2:6,2:6] - outer(d*u2,u2) > Xy <- U2 %*% Xy # first step > Xy # write it out y [1,] -2.449490e+00 -8.573214e+00 -37.150594 -18.9835455 [2,] 1.403787e-16 4.183300e+00 29.283101 6.2749502 [3,] 8.319417e-17 1.524659e-17 -3.707008 -0.6515187 [4,] 8.287132e-17 7.302102e-17 -4.036336 -0.6807508 [5,] 9.642626e-17 -9.887924e-17 -2.365665 -0.2099829 [6,] 1.099812e-16 -2.276825e-16 1.305007 0.2607850 > # coefficients and error ss > backsolve(Xy[1:2,1:2],Xy[1:2,4]) # should be two coefs [1] 2.5 1.5 > crossprod(Xy[3:6,4],Xy[3:6,4]) # should be css [,1] [1,] 1 > # modify for later use > u2 <- u2/s > # > # third iteration > # > u3 <- Xy[3:6,3] # construct third Householder tfm > s2 <- crossprod(u3,u3) > s <- sqrt(s2)*sign(u3[1]) # choose sign to avoid cancellation > d <- 1/(s2 + s*u3[1]) > u3[1] <- u3[1] + s > U3 <- I6 # start with this and modify > U3[3:6,3:6] <- I6[3:6,3:6] - outer(d*u3,u3) > Xy <- U3 %*% Xy # first step > Xy # write it out y [1,] -2.449490e+00 -8.573214e+00 -3.715059e+01 -18.98354551 [2,] 1.403787e-16 4.183300e+00 2.928310e+01 6.27495020 [3,] -1.190625e-16 -6.783335e-17 6.110101e+00 0.98198051 [4,] -2.871763e-19 3.886243e-17 8.092892e-16 -0.00913228 [5,] 4.768772e-17 -1.188993e-16 1.153456e-16 0.18364739 [6,] 1.368676e-16 -2.166385e-16 -1.814954e-16 0.04364085 > # coefficients and error ss > backsolve(Xy[1:3,1:3],Xy[1:3,4]) # should be three coefs [1] 4.0000000 0.3750000 0.1607143 > crossprod(Xy[4:6,4],Xy[4:6,4]) # should be css [,1] [1,] 0.03571429 > # modify for later use > u3 <- u3/s > # > # > #*** to understand qr output, first multiply U3*U2*U1 = Q' > # > U3 %*% U2 %*% U1 [,1] [,2] [,3] [,4] [,5] [,6] [1,] -0.40824829 -0.4082483 -0.40824829 -0.4082483 -0.4082483 -0.408248290 [2,] -0.59761430 -0.3585686 -0.11952286 0.1195229 0.3585686 0.597614305 [3,] 0.54554473 -0.1091089 -0.43643578 -0.4364358 -0.1091089 0.545544726 [4,] 0.02715063 0.1686667 -0.66001772 0.6965468 -0.2234604 -0.008886065 [5,] -0.09551508 0.4267771 -0.43546097 -0.2991286 0.6751073 -0.271779698 [6,] -0.41074462 0.6944567 0.05763483 -0.2321982 -0.4326116 0.323462922 > # and print these vectors > u1 [1] 1.4082483 0.4082483 0.4082483 0.4082483 0.4082483 0.4082483 > u2 [1] 1.1853214 -0.0537243 -0.2927700 -0.5318157 -0.7708615 > u3 [1] 1.6067016 0.6606006 0.3871728 -0.2135819 > # > #**** now do all of this using qr function > qrstuff <- qr(X) > qrstuff $qr [,1] [,2] [,3] [1,] -2.4494897 -8.5732141 -37.1505944 [2,] 0.4082483 4.1833001 29.2831009 [3,] 0.4082483 -0.0537243 6.1101009 [4,] 0.4082483 -0.2927700 0.6606006 [5,] 0.4082483 -0.5318157 0.3871728 [6,] 0.4082483 -0.7708615 -0.2135819 $rank [1] 3 $qraux [1] 1.408248 1.185321 1.606702 $pivot [1] 1 2 3 attr(,"class") [1] "qr" > # get Q -- not usually needed -- compare to Q' above > # > qr.Q( qrstuff, complete=TRUE ) [,1] [,2] [,3] [,4] [,5] [,6] [1,] -0.4082483 -0.5976143 0.5455447 0.027150626 -0.09551508 -0.41074462 [2,] -0.4082483 -0.3585686 -0.1091089 0.168666677 0.42677705 0.69445665 [3,] -0.4082483 -0.1195229 -0.4364358 -0.660017723 -0.43546097 0.05763483 [4,] -0.4082483 0.1195229 -0.4364358 0.696546844 -0.29912858 -0.23219822 [5,] -0.4082483 0.3585686 -0.1091089 -0.223460360 0.67510728 -0.43261156 [6,] -0.4082483 0.5976143 0.5455447 -0.008886065 -0.27177970 0.32346292 > # > # use qr.qty to multiply Q'y > Qty <- qr.qty(qrstuff,y) > Qty # write it out [1] -18.98354551 6.27495020 0.98198051 -0.00913228 0.18364739 [6] 0.04364085 > # > # use qr.qy to multiply by Q and get fitted values > yhat <- qr.qy(qrstuff,c(Qty[1:3],rep(0,3)) ) > yhat # write out fitted values [1] 4.535714 5.392857 6.571429 8.071429 9.892857 12.035714 > # > # reverse zeros to get residuals > ehat <- qr.qy(qrstuff,c(rep(0,3),Qty[4:6]) ) > ehat # write out residuals [1] -0.03571429 0.10714286 -0.07142857 -0.07142857 0.10714286 -0.03571429 > # done > rm(list=ls()) # clean up > q() /* regdemo.sas simple regression problem */ options ls=80 ; data a ; input y @@ ; * read across ; x = _n_ ; * so x goes 1 to 6 ; xx = x * x ; * quadratic ; cards ; 4.5 5.5 6.5 8 10 12 ; run ; proc reg data=a ; one: model y = x ; * simple linear regression ; two: model y = x xx ; * quadratic regression ; output out=b p=yhat r=ehat ; run ; proc print data=b ; title 'regression results' ; run ; The SAS System 1 11:02 Tuesday, September 18, 2007 The REG Procedure Model: one Dependent Variable: y Number of Observations Read 6 Number of Observations Used 6 Analysis of Variance Sum of Mean Source DF Squares Square F Value Pr > F Model 1 39.37500 39.37500 157.50 0.0002 Error 4 1.00000 0.25000 Corrected Total 5 40.37500 Root MSE 0.50000 R-Square 0.9752 Dependent Mean 7.75000 Adj R-Sq 0.9690 Coeff Var 6.45161 Parameter Estimates Parameter Standard Variable DF Estimate Error t Value Pr > |t| Intercept 1 2.50000 0.46547 5.37 0.0058 x 1 1.50000 0.11952 12.55 0.0002 The REG Procedure Model: two Dependent Variable: y Number of Observations Read 6 Number of Observations Used 6 Analysis of Variance Sum of Mean Source DF Squares Square F Value Pr > F Model 2 40.33929 20.16964 1694.25 <.0001 Error 3 0.03571 0.01190 Corrected Total 5 40.37500 Root MSE 0.10911 R-Square 0.9991 Dependent Mean 7.75000 Adj R-Sq 0.9985 Coeff Var 1.40786 Parameter Estimates Parameter Standard Variable DF Estimate Error t Value Pr > |t| Intercept 1 4.00000 0.19518 20.49 0.0003 x 1 0.37500 0.12769 2.94 0.0607 xx 1 0.16071 0.01786 9.00 0.0029 regression results 3 11:02 Tuesday, September 18, 2007 Obs y x xx yhat ehat 1 4.5 1 1 4.5357 -0.03571 2 5.5 2 4 5.3929 0.10714 3 6.5 3 9 6.5714 -0.07143 4 8.0 4 16 8.0714 -0.07143 5 10.0 5 25 9.8929 0.10714 6 12.0 6 36 12.0357 -0.03571