#Splus handout: Model selection in Splus Surgical Unit Data set from Chapter 8 of NKNW New Splus functions: leaps: All-Subset Regressions by Leaps and Bounds Leaps does not compute statistics for intercept only model so plots will look slightly different than the plots in the book. stepwise: Stepwise Subset Selection for Multiple Regression Other approaches are available in Splus. For example, see function "step" ________________________________________________________________________ #Introduction #Note that I added column labels to the file which will be used in the #plots below >surg<-read.table("CH08TA01.DAT",header=T) Clot Prog Enz Liver Surv log10Surv 1 6.7 62 81 2.59 200 2.3010 2 5.1 59 66 1.70 101 2.0043 3 7.4 57 83 2.16 204 2.3096 4 6.5 73 41 2.01 101 2.0043 5 7.8 65 115 4.30 509 2.7067 6 5.8 38 72 1.42 80 1.9031 7 5.7 46 63 1.91 80 1.9031 8 3.7 68 81 2.57 127 2.1038 9 6.0 67 93 2.50 202 2.3054 10 3.7 76 94 2.40 203 2.3075 ETC. #**Would typically do EDA here! >surg.lm<-lm(log10Surv~Clot+Prog+Enz+Liver,surg) >summary(surg.lm) Coefficients: Value Std. Error t value Pr(>|t|) (Intercept) 0.4888 0.0502 9.7297 0.0000 Clot 0.0685 0.0054 12.5961 0.0000 Prog 0.0093 0.0004 21.1893 0.0000 Enz 0.0095 0.0004 23.9105 0.0000 Liver 0.0019 0.0097 0.1983 0.8436 Residual standard error: 0.04733 on 49 degrees of freedom Multiple R-Squared: 0.9724 F-statistic: 431.1 on 4 and 49 degrees of freedom, the p-value is 0 ________________________________________________________________________ #LEAPS postscript("splus10.ps",print.it=F) surg.leaps<-leaps(surg[,1:4],surg[,6],method="r2") > surg.leaps $r2: [1] 52.73749 44.23867 35.15171 11.99959 81.29741 68.65344 64.95764 64.57938 [9] 52.78304 43.80532 97.23470 88.29007 71.91890 64.99865 97.23692 $size: [1] 2 2 2 2 3 3 3 3 3 3 4 4 4 4 5 $label: [1] "Liver" "Enz" "Prog" [4] "Clot" "Prog,Enz" "Enz,Liver" [7] "Prog,Liver" "Clot,Enz" "Clot,Liver" [10] "Clot,Prog" "Clot,Prog,Enz" "Prog,Enz,Liver" [13] "Clot,Enz,Liver" "Clot,Prog,Liver" "Clot,Prog,Enz,Liver" $which: [,1] [,2] [,3] [,4] [1,] F F F T [2,] F F T F [3,] F T F F [4,] T F F F [5,] F T T F [6,] F F T T [7,] F T F T [8,] T F T F [9,] T F F T [10,] T T F F [11,] T T T F [12,] F T T T [13,] T F T T [14,] T T F T [15,] T T T T $int: [1] T plot(surg.leaps$size,surg.leaps$r2/100,type="n",ylim=c(0,1), xlab="# of parameters",ylab="R^2") text(surg.leaps$size,surg.leaps$r2/100,surg.leaps$label) title("R^2 Plot for Surgery Data") surg.leaps<-leaps(surg[,1:4],surg[,6],method="adjr2") plot(surg.leaps$size,surg.leaps$adjr2/100,type="n",ylim=c(0,1), xlab="# of parameters",ylab="Adjusted R^2") text(surg.leaps$size,surg.leaps$adjr2/100,surg.leaps$label) title("Adjusted R^2 Plot for Surgery Data") #The plot below isn't very useful in this form for these data, but I'm #including it here for completeness surg.leaps<-leaps(surg[,1:4],surg[,6],method="Cp") plot(surg.leaps$size,surg.leaps$Cp,type="n", xlab="# of parameters",ylab="Cp") text(surg.leaps$size,surg.leaps$Cp,surg.leaps$label) abline(0,1) title("Cp Plot for Surgery Data\n See NKNW for better format of this plot") dev.off() >temp<-cbind(surg.leaps$label,surg.leaps$size,round(surg.leaps$Cp,2)) >as.data.frame(temp[order(surg.leaps$Cp),]) 1 Clot,Prog,Enz 4 3.04 2 Clot,Prog,Enz,Liver 5 5 3 Prog,Enz,Liver 4 161.66 4 Prog,Enz 3 283.67 5 Clot,Enz,Liver 4 451.99 6 Enz,Liver 3 507.9 7 Prog,Liver 3 573.44 8 Clot,Prog,Liver 4 574.71 9 Clot,Enz 3 580.14 10 Liver 2 788.15 11 Clot,Liver 3 789.34 12 Enz 2 938.86 13 Clot,Prog 3 948.55 14 Prog 2 1100.01 15 Clot 2 1510.59 #potential final model >surg.lm<-lm(log10Surv~Clot+Prog+Enz,surg) >summary(surg.lm) Coefficients: Value Std. Error t value Pr(>|t|) (Intercept) 0.4836 0.0426 11.3450 0.0000 Clot 0.0692 0.0041 16.9755 0.0000 Prog 0.0093 0.0004 24.2992 0.0000 Enz 0.0095 0.0003 31.0817 0.0000 Residual standard error: 0.04687 on 50 degrees of freedom Multiple R-Squared: 0.9723 F-statistic: 586 on 3 and 50 degrees of freedom, the p-value is 0 #**Now would do diagnostics here ______________________________________________________________________ #Stepwise (This output would be more interesting if you had more #predictors) >stepwise(surg[,1:4],surg[,6],method="efroymson") $rss: [1] 1.8776320 1.2453274 0.4652086 0.1097705 0.1098586 $size: [1] 1 2 3 4 3 $which: Clot Prog Enz Liver 1(+4) F F F T 2(+3) F F T T 3(+2) F T T T 4(+1) T T T T 3(-4) T T T F $f.stat: [1] 58.02377171 25.89482276 83.84612679 158.66248928 0.03932316 $method: [1] "efroymson"