#Install the packages bootstrap and boot.
#Load the bootstrap package.
library(bootstrap)
#This package is only loaded so that the
#law school data sets can be accessed.
#The entire population of 82 law schools.
law82
#The random sample of 15 law schools.
law
#Some example bootstrap samples.
set.seed(511)
law[sample(1:15,replace=T),]
law[sample(1:15,replace=T),]
law[sample(1:15,replace=T),]
#Load the boot package.
library(boot)
#Write a function that will compute the
#statistic for any bootstrap sample denoted
#by i=indices of resampled observations.
theta.hat=function(d,i)
{
cor(d[i,1],d[i,2])
}
#Perform bootstrapping using the boot function.
set.seed(9373564)
o=boot(data=law,statistic=theta.hat,R=5000)
o
#Examine the sample statistic theta.hat.
#This is the sample correlation coefficient.
o$t0
#Examine the distribution of any bootstrap
#replications of theta.hat. These are the
#sample correlation coefficients computed
#for each of the 5000 bootstrap samples.
hist(o$t,xlab="theta.hat.star",
main="Histogram of the 5000 Bootstrap Replications",
nclass=50,col=4)
box()
#We can examine characteristics of the
#bootstrap distribution for theta.hat.
quantile(o$t,c(.95,.75,.5,.25,.05))
min(o$t)
max(o$t)
mean(o$t)
sd(o$t)
#How does this compare to the true distribution
#theta.hat? If this were a real problem, we
#wouldn't know for sure. However, in this case.
#we know the true population and have previously
#used simulation to get a good approximation of
#the true distribution of theta.hat. Comparing
#back to the simulation results, we see that
#the bootstrap distribution seems shifted to the
#right relative to the true distribution.
#This is perhaps not surprising in this case,
#given that our sample estimate theta.hat is an
#overestimate of the population parameter theta.
o$t0
cor(law82[,2],law82[,3])
#However, note that the standard error estimate
#is pretty good (0.1348 compared to 0.1302).
#The bias of theta.hat is estimated to be
mean(o$t)-o$t0
#The bias corrected estimate is
o$t0-(mean(o$t)-o$t0)
#In this case, our biased corrected estimate
#is further from the true theta than the
#uncorrected estimate.
o$t0-(mean(o$t)-o$t0)
cor(law82[,2],law82[,3])
o$t0
#However, note that our previous simulation
#did demonstrate that the bias of theta.hat
#is negative.
#The mean of 100,000 replications of
#theta.hat was 0.7464. Thus, the true bias
#is approxiated by
0.7464-cor(law82[,2],law82[,3])
#Thus, the bootstrap estimate of bias was in
#the right direction even if correcting for it
#was harmful in this case.
#Let's find an approximate 95% confidence
#interval for theta using the percentile
#bootstrap approach.
boot.ci(o,conf=.95,type="perc")
quantile(o$t,c(0.025,0.975))
#The above results don't exactly agree
#because different algorithms are used
#for finding the quantiles. These differences
#are not important.
abline(v=quantile(o$t,c(0.025,0.975)),col=2,lwd=2)
#Now let's find the BCa interval using the law data.
BCa.int=boot.ci(o,conf=.95,type="bca")
BCa.int
attributes(BCa.int)
BCa.int$bca
sort(o$t)[c(27,28,4693,4694)]
abline(v=BCa.int$bca[1,4:5],col=3,lwd=2)