Resampling methods in R


Phương pháp tập kiểm chứng

library(ISLR)
set.seed(1)
train=sample(392,196)
lm.fit=lm(mpg~horsepower,data=Auto,subset=train)
attach(Auto)
## The following objects are masked from auto01:
##
## acceleration, cylinders, displacement, horsepower, mpg, name,
## origin, weight, year
##
## The following objects are masked from Auto (pos = 16):
##
## acceleration, cylinders, displacement, horsepower, mpg, name,
## origin, weight, year
##
## The following object is masked from package:ggplot2:
##
## mpg
mean((mpg-predict(lm.fit,Auto))[-train]^2)
## [1] 26
lm.fit2=lm(mpg~poly(horsepower,2),data=Auto,subset=train)
mean((mpg-predict(lm.fit2,Auto))[-train]^2)
## [1] 20
lm.fit3=lm(mpg~poly(horsepower,3),data=Auto,subset=train)
mean((mpg-predict(lm.fit3,Auto))[-train]^2)
## [1] 20
set.seed(2)
train=sample(392,196)
lm.fit=lm(mpg~horsepower,subset=train)
mean((mpg-predict(lm.fit,Auto))[-train]^2)
## [1] 23
lm.fit2=lm(mpg~poly(horsepower,2),data=Auto,subset=train)
mean((mpg-predict(lm.fit2,Auto))[-train]^2)
## [1] 19
lm.fit3=lm(mpg~poly(horsepower,3),data=Auto,subset=train)
mean((mpg-predict(lm.fit3,Auto))[-train]^2)
## [1] 19

Kiểm chứng chéo để lại 1 phần tử

glm.fit=glm(mpg~horsepower,data=Auto)
coef(glm.fit)
## (Intercept)  horsepower 
## 39.94 -0.16
lm.fit=lm(mpg~horsepower,data=Auto)
coef(lm.fit)
## (Intercept)  horsepower 
## 39.94 -0.16
library(boot)
## 
## Attaching package: 'boot'
##
## The following object is masked from 'package:car':
##
## logit
glm.fit=glm(mpg~horsepower,data=Auto)
cv.err=cv.glm(Auto,glm.fit)
cv.err$delta
## [1] 24 24
cv.error=rep(0,5)
for (i in 1:5){
glm.fit=glm(mpg~poly(horsepower,i),data=Auto)
cv.error[i]=cv.glm(Auto,glm.fit)$delta[1]
}
cv.error
## [1] 24 19 19 19 19

Kiểm chứng chéo k phần

set.seed(17)
cv.error.10=rep(0,10)
for (i in 1:10){
glm.fit=glm(mpg~poly(horsepower,i),data=Auto)
cv.error.10[i]=cv.glm(Auto,glm.fit,K=10)$delta[1]
}
cv.error.10
##  [1] 24 19 19 19 19 19 19 20 19 20

Tự khởi tạo mẫu

Ước tính độ chính xác của một giá trị thống kê

alpha.fn=function(data,index){
X=data$X[index]
Y=data$Y[index]
return((var(Y)-cov(X,Y))/(var(X)+var(Y)-2*cov(X,Y)))
}
alpha.fn(Portfolio,1:100)
## [1] 0.58
set.seed(1)
alpha.fn(Portfolio,sample(100,100,replace=T))
## [1] 0.6
boot(Portfolio,alpha.fn,R=1000)
## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = Portfolio, statistic = alpha.fn, R = 1000)
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* 0.58 -7.3e-05 0.089

Ước tính độ chính xác của mô hình hồi quy tuyến tính

boot.fn=function(data,index)
return(coef(lm(mpg~horsepower,data=data,subset=index)))
boot.fn(Auto,1:392)
## (Intercept)  horsepower 
## 39.94 -0.16
set.seed(1)
boot.fn(Auto,sample(392,392,replace=T))
## (Intercept)  horsepower 
## 38.74 -0.15
boot.fn(Auto,sample(392,392,replace=T))
## (Intercept)  horsepower 
## 40.04 -0.16
boot(Auto,boot.fn,1000)
## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = Auto, statistic = boot.fn, R = 1000)
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* 39.94 0.02972 0.8600
## t2* -0.16 -0.00031 0.0074
summary(lm(mpg~horsepower,data=Auto))$coef
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.94 0.7175 56 1.2e-187
## horsepower -0.16 0.0064 -24 7.0e-81
# another boot
boot.fn=function(data,index)
coefficients(lm(mpg~horsepower+I(horsepower^2),data=data,subset=index))
set.seed(1)
boot(Auto,boot.fn,1000)
## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = Auto, statistic = boot.fn, R = 1000)
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* 56.9001 6.1e-03 2.09449
## t2* -0.4662 -1.8e-04 0.03341
## t3* 0.0012 1.3e-06 0.00012
summary(lm(mpg~horsepower+I(horsepower^2),data=Auto))$coef
##                 Estimate Std. Error t value Pr(>|t|)
## (Intercept) 56.9001 1.80043 32 1.7e-109
## horsepower -0.4662 0.03112 -15 2.3e-40
## I(horsepower^2) 0.0012 0.00012 10 2.2e-21