ISL: Chapter 4


Bài 10

library(ISLR)
names(Weekly)
## [1] "Year"      "Lag1"      "Lag2"      "Lag3"      "Lag4"      "Lag5"     
## [7] "Volume" "Today" "Direction"
dim(Weekly)
## [1] 1089    9
summary(Weekly)
##       Year           Lag1            Lag2            Lag3      
## Min. :1990 Min. :-18.2 Min. :-18.2 Min. :-18.2
## 1st Qu.:1995 1st Qu.: -1.2 1st Qu.: -1.2 1st Qu.: -1.2
## Median :2000 Median : 0.2 Median : 0.2 Median : 0.2
## Mean :2000 Mean : 0.2 Mean : 0.2 Mean : 0.1
## 3rd Qu.:2005 3rd Qu.: 1.4 3rd Qu.: 1.4 3rd Qu.: 1.4
## Max. :2010 Max. : 12.0 Max. : 12.0 Max. : 12.0
## Lag4 Lag5 Volume Today Direction
## Min. :-18.2 Min. :-18.2 Min. :0.1 Min. :-18.2 Down:484
## 1st Qu.: -1.2 1st Qu.: -1.2 1st Qu.:0.3 1st Qu.: -1.2 Up :605
## Median : 0.2 Median : 0.2 Median :1.0 Median : 0.2
## Mean : 0.1 Mean : 0.1 Mean :1.6 Mean : 0.1
## 3rd Qu.: 1.4 3rd Qu.: 1.4 3rd Qu.:2.1 3rd Qu.: 1.4
## Max. : 12.0 Max. : 12.0 Max. :9.3 Max. : 12.0
pairs(Weekly)

plot of chunk 10

cor(Weekly[,-9])
##          Year    Lag1   Lag2   Lag3    Lag4    Lag5 Volume   Today
## Year 1.000 -0.0323 -0.033 -0.030 -0.0311 -0.0305 0.842 -0.0325
## Lag1 -0.032 1.0000 -0.075 0.059 -0.0713 -0.0082 -0.065 -0.0750
## Lag2 -0.033 -0.0749 1.000 -0.076 0.0584 -0.0725 -0.086 0.0592
## Lag3 -0.030 0.0586 -0.076 1.000 -0.0754 0.0607 -0.069 -0.0712
## Lag4 -0.031 -0.0713 0.058 -0.075 1.0000 -0.0757 -0.061 -0.0078
## Lag5 -0.031 -0.0082 -0.072 0.061 -0.0757 1.0000 -0.059 0.0110
## Volume 0.842 -0.0650 -0.086 -0.069 -0.0611 -0.0585 1.000 -0.0331
## Today -0.032 -0.0750 0.059 -0.071 -0.0078 0.0110 -0.033 1.0000
attach(Weekly)
## The following objects are masked from Smarket:
##
## Direction, Lag1, Lag2, Lag3, Lag4, Lag5, Today, Volume, Year
plot(Volume)

plot of chunk 10

# Logistic regression
glmout=glm(Direction~.-Year-Today,data=Weekly,family=binomial)
summary(glmout)
## 
## Call:
## glm(formula = Direction ~ . - Year - Today, family = binomial,
## data = Weekly)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.695 -1.256 0.991 1.085 1.458
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.2669 0.0859 3.11 0.0019 **
## Lag1 -0.0413 0.0264 -1.56 0.1181
## Lag2 0.0584 0.0269 2.18 0.0296 *
## Lag3 -0.0161 0.0267 -0.60 0.5469
## Lag4 -0.0278 0.0265 -1.05 0.2937
## Lag5 -0.0145 0.0264 -0.55 0.5833
## Volume -0.0227 0.0369 -0.62 0.5377
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1496.2 on 1088 degrees of freedom
## Residual deviance: 1486.4 on 1082 degrees of freedom
## AIC: 1500
##
## Number of Fisher Scoring iterations: 4
glmpro=predict(glmout,type="response")
contrasts(Direction)
##      Up
## Down 0
## Up 1
glmpre=rep("Down",1089)
glmpre[glmpro>.5]="Up"
table(glmpre,Direction)
##       Direction
## glmpre Down Up
## Down 54 48
## Up 430 557
mean(glmpre==Direction)
## [1] 0.56
(172+427)/1089
## [1] 0.55
427/(427+312)
## [1] 0.58
# split training data
train=(Year<2008)
Weekly0810=Weekly[!train,]
dim(Weekly0810)
## [1] 156   9
Direction0810=Direction[!train]

# using Logistic Regression
glmout2=glm(Direction~Lag2,data=Weekly,family=binomial,subset=train)
glmpro2=predict(glmout2,Weekly0810,type="response")
glmpre2=rep("Down",156)
glmpre2[glmpro2>.5]="Up"
table(glmpre2,Direction0810)
##        Direction0810
## glmpre2 Down Up
## Down 7 5
## Up 65 79
mean(glmpre2==Direction0810)
## [1] 0.55
# using LDA
library(MASS)
ldaout=lda(Direction~Lag2,data=Weekly,subset=train)
ldapro=predict(ldaout,Weekly0810)
ldapre=ldapro$class
table(ldapre,Direction0810)
##       Direction0810
## ldapre Down Up
## Down 6 5
## Up 66 79
mean(ldapre==Direction0810)
## [1] 0.54
# using QDA
qdaout=qda(Direction~Lag2,data=Weekly,subset=train)
qdapro=predict(qdaout,Weekly0810)
qdapre=qdapro$class
table(qdapre,Direction0810)
##       Direction0810
## qdapre Down Up
## Down 0 0
## Up 72 84
mean(qdapre==Direction0810)
## [1] 0.54
# using KNN, K=1
library(class)
trainK=matrix(Lag2[train])
testK=matrix(Lag2[!train])
trainD=Direction[train]
set.seed(1)
knnpre=knn(trainK,testK,trainD,k=1)
table(knnpre,Direction0810)
##       Direction0810
## knnpre Down Up
## Down 32 38
## Up 40 46
mean(knnpre==Direction0810)
## [1] 0.5
# K=3
knnpre3=knn(trainK,testK,trainD,k=3)
table(knnpre3,Direction0810)
##        Direction0810
## knnpre3 Down Up
## Down 26 28
## Up 46 56
mean(knnpre3==Direction0810)
## [1] 0.53
# K=7
knnpre7=knn(trainK,testK,trainD,k=7)
table(knnpre7,Direction0810)
##        Direction0810
## knnpre7 Down Up
## Down 26 25
## Up 46 59
mean(knnpre7==Direction0810)
## [1] 0.54

Bài 11

auto=read.csv("./ISL/Auto.csv",na.strings="?")
auto=na.omit(auto)
names(auto)
## [1] "mpg"          "cylinders"    "displacement" "horsepower"  
## [5] "weight" "acceleration" "year" "origin"
## [9] "name"
dim(auto)
## [1] 392   9
summary(auto)
##       mpg       cylinders    displacement   horsepower      weight    
## Min. : 9 Min. :3.0 Min. : 68 Min. : 46 Min. :1613
## 1st Qu.:17 1st Qu.:4.0 1st Qu.:105 1st Qu.: 75 1st Qu.:2225
## Median :23 Median :4.0 Median :151 Median : 94 Median :2804
## Mean :23 Mean :5.5 Mean :194 Mean :104 Mean :2978
## 3rd Qu.:29 3rd Qu.:8.0 3rd Qu.:276 3rd Qu.:126 3rd Qu.:3615
## Max. :47 Max. :8.0 Max. :455 Max. :230 Max. :5140
##
## acceleration year origin name
## Min. : 8.0 Min. :70 Min. :1.00 amc matador : 5
## 1st Qu.:13.8 1st Qu.:73 1st Qu.:1.00 ford pinto : 5
## Median :15.5 Median :76 Median :1.00 toyota corolla : 5
## Mean :15.5 Mean :76 Mean :1.58 amc gremlin : 4
## 3rd Qu.:17.0 3rd Qu.:79 3rd Qu.:2.00 amc hornet : 4
## Max. :24.8 Max. :82 Max. :3.00 chevrolet chevette: 4
## (Other) :365
summary(auto$mpg)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 9 17 23 23 29 47
mpg.medi=summary(auto$mpg)["Median"]
mpg01=rep(0,392)
mpg01[auto$mpg>mpg.medi]=1
sum(mpg01)
## [1] 196
auto01=data.frame(auto,mpg01)
pairs(auto01)

plot of chunk autodata

# split training data
trn=(auto01$year<80)
sum(trn)
## [1] 307
auto80=auto01[!trn,]
mpg80=auto01$mpg01[!trn]

# perform LDA
library(MASS)
ldaout=lda(mpg01~horsepower+weight+acceleration,data=auto01,subset=trn)
ldaout
## Call:
## lda(mpg01 ~ horsepower + weight + acceleration, data = auto01,
## subset = trn)
##
## Prior probabilities of groups:
## 0 1
## 0.62 0.38
##
## Group means:
## horsepower weight acceleration
## 0 131 3631 15
## 1 79 2276 16
##
## Coefficients of linear discriminants:
## LD1
## horsepower 0.0030
## weight -0.0018
## acceleration 0.0156
ldapro=predict(ldaout,auto80)
ldapre=ldapro$class
table(ldapre,mpg80)
##       mpg80
## ldapre 0 1
## 0 4 14
## 1 1 66
mean(ldapre==mpg80)
## [1] 0.82
# perform QDA
qdaout=qda(mpg01~horsepower+weight+acceleration,data=auto01,subset=trn)
qdapro=predict(qdaout,auto80)
qdapre=qdapro$class
table(qdapre,mpg80)
##       mpg80
## qdapre 0 1
## 0 5 11
## 1 0 69
mean(qdapre==mpg80)
## [1] 0.87
# perform logistic regression
glmout=glm(mpg01~horsepower+weight+acceleration,data=auto01,subset=trn,family=binomial)
summary(glmout)
## 
## Call:
## glm(formula = mpg01 ~ horsepower + weight + acceleration, family = binomial,
## data = auto01, subset = trn)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.142 -0.253 -0.018 0.298 3.412
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 14.89861 3.33064 4.47 7.7e-06 ***
## horsepower -0.05341 0.02402 -2.22 0.026 *
## weight -0.00331 0.00068 -4.87 1.1e-06 ***
## acceleration -0.06447 0.14000 -0.46 0.645
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 407.08 on 306 degrees of freedom
## Residual deviance: 150.55 on 303 degrees of freedom
## AIC: 158.5
##
## Number of Fisher Scoring iterations: 7
glmpro=predict(glmout,auto80,type="response")
glmpre=rep(0,sum(!trn))
glmpre[glmpro>.5]=1
sum(glmpre)
## [1] 63
table(glmpre,mpg80)
##       mpg80
## glmpre 0 1
## 0 5 17
## 1 0 63
mean(glmpre==mpg80)
## [1] 0.8
# perform KNN
library(class)
attach(auto01)
## The following objects are masked from Auto:
##
## acceleration, cylinders, displacement, horsepower, mpg, name,
## origin, weight, year
##
## The following object is masked from package:ggplot2:
##
## mpg
trnK=cbind(horsepower,weight,acceleration)[trn,]
tstK=cbind(horsepower,weight,acceleration)[!trn,]
trnD=mpg01[trn]
set.seed(1)

# K=1
knnpre=knn(trnK,tstK,trnD,k=1)
table(knnpre,mpg80)
##       mpg80
## knnpre 0 1
## 0 5 19
## 1 0 61
mean(knnpre==mpg80)
## [1] 0.78
# K=3
knnpre=knn(trnK,tstK,trnD,k=3)
table(knnpre,mpg80)
##       mpg80
## knnpre 0 1
## 0 5 30
## 1 0 50
mean(knnpre==mpg80)
## [1] 0.65
# K=5
knnpre=knn(trnK,tstK,trnD,k=7)
table(knnpre,mpg80)
##       mpg80
## knnpre 0 1
## 0 5 24
## 1 0 56
mean(knnpre==mpg80)
## [1] 0.72

Bài 12

Power2=function(x,a){
print(x^a)
}
Power2(2,3)
## [1] 8
Power3=function(x,a){
result=x^a
return(result)
}

Power2(3,8)
## [1] 6561
plot(1:10,y=Power3(1:10,2),col=2,xlab="x",ylab="y=x^2")

plot of chunk writing functions

plot(1:10,y=Power3(1:10,2),col=2,xlab="x",ylab="y=x^2",log="x")

plot of chunk writing functions

plot(1:10,y=Power3(1:10,2),col=2,xlab="x",ylab="y=x^2",log="y")

plot of chunk writing functions

plot(1:10,y=Power3(1:10,2),col=2,xlab="x",ylab="y=x^2",log="xy")

plot of chunk writing functions

PlotPower=function(x,a){
plot(x,Power3(x,a),xlab="x",ylab=paste("x^",a))
}
PlotPower(1:10,1.7)

plot of chunk writing functions