ISL: Chapter 4
Bài 10
library ( ISLR ) names ( Weekly )
## [1] "Year" "Lag1" "Lag2" "Lag3" "Lag4" "Lag5" ## [7] "Volume" "Today" "Direction"
dim ( Weekly )
## [1] 1089 9
summary ( Weekly )
## Year Lag1 Lag2 Lag3 ## Min. :1990 Min. :-18.2 Min. :-18.2 Min. :-18.2 ## 1st Qu.:1995 1st Qu.: -1.2 1st Qu.: -1.2 1st Qu.: -1.2 ## Median :2000 Median : 0.2 Median : 0.2 Median : 0.2 ## Mean :2000 Mean : 0.2 Mean : 0.2 Mean : 0.1 ## 3rd Qu.:2005 3rd Qu.: 1.4 3rd Qu.: 1.4 3rd Qu.: 1.4 ## Max. :2010 Max. : 12.0 Max. : 12.0 Max. : 12.0 ## Lag4 Lag5 Volume Today Direction ## Min. :-18.2 Min. :-18.2 Min. :0.1 Min. :-18.2 Down:484 ## 1st Qu.: -1.2 1st Qu.: -1.2 1st Qu.:0.3 1st Qu.: -1.2 Up :605 ## Median : 0.2 Median : 0.2 Median :1.0 Median : 0.2 ## Mean : 0.1 Mean : 0.1 Mean :1.6 Mean : 0.1 ## 3rd Qu.: 1.4 3rd Qu.: 1.4 3rd Qu.:2.1 3rd Qu.: 1.4 ## Max. : 12.0 Max. : 12.0 Max. :9.3 Max. : 12.0
pairs ( Weekly )
cor ( Weekly [, -9 ])
## Year Lag1 Lag2 Lag3 Lag4 Lag5 Volume Today ## Year 1.000 -0.0323 -0.033 -0.030 -0.0311 -0.0305 0.842 -0.0325 ## Lag1 -0.032 1.0000 -0.075 0.059 -0.0713 -0.0082 -0.065 -0.0750 ## Lag2 -0.033 -0.0749 1.000 -0.076 0.0584 -0.0725 -0.086 0.0592 ## Lag3 -0.030 0.0586 -0.076 1.000 -0.0754 0.0607 -0.069 -0.0712 ## Lag4 -0.031 -0.0713 0.058 -0.075 1.0000 -0.0757 -0.061 -0.0078 ## Lag5 -0.031 -0.0082 -0.072 0.061 -0.0757 1.0000 -0.059 0.0110 ## Volume 0.842 -0.0650 -0.086 -0.069 -0.0611 -0.0585 1.000 -0.0331 ## Today -0.032 -0.0750 0.059 -0.071 -0.0078 0.0110 -0.033 1.0000
attach ( Weekly )
## The following objects are masked from Smarket: ## ## Direction, Lag1, Lag2, Lag3, Lag4, Lag5, Today, Volume, Year
plot ( Volume )
# Logistic regression glmout = glm ( Direction ~ . - Year - Today , data = Weekly , family = binomial ) summary ( glmout )
## ## Call: ## glm(formula = Direction ~ . - Year - Today, family = binomial, ## data = Weekly) ## ## Deviance Residuals: ## Min 1Q Median 3Q Max ## -1.695 -1.256 0.991 1.085 1.458 ## ## Coefficients: ## Estimate Std. Error z value Pr(>|z|) ## (Intercept) 0.2669 0.0859 3.11 0.0019 ** ## Lag1 -0.0413 0.0264 -1.56 0.1181 ## Lag2 0.0584 0.0269 2.18 0.0296 * ## Lag3 -0.0161 0.0267 -0.60 0.5469 ## Lag4 -0.0278 0.0265 -1.05 0.2937 ## Lag5 -0.0145 0.0264 -0.55 0.5833 ## Volume -0.0227 0.0369 -0.62 0.5377 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## (Dispersion parameter for binomial family taken to be 1) ## ## Null deviance: 1496.2 on 1088 degrees of freedom ## Residual deviance: 1486.4 on 1082 degrees of freedom ## AIC: 1500 ## ## Number of Fisher Scoring iterations: 4
glmpro = predict ( glmout , type = "response" ) contrasts ( Direction )
## Up ## Down 0 ## Up 1
glmpre = rep ( "Down" , 1089 ) glmpre [ glmpro > .5 ] = "Up" table ( glmpre , Direction )
## Direction ## glmpre Down Up ## Down 54 48 ## Up 430 557
mean ( glmpre == Direction )
## [1] 0.56
( 172+427 ) / 1089
## [1] 0.55
427 / ( 427+312 )
## [1] 0.58
# split training data train = ( Year < 2008 ) Weekly0810 = Weekly [ ! train ,] dim ( Weekly0810 )
## [1] 156 9
Direction0810 = Direction [ ! train ] # using Logistic Regression glmout2 = glm ( Direction ~ Lag2 , data = Weekly , family = binomial , subset = train ) glmpro2 = predict ( glmout2 , Weekly0810 , type = "response" ) glmpre2 = rep ( "Down" , 156 ) glmpre2 [ glmpro2 > .5 ] = "Up" table ( glmpre2 , Direction0810 )
## Direction0810 ## glmpre2 Down Up ## Down 7 5 ## Up 65 79
mean ( glmpre2 == Direction0810 )
## [1] 0.55
# using LDA library ( MASS ) ldaout = lda ( Direction ~ Lag2 , data = Weekly , subset = train ) ldapro = predict ( ldaout , Weekly0810 ) ldapre = ldapro $ class table ( ldapre , Direction0810 )
## Direction0810 ## ldapre Down Up ## Down 6 5 ## Up 66 79
mean ( ldapre == Direction0810 )
## [1] 0.54
# using QDA qdaout = qda ( Direction ~ Lag2 , data = Weekly , subset = train ) qdapro = predict ( qdaout , Weekly0810 ) qdapre = qdapro $ class table ( qdapre , Direction0810 )
## Direction0810 ## qdapre Down Up ## Down 0 0 ## Up 72 84
mean ( qdapre == Direction0810 )
## [1] 0.54
# using KNN, K=1 library ( class ) trainK = matrix ( Lag2 [ train ]) testK = matrix ( Lag2 [ ! train ]) trainD = Direction [ train ] set.seed ( 1 ) knnpre = knn ( trainK , testK , trainD , k = 1 ) table ( knnpre , Direction0810 )
## Direction0810 ## knnpre Down Up ## Down 32 38 ## Up 40 46
mean ( knnpre == Direction0810 )
## [1] 0.5
# K=3 knnpre3 = knn ( trainK , testK , trainD , k = 3 ) table ( knnpre3 , Direction0810 )
## Direction0810 ## knnpre3 Down Up ## Down 26 28 ## Up 46 56
mean ( knnpre3 == Direction0810 )
## [1] 0.53
# K=7 knnpre7 = knn ( trainK , testK , trainD , k = 7 ) table ( knnpre7 , Direction0810 )
## Direction0810 ## knnpre7 Down Up ## Down 26 25 ## Up 46 59
mean ( knnpre7 == Direction0810 )
## [1] 0.54
Bài 11
auto = read.csv ( "./ISL/Auto.csv" , na.strings = "?" ) auto = na.omit ( auto ) names ( auto )
## [1] "mpg" "cylinders" "displacement" "horsepower" ## [5] "weight" "acceleration" "year" "origin" ## [9] "name"
dim ( auto )
## [1] 392 9
summary ( auto )
## mpg cylinders displacement horsepower weight ## Min. : 9 Min. :3.0 Min. : 68 Min. : 46 Min. :1613 ## 1st Qu.:17 1st Qu.:4.0 1st Qu.:105 1st Qu.: 75 1st Qu.:2225 ## Median :23 Median :4.0 Median :151 Median : 94 Median :2804 ## Mean :23 Mean :5.5 Mean :194 Mean :104 Mean :2978 ## 3rd Qu.:29 3rd Qu.:8.0 3rd Qu.:276 3rd Qu.:126 3rd Qu.:3615 ## Max. :47 Max. :8.0 Max. :455 Max. :230 Max. :5140 ## ## acceleration year origin name ## Min. : 8.0 Min. :70 Min. :1.00 amc matador : 5 ## 1st Qu.:13.8 1st Qu.:73 1st Qu.:1.00 ford pinto : 5 ## Median :15.5 Median :76 Median :1.00 toyota corolla : 5 ## Mean :15.5 Mean :76 Mean :1.58 amc gremlin : 4 ## 3rd Qu.:17.0 3rd Qu.:79 3rd Qu.:2.00 amc hornet : 4 ## Max. :24.8 Max. :82 Max. :3.00 chevrolet chevette: 4 ## (Other) :365
summary ( auto $ mpg )
## Min. 1st Qu. Median Mean 3rd Qu. Max. ## 9 17 23 23 29 47
mpg.medi = summary ( auto $ mpg )[ "Median" ] mpg01 = rep ( 0 , 392 ) mpg01 [ auto $ mpg > mpg.medi ] = 1 sum ( mpg01 )
## [1] 196
auto01 = data.frame ( auto , mpg01 ) pairs ( auto01 )
# split training data trn = ( auto01 $ year < 80 ) sum ( trn )
## [1] 307
auto80 = auto01 [ ! trn ,] mpg80 = auto01 $ mpg01 [ ! trn ] # perform LDA library ( MASS ) ldaout = lda ( mpg01 ~ horsepower + weight + acceleration , data = auto01 , subset = trn ) ldaout
## Call: ## lda(mpg01 ~ horsepower + weight + acceleration, data = auto01, ## subset = trn) ## ## Prior probabilities of groups: ## 0 1 ## 0.62 0.38 ## ## Group means: ## horsepower weight acceleration ## 0 131 3631 15 ## 1 79 2276 16 ## ## Coefficients of linear discriminants: ## LD1 ## horsepower 0.0030 ## weight -0.0018 ## acceleration 0.0156
ldapro = predict ( ldaout , auto80 ) ldapre = ldapro $ class table ( ldapre , mpg80 )
## mpg80 ## ldapre 0 1 ## 0 4 14 ## 1 1 66
mean ( ldapre == mpg80 )
## [1] 0.82
# perform QDA qdaout = qda ( mpg01 ~ horsepower + weight + acceleration , data = auto01 , subset = trn ) qdapro = predict ( qdaout , auto80 ) qdapre = qdapro $ class table ( qdapre , mpg80 )
## mpg80 ## qdapre 0 1 ## 0 5 11 ## 1 0 69
mean ( qdapre == mpg80 )
## [1] 0.87
# perform logistic regression glmout = glm ( mpg01 ~ horsepower + weight + acceleration , data = auto01 , subset = trn , family = binomial ) summary ( glmout )
## ## Call: ## glm(formula = mpg01 ~ horsepower + weight + acceleration, family = binomial, ## data = auto01, subset = trn) ## ## Deviance Residuals: ## Min 1Q Median 3Q Max ## -2.142 -0.253 -0.018 0.298 3.412 ## ## Coefficients: ## Estimate Std. Error z value Pr(>|z|) ## (Intercept) 14.89861 3.33064 4.47 7.7e-06 *** ## horsepower -0.05341 0.02402 -2.22 0.026 * ## weight -0.00331 0.00068 -4.87 1.1e-06 *** ## acceleration -0.06447 0.14000 -0.46 0.645 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## (Dispersion parameter for binomial family taken to be 1) ## ## Null deviance: 407.08 on 306 degrees of freedom ## Residual deviance: 150.55 on 303 degrees of freedom ## AIC: 158.5 ## ## Number of Fisher Scoring iterations: 7
glmpro = predict ( glmout , auto80 , type = "response" ) glmpre = rep ( 0 , sum ( ! trn )) glmpre [ glmpro > .5 ] = 1 sum ( glmpre )
## [1] 63
table ( glmpre , mpg80 )
## mpg80 ## glmpre 0 1 ## 0 5 17 ## 1 0 63
mean ( glmpre == mpg80 )
## [1] 0.8
# perform KNN library ( class ) attach ( auto01 )
## The following objects are masked from Auto: ## ## acceleration, cylinders, displacement, horsepower, mpg, name, ## origin, weight, year ## ## The following object is masked from package:ggplot2: ## ## mpg
trnK = cbind ( horsepower , weight , acceleration )[ trn ,] tstK = cbind ( horsepower , weight , acceleration )[ ! trn ,] trnD = mpg01 [ trn ] set.seed ( 1 ) # K=1 knnpre = knn ( trnK , tstK , trnD , k = 1 ) table ( knnpre , mpg80 )
## mpg80 ## knnpre 0 1 ## 0 5 19 ## 1 0 61
mean ( knnpre == mpg80 )
## [1] 0.78
# K=3 knnpre = knn ( trnK , tstK , trnD , k = 3 ) table ( knnpre , mpg80 )
## mpg80 ## knnpre 0 1 ## 0 5 30 ## 1 0 50
mean ( knnpre == mpg80 )
## [1] 0.65
# K=5 knnpre = knn ( trnK , tstK , trnD , k = 7 ) table ( knnpre , mpg80 )
## mpg80 ## knnpre 0 1 ## 0 5 24 ## 1 0 56
mean ( knnpre == mpg80 )
## [1] 0.72
Bài 12
Power2 = function ( x , a ){ print ( x ^ a ) } Power2 ( 2 , 3 )
## [1] 8
Power3 = function ( x , a ){ result = x ^ a return ( result ) } Power2 ( 3 , 8 )
## [1] 6561
plot ( 1 : 10 , y = Power3 ( 1 : 10 , 2 ), col = 2 , xlab = "x" , ylab = "y=x^2" )
plot ( 1 : 10 , y = Power3 ( 1 : 10 , 2 ), col = 2 , xlab = "x" , ylab = "y=x^2" , log = "x" )
plot ( 1 : 10 , y = Power3 ( 1 : 10 , 2 ), col = 2 , xlab = "x" , ylab = "y=x^2" , log = "y" )
plot ( 1 : 10 , y = Power3 ( 1 : 10 , 2 ), col = 2 , xlab = "x" , ylab = "y=x^2" , log = "xy" )
PlotPower = function ( x , a ){ plot ( x , Power3 ( x , a ), xlab = "x" , ylab = paste ( "x^" , a )) } PlotPower ( 1 : 10 , 1.7 )