Introduction to R
Giới thiệu về ngôn ngữ R1.
Các lệnh căn bản
x <- c(1,3,2,5)
x
## [1] 1 3 2 5
x = c(1,6,2)
x
## [1] 1 6 2
y = c(1,4,3)
length(x)
## [1] 3
length(y)
## [1] 3
x+y
## [1] 2 10 5
x=matrix(data=c(1,2,3,4), nrow=2, ncol=2)
x
## [,1] [,2]
## [1,] 1 3
## [2,] 2 4
x=matrix(c(1,2,3,4),2,2)
matrix(c(1,2,3,4),2,2,byrow=TRUE)
## [,1] [,2]
## [1,] 1 2
## [2,] 3 4
sqrt(x)
## [,1] [,2]
## [1,] 1.0 1.7
## [2,] 1.4 2.0
x^2
## [,1] [,2]
## [1,] 1 9
## [2,] 4 16
x=rnorm(50)
y=x+rnorm(50,mean=50,sd=.1)
cor(x,y)
## [1] 0.99
set.seed(1303)
rnorm(50)
## [1] -1.14398 1.34213 2.18539 0.53639 0.06319 0.50223 -0.00042
## [8] 0.56582 -0.57252 -1.11023 -0.04869 -0.69566 0.82892 0.20665
## [15] -0.23567 -0.55631 -0.36475 0.86236 -0.63077 0.31360 -0.93150
## [22] 0.82387 0.52337 0.70692 0.42020 -0.26905 -1.51032 -0.69021
## [29] -0.14347 -1.01353 1.57327 0.01275 0.87265 0.42207 -0.01882
## [36] 2.61575 -0.69314 -0.26632 -0.72064 1.36773 0.26401 0.63219
## [43] -1.33065 0.02689 1.04064 1.31202 -0.03000 -0.25003 0.02341
## [50] 1.65987
set.seed(3)
y=rnorm(100)
mean(y)
## [1] 0.011
var(y)
## [1] 0.73
sqrt(var(y))
## [1] 0.86
sd(y)
## [1] 0.86
Đồ họa
# 2 dimensions
x=rnorm(100)
y=rnorm(100)
plot(x,y)
plot(x,y,xlab="this is the x-axis",ylab="this is the y-axis",main="Plot of X vs Y")
# 3 dimensions
x=seq(1,10)
x
## [1] 1 2 3 4 5 6 7 8 9 10
x=1:10
x
## [1] 1 2 3 4 5 6 7 8 9 10
x=seq(-pi,pi,length=50)
y=x
f=outer(x,y,function(x,y)cos(y)/(1+x^2))
contour(x,y,f)
contour(x,y,f,nlevels=45,add=T)
fa=(f-t(f))/2
contour(x,y,fa,nlevels=15)
image(x,y,fa)
persp(x,y,fa)
persp(x,y,fa,theta=30)
persp(x,y,fa,theta=30,phi=20)
persp(x,y,fa,theta=30,phi=70)
persp(x,y,fa,theta=30,phi=40)
Chỉ mục dữ liệu
A=matrix(1:16,4,4)
A
## [,1] [,2] [,3] [,4]
## [1,] 1 5 9 13
## [2,] 2 6 10 14
## [3,] 3 7 11 15
## [4,] 4 8 12 16
A[2,3]
## [1] 10
A[c(1,3),c(2,4)]
## [,1] [,2]
## [1,] 5 13
## [2,] 7 15
A[1:3,2:4]
## [,1] [,2] [,3]
## [1,] 5 9 13
## [2,] 6 10 14
## [3,] 7 11 15
A[1:2,]
## [,1] [,2] [,3] [,4]
## [1,] 1 5 9 13
## [2,] 2 6 10 14
A[,1:2]
## [,1] [,2]
## [1,] 1 5
## [2,] 2 6
## [3,] 3 7
## [4,] 4 8
A[1,]
## [1] 1 5 9 13
A[-c(1,3),]
## [,1] [,2] [,3] [,4]
## [1,] 2 6 10 14
## [2,] 4 8 12 16
A[-c(1,3),-c(1,3,4)]
## [1] 6 8
dim(A)
## [1] 4 4
Tổng quan về dữ liệu
# fileUrl <- "http://www-bcf.usc.edu/~gareth/ISL/Auto.csv"
# download.file(fileUrl,destfile="./ISL/Auto.csv")
Auto=read.csv("./ISL/Auto.csv",header=T,na.strings="?")
dim(Auto)
## [1] 397 9
Auto[1:4,]
## mpg cylinders displacement horsepower weight acceleration year origin
## 1 18 8 307 130 3504 12 70 1
## 2 15 8 350 165 3693 12 70 1
## 3 18 8 318 150 3436 11 70 1
## 4 16 8 304 150 3433 12 70 1
## name
## 1 chevrolet chevelle malibu
## 2 buick skylark 320
## 3 plymouth satellite
## 4 amc rebel sst
Auto=na.omit(Auto)
dim(Auto)
## [1] 392 9
names(Auto)
## [1] "mpg" "cylinders" "displacement" "horsepower"
## [5] "weight" "acceleration" "year" "origin"
## [9] "name"
attach(Auto)
## The following object is masked from package:ggplot2:
##
## mpg
plot(cylinders, mpg)
cylinders=as.factor(cylinders)
plot(cylinders, mpg)
plot(cylinders, mpg, col="red")
plot(cylinders, mpg, col="red", varwidth=T)
plot(cylinders, mpg, col="red", varwidth=T,horizontal=T)
plot(cylinders, mpg, col="red", varwidth=T, xlab="cylinders", ylab="MPG")
hist(mpg)
hist(mpg,col=2)
hist(mpg,col=2,breaks=15)
pairs(Auto)
pairs(~ mpg + displacement + horsepower + weight + acceleration, Auto)
plot(horsepower,mpg)
identify(horsepower,mpg,name)
## integer(0)
summary(Auto)
## mpg cylinders displacement horsepower weight
## Min. : 9 Min. :3.0 Min. : 68 Min. : 46 Min. :1613
## 1st Qu.:17 1st Qu.:4.0 1st Qu.:105 1st Qu.: 75 1st Qu.:2225
## Median :23 Median :4.0 Median :151 Median : 94 Median :2804
## Mean :23 Mean :5.5 Mean :194 Mean :104 Mean :2978
## 3rd Qu.:29 3rd Qu.:8.0 3rd Qu.:276 3rd Qu.:126 3rd Qu.:3615
## Max. :47 Max. :8.0 Max. :455 Max. :230 Max. :5140
##
## acceleration year origin name
## Min. : 8.0 Min. :70 Min. :1.00 amc matador : 5
## 1st Qu.:13.8 1st Qu.:73 1st Qu.:1.00 ford pinto : 5
## Median :15.5 Median :76 Median :1.00 toyota corolla : 5
## Mean :15.5 Mean :76 Mean :1.58 amc gremlin : 4
## 3rd Qu.:17.0 3rd Qu.:79 3rd Qu.:2.00 amc hornet : 4
## Max. :24.8 Max. :82 Max. :3.00 chevrolet chevette: 4
## (Other) :365
summary(mpg)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 9 17 23 23 29 47