Introduction to R


Giới thiệu về ngôn ngữ R1.

Các lệnh căn bản

x <- c(1,3,2,5)
x
## [1] 1 3 2 5
x = c(1,6,2)
x
## [1] 1 6 2
y = c(1,4,3)
length(x)
## [1] 3
length(y)
## [1] 3
x+y
## [1]  2 10  5
x=matrix(data=c(1,2,3,4), nrow=2, ncol=2)
x
##      [,1] [,2]
## [1,] 1 3
## [2,] 2 4
x=matrix(c(1,2,3,4),2,2)
matrix(c(1,2,3,4),2,2,byrow=TRUE)
##      [,1] [,2]
## [1,] 1 2
## [2,] 3 4
sqrt(x)
##      [,1] [,2]
## [1,] 1.0 1.7
## [2,] 1.4 2.0
x^2
##      [,1] [,2]
## [1,] 1 9
## [2,] 4 16
x=rnorm(50)
y=x+rnorm(50,mean=50,sd=.1)
cor(x,y)
## [1] 0.99
set.seed(1303)
rnorm(50)
##  [1] -1.14398  1.34213  2.18539  0.53639  0.06319  0.50223 -0.00042
## [8] 0.56582 -0.57252 -1.11023 -0.04869 -0.69566 0.82892 0.20665
## [15] -0.23567 -0.55631 -0.36475 0.86236 -0.63077 0.31360 -0.93150
## [22] 0.82387 0.52337 0.70692 0.42020 -0.26905 -1.51032 -0.69021
## [29] -0.14347 -1.01353 1.57327 0.01275 0.87265 0.42207 -0.01882
## [36] 2.61575 -0.69314 -0.26632 -0.72064 1.36773 0.26401 0.63219
## [43] -1.33065 0.02689 1.04064 1.31202 -0.03000 -0.25003 0.02341
## [50] 1.65987
set.seed(3)
y=rnorm(100)
mean(y)
## [1] 0.011
var(y)
## [1] 0.73
sqrt(var(y))
## [1] 0.86
sd(y)
## [1] 0.86

Đồ họa

# 2 dimensions
x=rnorm(100)
y=rnorm(100)
plot(x,y)

plot of chunk graphic

plot(x,y,xlab="this is the x-axis",ylab="this is the y-axis",main="Plot of X vs Y")

plot of chunk graphic

# 3 dimensions
x=seq(1,10)
x
##  [1]  1  2  3  4  5  6  7  8  9 10
x=1:10
x
##  [1]  1  2  3  4  5  6  7  8  9 10
x=seq(-pi,pi,length=50)
y=x
f=outer(x,y,function(x,y)cos(y)/(1+x^2))
contour(x,y,f)
contour(x,y,f,nlevels=45,add=T)

plot of chunk 3-d

fa=(f-t(f))/2
contour(x,y,fa,nlevels=15)

plot of chunk 3-d

image(x,y,fa)

plot of chunk 3-d

persp(x,y,fa)

plot of chunk 3-d

persp(x,y,fa,theta=30)

plot of chunk 3-d

persp(x,y,fa,theta=30,phi=20)

plot of chunk 3-d

persp(x,y,fa,theta=30,phi=70)

plot of chunk 3-d

persp(x,y,fa,theta=30,phi=40)

plot of chunk 3-d

Chỉ mục dữ liệu

A=matrix(1:16,4,4)
A
##      [,1] [,2] [,3] [,4]
## [1,] 1 5 9 13
## [2,] 2 6 10 14
## [3,] 3 7 11 15
## [4,] 4 8 12 16
A[2,3]
## [1] 10
A[c(1,3),c(2,4)]
##      [,1] [,2]
## [1,] 5 13
## [2,] 7 15
A[1:3,2:4]
##      [,1] [,2] [,3]
## [1,] 5 9 13
## [2,] 6 10 14
## [3,] 7 11 15
A[1:2,]
##      [,1] [,2] [,3] [,4]
## [1,] 1 5 9 13
## [2,] 2 6 10 14
A[,1:2]
##      [,1] [,2]
## [1,] 1 5
## [2,] 2 6
## [3,] 3 7
## [4,] 4 8
A[1,]
## [1]  1  5  9 13
A[-c(1,3),]
##      [,1] [,2] [,3] [,4]
## [1,] 2 6 10 14
## [2,] 4 8 12 16
A[-c(1,3),-c(1,3,4)]
## [1] 6 8
dim(A)
## [1] 4 4

Tổng quan về dữ liệu

# fileUrl <- "http://www-bcf.usc.edu/~gareth/ISL/Auto.csv"
# download.file(fileUrl,destfile="./ISL/Auto.csv")

Auto=read.csv("./ISL/Auto.csv",header=T,na.strings="?")
dim(Auto)
## [1] 397   9
Auto[1:4,]
##   mpg cylinders displacement horsepower weight acceleration year origin
## 1 18 8 307 130 3504 12 70 1
## 2 15 8 350 165 3693 12 70 1
## 3 18 8 318 150 3436 11 70 1
## 4 16 8 304 150 3433 12 70 1
## name
## 1 chevrolet chevelle malibu
## 2 buick skylark 320
## 3 plymouth satellite
## 4 amc rebel sst
Auto=na.omit(Auto)
dim(Auto)
## [1] 392   9
names(Auto)
## [1] "mpg"          "cylinders"    "displacement" "horsepower"  
## [5] "weight" "acceleration" "year" "origin"
## [9] "name"
attach(Auto)
## The following object is masked from package:ggplot2:
##
## mpg
plot(cylinders, mpg)

plot of chunk add

cylinders=as.factor(cylinders)
plot(cylinders, mpg)

plot of chunk add

plot(cylinders, mpg, col="red")

plot of chunk add

plot(cylinders, mpg, col="red", varwidth=T)

plot of chunk add

plot(cylinders, mpg, col="red", varwidth=T,horizontal=T)

plot of chunk add

plot(cylinders, mpg, col="red", varwidth=T, xlab="cylinders", ylab="MPG")

plot of chunk add

hist(mpg)

plot of chunk add

hist(mpg,col=2)

plot of chunk add

hist(mpg,col=2,breaks=15)

plot of chunk add

pairs(Auto)

plot of chunk add

pairs(~ mpg + displacement + horsepower + weight + acceleration, Auto)

plot of chunk add

plot(horsepower,mpg)
identify(horsepower,mpg,name)

plot of chunk add

## integer(0)
summary(Auto)
##       mpg       cylinders    displacement   horsepower      weight    
## Min. : 9 Min. :3.0 Min. : 68 Min. : 46 Min. :1613
## 1st Qu.:17 1st Qu.:4.0 1st Qu.:105 1st Qu.: 75 1st Qu.:2225
## Median :23 Median :4.0 Median :151 Median : 94 Median :2804
## Mean :23 Mean :5.5 Mean :194 Mean :104 Mean :2978
## 3rd Qu.:29 3rd Qu.:8.0 3rd Qu.:276 3rd Qu.:126 3rd Qu.:3615
## Max. :47 Max. :8.0 Max. :455 Max. :230 Max. :5140
##
## acceleration year origin name
## Min. : 8.0 Min. :70 Min. :1.00 amc matador : 5
## 1st Qu.:13.8 1st Qu.:73 1st Qu.:1.00 ford pinto : 5
## Median :15.5 Median :76 Median :1.00 toyota corolla : 5
## Mean :15.5 Mean :76 Mean :1.58 amc gremlin : 4
## 3rd Qu.:17.0 3rd Qu.:79 3rd Qu.:2.00 amc hornet : 4
## Max. :24.8 Max. :82 Max. :3.00 chevrolet chevette: 4
## (Other) :365
summary(mpg)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 9 17 23 23 29 47
  1. http://www-bcf.usc.edu/~gareth/ISL