安装,启动,退出:
# 安装
sudo apt-get install r-base r-base-dev
# 启动
R
# 退出
q()
安装package:
install.packages("randomForest")
install.views("MachineLearning")
RSiteSearch("confusion", restrict = "functions")
加载package:
library(randomForest)
# 查看已经加载的package的信息
sessionInfo()
创建对象:
pages <- 97 town <- "Richmond"
基本数据类型和结构:
if 3 > 2 print("greater") else print("less") isGreater <- 3 > 2 isGreater is.logical(isGreater)
x <- 3.6
is.numeric(x)
is.integer(x)
is.double(x)
typeof(x)
y <- "your ad here"
typeof(y)
# length of y
nchar(y)
# determine if a substring exists in the character string
grep("ad", y)
grep("my", y)
# vector, c is for combine
weights <- c(90, 150, 111, 123)
is.vector(weights)
typeof(weights)
length(weights)
# [1] 90.25 150.25 111.25 123.25
weights + .25
mean(weights)
#string vector
colors <- c("green", "red", "blue", "red", "white")
grep("red", colors)
nchar(colors)
# index
weights[c(1, 4)]
# A vector of logical values can be used also but there should
# be as many logical values as elements
weights[c(TRUE, TRUE, FALSE, TRUE)]
#missing value
probabilities <- c(.05, .67, NA, .32, .90)
is.na(probabilities)
mean(probabilities) #[1] NA ... unless told otherwise
mean(probabilities, na.rm = TRUE) # [1] 0.485
数据集操作,matrix, data frames:
mat <- matrix(1:12, nrow = 3) rownames(mat) <- c("row 1", "row 2", "row 3") colnames(mat) <- c("col1", "col2", "col3", "col4") # col1 col2 col3 col4 #row 1 1 4 7 10 #row 2 2 5 8 11 #row 3 3 6 9 12 mat[1, 2:3] # col2 col3 # 4 7 mat["row 1", "col3"] mat[1,] #col1 col2 col3 col4 #1 4 7 10 is.matrix(mat[1,]) # [1] FALSE is.vector(mat[1,]) # [1] TRUE mat[1,,drop = FALSE] #col1 col2 col3 col4 #row 1 1 4 7 10 is.matrix(mat[1,,drop = FALSE]) # [1] TRUE is.vector(mat[1,,drop = FALSE]) #[1] FALSE
df <- data.frame(colors = colors2, time = 1:5) # colors time # 1 green 1 # 2 red 2 # 3 blue 3 # 4 red 4 # 5 white 5 dim(df) #[1] 5 2 colnames(df) # [1] "colors" "time“ rownames(df) [1] "1" "2" "3" "4" "5" df$colors # [1] green red blue red white # Levels: blue green red white subset(df, colors %in% c("red", "green") & time <= 2) #colors time #1 green 1 #2 red 2 df2 <- df # Add missing values to the data frame df2[1, 1] <- NA df2[5, 2] <- NA #colors time #1 <NA> 1 #2 red 2 #3 blue 3 #4 red 4 #5 white NA complete.cases(df2) #[1] FALSE TRUE TRUE TRUE FALSE
读取csv文件:
read.csv("data.csv") read.csv(header = FALSE, file = "data.csv") read.csv("data.csv", na.strings = "?")