dummyVars函数:dummyVars creates a full set of dummy variables (i.e. less than full rank parameterization----建立一套完整的虚拟变量
survey<-data.frame(service=c("very unhappy","unhappy","neutral","happy","very happy")) survey ## service ## 1 very unhappy ## 2 unhappy ## 3 neutral ## 4 happy ## 5 very happy
# 我们可以直接增加一列rank,用数字代表不同情感
survey<-data.frame(service=c("very unhappy","unhappy","neutral","happy","very happy"),rank=c(1,2,3,4,5)) survey ## service rank ## 1 very unhappy 1 ## 2 unhappy 2 ## 3 neutral 3 ## 4 happy 4 ## 5 very happy 5
library(caret) ## Loading required package: lattice ## Loading required package: ggplot2 customers<-data.frame(id=c(10,20,30,40,50),gender=c("male","female","female","male","female"), mood=c("happy","sad","happy","sad","happy"),outcome=c(1,1,0,0,0)) customers ## id gender mood outcome ## 1 10 male happy 1 ## 2 20 female sad 1 ## 3 30 female happy 0 ## 4 40 male sad 0 ## 5 50 female happy 0
# 利用dummyVars函数对customers数据进行哑变量处理
# 对自身变量进行预测,并转换成data.frame格式
trsf<-data.frame(predict(dmy,newdata=customers)) trsf ## id gender.female gender.male mood.happy mood.sad outcome ## 1 10 0 1 1 0 1 ## 2 20 1 0 0 1 1 ## 3 30 1 0 1 0 0 ## 4 40 0 1 0 1 0 ## 5 50 1 0 1 0 0
str(customers) ## 'data.frame': 5 obs. of 4 variables: ## $ id : num 10 20 30 40 50 ## $ gender : Factor w/ 2 levels "female","male": 2 1 1 2 1 ## $ mood : Factor w/ 2 levels "happy","sad": 1 2 1 2 1 ## $ outcome: num 1 1 0 0 0
customers$outcome<-as.factor(customers$outcome) str(customers) ## 'data.frame': 5 obs. of 4 variables: ## $ id : num 10 20 30 40 50 ## $ gender : Factor w/ 2 levels "female","male": 2 1 1 2 1 ## $ mood : Factor w/ 2 levels "happy","sad": 1 2 1 2 1 ## $ outcome: Factor w/ 2 levels "0","1": 2 2 1 1 1
trsf<-data.frame(predict(dmy,newdata=customers)) trsf ## id gender.female gender.male mood.happy mood.sad outcome0 outcome1 ## 1 10 0 1 1 0 0 1 ## 2 20 1 0 0 1 0 1 ## 3 30 1 0 1 0 1 0 ## 4 40 0 1 0 1 1 0 ## 5 50 1 0 1 0 1 0
dmy<-dummyVars(~gender,data=customers) trfs<-data.frame(predict(dmy,newdata=customers)) trfs ## gender.female gender.male ## 1 0 1 ## 2 1 0 ## 3 1 0 ## 4 0 1 ## 5 1 0
dmy<-dummyVars(~.,data=customers,fullRank=T) trfs<-data.frame(predict(dmy,newdata=customers)) trfs ## id gender.male mood.sad outcome.1 ## 1 10 1 0 1 ## 2 20 0 1 1 ## 3 30 0 0 0 ## 4 40 1 1 0 ## 5 50 0 0 0