C5.0
> ###########################决策树 > ########C5.0 > setwd("/Users/yaozhilin/Downloads/R_edu/data") > orgdata<-read.csv("Allelectronics.csv") > summary(orgdata) age income student credit_rating buys_computer Min. :1 Length:14 Length:14 Length:14 Length:14 1st Qu.:1 Class :character Class :character Class :character Class :character Median :2 Mode :character Mode :character Mode :character Mode :character Mean :2 3rd Qu.:3 Max. :3 > #C5.0只能处理分类变量 > orgdata<-as.data.frame(lapply(orgdata[,1:5],as.factor)) > library(C50) > #编辑决策树的控制数据 > #minCases控制类的样本量,CF置信因子越大,模型越大,winnow是否筛选变量(做xy的相关检验) > tr<-C5.0Control(minCases = 1,CF=0.95,winnow = FALSE,noGlobalPruning = TRUE) > model<-C5.0(buys_computer~.,data = orgdata,trials=1,rules=FALSE,control=tr) > plot(model)
> #生成规则 > rules<-C5.0(buys_computer~.,data = orgdata,trials=1,rules=TRUE,control=tr) > summary(rules) Call: C5.0.formula(formula = buys_computer ~ ., data = orgdata, trials = 1, rules = TRUE, control = tr) C5.0 [Release 2.07 GPL Edition] Wed Nov 4 15:12:38 2020 ------------------------------- Class specified by attribute `outcome' Read 14 cases (5 attributes) from undefined.data Rules: Rule 1: (3, lift 2.2) age = 1 student = no -> class no [0.800] Rule 2: (2, lift 2.1) age = 3 credit_rating = excellent -> class no [0.750] Rule 3: (4, lift 1.3) age = 2 -> class yes [0.833] Rule 4: (3, lift 1.2) age = 3 credit_rating = fair -> class yes [0.800] Rule 5: (7/1, lift 1.2) student = yes -> class yes [0.778] Default class: yes Evaluation on training data (14 cases): Rules ---------------- No Errors 5 1( 7.1%) << (a) (b) <-classified as ---- ---- 4 1 (a): class no 9 (b): class yes Attribute usage: 85.71% age 71.43% student 35.71% credit_rating Time: 0.0 secs
cart方法
######cart实现决策树 library(rpart) library(rpart.plot) #minsplit:每个节点中最小样本量,minbucket:节点中所含样本最小数,cp:复杂参数,通常是阈值。 tc<-rpart.control(minsplit = 1,minbucket = 1,cp=0.001,maxdepth = 6,xval = 10) cmodel<-rpart(buys_computer~.,orgdata,parms = list(split="gini"), method = "class",control = tc) rpart.plot(cmodel,branch=1,extra=106,under=TRUE,faclen=0,cex=0.8)
#进行剪枝 cmodel_p<-prune(cmodel,cp=0.3) rpart.plot(cmodel_p)