zoukankan      html  css  js  c++  java
  • kaggle bike sharing program R code tranlated from python code ranked 9th

    Use Caret to do the feature selection for rf.
    train_row <- read.csv('E:/kuaipan/Kaggle Project/Bike Sharing Demand/train.csv')
    test_row <- read.csv('e:/kuaipan/Kaggle Project/Bike Sharing Demand/test.csv')
    
    train_row$type = 'train'
    test_row$type = 'test'
    
    colnames(train_row)
    
    test_row$casual=NA
    test_row$registered=NA
    test_row$count=NA
    
    row_df = rbind(train_row, test_row)
    row_df_backup = row_df
    
    for(col in c('casual','registered','count'))
    {
      newcol = paste(col,'_log',sep="")
      row_df[newcol] = log(row_df[col] + 1)
    }
    
    if(!require('lubridate'))
    {
      install.packages(lubridate)
    }
    
    row_df$dt = ymd_hms(row_df$datetime)
    row_df$day = day(row_df$dt)
    row_df$month = month(row_df$dt)
    row_df$year = year(row_df$dt)
    row_df$hour = hour(row_df$dt)
    row_df$weekday =wday(row_df$dt)
    row_df$week = week(row_df$dt)
    
    for(s in c(1,2,3,4))
    {
      row_df[which(row_df$season==s & row_df$type=='train'),'season_count']=sum(row_df[which(row_df$season==s & row_df$type=='train'),'count'])
    }
    
    #head(row_df)
    
    SetWorkingDay = function(year,month,day,value){
      row_df[which(row_df$year %in% year & row_df$month %in% month & row_df$day %in% day),'workingday'] = value
      if(value == 1){
        b = 0
      }else{
        b = 1
      }
      row_df[which(row_df$year %in% year & row_df$month %in% month & row_df$day %in% day),'holiday'] = b
        
    }
    
    SetWorkingDay(2011,4,15,1)
    SetWorkingDay(2012,4,16,1)
    SetWorkingDay(2011,11,25,0)
    SetWorkingDay(2012,11,23,0)
    
    row_df[which(row_df$year == 2011 &row_df$month ==11 & row_df$day ==25),'holiday']=1
    row_df[which(row_df$year == 2012 &row_df$month ==11 & row_df$day ==23),'holiday']=1
    row_df[which(row_df$year == 2012 &row_df$month ==5 & row_df$day ==21),'holiday']=1
    row_df[which(row_df$year == 2012 &row_df$month ==6 & row_df$day ==1),'holiday']=1
    row_df[which(row_df$year == 2012 &row_df$month ==10 & row_df$day ==30),'holiday']=1
    row_df[which(row_df$month ==12 & row_df$day %in% c(24,26,31)),'holiday']=1
    row_df[which(row_df$month ==12 & row_df$day %in% c(24,31)),'workingday']=1
    
    row_df[which(row_df$workingday==1 & row_df$hour %in% c(8,17,18,12)),'peak']=1
    row_df[which(row_df$workingday==0 & 10<=row_df$hour<=19),'peak']=1
    row_df[which(is.na(row_df$peak)),'peak']=0
    
    row_df[which(row_df$temp>27 & row_df$windspeed <30),'ideal']=1
    row_df[which(is.na(row_df$ideal)),'ideal']=0
    
    row_df[which(row_df$humidity>=60&row_df$workingday==1),'sticky']=1
    row_df[which(is.na(row_df$sticky)),'sticky']=0
    
    row_df.train = row_df[which(row_df$type == 'train'),]
    row_df.test  = row_df[which(row_df$type == 'test'),]
    
    library(Metrics)
    get_rmsle =function(pred, actual){
      rs = rmsle(log(pred+1),log(actual+1))
      sqrt((exp(rs)))
    }
    
    library(caret)
    
    #use ten-fold cross validation 
    control=rfeControl(functions=rfFuncs, method="cv", number=10)
    row_df.features = row_df.train[,-c(10,11,12,14,15,16,1,17,13)]
    #Feature dataset and result dataset must be same dataset
    #By default the rfe will add a subset contain all the features.
    result=rfe(row_df.train[,-c(10,11,12,14,15,16,1,17,13)],row_df.train[,14],size=c(16:17),rfeControl = control) 
    plot(result, type=c('p','l'))
    
    #the final rf model is result$fit
    

  • 相关阅读:
    Python 脚本退出
    数组对象从大到小:
    小程序中使用倒计时
    倒计时
    将数字转化为汉字
    turn.js中文API 写一个翻页效果的参数详细解释
    前端数据可视化echarts.js使用指南
    视频及MP3 播放浅析 Jplayer参数详细
    https://blog.csdn.net/cddcj/article/details/52193932
    让一些旧浏览器变牛逼的库 ========兼容性
  • 原文地址:https://www.cnblogs.com/rav009/p/5131072.html
Copyright © 2011-2022 走看看