zoukankan      html  css  js  c++  java
  • kaggle bike sharing program R code tranlated from python code ranked 9th

    Use Caret to do the feature selection for rf.
    train_row <- read.csv('E:/kuaipan/Kaggle Project/Bike Sharing Demand/train.csv')
    test_row <- read.csv('e:/kuaipan/Kaggle Project/Bike Sharing Demand/test.csv')
    
    train_row$type = 'train'
    test_row$type = 'test'
    
    colnames(train_row)
    
    test_row$casual=NA
    test_row$registered=NA
    test_row$count=NA
    
    row_df = rbind(train_row, test_row)
    row_df_backup = row_df
    
    for(col in c('casual','registered','count'))
    {
      newcol = paste(col,'_log',sep="")
      row_df[newcol] = log(row_df[col] + 1)
    }
    
    if(!require('lubridate'))
    {
      install.packages(lubridate)
    }
    
    row_df$dt = ymd_hms(row_df$datetime)
    row_df$day = day(row_df$dt)
    row_df$month = month(row_df$dt)
    row_df$year = year(row_df$dt)
    row_df$hour = hour(row_df$dt)
    row_df$weekday =wday(row_df$dt)
    row_df$week = week(row_df$dt)
    
    for(s in c(1,2,3,4))
    {
      row_df[which(row_df$season==s & row_df$type=='train'),'season_count']=sum(row_df[which(row_df$season==s & row_df$type=='train'),'count'])
    }
    
    #head(row_df)
    
    SetWorkingDay = function(year,month,day,value){
      row_df[which(row_df$year %in% year & row_df$month %in% month & row_df$day %in% day),'workingday'] = value
      if(value == 1){
        b = 0
      }else{
        b = 1
      }
      row_df[which(row_df$year %in% year & row_df$month %in% month & row_df$day %in% day),'holiday'] = b
        
    }
    
    SetWorkingDay(2011,4,15,1)
    SetWorkingDay(2012,4,16,1)
    SetWorkingDay(2011,11,25,0)
    SetWorkingDay(2012,11,23,0)
    
    row_df[which(row_df$year == 2011 &row_df$month ==11 & row_df$day ==25),'holiday']=1
    row_df[which(row_df$year == 2012 &row_df$month ==11 & row_df$day ==23),'holiday']=1
    row_df[which(row_df$year == 2012 &row_df$month ==5 & row_df$day ==21),'holiday']=1
    row_df[which(row_df$year == 2012 &row_df$month ==6 & row_df$day ==1),'holiday']=1
    row_df[which(row_df$year == 2012 &row_df$month ==10 & row_df$day ==30),'holiday']=1
    row_df[which(row_df$month ==12 & row_df$day %in% c(24,26,31)),'holiday']=1
    row_df[which(row_df$month ==12 & row_df$day %in% c(24,31)),'workingday']=1
    
    row_df[which(row_df$workingday==1 & row_df$hour %in% c(8,17,18,12)),'peak']=1
    row_df[which(row_df$workingday==0 & 10<=row_df$hour<=19),'peak']=1
    row_df[which(is.na(row_df$peak)),'peak']=0
    
    row_df[which(row_df$temp>27 & row_df$windspeed <30),'ideal']=1
    row_df[which(is.na(row_df$ideal)),'ideal']=0
    
    row_df[which(row_df$humidity>=60&row_df$workingday==1),'sticky']=1
    row_df[which(is.na(row_df$sticky)),'sticky']=0
    
    row_df.train = row_df[which(row_df$type == 'train'),]
    row_df.test  = row_df[which(row_df$type == 'test'),]
    
    library(Metrics)
    get_rmsle =function(pred, actual){
      rs = rmsle(log(pred+1),log(actual+1))
      sqrt((exp(rs)))
    }
    
    library(caret)
    
    #use ten-fold cross validation 
    control=rfeControl(functions=rfFuncs, method="cv", number=10)
    row_df.features = row_df.train[,-c(10,11,12,14,15,16,1,17,13)]
    #Feature dataset and result dataset must be same dataset
    #By default the rfe will add a subset contain all the features.
    result=rfe(row_df.train[,-c(10,11,12,14,15,16,1,17,13)],row_df.train[,14],size=c(16:17),rfeControl = control) 
    plot(result, type=c('p','l'))
    
    #the final rf model is result$fit
    

  • 相关阅读:
    ghm一般规则
    沃尔玛强推RFID内外交困:供应商阳奉阴违
    电子商务物流解决方案
    database url
    物流中新技术应用的必要性
    美国物流管理协会更名标志全球物流进入供应链时代
    业内专家激辩物流挑战与机遇
    问的智慧
    调查报告:2003年物流信息化现状及挑战
    查找在菜单里提交的报表所在职责
  • 原文地址:https://www.cnblogs.com/rav009/p/5131072.html
Copyright © 2011-2022 走看看