zoukankan      html  css  js  c++  java
  • 生鲜购物篮模型

    library("arules")
    library("stringr")
    
    food_o3_df <- read.csv("C:/Users/Jennifer/Desktop/生鲜购物篮模型/fresh.csv")
    
    names(food_o3_df) <- tolower(names(food_o3_df))
    head(food_o3_df)
     c3 <- read.table(file = "clipboard", header = TRUE, stringsAsFactors = FALSE, sep = "	")
    names(c3) <- tolower(names(c3))
    
     save(food_o3_df, file = "food_o3.Rdata")
     save(c3, file = "c3.Rdata")
    
    load("food_o3.Rdata")
    load("c3.Rdata")
    
    
    # R数据读取为transactionis类型
    food_o3_ds <- as(split(food_o3_df$categ_lvl3_id, food_o3_df$parnt_ordr_id), "transactions")
    head(food_o3_ds)
    # save(food_o3_ds, file = "transactions_rdata/food_o3_ds.Rdata")
    
    # 购物篮模型
    food_o3_rules <- apriori(food_o3_ds, parameter = list(supp = 0.02,  #支持度
                                                          conf = 0.5,   #置信度
                                                          minlen = 2,   #最小数
                                                          target = "rules"))
    head(food_o3_rules)
    
    # 查看规则
    head(inspect(food_o3_rules))
    
    
    # 将规则转成中文
    food_o3_outrule <- as(food_o3_rules,'data.frame')
    temp <- unlist(food_o3_outrule$rules)
    number <- unique(unlist(str_extract_all(temp,"[0-9]+")))
    
    for (i in 1:length(number)){
      temp <- str_replace_all(temp, number[i], c3$categ_lvl3_name[c3$categ_lvl3_id == number[i]])
    }
    temp
    food_o3_outrule[,1] <- temp
    food_o3_outrule
    
    
    
    # 将规则切分开
    newname <- str_split(food_o3_outrule[,1], " => ")
    newname <- t(sapply(newname,function(x) str_replace_all(x, "\{|\}", "")))
    food_o3_outrule[,5:6] <- newname
    
    # 导出成csv
    write.csv(food_o3_outrule,file='C:/Users/Jennifer/Desktop/rules.csv')
    
    library("arules")
    library("stringr")
    
     food_om_df <- read.csv("C:/Users/Jennifer/Desktop/生鲜购物篮模型/brandid.csv")
     
     names(food_om_df) <- tolower(names(food_om_df))
    
    
    # SQL
    # select mg_brand_id, replace(mg_brand_name, chr(39),' ') as mg_brand_name from dw.v_dim_mg_brand_cur
    # where mg_brand_id > 0;
    
    mg <- read.table(file = "clipboard", header = TRUE, stringsAsFactors = FALSE, sep = "	")
    names(mg) <- tolower(names(mg))
    
    save(food_om_df, file = "df_rdata/food_om_df.Rdata")
    # save(mg, file = "mg.Rdata")
    
    load("food_om_df.Rdata")
    load("mg.Rdata")
    
    food_om_df <- unique(food_om_df)
    
    
    # R数据读取为transactionis类型
    food_om_ds <- as(split(food_om_df$mg_brand_id, food_om_df$parnt_ordr_id), "transactions")
    # CSV数据读取为transactionis类型
    #ds = read.transactions('some.csv', format = "single", sep = ",", cols = c("PARNT_ORDR_ID", "CATEG_LVL3_ID"))
    
    # save(food_om_ds, file = "transactions_rdata/food_om_ds.Rdata")
    
    
    # 购物篮模型
    food_om_rules <- apriori(food_om_ds, parameter = list(supp = 0.01,  #支持度
                                                          conf = 0.3,   #置信度
                                                          minlen = 2,   #最小数
                                                          target = "rules"))
    
    # 查看规则
    inspect(food_om_rules, rhs in unique())
    
    
    
    
    # 将规则转成中文
    food_om_outrule <- as(food_om_rules,'data.frame')
    temp <- unlist(food_om_outrule$rules)
    number <- unique(unlist(str_extract_all(temp,"[0-9]+")))
    
    for (i in 1:length(number)){
      temp <- str_replace_all(temp, number[i], mg$mg_brand_name[mg$mg_brand_id == number[i]])
    }
    temp
    food_om_outrule[,1] <- temp
    food_om_outrule
    
    
    
    # 将规则切分开
    newname <- str_split(food_om_outrule[,1], " => ")
    newname <- t(sapply(newname,function(x) str_replace_all(x, "\{|\}", "")))
    food_om_outrule[,5:6] <- newname
    
    # 导出成csv
    write.csv(food_om_outrule,file='C:/Users/Jennifer/Desktop/生鲜购物篮模型/food_om_rules.csv')
  • 相关阅读:
    学习:大文件统计与排序
    共享库SidebySide之Windows Shared Assembly
    Bundle是个好东西
    所谓的代码段、数据段
    [design decision] common case vs. rare case
    如何给C++设计一个GC
    玩一把tesseract
    [design decision]让工具依赖于naming convention是个拙劣的做法
    监控域名可用性并自动发信
    调试lua代码
  • 原文地址:https://www.cnblogs.com/ilxx1988/p/4112733.html
Copyright © 2011-2022 走看看