zoukankan      html  css  js  c++  java
  • 数据分析之两种用户分群方法(RFM和聚类)

    本文由于没有现成的数据,就自己生成了一些商品订单数据,基于该数据进行了RFM和聚类的构建

    1.数据的生成

    数据库表操作

     1 use my_work;
     2 
     3 -- 创建商品订单表
     4 CREATE table goods_orders_ful(
     5     user_id varchar(100),  -- 用户id
     6     order_id varchar(100), -- 订单id
     7     is_paid bool, -- 用户是否实际支付,1支付;0未支付
     8     amount double, -- 订单金额
     9     created_date date, -- 订单生成日期 yyyy-mm-dd
    10     created_time timestamp, -- 订单生成时间 yyyy-mm-dd hh:mm:ss
    11     business_type varchar(10), -- 业务类型
    12     region_name varchar(10), -- 所属区域:如 东部地区
    13     order_source_name varchar(10), -- 订单渠道:Web、H5、App 
    14     is_done bool -- 订单是否完成
    15     );
    16                                             
    17 -- 创建用户订单行为中间表                 
    18 drop table if exists user_info_frm_01;
    19 CREATE table user_info_frm_01
    20 as
    21 select gof.user_id,
    22        sum(gof.amount) all_of_money,
    23        max(gof.created_date) latest_date, 
    24        count(gof.order_id) all_of_orders
    25 FROM goods_orders_ful gof 
    26 where gof.is_paid = 1
    27 and gof.is_done = 1
    28 and gof.created_date >= '2020-01-01'
    29 and gof.created_date < '2020-07-01'
    30 group by gof.user_id;
    31 
    32 SELECT count(*) from user_info_frm_01 uif;
    33 SELECT * from user_info_frm_01 uif limit 10
    34 
    35 -- 创建行为指标均值表
    36 create table if not exists user_info_frm_02
    37 as
    38 select avg(uif.all_of_money) all_of_money_avg,
    39        avg(datediff('2020-07-22', uif.latest_date)) latest_days_avg,
    40        avg(uif.all_of_orders) orders_avg
    41 from user_info_frm_01 uif;
    42 
    43 SELECT * from user_info_frm_02;
    44 -- 消费均值1107.10,最小天数均值86.9,订单数量均值2.1
    45 
    46 -- 将用户进行rfm一级打标
    47 create table if not exists user_info_frm_03
    48 as
    49 SELECT uif.user_id,
    50        case when uif.all_of_money >= 1107.10
    51             then ''
    52             else ''
    53             end money,
    54        case when datediff('2020-07-22', uif.latest_date) >= 86.9
    55             then ''
    56             else ''
    57             end recency,
    58        case when uif.all_of_orders >= 2.1
    59             then ''
    60             else ''
    61             end frequency
    62 from user_info_frm_01 uif;
    63 
    64 -- 将用户进行二级打标
    65 create table if not exists user_info_frm_04
    66 as
    67 select uif.user_id,
    68        uif.recency,
    69        uif.frequency,
    70        uif.money,
    71        case when uif.recency = '' and uif.frequency = '' and uif.money = ''
    72             then '重要价值用户'
    73             when uif.recency = '' and uif.frequency = '' and uif.money = ''
    74             then '重要保持用户'
    75             when uif.recency = '' and uif.frequency = '' and uif.money = ''
    76             then '重要发展用户'
    77             when uif.recency = '' and uif.frequency = '' and uif.money = ''
    78             then '重要挽留用户'
    79             when uif.recency = '' and uif.frequency = '' and uif.money = ''
    80             then '一般价值用户'
    81             when uif.recency = '' and uif.frequency = '' and uif.money = ''
    82             then '一般保持用户'
    83             when uif.recency = '' and uif.frequency = '' and uif.money = ''
    84             then '一般发展用户'
    85             when uif.recency = '' and uif.frequency = '' and uif.money = ''
    86             then '一般挽留用户'
    87             else NULL 
    88             end type
    89             
    90 from user_info_frm_03 uif;

    python 程序生成数据

     1 # _*_ coding: utf-8 _*_ #
     2 # @Time     :2020/7/25 7:30 下午
     3 # @Author   :Zhx
     4 
     5 
     6 import pymysql
     7 import uuid
     8 import random
     9 import time
    10 
    11 
    12 class CreateData(object):
    13 
    14     def __init__(self):
    15         pass
    16 
    17     @staticmethod
    18     def create():
    19         user_id_ = random.randint(1, 5000)
    20         order_id_ = uuid.uuid1()
    21         is_paid_ = random.choice([1, 0, 1, 1, 1, 1, 1, 1, 1, 1])
    22         amount_ = random.uniform(10, 1000)
    23         a1 = (2020, 1, 1, 0, 0, 0, 0, 0, 0)
    24         a2 = (2020, 6, 31, 23, 59, 59, 0, 0, 0)
    25 
    26         start = time.mktime(a1)  # 生成开始时间戳
    27         end = time.mktime(a2)  # 生成结束时间戳
    28 
    29         # 随机生成10个日期字符串
    30         t = random.randint(start, end)  # 在开始和结束时间戳中随机取出一个
    31         date_tuple = time.localtime(t)  # 将时间戳生成时间元组
    32         created_date_ = time.strftime("%Y-%m-%d", date_tuple)  # 将时间元组转成格式化字符串
    33         created_time_ = time.strftime("%Y-%m-%d %H:%M:%S", date_tuple)
    34         business_type_ = random.randint(0, 20)
    35         region_name_ = random.choice(['', '西', '', ''])
    36         order_source_name_ = random.choice(['Web', 'app', 'H5'])
    37         is_done_ = is_paid_
    38         return user_id_, order_id_, is_paid_, amount_, created_date_, created_time_, 
    39             business_type_, region_name_, order_source_name_, is_done_
    40 
    41 
    42 if __name__ == '__main__':
    43     database = 'my_work'
    44     table = 'goods_orders_ful'
    45     counts = 10000
    46     create_data = CreateData()
    47     con = pymysql.connect(database=database, host='localhost',
    48                           user='root', port=3306, password='199498zhx@')
    49     cur = con.cursor()
    50     for i in range(counts):
    51         user_id, order_id, is_paid, amount, created_date, created_time, 
    52              business_type, region_name, order_source_name, is_done = create_data.create()
    53         sql = """insert into %s.%s values('%s', '%s', %d, %f, '%s', '%s', '%s', '%s', '%s', %d)""" % 
    54               (database, table, user_id, order_id, is_paid, amount, created_date, created_time, business_type,
    55                region_name, order_source_name, is_done)
    56         try:
    57             cur.execute(sql)
    58             print(i, i % 1000)
    59             con.commit()
    60         except Exception as e:
    61             print(e)
    62             con.rollback()
    63     con.close()
    64     cur.close()

    源数据字段有:

      user_id varchar(100),  -- 用户id

      order_id varchar(100), -- 订单id

      is_paid bool, -- 用户是否实际支付,1支付;0未支付

          amount double, -- 订单金额

      created_date date, -- 订单生成日期 yyyy-mm-dd

      created_time timestamp, -- 订单生成时间 yyyy-mm-dd hh:mm:ss

      business_type varchar(10), -- 业务类型

      region_name varchar(10), -- 所属区域:如 东部地区

      order_source_name varchar(10), -- 订单渠道:Web、H5、App 

      is_done bool -- 订单是否完成

     

    RFM 模型最终表数据

    最终的可视化分析使用jupyter完成

     

     

     

     

     

     

     

     

     

     

  • 相关阅读:
    IoC就是IoC,不是什么技术,与GoF一样,是一种 设计模式。
    控制反转是Spring框架的核心。
    一种是CI(Constructor Injection)构造方法注入,另一种是SI(Set Injection) set 注入
    IOC 的理解与解释
    java 单例模式5种写法
    AOP(Aspect Oriented Programming),即面向切面编程
    AOP称为面向切面编程,在程序开发中主要用来解决一些系统层面上的问题
    ioc和aop的区别?
    JAVA设计模式之单例模式
    详解JNDI的lookup资源引用 java:/comp/env
  • 原文地址:https://www.cnblogs.com/tttzqf/p/13382546.html
Copyright © 2011-2022 走看看