zoukankan      html  css  js  c++  java
  • 基于python、jupyter-notebook 的金融领域用户交易行为分析

    说明:本文重在说明交易数据统计、分析方法,所有数据均为生成的数据

        时间原因代码未定义成函数

    统计指标:1.用户单日交易行为数据

         2.按小时为计算单位,统计用户行为数据(旨在求得一天24小时中每个小时的交易金额、交易量和后再做统计计算)

    获取数据代码如下:

     1 #!/usr/bin/env python
     2 # -*- coding: utf-8 -*-
     3 __author__ = 'zqf'
     4 
     5 
     6 import pymysql
     7 import random
     8 import time
     9 from conf import test_conf
    10 
    11 
    12 con = pymysql.connect(**test_conf.con_set)
    13 print("连接成功")
    14 cur = con.cursor()
    15 time_start = time.time()
    16 for i in range(1, 100000):
    17     phone_random = random.randint(10000000000, 19999999999)
    18     transaction_amount_random = random.uniform(0, 50000)
    19     transaction_type_random = random.randint(1, 5)
    20     # sql = "select * from my_database.transcation_info"
    21     sql = "insert into my_database.transaction_info(user_id, name, phone, transaction_amount, transaction_type, " 
    22           "transaction_time) values('%d', 'name%d', '%d', '%d', '%d', '2019-05-%d %d:%d:%d')" % 
    23           (random.randint(1, 1000), random.randint(1, 1000), phone_random, transaction_amount_random, 
    24            transaction_type_random, random.randint(1, 31), random.randint(0, 23), random.randint(0, 59), 
    25            random.randint(0, 59))
    26 
    27     cur.execute(sql)
    28 print("execute完毕即将提交")
    29 try:
    30     con.commit()
    31     print("提交成功")
    32 except Exception:
    33     print("插入失败")
    34     con.rollback()
    35 time_stop = time.time()
    36 take_time = time_stop - time_start
    37 print("花费时间:", take_time)
    38 # print(cur.fetchall())

    在juoyter-notebook中

    导入所需包

    # 导入所需包
    import
    pandas as pd import pymysql from datetime import datetime import time from matplotlib import pyplot as plt plt.rcParams['font.family'] = ['sans-serif'] plt.rcParams['font.sans-serif'] = ['SimHei'] # from conf import test_conf

    连接sql数据库

     

     1 # 连接数据库
     2 con = pymysql.connect(**{
     3     'database': 'my_database',
     4     'host': '192.168.**.**',
     5     'port': 3306,
     6     'user': 'root',
     7     'password': '********',
     8     'charset': 'utf8'
     9 })
    10 print("连接成功")
    11 cur = con.cursor()
    12 sql = "select * from transaction_info"
    13 time_start = time.time()
    14 df = pd.read_sql(sql=sql, con=con)
    15 time_stop = time.time()
    16 take_time = time_stop - time_start
    17 print("读取十万条数据花费时间:", take_time)
    18 # print(df)
    连接成功
    读取十万条数据花费时间: 4.09512186050415
    # 用户每天每小时最大交易金额
    df_groupby_hour = df.groupby(['user_id', df['transaction_time'].apply(lambda item: datetime.strftime(item, '%Y-%m-%d %H'))])
    # 计算每天每小时交易金额最大值, 最小值, 平均值, 交易次数, 交易金额总计
    df_calculate_by_hour = df_groupby_hour['transaction_amount'].agg([["daily_max", "max"], ["daily_min", "min"], ["daily_mean", "mean"], ["daily_count", "count"], ["daily_sum", "sum"]])
    # 注:每天每小时交易金额最大值为每个小时中累计金额的最大值,所以后续需对hour_sum计算相应参数,hour_max、hour_min、hour_mean的后续相应
    # 计算均为以天为单位
    df_calculate_by_hour

    # 将user_id, transaction_time层索引设置成列索引
    df_calculate_by_hour_reset_index = df_calculate_by_hour.reset_index()
    df_calculate_by_hour_reset_index

    # 再根据user_id、transaction_time 分组,到天
    df_calculate_by_hour_reset_index_regroup = df_calculate_by_hour_reset_index.groupby(['user_id', df_calculate_by_hour_reset_index['transaction_time'].apply(lambda item: datetime.strftime(pd.to_datetime(item), '%Y-%m-%d'))])
    # 聚合计算
    df_finally = df_calculate_by_hour_reset_index_regroup.agg({'daily_max':['max'], 'daily_min':['min'], 'daily_mean':['mean'], 'daily_count':['sum', 'max', 'min', 'median', 'std', 'mean'], 'daily_sum':['sum', 'max', 'min', 'median', 'std', 'mean']})

    # 获取用户交易信息
    search_user_id = 1
    get_user_message = df_finally.loc[search_user_id]
    
    # 将user_id, transaction_time层索引设置成列索引
    df_finally_reset = df_finally.reset_index()
    
    df_finally.loc[search_user_id]

    # 绘制每天参数图
    
    plt.figure(figsize=(10, 6))
    plt.xticks(rotation=45)
    x = df_finally.loc[search_user_id].index
    li_daily = df_finally.columns.levels[0][:-2]
    colors = ['y', 'k', 'r']
    for index, i in enumerate(li_daily):
        plt.plot(x, df_finally.loc[search_user_id][i][df_finally[i].columns[0]], label=f"{i}", color=colors[index])
    
    plt.title("每天交易统计")
    plt.xlabel("日期")
    plt.ylabel("交易金额")
    plt.legend()
    plt.show()

    # 绘制交易量交易图
    plt.figure(figsize=(10, 6))
    plt.xticks(rotation=45)
    daily_count_str = df_finally.columns.levels[0][3]
    li_counts = df_finally[daily_count_str].columns
    x = df_finally.loc[search_user_id].index
    df_finally.columns.levels[0][3]
    colors = ['b', 'g', 'c', 'y', 'k', 'r']
    for index, i in enumerate(li_counts):
        plt.plot(x, df_finally.loc[search_user_id][daily_count_str][i], label=f"{i}", color=colors[index])
    
    plt.title("每天交易统计")
    plt.xlabel("日期")
    plt.ylabel("交易次数")
    plt.legend()
    plt.show()

    # 绘制每天每小时交易图
    plt.rcParams['font.family'] = ['sans-serif']
    plt.rcParams['font.sans-serif'] = ['SimHei']
    plt.figure(figsize=(10, 6))
    plt.xticks(rotation=45)
    x = df_finally.loc[search_user_id].index
    li_daily_hour = df_finally.columns.levels[1]
    daily_sum_str = df_finally.columns.levels[0][4]
    colors = ['b', 'g', 'c', 'y', 'k', 'r']
    for index, i in enumerate(li_daily_hour):
        plt.plot(x, df_finally.loc[search_user_id][daily_sum_str][i], label=f"{i}", color=colors[index])
    
    plt.title("每天每小时交易统计")
    plt.xlabel("日期")
    plt.ylabel("交易金额")
    plt.legend()
    plt.show()

  • 相关阅读:
    复习列表
    20201009 day30 复习2:滑动窗口
    20201009 day30 复习1:扫描线
    20201007day29 模拟(九)
    20201006day28 模拟(八)
    20201005day27 模拟(七)
    20201004 day26 模拟(六)
    20201003day25 模拟(五)
    路由重分布(一)
    RIP路由协议(一)
  • 原文地址:https://www.cnblogs.com/tttzqf/p/10880296.html
Copyright © 2011-2022 走看看