zoukankan      html  css  js  c++  java
  • 生肖属相单变量分析

    import pandas as pd
    import numpy as np
    
    f = open(r'ft_zodiac.txt', encoding='utf-8')
    ft_zodiac = pd.read_csv(f)  
    print(ft_zodiac.shape)
    ft_zodiac.head()
    
    (23519, 4)
    
    0 order_id chinese_zodiac zodiac
    0 0 100000081567592448 处女座
    1 0 100000467565182976 双子座
    2 0 100000530945323008 射手座
    3 0 100000556765458432 摩羯座
    4 0 100000598171623424 水瓶座

    pd15作为好坏的分割节点。>15 为坏人,<15为好人?

    15天以上的人为坏,5天以内的人为好。

    l = open(r'zodiac_label.txt')
    zodiac_label=pd.read_csv(l)
    zodiac_label.head()
    
    order_id overdue_days repay_time label
    0 100000081567592448 0 2018-07-09 0
    1 100000467565182976 1 2018-07-09 0
    2 100000530945323008 0 2018-07-09 0
    3 100000556765458432 0 2018-07-09 0
    4 100000598171623424 0 2018-07-09 0
    set(zodiac_label.label)
    
    {0, 1, 2}
    
    # 剔除不等于2的
    ft_label = zodiac_label[zodiac_label['label'] != 2]
    ft_label.head()
    
    order_id overdue_days repay_time label
    0 100000081567592448 0 2018-07-09 0
    1 100000467565182976 1 2018-07-09 0
    2 100000530945323008 0 2018-07-09 0
    3 100000556765458432 0 2018-07-09 0
    4 100000598171623424 0 2018-07-09 0
    set(ft_label.label)
    
    {0, 1}
    
    data = pd.merge(ft_label,ft_zodiac,on = 'order_id',how = 'inner')
    data.head()
    
    order_id overdue_days repay_time label 0 chinese_zodiac zodiac
    0 100000081567592448 0 2018-07-09 0 0 处女座
    1 100000467565182976 1 2018-07-09 0 0 双子座
    2 100000530945323008 0 2018-07-09 0 0 射手座
    3 100000556765458432 0 2018-07-09 0 0 摩羯座
    4 100000598171623424 0 2018-07-09 0 0 水瓶座

    badrate = bad/toal

    zodiac_list = set(data.zodiac)
    zodiac_list
    
    {'双子座',
     '双鱼座',
     '处女座',
     '天秤座',
     '天蝎座',
     '射手座',
     '巨蟹座',
     '摩羯座',
     '水瓶座',
     '狮子座',
     '白羊座',
     '金牛座'}
    
    chinese_zodiac_list = set(data.chinese_zodiac)
    chinese_zodiac_list
    
    {'兔', '牛', '狗', '猪', '猴', '羊', '虎', '蛇', '马', '鸡', '鼠', '龙'}
    
    # 星座
    zodiac_badrate = {}
    for x in zodiac_list:
        
        a = data[data.zodiac == x]
        
        bad = a[a.label == 1]['label'].count()  # 坏的计数
        good = a[a.label == 0]['label'].count() # 好的计数
        
        zodiac_badrate[x] = bad/(bad+good)
    
    zodiac_badrate
    
    {'双子座': 0.1312410841654779,
     '巨蟹座': 0.1408351026185421,
     '狮子座': 0.12760416666666666,
     '射手座': 0.14480286738351256,
     '水瓶座': 0.140117994100295,
     '白羊座': 0.13455414012738853,
     '双鱼座': 0.14873646209386282,
     '处女座': 0.13035143769968052,
     '天秤座': 0.12461252324860508,
     '天蝎座': 0.12005028284098052,
     '摩羯座': 0.12920489296636087,
     '金牛座': 0.12259059367771781}
    
    f = zip(zodiac_badrate.keys(), zodiac_badrate.values())
    f = sorted(f, key = lambda x : x[1], reverse = True )
    zodiac_badrate = pd.DataFrame(f)
    zodiac_badrate.columns = pd.Series(['星座', 'badrate'])
    zodiac_badrate
    
    星座 badrate
    0 双鱼座 0.148736
    1 射手座 0.144803
    2 巨蟹座 0.140835
    3 水瓶座 0.140118
    4 白羊座 0.134554
    5 双子座 0.131241
    6 处女座 0.130351
    7 摩羯座 0.129205
    8 狮子座 0.127604
    9 天秤座 0.124613
    10 金牛座 0.122591
    11 天蝎座 0.120050
    from pyecharts import Line
    x = zodiac_badrate['星座']
    y = zodiac_badrate['badrate']
    line = Line('星座')
    line.add(1, x, y)
    
    <div id="c56416b4b8514d2780bb35f9e761fcf5" style="800px;height:400px;"></div>
    
    # 生肖
    chinese_zodiac_badrate = {}
    for x in chinese_zodiac_list:
        
        a = data[data.chinese_zodiac == x]
        
        bad = a[a.label == 1]['label'].count()  # 好的计数
        good = a[a.label == 0]['label'].count()  # 坏的计数
        
        chinese_zodiac_badrate[x] = bad/(bad+good)
    
    chinese_zodiac_badrate
    
    {'猪': 0.14269406392694065,
     '牛': 0.1578112609040444,
     '虎': 0.15165876777251186,
     '龙': 0.1439084219133279,
     '鼠': 0.1340602950609365,
     '兔': 0.1502843216896832,
     '鸡': 0.12846998063266624,
     '蛇': 0.12789827973074047,
     '羊': 0.11335403726708075,
     '猴': 0.12008141112618724,
     '马': 0.12053872053872054,
     '狗': 0.11052009456264776}
    
    f = zip(chinese_zodiac_badrate.keys(),chinese_zodiac_badrate.values())
    f = sorted(f,key = lambda x : x[1],reverse = True )
    chinese_zodiac_badrate = pd.DataFrame(f)
    chinese_zodiac_badrate.columns = pd.Series(['生肖','badrate'])
    chinese_zodiac_badrate
    
    生肖 badrate
    0 0.157811
    1 0.151659
    2 0.150284
    3 0.143908
    4 0.142694
    5 0.134060
    6 0.128470
    7 0.127898
    8 0.120539
    9 0.120081
    10 0.113354
    11 0.110520
    from pyecharts import Line
    x = chinese_zodiac_badrate['生肖']
    y = chinese_zodiac_badrate['badrate']
    line = Line('生肖')
    line.add(1,x,y)
    
    <div id="8801efc233e94477a9d56e1162e60a2b" style="800px;height:400px;"></div>
    
  • 相关阅读:
    javaScript设计模式探究【1】
    Java基础算法集50题
    DataTable学习笔记排序细则、列隐藏[3]
    javaScript设计模式探究【4】工厂模式
    javaScript设计模式探究【3】
    一次面试感想+js最近学习体会
    DataTable学习笔记范例应用篇[2]
    腾讯2013实习生招聘面经
    初品cakephp
    php编译中配置libxml2的错误
  • 原文地址:https://www.cnblogs.com/chenxiangzhen/p/10902219.html
Copyright © 2011-2022 走看看