zoukankan      html  css  js  c++  java
  • 18.scrapy_maitian_analysis

    1_info.py

    # encoding: utf-8
    import pandas as pd
    
    # 租房 基本信息
    # 读取文件 df=dataframe
    df = pd.read_json("zufang.json")
    # print(df)
    # print(df.columns)
    
    # 使用pandas的describe方法,打印基本信息
    print(df.describe())
    # 按照区,分别统计个数
    print(df["district"].value_counts())
    # print('**************************')
    # # 二手房 基本信息
    df = pd.read_json("ershoufang.json")
    print(df.describe())
    # 分别统计个数
    print(df["district"].value_counts())
    

    2_pie_chart.py

    # coding:utf-8
    import numpy as np
    import pandas as pd
    import json
    import matplotlib as mpl
    import matplotlib.pyplot as plt
    from matplotlib.font_manager import FontProperties
    
    from pylab import *
    mpl.rcParams['font.sans-serif'] = ['SimHei']
    
    myfont = FontProperties(
        fname='/Users/seancheney/.matplotlib/mpl-data/fonts/ttf/SimHei.ttf')
    
    labels = '朝阳', '海淀', '昌平', '东城', '大兴', '西城', '丰台', '石景山', '通州', '顺义'
    
    df_zf = pd.read_json("ershoufang.json")
    chaoyang_count = df_zf['district'].value_counts()['朝阳']
    haidian_count = df_zf['district'].value_counts()['海淀']
    changping_count = df_zf['district'].value_counts()['昌平']
    dongcheng_count = df_zf['district'].value_counts()['东城']
    daxing_count = df_zf['district'].value_counts()['大兴']
    xicheng_count = df_zf['district'].value_counts()['西城']
    fengtai_count = df_zf['district'].value_counts()['丰台']
    shijingshan_count = df_zf['district'].value_counts()['石景山']
    tongzhou_count = df_zf['district'].value_counts()['通州']
    shunyi_count = df_zf['district'].value_counts()['顺义']
    
    sizes = [
        chaoyang_count,
        haidian_count,
        changping_count,
        dongcheng_count,
        daxing_count,
        xicheng_count,
        fengtai_count,
        shijingshan_count,
        tongzhou_count,
        shunyi_count]
    explode = (0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0)
    plt.subplot(121)
    plt.pie(
        sizes,
        explode=explode,
        labels=labels,
        autopct='%1.1f%%',
        shadow=True,
        startangle=-90)
    plt.axis('equal')
    plt.title("房屋出售分布", fontproperties=myfont)
    
    labels = '朝阳', '海淀', '昌平', '东城', '大兴', '西城', '丰台', '石景山', '通州', '顺义'
    df_zf = pd.read_json("zufang.json")
    chaoyang_count = df_zf['district'].value_counts()['朝阳']
    haidian_count = df_zf['district'].value_counts()['海淀']
    changping_count = df_zf['district'].value_counts()['昌平']
    dongcheng_count = df_zf['district'].value_counts()['东城']
    daxing_count = df_zf['district'].value_counts()['大兴']
    xicheng_count = df_zf['district'].value_counts()['西城']
    fengtai_count = df_zf['district'].value_counts()['丰台']
    shijingshan_count = df_zf['district'].value_counts()['石景山']
    tongzhou_count = df_zf['district'].value_counts()['通州']
    
    labels = '朝阳', '海淀', '昌平', '东城', '大兴', '西城', '丰台', '石景山', '通州'
    sizes = [
        chaoyang_count,
        haidian_count,
        changping_count,
        dongcheng_count,
        daxing_count,
        xicheng_count,
        fengtai_count,
        shijingshan_count,
        tongzhou_count]
    explode = (0.1, 0, 0, 0, 0, 0, 0, 0, 0)
    plt.subplot(122)
    plt.pie(
        sizes,
        explode=explode,
        labels=labels,
        autopct='%1.1f%%',
        shadow=True,
        startangle=-90)
    plt.axis('equal')
    plt.title("房屋出租分布", fontproperties=myfont)
    plt.rc('font', family=['SimHei'])
    plt.show()
    

    3_hist.py

    import numpy as np
    import pandas as pd
    import json
    import matplotlib.pyplot as plt
    from pylab import *
    
    mpl.rcParams['font.sans-serif'] = ['SimHei']
    
    df = pd.read_json("ershoufang.json")
    
    print(df.columns)
    
    unitprice_values = df.unitprice
    plt.hist(unitprice_values,bins=25)
    plt.xlim(0, 200000)
    plt.title(u"房屋出售每平米价格分布")
    plt.xlabel(u'价格(单位:万/平方米)')
    plt.ylabel(u'套数')
    plt.show()
    

    4_ratio.py

    # 售租比
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    from pylab import *
    mpl.rcParams['font.sans-serif'] = ['SimHei']
    
    district = ('西城', '石景山', '东城', '海淀', '丰台', '昌平', '大兴', '朝阳', '通州')
    
    # 读取租房数据
    df_zf = pd.read_json("zufang.json")
    unitprice_zf = df_zf['price'] / df_zf['area']
    df_zf['unitprice'] = unitprice_zf
    
    # print(df_zf)
    
    month_price = df_zf.groupby(by=['district']).sum(
    )['unitprice'] / df_zf["district"].value_counts()
    
    # print(month_price)
    
    # # 读取二手房数据
    df_esf = pd.read_json("ershoufang.json")
    
    sell_price = df_esf.groupby(by=['district']).sum(
    )['unitprice'] / df_esf["district"].value_counts()
    
    # print(sell_price)
    
    xicheng_ratio = sell_price['西城'] / month_price['西城']
    shijingshan_ratio = sell_price['石景山'] / month_price['石景山']
    dongcheng_ratio = sell_price['东城'] / month_price['东城']
    haidian_ratio = sell_price['海淀'] / month_price['海淀']
    fengtai_ratio = sell_price['丰台'] / month_price['丰台']
    changping_ratio = sell_price['昌平'] / month_price['昌平']
    daxing_ratio = sell_price['大兴'] / month_price['大兴']
    chaoyang_ratio = sell_price['朝阳'] / month_price['朝阳']
    tongzhou_ratio = sell_price['通州'] / month_price['通州']
    #
    #
    ratio = (
        xicheng_ratio,
        shijingshan_ratio,
        dongcheng_ratio,
        haidian_ratio,
        fengtai_ratio,
        changping_ratio,
        daxing_ratio,
        chaoyang_ratio,
        tongzhou_ratio
    )
    
    fig, ax = plt.subplots()
    
    y_pos = np.arange(len(district))
    # performance = ratio
    
    ax.barh(y_pos, ratio, align='center', color='green', ecolor='black')
    ax.set_yticks(y_pos)
    ax.set_yticklabels(district)
    # ax.invert_yaxis()
    ax.set_xlabel('售租比(单位:月)')
    ax.set_title('各区房屋售租比')
    
    plt.show()
    
  • 相关阅读:
    0705-深度网络模型持久化
    0704-使用GPU加速_cuda
    0703-可视化工具tensorboard和visdom
    0702-计算机视觉工具包torchvision
    00-凸优化引言
    MySQL数据库从入门到放弃(目录)
    0701-数据处理
    BurpSuite抓取HTTPS请求&&拦截Android请求
    数学里的e到底指什么
    QGeoPolygon
  • 原文地址:https://www.cnblogs.com/hankleo/p/10809175.html
Copyright © 2011-2022 走看看