zoukankan      html  css  js  c++  java
  • 简单的入门2

    import json
    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    if __name__=="__main__":
        path="usagov_bitly_data2012-03-16-1331923249.txt"
        fp=open(path)
        records=[json.loads(line) for line in fp.readlines()]
        print(len(records))
        frame=pd.DataFrame(records)
        print(frame['tz'])
        clean_tz=frame['tz'].fillna('Missing')
        clean_tz[clean_tz=='']='Unknown'
        tz_counts=clean_tz.value_counts()
        print(tz_counts[:20])
        #tz_counts[:10].plot(kind='barh',rot=0)
        #plt.show()
        results=pd.Series([x.split()[0] for x in frame.a.dropna()])
        print(results[:5])
        cframe=frame[frame.a.notnull()]
        operating_system=np.where(cframe['a'].str.contains('Windows'),'Windows','not Windows')
        print(operating_system[:10])
        by_tz_os=cframe.groupby(['tz',operating_system])
        agg_counts=by_tz_os.size().unstack().fillna(0)
        print(agg_counts[:10])
        indexer=agg_counts.sum(1).argsort()
        print(indexer[:10])
        count_subset=agg_counts.take(indexer)[-10:]
        print(count_subset)
        #count_subset.plot(kind='barh',stacked=True)
        normed_subset=count_subset.div(count_subset.sum(1),axis=0)
        normed_subset.plot(kind='barh',stacked=True)
        plt.show()
        
        
        

  • 相关阅读:
    paip.重装系统需要备份的资料总结..
    poj3078
    poj3009
    poj2151
    poj3274
    poj3436
    VC++:打开、保存文件对话框和浏览文件夹对话框
    目前所有的视频格式都有哪些?
    CMSHFlexGrid 类用法
    Matlab的ActiveX接口_百度文库
  • 原文地址:https://www.cnblogs.com/sklww/p/3655246.html
Copyright © 2011-2022 走看看