zoukankan      html  css  js  c++  java
  • 大数据统计笔记

    # -*- coding: utf-8 -*-
    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    import json
    
    s = pd.Series([1,3,5,np.nan,6,8]);
    print(s);
    s = pd.Series([1,3],["a","b"]);
    print(s);
    
    #循环查询日期范围内数据
    dates = pd.date_range('20130101',periods=6);
    print(dates);
    
    
    df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=list('ABCD'))
    
    print("LIMIT:============");
    print(df.head(3));
    print(df.tail(3));
    
    print("反转XY显示 :============");
    print(df.T);
    
    #排序
    #df.sort_index(axis=1, ascending=False)
    #df.sort(columns='B')
    
     
    
    ########################选择器#############################
    print("指定列 :============");
    print(df[['A','B']]);
    
    
    print("df[行范围,列范围]:============");
    print(df[0:3]);
    
    print("指定索引主键 :============");
    print(df['20130103':'20130104']);
    
    
    print("布尔过滤 :============");
    print(df[df.A>0.5]);
    
    #print(df[0:3,0:1]);
    
    ########################距阵操作 #############################
    print("距阵操作 :============");
    print(df*2);
    print(np.exp(df));
    ########################TABLE 元素#############################
    
    #以二维数组显示
    #df.values
    
    #列描述
    #df.columns
    
    #索引
    #df.index 
    #对象类型
    #df.dtypes
    
    print("字典(JSON)转换距阵 :============");
    sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000};
    df = pd.Series(sdata);
    print(df);
    
    sdata = [{'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}];
    df = pd.DataFrame(sdata);
    df.columns = ['Ohio', 'Texas', 'Oregon', 'Utah'];
    print(df);
    df = df.fillna(0) #将缺失值都替换为0
    
    #入门
    #http://pandas.pydata.org/pandas-docs/stable/10min.html
    #http://pda.readthedocs.org/en/latest/chp5.html
    #blog
    #http://cloga.info/#wat_e_eb3d32d8-f59a-4a08-bf96-6f706d89c097_zss_
    
    #大数据
    #http://www.17bigdata.com/?cat=22
    #优化
    #http://1.aisensiy.sinaapp.com/2014/03/%E6%9C%80%E8%BF%91%E4%BD%BF%E7%94%A8-pandas-%E7%9A%84%E6%80%BB%E7%BB%93/
    df.to_csv('E:\py\foo.csv')
    
    
    ######################JSON###########################
    
    print("josn====================");
    j = [{'0':{"a":"a"},'1':{"b":"b"}}];
    elevations = json.dumps(j);
    df = pd.read_json(elevations );
    print(df);
  • 相关阅读:
    引用 AspNetCoreRateLimit => StatusCode cannot be set because the response has already started.
    Sublime Json 格式化
    gitlab 建立本地仓库
    R语言 启动报错 *** glibc detected *** /usr/lib64/R/bin/exec/R: free(): invalid next size (fast): 0x000000000263a420 *** 错误 解决方案
    范数
    SparkR-Install
    R语言扩展包dplyr——数据清洗和整理
    R语言与机器学习学习笔记
    sparkR原理
    data.frame类型数据如何将第一列值替换为行号
  • 原文地址:https://www.cnblogs.com/solq/p/3884980.html
Copyright © 2011-2022 走看看