zoukankan      html  css  js  c++  java
  • DataFrame

    个人见解:像Excel

    import numpy as np
    import pandas as pd
    print(np.array([1,2,3,4,5]))
    [1 2 3 4 5]
    print(np.arange(1,10,1))
    [1 2 3 4 5 6 7 8 9]
    print(np.array(np.arange(10)))
    [0 1 2 3 4 5 6 7 8 9]
    

    myList = [[0,1],[1,2],[2,3]]
    print(np.array(myList))
    [[0 1]
     [1 2]
     [2 3]]
    myList1= [[0,5],[1,6],[2,7]]
    print(np.array(myList1))
    [[0 5]
     [1 6]
     [2 7]]
    
    #相加
    List1 = np.array(myList)
    List2 = np.array(myList1)
    print(List1+List2)
    [[ 0  6]
     [ 2  8]
     [ 4 10]]
    #合并
    print(np.concatenate((List1,List2),axis=1))
    [[0 1 0 5]
     [1 2 1 6]
     [2 3 2 7]]
    print(np.hstack((List1,List2)))
    [[0 1 0 5]
     [1 2 1 6]
     [2 3 2 7]]
    

    #索引
    ser1 = np.array([1,2,3,4,5])
    pd1 = pd.Series(ser1,index=np.arange(5))
    print(pd1)
    0    1
    1    2
    2    3
    3    4
    4    5
    dtype: int32
    ser2 = np.array([6,7,8,9,10])
    pd2 = pd.Series(ser2,index=np.arange(5))
    print(pd2)
    0     6
    1     7
    2     8
    3     9
    4    10
    dtype: int32

    print(pd.DataFrame(ser2,index=np.arange(5),columns=['apple']))
       apple
    0      6
    1      7
    2      8
    3      9
    4     10

    print(pd.Series([3,2,0,1],index=np.arange(4)))
    print(pd.Series([0,3,7,2],index=np.arange(4)))
    0    3
    1    2
    2    0
    3    1
    dtype: int64
    0    0
    1    3
    2    7
    3    2
    dtype: int64
    
    myList2 = [[3,0],[2,3],[0,7],[1,2]]
    print(pd.DataFrame(myList2,index=np.arange(4),columns=['apples','oranges']))
       apples  oranges
    0       3        0
    1       2        3
    2       0        7
    3       1        2

    
    
    import pandas as pd
    from pandas import Series,DataFrame
    x1 = Series([1,2,3,4])
    x2 = Series(data=[1,2,3,4],index=['a','b','c','d'])
    mydata = {'a':1,'b':2,'c':3,'d':4}#使用字典创建
    x3 = Series(mydata)
    print(x1)
    0    1
    1    2
    2    3
    3    4
    dtype: int64
    print(x2)
    a    1
    b    2
    c    3
    d    4
    dtype: int64
    print(x3)
    a    1
    b    2
    c    3
    d    4
    dtype: int64
    print(x3.count())
    4
    print(x3.max())
    4
    print(x3.min())
    1
    print(x3.mean())
    2.5
    print(x3.sum())
    10
    print(x3.median())
    2.5
    print(x3.argmax())
    3
    print(x3.var())
    1.6666666666666667
    print(x3.describe())
    count    4.000000
    mean     2.500000
    std      1.290994
    min      1.000000
    25%      1.750000
    50%      2.500000
    75%      3.250000
    max      4.000000
    dtype: float64
    df1 = DataFrame({'name':['zhangfei','guanyu','a','b','c'],'data1':range(1,6)})
    df2 = DataFrame({'name':['zhangfei','guanyu','A','B','C'],'data2':range(1,6)})
    df3 = pd.merge(df1,df2,on='name')
    print(df1)
    print(df2)
    print(df3)
     name  data1
    0  zhangfei      1
    1    guanyu      2
    2         a      3
    3         b      4
    4         c      5
           name  data2
    0  zhangfei      1
    1    guanyu      2
    2         A      3
    3         B      4
    4         C      5
           name  data1  data2
    0  zhangfei      1      1
    1    guanyu      2      2

    df3 = pd.merge(df1,df2,how='inner')
    print(df3)
    name  data1  data2
    0  zhangfei      1      1
    1    guanyu      2      2
    #第一个
    df3 = pd.merge(df1,df2,how='left')
    print(df3)
           name  data1  data2
    0  zhangfei      1    1.0
    1    guanyu      2    2.0
    2         a      3    NaN
    3         b      4    NaN
    4         c      5    NaN
    #第二个
    df3 = pd.merge(df1,df2,how='right')
    print(df3)
           name  data1  data2
    0  zhangfei    1.0      1
    1    guanyu    2.0      2
    2         A    NaN      3
    3         B    NaN      4
    4         C    NaN      5
    #所有
    df3 = pd.merge(df1,df2,how='outer')
    print(df3)
           name  data1  data2
    0  zhangfei    1.0    1.0
    1    guanyu    2.0    2.0
    2         a    3.0    NaN
    3         b    4.0    NaN
    4         c    5.0    NaN
    5         A    NaN    3.0
    6         B    NaN    4.0
    7         C    NaN    5.0
    
    data = {'Chinese': [66, 95, 93, 90,80], 'Math': [30, 98, 96, 77, 90], 'English': [65, 85, 92, 88, 90]}
    df = DataFrame(data, index=['ZhangFei', 'GuanYu', 'LiuBei', 'DianWei', 'XuChu'], columns=['Chinese', 'Math', 'English'])
    print(df)
              Chinese  Math  English
    ZhangFei       66    30       65
    GuanYu         95    98       85
    LiuBei         93    96       92
    DianWei        90    77       88
    XuChu          80    90       90
    print(df.loc['ZhangFei'])
    Chinese    66
    Math       30
    English    65
    Name: ZhangFei, dtype: int64
    print(df.iloc[0])
    Chinese    66
    Math       30
    English    65
    Name: ZhangFei, dtype: int64
    print(df.columns)
    Index(['Chinese', 'Math', 'English'], dtype='object')
    print(df.iloc[2]['Math'])
    96
    print(df.iloc[2]['Chinese'])
    93
    


















  • 相关阅读:
    设置eclipse启动时所需要的jdk
    Mvc ModelState.isValid为false时,检查时那个字段不符合规则的代码
    360急速浏览器JS的调试
    Python3 安装第三方包
    Sqlserver生成带数据的脚本
    Spring(八)核心容器
    Spring(七)核心容器
    Spring(六)核心容器
    Spring(五)核心容器
    Spring(四)核心容器
  • 原文地址:https://www.cnblogs.com/Cookie-Jing/p/13604570.html
Copyright © 2011-2022 走看看