zoukankan      html  css  js  c++  java
  • Python数据分析与机器学习-Pandas_1

    import pandas
    food_info = pandas.read_csv("food_info.csv")
    print(type(food_info))
    print (food_info.dtypes)
    
    <class 'pandas.core.frame.DataFrame'>
    NDB_No               int64
    Shrt_Desc           object
    Water_(g)          float64
    Energ_Kcal           int64
    Protein_(g)        float64
    Lipid_Tot_(g)      float64
    Ash_(g)            float64
    Carbohydrt_(g)     float64
    Fiber_TD_(g)       float64
    Sugar_Tot_(g)      float64
    Calcium_(mg)       float64
    Iron_(mg)          float64
    Magnesium_(mg)     float64
    Phosphorus_(mg)    float64
    Potassium_(mg)     float64
    Sodium_(mg)        float64
    Zinc_(mg)          float64
    Copper_(mg)        float64
    Manganese_(mg)     float64
    Selenium_(mcg)     float64
    Vit_C_(mg)         float64
    Thiamin_(mg)       float64
    Riboflavin_(mg)    float64
    Niacin_(mg)        float64
    Vit_B6_(mg)        float64
    Vit_B12_(mcg)      float64
    Vit_A_IU           float64
    Vit_A_RAE          float64
    Vit_E_(mg)         float64
    Vit_D_mcg          float64
    Vit_D_IU           float64
    Vit_K_(mcg)        float64
    FA_Sat_(g)         float64
    FA_Mono_(g)        float64
    FA_Poly_(g)        float64
    Cholestrl_(mg)     float64
    dtype: object
    
    # first_rows = food_info.head()
    # print(first_rows)
    food_info.head(3)
    food_info.tail(4)
    print(food_info.columns)
    print(food_info.shape)
    
    Index(['NDB_No', 'Shrt_Desc', 'Water_(g)', 'Energ_Kcal', 'Protein_(g)',
           'Lipid_Tot_(g)', 'Ash_(g)', 'Carbohydrt_(g)', 'Fiber_TD_(g)',
           'Sugar_Tot_(g)', 'Calcium_(mg)', 'Iron_(mg)', 'Magnesium_(mg)',
           'Phosphorus_(mg)', 'Potassium_(mg)', 'Sodium_(mg)', 'Zinc_(mg)',
           'Copper_(mg)', 'Manganese_(mg)', 'Selenium_(mcg)', 'Vit_C_(mg)',
           'Thiamin_(mg)', 'Riboflavin_(mg)', 'Niacin_(mg)', 'Vit_B6_(mg)',
           'Vit_B12_(mcg)', 'Vit_A_IU', 'Vit_A_RAE', 'Vit_E_(mg)', 'Vit_D_mcg',
           'Vit_D_IU', 'Vit_K_(mcg)', 'FA_Sat_(g)', 'FA_Mono_(g)', 'FA_Poly_(g)',
           'Cholestrl_(mg)'],
          dtype='object')
    (8618, 36)
    
    # Returns a DataFrame containing the rows at indexes 3,4,5, and 6.
    print(food_info.loc[3:6])
    print("--------------------------------")
    # Returns a DataFrame containing the rows at indexes 2, 5, and 10.
    print(food_info.loc[[2,5,10]])
    
       NDB_No         Shrt_Desc  Water_(g)  Energ_Kcal  Protein_(g)  
    3    1004       CHEESE BLUE      42.41         353        21.40   
    4    1005      CHEESE BRICK      41.11         371        23.24   
    5    1006       CHEESE BRIE      48.42         334        20.75   
    6    1007  CHEESE CAMEMBERT      51.80         300        19.80   
    
       Lipid_Tot_(g)  Ash_(g)  Carbohydrt_(g)  Fiber_TD_(g)  Sugar_Tot_(g)  ...  
    3          28.74     5.11            2.34           0.0           0.50  ...   
    4          29.68     3.18            2.79           0.0           0.51  ...   
    5          27.68     2.70            0.45           0.0           0.45  ...   
    6          24.26     3.68            0.46           0.0           0.46  ...   
    
       Vit_A_IU  Vit_A_RAE  Vit_E_(mg)  Vit_D_mcg  Vit_D_IU  Vit_K_(mcg)  
    3     721.0      198.0        0.25        0.5      21.0          2.4   
    4    1080.0      292.0        0.26        0.5      22.0          2.5   
    5     592.0      174.0        0.24        0.5      20.0          2.3   
    6     820.0      241.0        0.21        0.4      18.0          2.0   
    
       FA_Sat_(g)  FA_Mono_(g)  FA_Poly_(g)  Cholestrl_(mg)  
    3      18.669        7.778        0.800            75.0  
    4      18.764        8.598        0.784            94.0  
    5      17.410        8.013        0.826           100.0  
    6      15.259        7.023        0.724            72.0  
    
    [4 rows x 36 columns]
    --------------------------------
        NDB_No             Shrt_Desc  Water_(g)  Energ_Kcal  Protein_(g)  
    2     1003  BUTTER OIL ANHYDROUS       0.24         876         0.28   
    5     1006           CHEESE BRIE      48.42         334        20.75   
    10    1011          CHEESE COLBY      38.20         394        23.76   
    
        Lipid_Tot_(g)  Ash_(g)  Carbohydrt_(g)  Fiber_TD_(g)  Sugar_Tot_(g)  ...  
    2           99.48     0.00            0.00           0.0           0.00  ...   
    5           27.68     2.70            0.45           0.0           0.45  ...   
    10          32.11     3.36            2.57           0.0           0.52  ...   
    
        Vit_A_IU  Vit_A_RAE  Vit_E_(mg)  Vit_D_mcg  Vit_D_IU  Vit_K_(mcg)  
    2     3069.0      840.0        2.80        1.8      73.0          8.6   
    5      592.0      174.0        0.24        0.5      20.0          2.3   
    10     994.0      264.0        0.28        0.6      24.0          2.7   
    
        FA_Sat_(g)  FA_Mono_(g)  FA_Poly_(g)  Cholestrl_(mg)  
    2       61.924       28.732        3.694           256.0  
    5       17.410        8.013        0.826           100.0  
    10      20.218        9.280        0.953            95.0  
    
    [3 rows x 36 columns]
    
    # Series object representing the "NDB_No" column.
    ndb_col = food_info["NDB_No"]
    # print(ndb_col)
    # Alternatively, you can access a column by passing in a string variable.
    col_name = "NDB_No"
    ndb_col = food_info[col_name]
    print(ndb_col)
    
    0        1001
    1        1002
    2        1003
    3        1004
    4        1005
    5        1006
    6        1007
    7        1008
    8        1009
    9        1010
    10       1011
    11       1012
    12       1013
    13       1014
    14       1015
    15       1016
    16       1017
    17       1018
    18       1019
    19       1020
    20       1021
    21       1022
    22       1023
    23       1024
    24       1025
    25       1026
    26       1027
    27       1028
    28       1029
    29       1030
            ...  
    8588    43544
    8589    43546
    8590    43550
    8591    43566
    8592    43570
    8593    43572
    8594    43585
    8595    43589
    8596    43595
    8597    43597
    8598    43598
    8599    44005
    8600    44018
    8601    44048
    8602    44055
    8603    44061
    8604    44074
    8605    44110
    8606    44158
    8607    44203
    8608    44258
    8609    44259
    8610    44260
    8611    48052
    8612    80200
    8613    83110
    8614    90240
    8615    90480
    8616    90560
    8617    93600
    Name: NDB_No, Length: 8618, dtype: int64
    
    columns = ["Zinc_(mg)","Copper_(mg)"]
    zinc_copper = food_info[columns]
    print(zinc_copper)
    
          Zinc_(mg)  Copper_(mg)
    0          0.09        0.000
    1          0.05        0.016
    2          0.01        0.001
    3          2.66        0.040
    4          2.60        0.024
    5          2.38        0.019
    6          2.38        0.021
    7          2.94        0.024
    8          3.43        0.056
    9          2.79        0.042
    10         3.07        0.042
    11         0.40        0.029
    12         0.33        0.040
    13         0.47        0.030
    14         0.51        0.033
    15         0.38        0.028
    16         0.51        0.019
    17         3.75        0.036
    18         2.88        0.032
    19         3.50        0.025
    20         1.14        0.080
    21         3.90        0.036
    22         3.90        0.032
    23         2.10        0.021
    24         3.00        0.032
    25         2.92        0.011
    26         2.46        0.022
    27         2.76        0.025
    28         3.61        0.034
    29         2.81        0.031
    ...         ...          ...
    8588       3.30        0.377
    8589       0.05        0.040
    8590       0.05        0.030
    8591       1.15        0.116
    8592       5.03        0.200
    8593       3.83        0.545
    8594       0.08        0.035
    8595       3.90        0.027
    8596       4.10        0.100
    8597       3.13        0.027
    8598       0.13        0.000
    8599       0.02        0.000
    8600       0.09        0.037
    8601       0.21        0.026
    8602       2.77        0.571
    8603       0.41        0.838
    8604       0.05        0.028
    8605       0.03        0.023
    8606       0.10        0.112
    8607       0.02        0.020
    8608       1.49        0.854
    8609       0.19        0.040
    8610       0.10        0.038
    8611       0.85        0.182
    8612       1.00        0.250
    8613       1.10        0.100
    8614       1.55        0.033
    8615       0.19        0.020
    8616       1.00        0.400
    8617       1.00        0.250
    
    [8618 rows x 2 columns]
    
    print(food_info.columns)
    print(food_info.head(2))
    col_names = food_info.columns.tolist()
    gram_columns = []
    
    for c in col_names:
        if c.endswith("(g)"):
            gram_columns.append(c)
    gram_df = food_info[gram_columns]
    print(gram_df.head(3))
    
    Index(['NDB_No', 'Shrt_Desc', 'Water_(g)', 'Energ_Kcal', 'Protein_(g)',
           'Lipid_Tot_(g)', 'Ash_(g)', 'Carbohydrt_(g)', 'Fiber_TD_(g)',
           'Sugar_Tot_(g)', 'Calcium_(mg)', 'Iron_(mg)', 'Magnesium_(mg)',
           'Phosphorus_(mg)', 'Potassium_(mg)', 'Sodium_(mg)', 'Zinc_(mg)',
           'Copper_(mg)', 'Manganese_(mg)', 'Selenium_(mcg)', 'Vit_C_(mg)',
           'Thiamin_(mg)', 'Riboflavin_(mg)', 'Niacin_(mg)', 'Vit_B6_(mg)',
           'Vit_B12_(mcg)', 'Vit_A_IU', 'Vit_A_RAE', 'Vit_E_(mg)', 'Vit_D_mcg',
           'Vit_D_IU', 'Vit_K_(mcg)', 'FA_Sat_(g)', 'FA_Mono_(g)', 'FA_Poly_(g)',
           'Cholestrl_(mg)'],
          dtype='object')
       NDB_No                 Shrt_Desc  Water_(g)  Energ_Kcal  Protein_(g)  
    0    1001          BUTTER WITH SALT      15.87         717         0.85   
    1    1002  BUTTER WHIPPED WITH SALT      15.87         717         0.85   
    
       Lipid_Tot_(g)  Ash_(g)  Carbohydrt_(g)  Fiber_TD_(g)  Sugar_Tot_(g)  ...  
    0          81.11     2.11            0.06           0.0           0.06  ...   
    1          81.11     2.11            0.06           0.0           0.06  ...   
    
       Vit_A_IU  Vit_A_RAE  Vit_E_(mg)  Vit_D_mcg  Vit_D_IU  Vit_K_(mcg)  
    0    2499.0      684.0        2.32        1.5      60.0          7.0   
    1    2499.0      684.0        2.32        1.5      60.0          7.0   
    
       FA_Sat_(g)  FA_Mono_(g)  FA_Poly_(g)  Cholestrl_(mg)  
    0      51.368       21.021        3.043           215.0  
    1      50.489       23.426        3.012           219.0  
    
    [2 rows x 36 columns]
       Water_(g)  Protein_(g)  Lipid_Tot_(g)  Ash_(g)  Carbohydrt_(g)  
    0      15.87         0.85          81.11     2.11            0.06   
    1      15.87         0.85          81.11     2.11            0.06   
    2       0.24         0.28          99.48     0.00            0.00   
    
       Fiber_TD_(g)  Sugar_Tot_(g)  FA_Sat_(g)  FA_Mono_(g)  FA_Poly_(g)  
    0           0.0           0.06      51.368       21.021        3.043  
    1           0.0           0.06      50.489       23.426        3.012  
    2           0.0           0.00      61.924       28.732        3.694
  • 相关阅读:
    kubernetes案例 tomcat+mysql
    elasticsearch+logstash+kibana部署
    elasticsearch集群部署以及head插件安装
    Rhel7.4系统部署cobbler
    部署Hadoop2.0高性能集群
    使用haproxy实现负载均衡集群
    nginx实现动静分离的负载均衡集群
    heartrbeat实现web服务器高可用
    keepalived+lvs
    LVS集群之IP TUN模式以及网站压力测试
  • 原文地址:https://www.cnblogs.com/SweetZxl/p/11124183.html
Copyright © 2011-2022 走看看