zoukankan      html  css  js  c++  java
  • 【数据分析学习】Pandas学习记录

    import pandas as pd
    path = r'F:数据分析专用数据分析与机器学习food_info.csv'
    with open(path, 'r') as f:
        data = pd.read_csv(f)
        print(type(data))
        print(data.dtypes)
        print(help(pd.read_csv))
    文件操作
    print(data.head(3))
    print(data.tail(3))
    print(data.columns)
    查看数据
    data.sort_values('Carbohydrt_(g)', inplace=True)
    print(data['Carbohydrt_(g)'])
    # print(data)
    data.sort_values('Carbohydrt_(g)', inplace=True, ascending=False)
    print(data['Carbohydrt_(g)'])
    数据排序
    age = t_s['Age']
    age_is_null = pd.isnull(age)
    age_null_true = age[age_is_null]
    age_null_count = len(age_null_true)
    print(age_null_count)
    #----------------------------------------
    count_list = []
    for i in age:
        if i != i:
            count_list.append(i)
    print(len(count_list))
    数据的筛选
    mean_age = sum(t_s['Age'])/len(t_s['Age'])
    print(mean_age)
    #--------------------------------------------------
    good_ages = t_s['Age'][age_is_null==False]
    correct_mean_age = sum(good_ages)/len(good_ages)
    print(correct_mean_age)
    #---------------------------------------------
    correct_mean_age = t_s['Age'].mean()
    print(correct_mean_age)
    数据的处理方法(平均数)
    passenger_classes = {1, 2, 3}
    faces_by_class = {}
    for this_class in passenger_classes:
        pclass_rows = t_s[t_s['Pclass']==this_class]
        pclass_fares = pclass_rows['Fare']
        fare_for_class = pclass_fares.mean()
        faces_by_class[this_class] = fare_for_class
    print(faces_by_class)
    #--------------------------------------------------------
    passenger_s = t_s.pivot_table(index='Pclass', values='Survived', aggfunc=np.mean)
    print(passenger_s)
    #--------------------------------------------------------
    passenger_age = t_s.pivot_table(index='Pclass', values='Age')
    print(passenger_age)
    #--------------------------------------------------------
    passenger_price = t_s.pivot_table(index='Pclass', values='Fare')
    print(passenger_price)
    #--------------------------------------------------------
    port_stats = t_s.pivot_table(index='Embarked', values=['Fare', 'Survived'], aggfunc=np.sum)
    print(port_stats)
    数据透视表

    def hundredth_row(column):
        hundredth_item = column.loc[99]
        return hundredth_item
    
    
    hundredth_row = t_r.apply(hundredth_row)
    print(hundredth_row)
    自定义函数
    def which_class(row):
        pclass = row['Pclass']
        if pd.isnull(pclass):
            return 'Unknown'
        elif pclass == 1:
            return "First Class"
        elif pclass == 2:
            return "Second Class"
        elif pclass == 3:
            return "Third Class"
    
    
    classes = t_r.apply(which_class, axis=1)
    print(classes)
    自定义函数

    import pandas as pd
    path = r'F:数据分析专用数据分析与机器学习fandango_score_comparison.csv'
    with open(path, 'r', encoding='utf-8') as f:
        data = pd.read_csv(f)
        # print(data.dtypes)
        series_film = data['FILM']
        print(type(series_film))
        series_rt = data['RottenTomatoes']
        print(series_rt[0:5])
    #--------------------------------------------------------------
    from pandas import Series
    film_names = series_film.values
    print(type(film_names))
    rt_scores = series_rt.values
    series_custom = Series(rt_scores, index=film_names)
    series_custom[['Minions (2015)', 'Leviathan (2014)']]
    Series结构
    Win a contest, win a challenge
  • 相关阅读:
    年度回忆录(2012.102013.01)
    Java中的Annotation(1)三个基本Annotation
    Java7中的文件和目录管理Path类
    Struts1和Struts2核心控制器的执行原理
    java中的IO基础3
    动态代理(2)动态代理和AOP
    java中的IO基础
    《嫌疑犯x的献身》看完了。。。
    像NHibernate致敬ado.net entity framework的范型DAO和open session in view实现
    我的MBTI职业性格测试
  • 原文地址:https://www.cnblogs.com/pandaboy1123/p/9681966.html
Copyright © 2011-2022 走看看