zoukankan      html  css  js  c++  java
  • pandas

    import pandas as pd
    import numpy as np

    titanic_survival = pd.read_csv("titanic_train.csv")
    #读取titanic_train.csv,并将该变量储存为titanic_survival

    age = titanic_survival["Age"]
    print(age.loc[0:10])
    #将Age这一列定义为变量age,并将这一列的前10个元素print出来

    age_is_null = pd.isnull(age)
    #判断age这一列是否是一个缺失值,缺失显示为True,不缺失显示为False,可以用作为索引
    print(age_is_null)

    age_null_true = age[age_is_null]
    #注意,这里只有True的值会传入进去,而False并不会,因此就将缺失值给筛选出来了
    print(age_null_true)
    age_null_count = len(age_null_true)
    print (age_null_count)

    mean_age = sum(titanic_survival["Age"])/len(titanic_survival["Age"])
    print(mean_age)
    #这时候会显示NAN 因为里面有缺失值

    good_ages = titanic_survival["Age"][age_is_null == False]
    mean_age = sum(good_ages)/len(good_ages)
    print(mean_age)
    #这一部就是讲没有缺失的那些值给筛选出来,定义未一个新的变量定义为good_age

    correct_mean = titanic_survival["Age"].mean()
    print(correct_mean)
    #简便的算法

     

    passenger_classes = [1,2,3]
    fares_by_class = {}
    for this_class in passenger_classes:
    pclass_rows = titanic_survival[titanic_survival["Pclass"] == this_class]
    pclass_fares = pclass_rows["Fare"]
    fare_for_class = pclass_fares.mean()
    fares_by_class[this_class]=fare_for_class
    print(fares_by_class)
    #写一个循环,将不同船舱的均价给算出来

    passenger_survival = titanic_survival.pivot_table(index="Pclass",values="Survived",aggfunc=np.mean)
    print(passenger_survival)
    #利用pivot_table函数来简便计算

    passenger_age = titanic_survival.pivot_table(index="Pclass",values="Age")
    print(passenger_age)
    #利用pivot_table函数来简便计算,后面的aggfunc不指定就代表求平均值

    port_stats = titanic_survival.pivot_table(index="Embarked",values=["Fare","Survived"],aggfunc=np.sum)
    print(port_stats)
    #统计出来不同的登船口的船费合计,以及获救人数

    drop_na_columns = titanic_survival.dropna(axis=1)
    #下去搜dateframe.dropna,这个函数,这个是只要列里面有NA值就会将这一行给drop掉,其中axis=1代表对象是列
    new_titanic_survival = titanic_survival.dropna(axis=0,subset=["Age","Sex"])
    #对象是行,然后只要index: age sex中有NA这一行就会被drop掉
    print(drop_na_columns)
    print(new_titanic_survival)

    row_index_83_age = titanic_survival.loc[83,"Age"]
    row_index_766_pclass = titanic_survival.loc[766,"Pclass"]
    print(row_index_83_age)
    print(row_index_766_pclass)
    #利用loc函数定位到精确的某一行某一列

    new2_titanic_survival = titanic_survival.sort_values("Age",ascending = False)
    print(new2_titanic_survival[0:10])
    #将数据,以Age变量,做降序处理
    titanic_reindexed = new2_titanic_survival.reset_index(drop=True)
    print("---------------")
    print(titanic_reindexed.loc[0:10])
    #下面那个代表我现在想把他前面的序号从新排列

  • 相关阅读:
    [转载]安装SQL Server 2008 R2遇到“...Setup has stopped working.”
    WPF验证错误显示
    说一下我对Mvvm模式的理解
    [转载]C#深拷贝的方法
    Windows Phone 开发(一):入门指南 — 安装开发环境:Windows Phone SDK
    DateTime.ToString() Patterns
    Log4net 根据日志类别保存到不同的文件,并按照日期生成不同文件名称
    使用Visual Studio 2010进行UI自动化测试
    WPF触发器之数据触发器(A)
    Getting The imported project "C:\Program Files\MSBuild\Microsoft\Silverlight for Phone\v4.0\Microsoft.Silverlight..Overrides.targets" was not found
  • 原文地址:https://www.cnblogs.com/zaizaiaipython/p/8169156.html
Copyright © 2011-2022 走看看