zoukankan      html  css  js  c++  java
  • python篇自动化办公

    # -!- coding:utf-8 -!-
    import pandas as pd
    from datetime import date, timedelta


    # 1,创建excel表格,简单放入数据
    df = pd.DataFrame({'ID':[1,2,3],'name':['AA','BB','CC']}) # 如果不填入内容,就只创建一个excel表格
    df = df.set_index('ID') # 将ID设为索引,如果不设置,有默认索引
    df.to_excel('E:/test/test.xlsx')
    print('done')

    # 2,给表格加上表头,并另存为。。。
    people = pd.read_excel('E:/test/test.xlsx',header = None) # header = None没有列头
    people.columns = ['ID','Type','FirstName','MiddleName','LastName'] # 设置列头
    people.set_index('ID',inplace=True) # 将ID设为索引,,inplace = True要带上
    print(people.columns)
    people.to_excel('E:/test/test1.xlsx')

    # 3,如果已知哪列为索引,读取的时候使用index_col设置
    people = pd.read_excel('E:/test/test1.xlsx',index_col='ID') # 将ID列设为索引

    # 4,简单的填充表格
    s1 = pd.Series([1,2,3],index=[1,2,3],name='A')
    s2 = pd.Series([10,20,30],index=[1,2,3],name='B')
    s3 = pd.Series([100,200,300],index=[1,2,3],name='C')
    df = pd.DataFrame({s1.name:s1,s2.name:s2,s3.name:s3})
    print(df)

    # 5,前面带有空行,空列的表格,填充序列,时间,另存为。。。
    def add_month(d, md):
    yd = md // 12
    m = d.month + md % 12
    if m != 12:
    yd += m // 12
    m = m % 12
    return date(d.year + yd, m, d.day)

    books = pd.read_excel('E:/test/test3.xlsx', skiprows=3, usecols='C:F', # 跳过前面三个空行,列只读取C~F列
    dtype={'ID': str, 'YESORNO': str, 'date': str}) # 将NO,YESORNO date三列设置为字符串类型,此处不能直接将NO设置成整型
    start = date(2018, 1, 1)
    for i in books.index:
    books['ID'].at[i] = i + 1
    books['YESORNO'].at[i] = 'Yes' if i % 2 == 0 else 'NO'
    # 日期依次 +1
    books['date'].at[i] = start + timedelta(days=i)
    # 年份+1
    books['date'].at[i] = date[start.year + i, start.month, start.day]
    # 月份 +1
    books['date'].at[i] = add_month(start,i) # 等价于 books.at[i,'date'] = add_month(start,i)

    print(books)
    books.set_index('ID',inplace = True)
    books.to_excel('E:/test/test4.xlsx',)

    6,表格中的算法
    import pandas as pd
    def add_2(x):
    return x+2

    # 已知单价和数量,计算总价
    books = pd.read_excel('E:/test/book.xlsx',index_col='ID')
    # 计算所有行价格
    books['Price'] = books['listprice']*books['count']
    #计算部分行价格
    for i in range(5,10):
    books['Price'].at[i] = books['listprice'].at[i] * books['count'].at[i]
    # 单价+2
    books['listprice'] = books['listprice'] +2
    # 使用apply单价+2
    books['listprice'] = books['listprice'].apply(add_2) # 注意此处add_2没有()
    # 使用匿名函数
    books['listprice'] = books['listprice'].apply(lambda x:x+2)
    print(books)

    7,排序
    books = pd.read_excel('E:/test/book.xlsx',index_col='ID')
    # 按照单价排序
    books.sort_values(by='listprice',inplace = True,ascending= False) # by后面是指定排序的列,inplace = True不再生成新的DataFrame,ascending= False 从大到小排序
    # 先按照单价(从大到小),再按照数量排序(由小到大)
    books.sort_values(by=['listprice','count'],inplace = True,ascending=[False,True])

    8, 筛选
    def age_18_to_40(a):
    return 18<=a<40
    def level_a(s):
    return 85<=s<=100
    students = pd.read_excel('E:/test/student.xlsx',index_col='ID')
    # 筛选出大于18岁,小于40岁,分数85分以上的学员
    students = students.loc[students['Age'].apply(age_18_to_40)].loc[students['Score'].apply(level_a)]
    # 使用匿名函数等价于以下 students['Age'] 可以等价与 students.Age
    students = students.loc[students['Age'].apply(lambda a:18<=a<40)].loc[students['Score'].apply(lambda s:85<=s<=100)]

    # -- coding:utf-8 --
    import pandas as pd
    import matplotlib.pyplot as plt

    9,柱状图
    student = pd.read_excel('E:/test/zhuzhuangtu.xlsx',index_col='ID')
    student.sort_values(by='Number',inplace=True,ascending=False)
    # 使用pandas的plot.bar制图
    # student.plot.bar(x='Field',y='Number',color='orange',title='International Students by Field')
    # 使用matplotlib制图
    plt.bar(student.Field,student.Number,color='orange')
    plt.xticks(student.Field,rotation = '90')
    plt.xlabel('Field')
    plt.ylabel('Number')
    plt.title('International Students by Field',fontsize=16)
    plt.tight_layout() # x轴,y轴标签显示全面(不加的话,标签会截断)
    plt.show() # 柱状图在pycharm展示出来
    print(student)
    效果:

     10,两组柱状图比较 (更多细节可以参考上面的9优化)

    students = pd.read_excel('E:/test/zhuzhuangtu2.xlsx')
    print(students)
    # students.plot.bar(x='Field',y=['2020','2021'],color=['orange','red'])
    students.plot.bar(x='Field',y=['year2020','year2021'])
    plt.show()

     11,计算长方形外切圆面积

    import pandas as pd
    import numpy as np

    def get_circumcircle_area(l,w):
    r = np.sqrt(l**2+w**2)/2 # 求长方形半径
    return r**2*np.pi # 返回圆的面积
    def wrapper(row):
    return get_circumcircle_area(row['lenth'],row['width'])

    rects = pd.read_excel('E:/test/yuan.xlsx',index_col='ID')
    rects['ca'] = rects.apply(wrapper,axis=1) # axis=1 以行遍历, axis=0 以列遍历
    # 等价于
    # rects['ca'] = rects.apply(lambda row:get_circumcircle_area(row['lenth'],row['width']),axis=1)
    print(rects)

     12,饼图

    # 饼图
    students = pd.read_excel('E:/test/bingtu.xlsx',index_col='ID')
    print(students)
    # 简单的图形
    # students['year2017'].plot.pie()
    # 做些样式优化
    students['year2017'].sort_values(ascending=True).plot.pie(fontsize=8,startangle=270)
    plt.title('AAAAAA',fontsize=16,fontweight='bold')
    plt.ylabel('year2017',fontsize=12,fontweight='bold')
    plt.show()
     
    13,折线图,区域叠加图,叠加柱状图
    weeks = pd.read_excel('E:/test/week.xlsx')
    print(weeks)
    print(weeks.columns)
    # 折线图
    weeks.plot(y=['year2019','year2020','year2021']) # y=[],列表里面是以哪几列做折线图
    # 区域叠加图
    weeks.plot.area(y=['year2019','year2020','year2021'])
    #叠加柱状图
    weeks.plot.bar(y=['year2019','year2020','year2021'],stacked=True)
    # 给折线图增加一些样式
    plt.title('Sales Weekly Trend',fontsize=16,fontweight='bold')
    plt.xticks(weeks.index,fontsize=8)
    plt.ylabel('Total',fontsize=12,fontweight='bold')
    plt.show()

            

    14,散点图,面积分布图,

    pd.options.display.max_columns = 777   # 打印的时候全部显示出来,不会收起部分数据
    homes = pd.read_excel('E:/test/home_data.xlsx',index_col='Id')
    print(homes)
    # 散点图
    # homes.plot.scatter(x='sqft_living',y='price')
    # 面积分布图
    # homes.sqft_living.plot.hist(bins = 5)
    # 密度图
    # homes.sqft_living.plot.kde()
    # 数据之间的关联性
    print(homes.corr())

    plt.show()

                                     

          

     15,多表杂碎数据合并

    # --  coding:utf-8 --
    import pandas as pd

    students = pd.read_excel('E:/test/student_score.xlsx',sheet_name='student')
    scores = pd.read_excel('E:/test/student_score.xlsx',sheet_name='score')
    # 使用merge合并
    table = students.merge(scores,how = 'left',on='ID').fillna(0) # 合并scores列
    # 使用join合并
    # table = students.join(scores,how='left').fillna(0) # join 合并,scores
    table.Score = table.Score.astype(int)

    print(table)

    16,数据校验
    # --  coding:utf-8 --
    import pandas as pd

    def score_validation(row):
    try:
    assert 0<=row.Score<=100
    except:
    print(f'#{row.ID}\t student{row.Name} has an invalid score {row.Score}.') # 加上\t是有的ID是个位数有的是两位数,打印信息对其
    # 也可以这样写
     # if not 0<=row.Score<=100:
     # print(f'#{row.ID}\t student{row.Name} has an invalid score {row.Score}.')


    students = pd.read_excel('E:/test/student2.xlsx')
    students.apply(score_validation,axis =1) # 轴 axis=1 从左到右一行一行校验,axis=0 是从上到下

        表格数据:                                         运行结果:

                 

     17,读取csv,tsv,txt中的数据

      import pandas as pd

    # 读取csv,tsv,txt中的数据
    students = pd.read_csv('E:/test/student.csv', index_col='ID')
    students2 = pd.read_csv('E:/test/student.tsv', sep='\t', index_col='ID') # 数据以制表符\t分开
    students3 = pd.read_csv('E:/test/student.txt', sep='|', index_col='ID') # 数据以|分开



  • 相关阅读:
    Android笔记(三) 使得Activity之间可以跳转---Intent
    Python jQuery
    Python JavaScript BOM和DOM以及window对象
    Python JavaScript
    Python 前端CSS样式
    Python 前端CSS
    Python 前端 HTTP HTML标签
    Python mysql中的:视图 触发器 事务 存储过程 mysql内置函数 流程控制 b+树 索引 慢日志查询 权限管理
    Python pymysql模块
    Django进阶Model篇005
  • 原文地址:https://www.cnblogs.com/147258llj/p/15714072.html
Copyright © 2011-2022 走看看