zoukankan html css js c++ java

python篇自动化办公

# -!- coding:utf-8 -!-
import pandas as pd
from datetime import date, timedelta


# 1，创建excel表格，简单放入数据
df = pd.DataFrame({'ID':[1,2,3],'name':['AA','BB','CC']})   # 如果不填入内容，就只创建一个excel表格
df = df.set_index('ID')    # 将ID设为索引，如果不设置，有默认索引
df.to_excel('E:/test/test.xlsx')
print('done')

# 2，给表格加上表头，并另存为。。。
people = pd.read_excel('E:/test/test.xlsx',header = None)   # header = None没有列头
people.columns = ['ID','Type','FirstName','MiddleName','LastName']  # 设置列头
people.set_index('ID',inplace=True)  # 将ID设为索引,,inplace = True要带上
print(people.columns)
people.to_excel('E:/test/test1.xlsx')

# 3，如果已知哪列为索引，读取的时候使用index_col设置
people = pd.read_excel('E:/test/test1.xlsx',index_col='ID')   # 将ID列设为索引

# 4,简单的填充表格
s1 = pd.Series([1,2,3],index=[1,2,3],name='A')
s2 = pd.Series([10,20,30],index=[1,2,3],name='B')
s3 = pd.Series([100,200,300],index=[1,2,3],name='C')
df = pd.DataFrame({s1.name:s1,s2.name:s2,s3.name:s3})
print(df)

# 5,前面带有空行,空列的表格，填充序列，时间，另存为。。。
def add_month(d, md):
    yd = md // 12
    m = d.month + md % 12
    if m != 12:
        yd += m // 12
        m = m % 12
    return date(d.year + yd, m, d.day)

books = pd.read_excel('E:/test/test3.xlsx', skiprows=3, usecols='C:F',  # 跳过前面三个空行，列只读取C~F列
                      dtype={'ID': str, 'YESORNO': str, 'date': str})  # 将NO，YESORNO date三列设置为字符串类型，此处不能直接将NO设置成整型
start = date(2018, 1, 1)
for i in books.index:
    books['ID'].at[i] = i + 1
    books['YESORNO'].at[i] = 'Yes' if i % 2 == 0 else 'NO'
    # 日期依次 +1
    books['date'].at[i] = start + timedelta(days=i)
    # 年份+1
    books['date'].at[i] = date[start.year + i, start.month, start.day]
    # 月份 +1
    books['date'].at[i] = add_month(start,i)     # 等价于 books.at[i,'date'] = add_month(start,i)

print(books)
books.set_index('ID',inplace = True)
books.to_excel('E:/test/test4.xlsx',)

6，表格中的算法

import pandas as pd
def add_2(x):
    return x+2

# 已知单价和数量，计算总价
books = pd.read_excel('E:/test/book.xlsx',index_col='ID')
#  计算所有行价格
books['Price'] = books['listprice']*books['count']
#计算部分行价格
for i in range(5,10):
    books['Price'].at[i] = books['listprice'].at[i] * books['count'].at[i]
#  单价+2
books['listprice'] = books['listprice'] +2
# 使用apply单价+2
books['listprice'] = books['listprice'].apply(add_2)   #  注意此处add_2没有()
#  使用匿名函数
books['listprice'] = books['listprice'].apply(lambda x:x+2)
print(books)

7，排序
books = pd.read_excel('E:/test/book.xlsx',index_col='ID')
# 按照单价排序
books.sort_values(by='listprice',inplace = True,ascending= False)  # by后面是指定排序的列，inplace = True不再生成新的DataFrame，ascending= False 从大到小排序
# 先按照单价(从大到小)，再按照数量排序（由小到大）
books.sort_values(by=['listprice','count'],inplace = True,ascending=[False,True])

8， 筛选
def age_18_to_40(a):
    return 18<=a<40
def level_a(s):
    return 85<=s<=100
students = pd.read_excel('E:/test/student.xlsx',index_col='ID')
# 筛选出大于18岁，小于40岁，分数85分以上的学员
students = students.loc[students['Age'].apply(age_18_to_40)].loc[students['Score'].apply(level_a)]
# 使用匿名函数等价于以下    students['Age'] 可以等价与 students.Age
students = students.loc[students['Age'].apply(lambda a:18<=a<40)].loc[students['Score'].apply(lambda s:85<=s<=100)]

# -- coding:utf-8 --
import pandas as pd
import matplotlib.pyplot as plt

9，柱状图
student = pd.read_excel('E:/test/zhuzhuangtu.xlsx',index_col='ID')
student.sort_values(by='Number',inplace=True,ascending=False)
# 使用pandas的plot.bar制图
# student.plot.bar(x='Field',y='Number',color='orange',title='International Students by Field')
# 使用matplotlib制图
plt.bar(student.Field,student.Number,color='orange')
plt.xticks(student.Field,rotation = '90')
plt.xlabel('Field')
plt.ylabel('Number')
plt.title('International Students by Field',fontsize=16)
plt.tight_layout() # x轴，y轴标签显示全面（不加的话，标签会截断）
plt.show()    # 柱状图在pycharm展示出来
print(student)
效果：

10，两组柱状图比较（更多细节可以参考上面的9优化）

students = pd.read_excel('E:/test/zhuzhuangtu2.xlsx')
print(students)
# students.plot.bar(x='Field',y=['2020','2021'],color=['orange','red'])
students.plot.bar(x='Field',y=['year2020','year2021'])
plt.show()

11，计算长方形外切圆面积

import pandas as pd
import numpy as np

def get_circumcircle_area(l,w):
    r = np.sqrt(l**2+w**2)/2   # 求长方形半径
    return r**2*np.pi    # 返回圆的面积
def wrapper(row):
    return get_circumcircle_area(row['lenth'],row['width'])   

rects = pd.read_excel('E:/test/yuan.xlsx',index_col='ID')
rects['ca'] = rects.apply(wrapper,axis=1)  # axis=1 以行遍历， axis=0 以列遍历

# 等价于
# rects['ca'] = rects.apply(lambda row:get_circumcircle_area(row['lenth'],row['width']),axis=1)
print(rects)

12，饼图

# 饼图
students = pd.read_excel('E:/test/bingtu.xlsx',index_col='ID')
print(students)
# 简单的图形
# students['year2017'].plot.pie()
# 做些样式优化 
students['year2017'].sort_values(ascending=True).plot.pie(fontsize=8,startangle=270)
plt.title('AAAAAA',fontsize=16,fontweight='bold')
plt.ylabel('year2017',fontsize=12,fontweight='bold')
plt.show()

13,折线图，区域叠加图，叠加柱状图

weeks = pd.read_excel('E:/test/week.xlsx')
print(weeks)
print(weeks.columns)
# 折线图
weeks.plot(y=['year2019','year2020','year2021'])  # y=[],列表里面是以哪几列做折线图
# 区域叠加图
weeks.plot.area(y=['year2019','year2020','year2021'])
#叠加柱状图
weeks.plot.bar(y=['year2019','year2020','year2021'],stacked=True)
# 给折线图增加一些样式
plt.title('Sales Weekly Trend',fontsize=16,fontweight='bold')
plt.xticks(weeks.index,fontsize=8)
plt.ylabel('Total',fontsize=12,fontweight='bold')
plt.show()

14，散点图，面积分布图，

pd.options.display.max_columns = 777   # 打印的时候全部显示出来，不会收起部分数据
homes = pd.read_excel('E:/test/home_data.xlsx',index_col='Id')
print(homes)
# 散点图
# homes.plot.scatter(x='sqft_living',y='price')
# 面积分布图
# homes.sqft_living.plot.hist(bins = 5)
# 密度图
# homes.sqft_living.plot.kde()
# 数据之间的关联性
print(homes.corr())

plt.show()

15，多表杂碎数据合并

# --  coding:utf-8 --
import pandas as pd

students = pd.read_excel('E:/test/student_score.xlsx',sheet_name='student')
scores = pd.read_excel('E:/test/student_score.xlsx',sheet_name='score')
# 使用merge合并
table = students.merge(scores,how = 'left',on='ID').fillna(0)   # 合并scores列
# 使用join合并
# table = students.join(scores,how='left').fillna(0) # join 合并，scores
table.Score = table.Score.astype(int)

print(table)

16，数据校验

# --  coding:utf-8 --
import pandas as pd

def score_validation(row):
    try:
        assert 0<=row.Score<=100
    except:
        print(f'#{row.ID}\t student{row.Name} has an invalid score {row.Score}.')   # 加上\t是有的ID是个位数有的是两位数，打印信息对其

# 也可以这样写
　# if not 0<=row.Score<=100:
　#     print(f'#{row.ID}\t student{row.Name} has an invalid score {row.Score}.')



students = pd.read_excel('E:/test/student2.xlsx')
students.apply(score_validation,axis =1)   # 轴 axis=1 从左到右一行一行校验，axis=0 是从上到下

表格数据：运行结果：

17，读取csv，tsv，txt中的数据

import pandas as pd

# 读取csv，tsv，txt中的数据
students = pd.read_csv('E:/test/student.csv', index_col='ID')
students2 = pd.read_csv('E:/test/student.tsv', sep='\t', index_col='ID')   # 数据以制表符\t分开
students3 = pd.read_csv('E:/test/student.txt', sep='|', index_col='ID')   # 数据以|分开

查看全文

相关阅读:
Android笔记（三）使得Activity之间可以跳转---Intent
Python jQuery
Python JavaScript BOM和DOM以及window对象
 Python JavaScript
Python 前端CSS样式
 Python 前端CSS
Python 前端 HTTP HTML标签
 Python mysql中的：视图触发器事务存储过程 mysql内置函数流程控制 b+树索引慢日志查询权限管理
 Python pymysql模块
 Django进阶Model篇005

原文地址：https://www.cnblogs.com/147258llj/p/15714072.html