目的:以学院为单位,统计本科学校为“一本”学校的占比,“第一志愿”占比。
学校单位是XX学院的是二本,XX学校的是一本;“一志愿”是第一志愿录取,“调剂”是调剂之后录取。
原始数据:
代码:
import pandas as pd if __name__ == '__main__': df =pd.read_csv("2020.csv",encoding = 'gbk') list = df["毕业学校"].tolist() list_school = [] for school in list: if "学院" in school: list_school.append(0) else: list_school.append(1) df['毕业school'] = list_school list_volunteer = [] list = df["备注"].tolist() for volunteer in list: if volunteer == '一志愿': list_volunteer.append(1) if volunteer =='调剂': list_volunteer.append(0) df["volunteer"] = list_volunteer #print(df.head(10)) value1 = [] #一本学校录取率 sum_school_sum_list = [] sum_school_len_list = [] for i in range(1,25): sum_school_sum = (df[df['学院代码'] == i])["毕业school"].sum() sum_school_len = (df[df['学院代码'] == i])["毕业school"].count() sum_school_sum_list.append(sum_school_sum) sum_school_len_list.append(sum_school_len) value1.append(sum_school_sum/sum_school_len) #print(value1) sum_volunteer_sum_list =[] value2 = [] #一志愿录取率 for i in range(1,25): sum_volunteer_sum = (df[df['学院代码'] == i])["volunteer"].sum() sum_volunteer_len = (df[df['学院代码'] == i])["volunteer"].count() value2.append(sum_volunteer_sum/sum_volunteer_len) sum_volunteer_sum_list.append(sum_volunteer_sum) college_name = [] for k in range(1,25): for i,j in zip(df["学院代码"],df["学院名称"]): if i == k: college_name.append(j) break college_number = [] for i in range(1,25): college_number.append(i) data_dic = { "学院代码":college_number, "学院名称":college_name, "一本学校录取人数":sum_school_sum_list, "总录取人数":sum_school_len_list, "一本录取率":value1, "第一志愿录取人数":sum_volunteer_sum_list, "总录取人数":sum_school_len_list, "一志愿录取率":value2} pd_value = pd.DataFrame(data_dic) pd_value.to_csv("2020年录取率详情.csv",encoding='gbk')
结果:
数据源不对外公布。