zoukankan      html  css  js  c++  java
  • 考勤清洗

    import pandas as pd
    import numpy as np
    import os

    def work_attendance(file_path,out_path):
    data = pd.read_excel(file_path, sheet_name='2020.1.2') # 填文件路径
    # data.replace(' ', "", inplace=True)
    # data.dropna(axis=0)
    # 将姓名填满那一行
    for i in data.index:
    if i % 2 == 0:
    data.iloc[i:i + 2:2] = data[11].at[i]
    # 构造空数组
    list1 = []
    for i in range(0, len(data[11]), 2):
    for j in range(1, len(data.columns) + 1):
    list1.append(j)
    aa = pd.DataFrame((x for x in list1), columns=["日期"])
    aa["姓名"] = pd.DataFrame((str(x) for x in list1))
    aa["时间"] = pd.DataFrame((str(x) for x in list1))
    name = []
    for i in range(len(data[11])):
    for j in range(1, len(data.columns) + 1):
    if i % 2 == 0:
    name.append(str(data[j].at[i]))
    # 将打卡时间按顺序提取放到列表
    time = []
    for i in range(len(data[11])):
    for j in range(1, len(data.columns) + 1):
    if i % 2 != 0:
    time.append(str(data[j].at[i]))
    for i in range(0, len(aa["时间"])):
    aa["时间"].at[i] = time[i]
    aa["姓名"].at[i] = name[i]
    aa['时间'] = aa['时间'].str.replace(' ', '-')
    aa['时间'] = aa['时间'].str.replace(' ', '-')
    aa2 = pd.DataFrame((str(x).split('-') for x in aa['时间'])
    ) # 时间分列,"columns=" 是分列后的列名
    df = pd.merge(aa, aa2, how='left', left_index=True, right_index=True)
    df["上班"] = None
    df['下班'] = pd.DataFrame((str(x) for x in df.index), index=aa.index)
    df['下班'] = None
    df['上班点'] = None
    df.fillna('', axis=0, inplace=True)
    df.replace('nan', "", inplace=True)
    df.to_excel(out_path) # 输出路径
    try:
    for a in df.index:
    if len(df[0].at[a]) > 0 and pd.to_datetime(df[0].at[a], format='%H:%M') < pd.to_datetime('12:00:00',
    format='%H:%M:%S'):
    df["上班"].at[a] = df[0].at[a]
    elif len(df[1].at[a]) > 0 and pd.to_datetime(df[1].at[a], format='%H:%M') < pd.to_datetime('12:00:00',
    format='%H:%M:%S'):
    df["上班"].at[a] = df[1].at[a]

    except Exception as e:
    print(e)
    print(a)
    print(df[1].at[a])
    for j in range(len(aa2.columns) - 2, -1, -1):
    for i in df.index:
    if len(df[j].at[i]) > 0:
    if pd.to_datetime(df[j].at[i], format='%H:%M') > pd.to_datetime('17:00:00', format='%H:%M:%S'):
    df["下班"].at[i] = df[j].at[i]
    # else:
    # df["下班"].at[i] = '无'
    # else:
    # df["下班"].at[i] = ''
    for i in df.index:
    if len(df["上班"].at[i]) > 0:
    if pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('00:30:00', format='%H:%M:%S'):
    df['上班点'].at[i] = '00:30:00'
    elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('06:00:00', format='%H:%M:%S'):
    df['上班点'].at[i] = '06:00:00'
    elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('06:30:00', format='%H:%M:%S'):
    df['上班点'].at[i] = '06:30:00'
    elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('07:00:00', format='%H:%M:%S'):
    df['上班点'].at[i] = '07:00:00'
    elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('07:30:00', format='%H:%M:%S'):
    df['上班点'].at[i] = '07:30:00'
    elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('08:00:00', format='%H:%M:%S'):
    df['上班点'].at[i] = '08:00:00'
    elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('09:00:00', format='%H:%M:%S'):
    df['上班点'].at[i] = '09:00:00'
    elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('09:30:00', format='%H:%M:%S'):
    df['上班点'].at[i] = '09:30:00'
    elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('10:00:00', format='%H:%M:%S'):
    df['上班点'].at[i] = '10:00:00'
    elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('10:30:00', format='%H:%M:%S'):
    df['上班点'].at[i] = '10:30:00'
    elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('11:00:00', format='%H:%M:%S'):
    df['上班点'].at[i] = '11:00:00'
    # df['上班点'] = df['上班']
    for aa in df.index:
    if len(df["时间"].at[aa]) < 1:
    df.drop([aa], axis=0, inplace=True)
    df.to_excel(out_path) # 输出路径
    if __name__ == '__main__':
    path = 'C:/Users/mgxx/Desktop/仓库考勤'
    fileList = os.listdir(path)
    for file in fileList:
    file_path = path + "/" + file
    out_path = path + "/" + '清洗' + file
    work_attendance(file_path, out_path)
  • 相关阅读:
    LeetCode 560. Subarray Sum Equals K (子数组之和等于K)
    25、LinkedList特有方法
    24、List三个子类的特点
    23、数据结构之数组和链表
    22、Vector简介
    21、List遍历时修改元素的问题
    20、List集合中特有的方法
    19、集合概述
    18、Random类简介
    17、enum简介
  • 原文地址:https://www.cnblogs.com/snackpython/p/11994700.html
Copyright © 2011-2022 走看看