zoukankan      html  css  js  c++  java
  • 考勤清洗

    import pandas as pd
    import numpy as np
    import os

    def work_attendance(file_path,out_path):
    data = pd.read_excel(file_path, sheet_name='2020.1.2') # 填文件路径
    # data.replace(' ', "", inplace=True)
    # data.dropna(axis=0)
    # 将姓名填满那一行
    for i in data.index:
    if i % 2 == 0:
    data.iloc[i:i + 2:2] = data[11].at[i]
    # 构造空数组
    list1 = []
    for i in range(0, len(data[11]), 2):
    for j in range(1, len(data.columns) + 1):
    list1.append(j)
    aa = pd.DataFrame((x for x in list1), columns=["日期"])
    aa["姓名"] = pd.DataFrame((str(x) for x in list1))
    aa["时间"] = pd.DataFrame((str(x) for x in list1))
    name = []
    for i in range(len(data[11])):
    for j in range(1, len(data.columns) + 1):
    if i % 2 == 0:
    name.append(str(data[j].at[i]))
    # 将打卡时间按顺序提取放到列表
    time = []
    for i in range(len(data[11])):
    for j in range(1, len(data.columns) + 1):
    if i % 2 != 0:
    time.append(str(data[j].at[i]))
    for i in range(0, len(aa["时间"])):
    aa["时间"].at[i] = time[i]
    aa["姓名"].at[i] = name[i]
    aa['时间'] = aa['时间'].str.replace(' ', '-')
    aa['时间'] = aa['时间'].str.replace(' ', '-')
    aa2 = pd.DataFrame((str(x).split('-') for x in aa['时间'])
    ) # 时间分列,"columns=" 是分列后的列名
    df = pd.merge(aa, aa2, how='left', left_index=True, right_index=True)
    df["上班"] = None
    df['下班'] = pd.DataFrame((str(x) for x in df.index), index=aa.index)
    df['下班'] = None
    df['上班点'] = None
    df.fillna('', axis=0, inplace=True)
    df.replace('nan', "", inplace=True)
    df.to_excel(out_path) # 输出路径
    try:
    for a in df.index:
    if len(df[0].at[a]) > 0 and pd.to_datetime(df[0].at[a], format='%H:%M') < pd.to_datetime('12:00:00',
    format='%H:%M:%S'):
    df["上班"].at[a] = df[0].at[a]
    elif len(df[1].at[a]) > 0 and pd.to_datetime(df[1].at[a], format='%H:%M') < pd.to_datetime('12:00:00',
    format='%H:%M:%S'):
    df["上班"].at[a] = df[1].at[a]

    except Exception as e:
    print(e)
    print(a)
    print(df[1].at[a])
    for j in range(len(aa2.columns) - 2, -1, -1):
    for i in df.index:
    if len(df[j].at[i]) > 0:
    if pd.to_datetime(df[j].at[i], format='%H:%M') > pd.to_datetime('17:00:00', format='%H:%M:%S'):
    df["下班"].at[i] = df[j].at[i]
    # else:
    # df["下班"].at[i] = '无'
    # else:
    # df["下班"].at[i] = ''
    for i in df.index:
    if len(df["上班"].at[i]) > 0:
    if pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('00:30:00', format='%H:%M:%S'):
    df['上班点'].at[i] = '00:30:00'
    elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('06:00:00', format='%H:%M:%S'):
    df['上班点'].at[i] = '06:00:00'
    elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('06:30:00', format='%H:%M:%S'):
    df['上班点'].at[i] = '06:30:00'
    elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('07:00:00', format='%H:%M:%S'):
    df['上班点'].at[i] = '07:00:00'
    elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('07:30:00', format='%H:%M:%S'):
    df['上班点'].at[i] = '07:30:00'
    elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('08:00:00', format='%H:%M:%S'):
    df['上班点'].at[i] = '08:00:00'
    elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('09:00:00', format='%H:%M:%S'):
    df['上班点'].at[i] = '09:00:00'
    elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('09:30:00', format='%H:%M:%S'):
    df['上班点'].at[i] = '09:30:00'
    elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('10:00:00', format='%H:%M:%S'):
    df['上班点'].at[i] = '10:00:00'
    elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('10:30:00', format='%H:%M:%S'):
    df['上班点'].at[i] = '10:30:00'
    elif pd.to_datetime(df['上班'].at[i], format='%H:%M') <= pd.to_datetime('11:00:00', format='%H:%M:%S'):
    df['上班点'].at[i] = '11:00:00'
    # df['上班点'] = df['上班']
    for aa in df.index:
    if len(df["时间"].at[aa]) < 1:
    df.drop([aa], axis=0, inplace=True)
    df.to_excel(out_path) # 输出路径
    if __name__ == '__main__':
    path = 'C:/Users/mgxx/Desktop/仓库考勤'
    fileList = os.listdir(path)
    for file in fileList:
    file_path = path + "/" + file
    out_path = path + "/" + '清洗' + file
    work_attendance(file_path, out_path)
  • 相关阅读:
    【磁盘/文件系统】第五篇:CentOS7.x__btrfs文件系统详解
    【python】-- IO多路复用(select、poll、epoll)介绍及实现
    【python】-- 事件驱动介绍、阻塞IO, 非阻塞IO, 同步IO,异步IO介绍
    【python】-- 协程介绍及基本示例、协程遇到IO操作自动切换、协程(gevent)并发爬网页
    【python】-- 多进程的基本语法 、进程间数据交互与共享、进程锁和进程池的使用
    【python】-- 队列(Queue)、生产者消费者模型
    【python】-- 信号量(Semaphore)、event(红绿灯例子)
    【python】-- GIL锁、线程锁(互斥锁)、递归锁(RLock)
    【python】-- 继承式多线程、守护线程
    【python】-- 进程与线程
  • 原文地址:https://www.cnblogs.com/snackpython/p/11994700.html
Copyright © 2011-2022 走看看