zoukankan      html  css  js  c++  java
  • excelLoader.py

    from DataCleaning.library.functions.floatFormatter import *
    from DataCleaning.library.functions.list2SQL import *
    from DataCleaning.library.functions.excelDateFormatter import *
    from DataCleaning.library.functions.getStartAndEndLine import *
    from DataCleaning.library.functions.processBar import *


    def excelLoader(sh, cursor, titleList, typeList, tempTableName ="tempTableName", colsKeep = [], firstCell = "", appendOnly = False, firstCol = 0):
    print("generating table " + tempTableName)
    logging.info("generating table " + tempTableName)
    start = datetime.datetime.now()
    if firstCell == "":
    firstCell = str(sh.cell(0, firstCol).value)
    firstLine, lastLine = getStartAndEndLine(sh, firstCell, firstCol)
    if len(colsKeep) == 0:
    rounds = list(range(firstCol, sh.ncols))
    else:
    rounds = colsKeep
    SQLtempList = []
    processBar(context='preparing data...')
    for i in range(firstLine, lastLine):
    row = [""] * len(rounds)
    for j in range(len(rounds)):
    k = rounds[j]
    if "DATETIME" in typeList[j]:
    if str(sh.cell(i, k).value) != "":
    row[j] = str(sh.cell(i, k).value)
    else:
    row[j] = "NULL"
    elif "DATE" in typeList[j]:
    row[j] = excelDateFormatter(sh, i, k, sep="-")
    elif "TIME" in typeList[j]:
    row[j] = excelTimeFormatter(sh, i, k, sep=":")
    elif "FLOAT" in typeList[j]:
    if str(sh.cell(i, k).value) != "":
    row[j] = str(float(sh.cell(i, k).value))
    elif "DECIMAL" in typeList[j]:
    digits = int(typeList[j].split(",")[1][0])
    try:
    row[j] = str(round(float(sh.cell(i, k).value), digits))
    except:
    row[j] = "NULL"
    elif "INT" in typeList[j]:
    row[j] = floatFormatter(sh.cell(i, k).value)
    else:
    row[j] = floatFormatter(sh.cell(i, k).value).replace(".", " ").replace("'", "''").replace(",", " ").strip()
    if row[j] == "" or sh.cell(i, k).ctype == 5:
    row[j] = "NULL"
    if not emptyLine(row):
    SQLtempList.append(row)
    processBar(i, lastLine, firstLine)
    # write 2 sqlserver
    list2SQL(cursor, SQLtempList, tempTableName, titleList, typeList, appendOnly)
    print("successfully generated table " + tempTableName)
    logging.info("successfully generated table " + tempTableName)
    end = datetime.datetime.now()
    logging.info("time spent on loading and inserting " + tempTableName + ": " + str(end - start))



    def emptyLine(row):
    sum = 0
    for ele in row:
    if ele != "NULL":
    sum += len(ele)
    if sum == 0:
    return True
    else:
    return False



    from DataCleaning.library.functions.floatFormatter import *
    from DataCleaning.library.functions.list2SQL import *
    from DataCleaning.library.functions.excelDateFormatter import *
    from DataCleaning.library.functions.getStartAndEndLine import *


    def excelLoaderExchangeRate(sh, cursor, titleList, typeList, tempTableName ="tempTableName", colsKeep = [], firstCell = "", appendOnly = False):
    print("generating table " + tempTableName)
    logging.info("generating table " + tempTableName)
    start = datetime.datetime.now()
    if firstCell == "":
    firstCell = str(sh.cell(0, 0).value)
    firstLine, lastLine = getStartAndEndLine(sh, firstCell)
    if len(colsKeep) == 0:
    rounds = list(range(sh.ncols))
    else:
    rounds = colsKeep
    SQLtempList = []
    processBar(context='preparing data...')
    for i in range(firstLine, lastLine):
    row = [""] * len(rounds)
    for j in range(len(rounds)):
    k = rounds[j]
    if "DATE" in typeList[j]:
    if sh.cell(i, j).value != "":
    tempDate = str(sh.cell(i, j).value).replace(".", "")
    row[j] = "-".join([tempDate[-4:], tempDate[2:4], tempDate[:2]])
    elif "TIME" in typeList[j]:
    row[j] = excelTimeFormatter(sh, i, k, sep=":")
    elif "DECIMAL" in typeList[j]:
    digits = int(typeList[j].split(",")[1][0])
    try:
    row[j] = str(round(float(sh.cell(i, k).value.replace("/", "").replace(".", "").replace(",", ".")), digits))
    except:
    row[j] = "NULL"
    elif "INT" in typeList[j]:
    row[j] = floatFormatter(sh.cell(i, k).value)
    else:
    row[j] = floatFormatter(sh.cell(i, k).value).replace(".", ":").replace("'", "''").strip()
    if row[j] == "" or sh.cell(i, k).ctype == 5:
    row[j] = "NULL"
    if not emptyLine(row):
    SQLtempList.append(row)
    processBar(i, lastLine, firstLine)
    # write 2 sqlserver
    list2SQL(cursor, SQLtempList, tempTableName, titleList, typeList)
    print("successfully generated table " + tempTableName)
    logging.info("successfully generated table " + tempTableName)
    end = datetime.datetime.now()
    logging.info("time spent on loading and inserting " + tempTableName + ": " + str(end - start))


    def excelLoaderPN_TOPMOST_MAPPING(sh, cursor, titleList, typeList, tempTableName ="tempTableName", colsKeep = [], firstCell = "", appendOnly = False):
    print("generating table " + tempTableName)
    logging.info("generating table " + tempTableName)
    start = datetime.datetime.now()
    if firstCell == "":
    firstCell = str(sh.cell(0, 0).value)
    firstLine, lastLine = getStartAndEndLine(sh, firstCell)
    if len(colsKeep) == 0:
    rounds = list(range(sh.ncols))
    else:
    rounds = colsKeep
    PN2TOPMOST_Dic = {}
    SQLtempList = []
    processBar(context='preparing data...')
    for i in range(firstLine, lastLine):
    row = [""] * len(rounds)
    for j in range(len(rounds)):
    k = rounds[j]
    if "DATETIME" in typeList[j]:
    if str(sh.cell(i, k).value) != "":
    row[j] = str(sh.cell(i, k).value)
    else:
    row[j] = "NULL"
    elif "DATE" in typeList[j]:
    row[j] = excelDateFormatter(sh, i, k, sep="-")
    elif "TIME" in typeList[j]:
    row[j] = excelTimeFormatter(sh, i, k, sep=":")
    elif "FLOAT" in typeList[j]:
    if str(sh.cell(i, k).value) != "":
    row[j] = str(float(sh.cell(i, k).value))
    elif "DECIMAL" in typeList[j]:
    digits = int(typeList[j].split(",")[1][0])
    try:
    row[j] = str(round(float(sh.cell(i, k).value), digits))
    except:
    row[j] = "NULL"
    elif "INT" in typeList[j]:
    row[j] = floatFormatter(sh.cell(i, k).value)
    else:
    row[j] = floatFormatter(sh.cell(i, k).value).replace(".", " ").replace("'", "''").replace(",", " ").strip()
    if row[j] == "" or sh.cell(i, k).ctype == 5:
    row[j] = "NULL"
    if row[1] in PN2TOPMOST_Dic.keys():
    PN2TOPMOST_Dic[row[1]].append(row[0])
    else:
    PN2TOPMOST_Dic[row[1]] = [row[0]]

    if not emptyLine(row):
    SQLtempList.append(row)
    processBar(i, lastLine, firstLine)
    for i in range(len(SQLtempList)):
    SQLtempList[i].append(";".join(PN2TOPMOST_Dic[SQLtempList[i][1]]))
    # write 2 sqlserver
    list2SQL(cursor, SQLtempList, tempTableName, titleList, typeList, appendOnly)
    print("successfully generated table " + tempTableName)
    logging.info("successfully generated table " + tempTableName)
    end = datetime.datetime.now()
    logging.info("time spent on loading and inserting " + tempTableName + ": " + str(end - start))  
  • 相关阅读:
    一个基于STM32F429 HAL库的学习工程模板
    STM32F4 SPI 学习笔记
    STM32 相同头文件名称的路径问题
    STM32F429基于CUBEMX的串口中断接收
    STM32F429 使用ESP8266通讯心得
    Zabbix 配置通过sendEmail发送邮件报警
    CentOS 7 下安装Zabbix 3.0
    centOS 6.5下Zabbix RPM安装
    Java安装和配置(jdk)
    centos7.2 系统基础优化
  • 原文地址:https://www.cnblogs.com/zhulimin/p/15369463.html
Copyright © 2011-2022 走看看