zoukankan      html  css  js  c++  java
  • excel 数据入库

    import logging
    import pymongo
    import os
    import pandas as pd
    import numpy as np
    from tqdm import tqdm


    def excel_mongo(file_path, db_name, table_name, sheet_num=True, ip_address='127.0.0.1', port=27017):
    """
    file_path excel 文件的路径
    db_name 数据库名称
    table_name 数据库表的名称
    sheet_num 是否多个sheet导入
    ip_address 导入数据库的ip地址
    """
    LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s"
    DATE_FORMAT = "%m/%d/%Y %H:%M:%S %p"
    logging.basicConfig(filename='mongo_insert.log', level=logging.INFO, format=LOG_FORMAT, datefmt=DATE_FORMAT)
    excel_data_list = []
    logging.info("------- " + str(ip_address) + ' 数据库导入ip地址----')
    if os.path.isfile(file_path):
    file = os.path.splitext(file_path)
    _, type = file
    if type !='.xls' and type != '.xlsx':
    logging.info("------- " + str(file_path) + ' 非excel文件类型----')
    return
    logging.info("------- "+str(file_path) + ' 路径下的excel数据开始读取----')
    if sheet_num:
    """单个sheet 导入mongo"""
    df = pd.read_excel(file_path, keep_default_na=False)
    excel_header = list(df.columns)
    for data in df.values:
    row_data = dict(zip(excel_header, data))
    excel_data_list.append(row_data)
    else:
    """多 sheet 导入"""
    df = pd.read_excel(file_path, keep_default_na=False, sheet_name=None)
    for data_obj in df.values():
    excel_header = list(data_obj.columns)
    data_array = np.array(data_obj)
    for data in data_array:
    row_data = dict(zip(excel_header, data))
    excel_data_list.append(row_data)
    client = pymongo.MongoClient(ip_address, port=port)
    db_insert = client[db_name][table_name]
    try:
    db_insert.insert_many(excel_data_list)
    logging.info(f'{table_name}---数据入库成功')
    except Exception as e:
    logging.error(f'{table_name}入库失败--数据表中有部分没有表头,请查看! 错误类型:{e}')
    print(f'{table_name}入库失败--数据表中有部分没有表头,请查看!')
    dn_count = db_insert.find().count()
    excel_count = df.shape[0]
    if dn_count == excel_count:
    logging.info(f'{table_name}---数据没有遗失')
    else:
    logging.error(f'{table_name}---数据遗失,删除数据库')
    db_insert.drop()
    else:
    logging.warning("------- " + str(file_path) + ' 非文件类型----')


    def to_mongo(path, ip_address, db='BiaoQian_3'):
    for file in tqdm(os.listdir(path)):
    file_path = os.path.join(path, file)
    if os.path.isfile(file_path):
    cate = path.split('\')[-1]
    file_name, _ = os.path.splitext(file)
    table_name = cate+'/'+file_name
    excel_mongo(file_path, db, table_name, ip_address=ip_address)
    else:
    continue
    print(f'{path}--excel 表格全部入库')


    if __name__ == '__main__':
    path = r'E:标签组数据第五次文旅'
    ip_address = '192.168.0.147'
    # db, 可指定数据库名称: 默认BiaoQian_3
    db = 'BiaoQian_5'
    to_mongo(path, ip_address,db)
  • 相关阅读:
    随笔2
    随笔
    关于updateElement接口
    随笔1
    本地访问正常,服务器访问乱码 记录
    Redis (error) NOAUTH Authentication required.解决方法
    tomcat启动很慢 停留在 At least one JAR was scanned for TLDs yet contained no TLDs.
    微信公众号消息回复
    微信公众号 报token验证失败
    idea中web.xml报错 Servlet should have a mapping
  • 原文地址:https://www.cnblogs.com/lqn404/p/13612201.html
Copyright © 2011-2022 走看看