zoukankan      html  css  js  c++  java
  • excel 数据入库

    import logging
    import pymongo
    import os
    import pandas as pd
    import numpy as np
    from tqdm import tqdm


    def excel_mongo(file_path, db_name, table_name, sheet_num=True, ip_address='127.0.0.1', port=27017):
    """
    file_path excel 文件的路径
    db_name 数据库名称
    table_name 数据库表的名称
    sheet_num 是否多个sheet导入
    ip_address 导入数据库的ip地址
    """
    LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s"
    DATE_FORMAT = "%m/%d/%Y %H:%M:%S %p"
    logging.basicConfig(filename='mongo_insert.log', level=logging.INFO, format=LOG_FORMAT, datefmt=DATE_FORMAT)
    excel_data_list = []
    logging.info("------- " + str(ip_address) + ' 数据库导入ip地址----')
    if os.path.isfile(file_path):
    file = os.path.splitext(file_path)
    _, type = file
    if type !='.xls' and type != '.xlsx':
    logging.info("------- " + str(file_path) + ' 非excel文件类型----')
    return
    logging.info("------- "+str(file_path) + ' 路径下的excel数据开始读取----')
    if sheet_num:
    """单个sheet 导入mongo"""
    df = pd.read_excel(file_path, keep_default_na=False)
    excel_header = list(df.columns)
    for data in df.values:
    row_data = dict(zip(excel_header, data))
    excel_data_list.append(row_data)
    else:
    """多 sheet 导入"""
    df = pd.read_excel(file_path, keep_default_na=False, sheet_name=None)
    for data_obj in df.values():
    excel_header = list(data_obj.columns)
    data_array = np.array(data_obj)
    for data in data_array:
    row_data = dict(zip(excel_header, data))
    excel_data_list.append(row_data)
    client = pymongo.MongoClient(ip_address, port=port)
    db_insert = client[db_name][table_name]
    try:
    db_insert.insert_many(excel_data_list)
    logging.info(f'{table_name}---数据入库成功')
    except Exception as e:
    logging.error(f'{table_name}入库失败--数据表中有部分没有表头,请查看! 错误类型:{e}')
    print(f'{table_name}入库失败--数据表中有部分没有表头,请查看!')
    dn_count = db_insert.find().count()
    excel_count = df.shape[0]
    if dn_count == excel_count:
    logging.info(f'{table_name}---数据没有遗失')
    else:
    logging.error(f'{table_name}---数据遗失,删除数据库')
    db_insert.drop()
    else:
    logging.warning("------- " + str(file_path) + ' 非文件类型----')


    def to_mongo(path, ip_address, db='BiaoQian_3'):
    for file in tqdm(os.listdir(path)):
    file_path = os.path.join(path, file)
    if os.path.isfile(file_path):
    cate = path.split('\')[-1]
    file_name, _ = os.path.splitext(file)
    table_name = cate+'/'+file_name
    excel_mongo(file_path, db, table_name, ip_address=ip_address)
    else:
    continue
    print(f'{path}--excel 表格全部入库')


    if __name__ == '__main__':
    path = r'E:标签组数据第五次文旅'
    ip_address = '192.168.0.147'
    # db, 可指定数据库名称: 默认BiaoQian_3
    db = 'BiaoQian_5'
    to_mongo(path, ip_address,db)
  • 相关阅读:
    search
    longestValidParentheses
    nextPermutation
    linux下通过安装xampp搭建环境卸载xampp、以及网站搬家-64位系统Apache启动不了的问题
    对拍器 ADC-ACMDataCreater
    Educational Codeforces Round 86 (Rated for Div. 2) A~D
    Codeforces Round #637 (Div. 2) A~D
    Codeforces Round #636 (Div. 3) A~D
    Codeforces Round #635 (Div. 2) A~D
    原型设计工具比较及实践 基于WolframAlphaAPI的科学计算器原型设计
  • 原文地址:https://www.cnblogs.com/lqn404/p/13612201.html
Copyright © 2011-2022 走看看