zoukankan      html  css  js  c++  java
  • Python 读取本地txt文件生成excel

    import os
    import re
    import logging
    from bs4 import BeautifulSoup
    from openpyxl import  Workbook
    from openpyxl.utils import get_column_letter
    import datetime
    
    logging.basicConfig(level=logging.INFO,#控制台打印的日志级别
                        filename='food.log',
                        filemode='a',##模式,有w和a,w就是写模式,每次都会重新写日志,覆盖之前的日志,#a是追加模式,默认如果不写的话,就是追加模式
                        format= '%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s' #日志格式
                        )
    
    def Insert2Excel(allinfo):
        # 插入数据
        try:
            tableTitle = ['name', 'time', 'score']
            wb = Workbook()
            ws = wb.active
            ws.title = 'restaurants'
            ws.append(tableTitle)
            work_name = 'restaurantsinfo.xlsx'
            for i in range(1, ws.max_column + 1):
                ws.column_dimensions[get_column_letter(i)].width = 15
            for info in allinfo :
                ws.append(info)
            wb.save(work_name)
            return 'Insert Excel succcessfully!'
        except:
            return 'Insert Excel failed!'
    if __name__ == '__main__':
        start = datetime.datetime.now().replace(microsecond=0)
        print('Start: ', start)
        path = os.getcwd()
        allinfo = []
        # url = 'https://food.grab.com/sg/en/restaurants'
        #########################  test  ##################
        with open('food.txt', 'rb') as f:  # 设置文件对象
            html = f.read()  # 可以是随便对文件的操作
    
        soup = BeautifulSoup(html, 'html.parser')
        tag = soup.find('div', attrs={'class': 'ant-row-flex RestaurantListRow___1SbZY'})
        print(len(tag))
        for restaurant in tag:
            resinfo = []
            name = restaurant.find('h6', attrs={'class': 'name___2epcT'}).get_text()        
            resinfo.append(name)
            lst = restaurant.find_all('div', attrs={'class': 'numbersChild___2qKMV'})
            if len(lst) == 2:
                score = lst[0].get_text()
                time = re.findall("d+",lst[1].get_text())[0]
            else:
                score = '0'
                aa = re.findall("d+",lst[0].get_text())
                time = aa[0]
            resinfo.append(time)
            resinfo.append(score)
            allinfo.append(resinfo)
        print(Insert2Excel(allinfo))
        end = datetime.datetime.now().replace(microsecond=0)
        print('End:', end)
        print('Running time: %s Seconds' % (end - start))
  • 相关阅读:
    【5.3】dict的子类
    【5.2】dict的常用方法
    【5.1】dict的abc继承关系
    【4.5】列表推导式、生成器表达式、字典推导式
    【4.4】bisect维护已排序序列
    【4.3】实现可切片的对象
    【4.2】Python序列中+、+=和extend的区别
    【4.1】Python中的序列分类
    【3.12】contextlib简化上下文管理器
    【3.11】Python中的with语句
  • 原文地址:https://www.cnblogs.com/ouzai/p/13739321.html
Copyright © 2011-2022 走看看