zoukankan      html  css  js  c++  java
  • Python 读取本地txt文件生成excel

    import os
    import re
    import logging
    from bs4 import BeautifulSoup
    from openpyxl import  Workbook
    from openpyxl.utils import get_column_letter
    import datetime
    
    logging.basicConfig(level=logging.INFO,#控制台打印的日志级别
                        filename='food.log',
                        filemode='a',##模式,有w和a,w就是写模式,每次都会重新写日志,覆盖之前的日志,#a是追加模式,默认如果不写的话,就是追加模式
                        format= '%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s' #日志格式
                        )
    
    def Insert2Excel(allinfo):
        # 插入数据
        try:
            tableTitle = ['name', 'time', 'score']
            wb = Workbook()
            ws = wb.active
            ws.title = 'restaurants'
            ws.append(tableTitle)
            work_name = 'restaurantsinfo.xlsx'
            for i in range(1, ws.max_column + 1):
                ws.column_dimensions[get_column_letter(i)].width = 15
            for info in allinfo :
                ws.append(info)
            wb.save(work_name)
            return 'Insert Excel succcessfully!'
        except:
            return 'Insert Excel failed!'
    if __name__ == '__main__':
        start = datetime.datetime.now().replace(microsecond=0)
        print('Start: ', start)
        path = os.getcwd()
        allinfo = []
        # url = 'https://food.grab.com/sg/en/restaurants'
        #########################  test  ##################
        with open('food.txt', 'rb') as f:  # 设置文件对象
            html = f.read()  # 可以是随便对文件的操作
    
        soup = BeautifulSoup(html, 'html.parser')
        tag = soup.find('div', attrs={'class': 'ant-row-flex RestaurantListRow___1SbZY'})
        print(len(tag))
        for restaurant in tag:
            resinfo = []
            name = restaurant.find('h6', attrs={'class': 'name___2epcT'}).get_text()        
            resinfo.append(name)
            lst = restaurant.find_all('div', attrs={'class': 'numbersChild___2qKMV'})
            if len(lst) == 2:
                score = lst[0].get_text()
                time = re.findall("d+",lst[1].get_text())[0]
            else:
                score = '0'
                aa = re.findall("d+",lst[0].get_text())
                time = aa[0]
            resinfo.append(time)
            resinfo.append(score)
            allinfo.append(resinfo)
        print(Insert2Excel(allinfo))
        end = datetime.datetime.now().replace(microsecond=0)
        print('End:', end)
        print('Running time: %s Seconds' % (end - start))
  • 相关阅读:
    tomcat log
    关于 终端 ls 命令 不能区分文件和目录的问题
    画幅
    透视
    焦距和等效焦距
    滚动条插件mCustomScrollbar
    网页优化总结
    CSS3中translate、transform和translation的区别和联系
    Less的学习和使用
    Koala工具的使用说明
  • 原文地址:https://www.cnblogs.com/ouzai/p/13739321.html
Copyright © 2011-2022 走看看