zoukankan      html  css  js  c++  java
  • Python 读取本地txt文件生成excel

    import os
    import re
    import logging
    from bs4 import BeautifulSoup
    from openpyxl import  Workbook
    from openpyxl.utils import get_column_letter
    import datetime
    
    logging.basicConfig(level=logging.INFO,#控制台打印的日志级别
                        filename='food.log',
                        filemode='a',##模式,有w和a,w就是写模式,每次都会重新写日志,覆盖之前的日志,#a是追加模式,默认如果不写的话,就是追加模式
                        format= '%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s' #日志格式
                        )
    
    def Insert2Excel(allinfo):
        # 插入数据
        try:
            tableTitle = ['name', 'time', 'score']
            wb = Workbook()
            ws = wb.active
            ws.title = 'restaurants'
            ws.append(tableTitle)
            work_name = 'restaurantsinfo.xlsx'
            for i in range(1, ws.max_column + 1):
                ws.column_dimensions[get_column_letter(i)].width = 15
            for info in allinfo :
                ws.append(info)
            wb.save(work_name)
            return 'Insert Excel succcessfully!'
        except:
            return 'Insert Excel failed!'
    if __name__ == '__main__':
        start = datetime.datetime.now().replace(microsecond=0)
        print('Start: ', start)
        path = os.getcwd()
        allinfo = []
        # url = 'https://food.grab.com/sg/en/restaurants'
        #########################  test  ##################
        with open('food.txt', 'rb') as f:  # 设置文件对象
            html = f.read()  # 可以是随便对文件的操作
    
        soup = BeautifulSoup(html, 'html.parser')
        tag = soup.find('div', attrs={'class': 'ant-row-flex RestaurantListRow___1SbZY'})
        print(len(tag))
        for restaurant in tag:
            resinfo = []
            name = restaurant.find('h6', attrs={'class': 'name___2epcT'}).get_text()        
            resinfo.append(name)
            lst = restaurant.find_all('div', attrs={'class': 'numbersChild___2qKMV'})
            if len(lst) == 2:
                score = lst[0].get_text()
                time = re.findall("d+",lst[1].get_text())[0]
            else:
                score = '0'
                aa = re.findall("d+",lst[0].get_text())
                time = aa[0]
            resinfo.append(time)
            resinfo.append(score)
            allinfo.append(resinfo)
        print(Insert2Excel(allinfo))
        end = datetime.datetime.now().replace(microsecond=0)
        print('End:', end)
        print('Running time: %s Seconds' % (end - start))
  • 相关阅读:
    Asp.Net Web API 2第八课——Web API 2中的属性路由
    Asp.Net Web API 2第七课——Web API异常处理
    Asp.Net Web API 2第六课——Web API路由和动作选择
    Asp.Net Web API 2第五课——Web API路由
    开始学习python
    BMI 小程序 购物车
    深浅copy 文件操作
    字典 dict 集合set
    基本数据类型 (str,int,bool,tuple,)
    python 运算符
  • 原文地址:https://www.cnblogs.com/ouzai/p/13739321.html
Copyright © 2011-2022 走看看