zoukankan      html  css  js  c++  java
  • 抓取模板

    import pandas as pd
    from lxml import etree
    import json,requests,random
    import os,time,shutil,traceback
    
    def get_data(url, headers):
        try:
            store_res = requests.get(url=url, headers=headers)
            if store_res.status_code == 200:
                jdata=store_res.json()
                for s in jdata:
                    result=s['title']
                    print(result)
                    yield result
        except Exception:
            traceback.print_exc()
    
    def save_data(data, sheet, head):
        data = pd.DataFrame([i for i in data], columns=head)
        dirname=time.strftime("%Y%m%d",time.localtime())
        os.makedirs(dirname,exist_ok=True)
        skufile='./{0}/'.format(dirname)+sheet+dirname
        os.makedirs(skufile,exist_ok=True)
        shutil.copy(sheet+'.py',skufile+'/'+sheet+'.py')
        data.to_excel(skufile+'/{0}{1}.xlsx'.format(sheet,dirname),index=False,sheet_name=sheet)
        print('Done!')
    
    def main():
        headers = {
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36",
            "x-requested-with": "XMLHttpRequest"
        }
        url = ' '
        data = get_data(url, headers)
    
        sku = os.path.splitext(os.path.basename(__file__))[0]
        head = [' ']
        save_data(data,sheet,head)
    
    if __name__ == '__main__':
        main()
    
  • 相关阅读:
    实验一 软件开发文档与工具的安装与使用
    ATM管理系统
    举例分析流程图与活动图的区别与联系
    四则运算
    机器学习 实验三
    机器学习 实验四
    机器视觉实验二
    实验三
    实验二
    实验一
  • 原文地址:https://www.cnblogs.com/hankleo/p/11687204.html
Copyright © 2011-2022 走看看