zoukankan      html  css  js  c++  java
  • 抓取模板

    import pandas as pd
    from lxml import etree
    import json,requests,random
    import os,time,shutil,traceback
    
    def get_data(url, headers):
        try:
            store_res = requests.get(url=url, headers=headers)
            if store_res.status_code == 200:
                jdata=store_res.json()
                for s in jdata:
                    result=s['title']
                    print(result)
                    yield result
        except Exception:
            traceback.print_exc()
    
    def save_data(data, sheet, head):
        data = pd.DataFrame([i for i in data], columns=head)
        dirname=time.strftime("%Y%m%d",time.localtime())
        os.makedirs(dirname,exist_ok=True)
        skufile='./{0}/'.format(dirname)+sheet+dirname
        os.makedirs(skufile,exist_ok=True)
        shutil.copy(sheet+'.py',skufile+'/'+sheet+'.py')
        data.to_excel(skufile+'/{0}{1}.xlsx'.format(sheet,dirname),index=False,sheet_name=sheet)
        print('Done!')
    
    def main():
        headers = {
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36",
            "x-requested-with": "XMLHttpRequest"
        }
        url = ' '
        data = get_data(url, headers)
    
        sku = os.path.splitext(os.path.basename(__file__))[0]
        head = [' ']
        save_data(data,sheet,head)
    
    if __name__ == '__main__':
        main()
    
  • 相关阅读:
    nodeJs爬虫小程序练习
    promise
    node-并发控制
    高性能Js—数据存取
    javascript测试框架mocha
    npm、模块暴露,小知识点区别
    高性能Js-加载和执行
    Request对象获得参数方法:query和body方法
    nvm工具
    在express中提供静态文件笔记
  • 原文地址:https://www.cnblogs.com/hankleo/p/11687204.html
Copyright © 2011-2022 走看看