zoukankan      html  css  js  c++  java
  • 动态网页爬取方法

    import requests
    from bs4 import BeautifulSoup import json import re import csv Cookie = "HAList=a-sz-300520-%u79D1%u5927%u56FD%u521B; em_hq_fls=js; st_si=07173812051945; st_asi=delete; st_pvi=43571872917364; st_sp=2019-08-19%2016%3A08%3A52; st_inirUrl=https%3A%2F%2Fcn.bing.com%2F; st_sn=4; st_psi=20190920155504470-111000300841-5541718974; qgqp_b_id=8305e24557b9f3a02c7da68092480aca" url = "http://push2.eastmoney.com/api/qt/stock/trends2/get?secid=1.000002&fields1=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13&fields2=f51,f52,f53,f54,f55,f56,f57,f58&ut=e1e6871893c6386c5ff6967026016627&iscr=0&cb=cb_1568966339259_56575186&cb_1568966339259_56575186=cb_1568966339259_56575186" headers = { 'User-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36', 'Cookie': Cookie, 'Connection': 'keep-alive', 'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate, sdch', 'Accept-Language': 'zh-CN,zh;q=0.8', 'Host': 'query.sse.com.cn', 'Referer': 'http://quote.eastmoney.com/center/hszs.html' } #可以在浏览器F12 network中查到动态网页数据,复制过来就好了 # gethtml(url,headers): req = requests.get(url,headers=headers) req.content.decode('gb18030') req = req.text data = re.search(r'"trends":([sS]*?)$',req).group(1)[:-1] #抽取json中的trend data = data.replace('[','').replace(']','').replace('"','') #处理成长字符串列表 data = data.split(',')                        #用逗号分割开 data = [data[i:i+8] for i in range(0,len(data),8)] print(data) f = open('e:\shuju\3.csv', 'w', encoding='gb18030', newline="") #csv数据存储 writer = csv.writer(f) writer.writerow(('日期', '最新价', '单位净值', '累计净值', '日增长额', '日增长率', '申购', '赎回')) #字段名瞎写的,懒得一个一个对了 for i in data: writer.writerow(i)
  • 相关阅读:
    本地计算机 上的 Redis Server 服务启动后停止
    RabbitMQ 命令行
    mysql ORDER BY 中文出现错误问题
    使用javascript纯前端导出excel
    软件测试概念学习
    excel控件只为简单写入数据表--github找到ExcelUtil笔记
    快速创建Spring web项目
    PQGrid商业化的表格组件
    mybatis传入参数为0被误认为是空字符串的解决方法
    MyBatis like报错
  • 原文地址:https://www.cnblogs.com/zsf-note/p/11598579.html
Copyright © 2011-2022 走看看