import requests from bs4 import BeautifulSoup import json import re import csv Cookie = "yfx_c_g_u_id_10000042=_ck19090210233412150518192701183; VISITED_MENU=%5B%228451%22%2C%228466%22%2C%2211169%22%2C%228467%22%2C%228468%22%2C%228489%22%2C%228488%22%2C%2211171%22%2C%2211172%22%2C%229881%22%2C%229880%22%5D; yfx_f_l_v_t_10000042=f_t_1567391014184__r_t_1587432339504__v_t_1587439706300__r_c_7" url = "http://query.sse.com.cn/commonQuery.do?jsonCallBack=jsonpCallback34451&isPagination=true&sqlId=COMMON_BOND_SCSJ_SCTJ_TJYB_JYQK_L&pageHelp.pageSize=20&pageHelp.cacheSize=1&pageHelp.pageNo=1&pageHelp.beginPage=1&pagecache=false&TRADEDATE=2020-03&_=1587439706736" headers = { 'User-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36', 'Cookie': Cookie, 'Connection': 'keep-alive', 'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Host': 'query.sse.com.cn', 'Referer': 'http://www.sse.com.cn/market/bonddata/overview/monthly/' } #可以在浏览器F12 network中查到动态网页数据,复制过来就好了 # gethtml(url,headers): req = requests.get(url,headers=headers) req.content.decode('utf-8') req = req.text data = re.search(r'"result":([sS]*?)$',req).group(1).replace('[','').replace(']','').replace('-','').replace(',"sqlId":"COMMON_BOND_SCSJ_SCTJ_TJYB_JYQK_L","texts":null,"type":"","validateCode":""})','') data = data.replace(',{',',,{').split(',,') print(data) amount = [] #类型 成交笔数 成交金额(万元)AMOUNT 加权平均价格AVG_PRICE TRADE_DATE TYPE date =[] cjbs =[] jqpjjg=[] type=[] for i in range(len(data)): das = eval(data[i]) date.append(das["TRADE_DATE"]) cjbs.append(das["VOLUME"]) jqpjjg.append(das["AVG_PRICE"]) amount.append(das["AMOUNT"]) type.append(das["TYPE"]) f = open('e:\shuju\3.csv', 'w', newline="") #csv数据存储 writer = csv.writer(f) writer.writerow(('时间', '类型', '成交笔数', '成交金额(万元)', '加权平均价格') ) # 类型 成交笔数 成交金额(万元) 加权平均价格 for i in range(len(data)): writer.writerow((date[i],type[i],cjbs[i],amount[i],jqpjjg[i]))
主要是eval转dict