现在使用 雪球网 对股票的各项数值进行爬取
雪球网中爬取的信息存储比较奇怪,需提取 其中字典,在这里使用提取沪市的部分股票为例,由于雪球网的数据基本可通过 关键词搜索 索引+1 来获取,所以在这里我就用简易XPath
注意:之前选择正则取数据有坑,因为其数据的text分布不同,所以最好用查找关键词的方法筛选自己想要的数据。
这是主要提取元素的方法,其中的URL 也可以使用科技股和其他股的,因为检索的索引关键词是一样的:
1 def get_website(): 2 3 4 for stock_number in stock_list: 5 stock_URL = "https://xueqiu.com/S/SH{}".format(str(stock_number)) 6 response = requests.get(stock_URL, headers=headers) 7 # print(response.text) 8 stock_html = etree.HTML(response.text) 9 # print(stock_html) 10 11 itemlist = {} 12 # 股票名称 13 stockitem_name = stock_html.xpath('//div[@class="stock-name"]/text()')[0] 14 # print(stockitem_name) 15 itemlist['stockitem_name'] = stockitem_name 16 # 现在价格(不在td可循环内) 17 stockitem_result = stock_html.xpath('//body//strong/text()')[8] 18 # print(stockitem_result) 19 itemlist['stockitem_result'] = stockitem_result 20 # 现在状态 21 stock_status = stock_html.xpath('//div[@class="stock-time"]/span/text()')[0] 22 # print(stock_status) 23 itemlist['stock_status'] = stock_status 24 # 当前时间(记录时间) 25 stock_time = stock_html.xpath('//div[@class="stock-time"]/span/text()')[1] 26 # print(stock_time) 27 itemlist['stock_time'] = stock_time 28 # 开始td标签循环! 29 30 stock_items = stock_html.xpath('//td//text()') 31 # print(stock_items) 32 for i in range(27): 33 if "最高:" in stock_items[i]: 34 # print(stock_items[i+1]) 35 itemlist['highest'] = stock_items[i+1] 36 if "最低:" in stock_items[i]: 37 # print(stock_items[i + 1]) 38 itemlist['lowerest'] = stock_items[i+1] 39 40 for i in range(54): 41 if "今开:" in stock_items[i]: 42 # print(stock_items[i+1]) 43 itemlist['today_start'] = stock_items[i+1] 44 if "涨停:" in stock_items[i]: 45 # print(stock_items[i+1]) 46 itemlist['high_stop'] = stock_items[i+1] 47 if "成交量:" in stock_items[i]: 48 # print(stock_items[i+1]) 49 itemlist['ok_total'] = stock_items[i+1] 50 if "昨收:" in stock_items[i]: 51 # print(stock_items[i+1]) 52 itemlist['lastday_over'] = stock_items[i+1] 53 if "跌停:" in stock_items[i]: 54 # print(stock_items[i+1]) 55 itemlist['low_stop'] = stock_items[i+1] 56 if "成交额:" in stock_items[i]: 57 # print(stock_items[i+1]) 58 itemlist['ok_test'] = stock_items[i+1] 59 if "量比:" in stock_items[i]: 60 # print(stock_items[i+1]) 61 itemlist['liang_percent'] = stock_items[i+1] 62 if "换手:" in stock_items[i]: 63 # print(stock_items[i+1]) 64 itemlist['change_hand'] = stock_items[i+1] 65 if "市盈率(动):" in stock_items[i]: 66 # print(stock_items[i+1]) 67 itemlist['shiying_dong'] = stock_items[i+1] 68 if "市盈率(TTM):" in stock_items[i]: 69 # print(stock_items[i+1]) 70 itemlist['shiying_TTM'] = stock_items[i+1] 71 if "委比:" in stock_items[i]: 72 # print(stock_items[i+1]) 73 itemlist['wei_percent'] = stock_items[i+1] 74 if "振幅:" in stock_items[i]: 75 # print(stock_items[i+1]) 76 itemlist['zheng_way'] = stock_items[i+1] 77 if "市盈率(静):" in stock_items[i]: 78 # print(stock_items[i+1]) 79 itemlist['shiying_jin'] = stock_items[i+1] 80 if "市净率:" in stock_items[i]: 81 # print(stock_items[i+1]) 82 itemlist['shijin_percent'] = stock_items[i+1] 83 if "每股收益:" in stock_items[i]: 84 # print(stock_items[i+1]) 85 itemlist['per_stock_fit'] = stock_items[i+1] 86 if "股息(TTM):" in stock_items[i]: 87 # print(stock_items[i+1]) 88 itemlist['stock_xi_TTM'] = stock_items[i+1] 89 if "总股本:" in stock_items[i]: 90 # print(stock_items[i+1]) 91 itemlist['total_stock_ben'] = stock_items[i+1] 92 if "总市值:" in stock_items[i]: 93 # print(stock_items[i+1]) 94 itemlist['total_stock_value'] = stock_items[i+1] 95 if "52周最高:" in stock_items[i]: 96 # print(stock_items[i+1]) 97 itemlist['highest_52'] = stock_items[i+1] 98 if "52周最低:" in stock_items[i]: 99 # print(stock_items[i+1]) 100 itemlist['lowerest_52'] = stock_items[i+1] 101 if "货币单位:" in stock_items[i]: 102 # print(stock_items[i+1]) 103 itemlist['cop'] = stock_items[i+1] 104 # print(itemlist) 105 print(itemlist) 106 writer_to_csv_item(itemlist)
调用主函数,其中的股票列表可替换或用input自己添加:
这里每30秒记录一次,但是这里记录时间会不一样
if __name__ == '__main__': # stock = input("请输入您的股票:") stock_list = ['688157', '600018', '600072', '600635', '600497', '603658', '688208', '600550', '600498', '600703', '603815', '600127', '600305', '603777', '688004'] headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.75 Safari/537.36"} # stock_list.append(stock) writer_ss = write_to_csv_header() # get_website() # 循环 scheduler = BlockingScheduler() scheduler.add_job(func=get_website, trigger='interval', seconds=30) scheduler.start()
写入csv函数:(好丑)还是有键名
1 def write_to_csv_header(): 2 with open('stock.csv', 'a', encoding='utf-8-sig', newline='') as f: 3 csv_header = ['股票名称', '现在价格', '现在状态', '当前记录时间', '最高', '最低', '今日开盘价', '涨停价格', '成交量', '昨日收盘价', '跌停价格', '成交额', '量比', '换手', '市盈率(动)', '市盈率(TTM)', '委比', '振幅', '市盈率(静)', '市净率', '每股收益', '股息(TTM)', '总股本', '总市值', '52周最高', '52周最低', '货币单位'] 4 writer = csv.writer(f) 5 writer.writerow(csv_header) 6 7 # with open('stock.csv', 'a', encoding='utf-8-sig', newline='') as w: 8 # writer_1 = csv.DictWriter(w, fieldnames=['stockitem_name', 'stockitem_result', 'stock_status', 'stock_time', 'highest', 'lowerest', 'today_start', 'high_stop', 'ok_total', 'lastday_over', 'low_stop', 'ok_test', 'liang_percent', 'change_hand', 'shiying_dong', 'shiying_TTM', 'wei_percent', 'zheng_way', 'shiying_jin', 'shijin_percent', 'per_stock_fit', 'stock_xi_TTM', 'total_stock_ben', 'total_stock_value', 'highest_52', 'lowerest_52', 'cop']) 9 # writer_1.writeheader() 10 return writer 11 def writer_to_csv_item(itemlist): 12 with open('stock.csv', 'a', encoding='utf-8-sig', newline='') as w: 13 writer_1 = csv.DictWriter(w, fieldnames=['stockitem_name', 'stockitem_result', 'stock_status', 'stock_time', 14 'highest', 'lowerest', 'today_start', 'high_stop', 'ok_total', 15 'lastday_over', 'low_stop', 'ok_test', 'liang_percent', 'change_hand', 16 'shiying_dong', 'shiying_TTM', 'wei_percent', 'zheng_way', 17 'shiying_jin', 'shijin_percent', 'per_stock_fit', 'stock_xi_TTM', 18 'total_stock_ben', 'total_stock_value', 'highest_52', 'lowerest_52', 19 'cop']) 20 21 writer_1.writeheader() 22 23 writer_1.writerow(itemlist)