时隔多年,开盘啦APP龙虎榜席位标签爬虫,再上路,代码如下,非专业开发,很业余,数据解析存储中间还有很多不到位的地方,欢迎留言交流:
# -*- coding:utf-8 -*-
import pymysql
import datetime
import pandas as pd
import akshare as ak
import requests
import json
import pymysql
from sqlalchemy import create_engine
def spider_lhb_sales_department(date, code):
try:
url = 'https://lhb.kaipanla.com/w1/api/index.php?apiv=w28&PhoneOSNew=1&VerSion=5.2.0.1 HTTP/1.1'
data = {
'c': 'Stock',
'a': 'GetNewOneStockInfo',
'Type': 0,
'Time': date,
'StockID': code,
'DeviceID': 'ffffffff-f916-2186-0000-00000cdf9093'
}
headers = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 7.1.2; VOG-AL00 Build/N2G48H; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/68.0.3440.70 Mobile Safari/537.36;kaipanla 5.2.0.1',
}
response = eval(requests.post(url=url, data=data, headers=headers).text).get('List')[0]
# 开始解析
SellList = response.get('SellList')
BuyList = response.get('BuyList')
df = pd.DataFrame(columns=['营业部ID', '营业部名称', '营业部标签', '资金席位ID', '资金席位名称'])
for i in SellList:
ID = i.get('ID')
Name = i.get('Name')
YouZiIcon = i.get('YouZiIcon')
GroupID = i.get('GroupID')
GroupIcon = i.get('GroupIcon')
dict1 = {
'营业部ID': ID,
'营业部名称': Name,
'营业部标签': YouZiIcon,
'资金席位ID': GroupID,
'资金席位名称': GroupIcon
}
df1 = pd.DataFrame(dict1)
df = df.append(df1, ignore_index=False)
for j in BuyList:
ID = j.get('ID')
Name = j.get('Name')
YouZiIcon = j.get('YouZiIcon')
GroupID = j.get('GroupID')
GroupIcon = j.get('GroupIcon')
dict2 = {
'营业部ID': ID,
'营业部名称': Name,
'营业部标签': YouZiIcon,
'资金席位ID': GroupID,
'资金席位名称': GroupIcon
}
df2 = pd.DataFrame(dict2)
df = df.append(df2, ignore_index=False)
print(df)
df.to_sql('ods_basic_department_info', con=engine1, if_exists='append', index=False)
except Exception as error:
pass
if __name__ == '__main__':
# 定义日期
today = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d')
# engine
engine1 = create_engine('mysql+pymysql://root:123456@localhost/stock_ods_db?charset=utf8')
engine2 = create_engine('mysql+pymysql://root:123456@localhost/stock_dwd_db?charset=utf8')
# 获取龙虎榜名单
lhb_df = pd.read_sql('select distinct t_date,v_code from dwd_stock_special_lhb', con=engine2)
for date in lhb_df['t_date'].values:
for code in lhb_df['v_code'].values:
print('开始')
spider_lhb_sales_department(date, code)
print('结束')