zoukankan html css js c++ java

Python爬蟲獲取股票信息代碼分享

# -*- coding: utf-8 -*-

import requests
import traceback
import re
import os
from bs4 import BeautifulSoup


# 獲取網頁內容
def get_html_text(url):
    try:
        r = requests.get(url)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:
        return ''


# 獲取股票代碼列表
def get_stock_list(url):
    # 獲取股票列表網頁
    html = get_html_text(url)
    # 解析
    soup = BeautifulSoup(html, 'html.parser')
    # 獲取所有超鏈接a標籤
    a = soup.find_all('a')
    # 提取a標籤中的股票代碼
    lst = []
    for i in a:
        try:
            href = i.attrs['href']
            # 捕捉股票代碼
            lst.append(re.findall(r'[s][hz]d{6}', href)[0])
        except:
            continue
    return lst


# 獲取並寫入每隻個股的信息
def get_and_write_stock_info(lst):
    desktop = os.path.join(os.path.expanduser("~"), 'Desktop')
    # 獲取每隻股票的信息
    for i, stock in enumerate(lst):
        try:
            url = STOCK_URL + stock + '.html'
            html = get_html_text(url)
            if html == '':
                continue
            soup = BeautifulSoup(html, 'html.parser')
            stock_info = soup.find('div', attrs={'class': 'stock-bets'})
            info_dict = {}
            # 獲取股票名稱
            info_dict.update({'股票代碼': stock})
            name = stock_info.find_all(attrs={'class': 'bets-name'})[0]
            info_dict.update({'股票名稱': name.text.split()[0]})
            # 獲取其他股票信息
            key_list = stock_info.find_all('dt')
            value_list = stock_info.find_all('dd')
            if len(key_list) == 0:
                continue
            for k, v in zip(key_list, value_list):
                info_dict[k.text] = v.text
            # 每隻個股的信息寫入文件
            with open(desktop + '\' + SAVE_FILE_PATH, 'a', encoding='utf-8') as f:
                f.write(str(info_dict) + '
')
                print("
當前進度: {:.2f}%".format(i * 100 / len(lst)), end="")
        except:
            continue


# 主函數
if __name__ == '__main__':
    # 東方財富網股票代碼鏈接
    STOCK_LIST_URL = 'http://quote.eastmoney.com/stocklist.html'
    # 百度股票的每隻個股的信息
    STOCK_URL = 'https://gupiao.baidu.com/stock/'
    # 保存路徑
    SAVE_FILE_PATH = '股票信息.txt'
    # 獲取股票代碼列表
    stock_list = get_stock_list(STOCK_LIST_URL)
    get_and_write_stock_info(stock_list)

参考：

Python小爬虫，爬取当前全部股票信息

https://blog.csdn.net/weixin_44521703/article/details/95525861

Python小爬虫，爬取当前全部股票信息

https://blog.csdn.net/weixin_44521703/article/details/95525861

查看全文

相关阅读:
npm常用命令
 React进阶
 ant按需加载、配置configoverrides.js文件、项目中引入less、解决TypeError: this.getOptions is not a function错误
 数组常用api
react项目实战简单登录注册
 Hoot新特性
 win10全局安装插件却不能全局用,https:/go.microsoft.com/ fwlink/?LinkID=135170禁止运行脚本
 Ant Design编辑表格(类组件)
反射解决类的复制
 基于Jquery和Ajax的多选框

原文地址：https://www.cnblogs.com/ilovecpp/p/12750170.html