zoukankan      html  css  js  c++  java
  • Python爬蟲獲取股票信息代碼分享

    # -*- coding: utf-8 -*-
    
    import requests
    import traceback
    import re
    import os
    from bs4 import BeautifulSoup
    
    
    # 獲取網頁內容
    def get_html_text(url):
        try:
            r = requests.get(url)
            r.raise_for_status()
            r.encoding = r.apparent_encoding
            return r.text
        except:
            return ''
    
    
    # 獲取股票代碼列表
    def get_stock_list(url):
        # 獲取股票列表網頁
        html = get_html_text(url)
        # 解析
        soup = BeautifulSoup(html, 'html.parser')
        # 獲取所有超鏈接a標籤
        a = soup.find_all('a')
        # 提取a標籤中的股票代碼
        lst = []
        for i in a:
            try:
                href = i.attrs['href']
                # 捕捉股票代碼
                lst.append(re.findall(r'[s][hz]d{6}', href)[0])
            except:
                continue
        return lst
    
    
    # 獲取並寫入每隻個股的信息
    def get_and_write_stock_info(lst):
        desktop = os.path.join(os.path.expanduser("~"), 'Desktop')
        # 獲取每隻股票的信息
        for i, stock in enumerate(lst):
            try:
                url = STOCK_URL + stock + '.html'
                html = get_html_text(url)
                if html == '':
                    continue
                soup = BeautifulSoup(html, 'html.parser')
                stock_info = soup.find('div', attrs={'class': 'stock-bets'})
                info_dict = {}
                # 獲取股票名稱
                info_dict.update({'股票代碼': stock})
                name = stock_info.find_all(attrs={'class': 'bets-name'})[0]
                info_dict.update({'股票名稱': name.text.split()[0]})
                # 獲取其他股票信息
                key_list = stock_info.find_all('dt')
                value_list = stock_info.find_all('dd')
                if len(key_list) == 0:
                    continue
                for k, v in zip(key_list, value_list):
                    info_dict[k.text] = v.text
                # 每隻個股的信息寫入文件
                with open(desktop + '\' + SAVE_FILE_PATH, 'a', encoding='utf-8') as f:
                    f.write(str(info_dict) + '
    ')
                    print("
    當前進度: {:.2f}%".format(i * 100 / len(lst)), end="")
            except:
                continue
    
    
    # 主函數
    if __name__ == '__main__':
        # 東方財富網股票代碼鏈接
        STOCK_LIST_URL = 'http://quote.eastmoney.com/stocklist.html'
        # 百度股票的每隻個股的信息
        STOCK_URL = 'https://gupiao.baidu.com/stock/'
        # 保存路徑
        SAVE_FILE_PATH = '股票信息.txt'
        # 獲取股票代碼列表
        stock_list = get_stock_list(STOCK_LIST_URL)
        get_and_write_stock_info(stock_list)

    参考:

    Python小爬虫,爬取当前全部股票信息

    https://blog.csdn.net/weixin_44521703/article/details/95525861

    Python小爬虫,爬取当前全部股票信息

    https://blog.csdn.net/weixin_44521703/article/details/95525861

  • 相关阅读:
    pm3
    算法交易系列研究之一
    CDS究竟是个什么鬼?它直接导致了次贷危机?
    源特定组播(SSM:Source Specific Multicast)
    投资交易系统常用表
    交易系统解析(六)前台报盘应用设计要点
    人物
    句子
    康德拉季耶夫周期
    非标
  • 原文地址:https://www.cnblogs.com/ilovecpp/p/12750170.html
Copyright © 2011-2022 走看看