zoukankan      html  css  js  c++  java
  • python简单爬虫(股票信息)

     1 
     2 import requests
     3 from bs4 import BeautifulSoup
     4 import traceback
     5 import re
     6 
     7 def getHTMLText(url, code="utf-8"):   #获取股票页面信息
     8     try:
     9         r = requests.get(url)
    10         r.raise_for_status()
    11         r.encoding = code
    12         return r.text
    13     except:
    14         return ""
    15 
    16 def getStockList(lst, stockURL):      #获取股票列表
    17     html = getHTMLText(stockURL, "GB2312")
    18     soup = BeautifulSoup(html, 'html.parser') 
    19     a = soup.find_all('a')
    20     for i in a:
    21         try:
    22             href = i.attrs['href']
    23             lst.append(re.findall(r"[s][hz]d{6}", href)[0])
    24         except:
    25             continue
    26 
    27 def getStockInfo(lst, stockURL, fpath):  #获取单支股票信息
    28     count = 0
    29     for stock in lst:
    30         url = stockURL + stock + ".html"
    31         html = getHTMLText(url)
    32         try:
    33             if html=="":
    34                 continue
    35             infoDict = {}
    36             soup = BeautifulSoup(html, 'html.parser')
    37             stockInfo = soup.find('div',attrs={'class':'stock-bets'})
    38 
    39             name = stockInfo.find_all(attrs={'class':'bets-name'})[0]
    40             infoDict.update({'股票名称': name.text.split()[0]})
    41             
    42             keyList = stockInfo.find_all('dt')
    43             valueList = stockInfo.find_all('dd')
    44             for i in range(len(keyList)):
    45                 key = keyList[i].text
    46                 val = valueList[i].text
    47                 infoDict[key] = val
    48             
    49             with open(fpath, 'a', encoding='utf-8') as f:   #写入文件
    50                 f.write( str(infoDict) + '
    ' )
    51                 count = count + 1
    52                 print("
    当前进度: {:.2f}%".format(count*100/len(lst)),end="")
    53         except:
    54             count = count + 1
    55             print("
    当前进度: {:.2f}%".format(count*100/len(lst)),end="")
    56             continue
    57 
    58 def main():
    59     stock_list_url = 'http://quote.eastmoney.com/stocklist.html'
    60     stock_info_url = 'https://gupiao.baidu.com/stock/'
    61     output_file = 'C:/BaiduStockInfo.txt'
    62     slist=[]
    63     getStockList(slist, stock_list_url)
    64     getStockInfo(slist, stock_info_url, output_file)
  • 相关阅读:
    json页面解析
    map判断
    将页面中所有的checkbox设成单选得
    配置两个环境变量:
    一个input框边输入,另外一个input框中边显示的触发事件
    页面tr和td的的隐藏与显示
    判断声明出来的list为空的时候,list!=null
    从一个表中往另外一个表中插入数据用到的SQL
    final使用方法
    Android学习笔记(23):列表项的容器—AdapterView的子类们
  • 原文地址:https://www.cnblogs.com/ouzai/p/13048595.html
Copyright © 2011-2022 走看看