zoukankan      html  css  js  c++  java
  • python爬取新浪股票数据—绘图【原创分享】

    目标:不做蜡烛图,只用折线图绘图,绘出四条线之间的关系。

    注:未使用接口,仅爬虫学习,不做任何违法操作。

      1 """
      2     新浪财经,爬取历史股票数据
      3 """
      4 
      5 # -*- coding:utf-8 -*-
      6 
      7 import numpy as np
      8 import urllib.request, lxml.html
      9 from urllib.request import urlopen
     10 from bs4 import BeautifulSoup
     11 import re, time
     12 import matplotlib.pyplot as plt
     13 from datetime import datetime
     14 # 绘图显示中文设置
     15 plt.rcParams['font.sans-serif'] = ['SimHei']
     16 plt.rcParams['axes.unicode_minus'] = False
     17 
     18 
     19 # 公共模块,请求头信息
     20 def public(link):
     21     r = urllib.request.Request(link)
     22 
     23     ug = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0'
     24 
     25     r.add_header('User-Agent', ug)
     26 
     27     cookie = "SUB=_2AkMsqZjif8NxqwJRmfkRxG7nZYpzyg_EieKa9Wk5JRMyHRl-yD83qkJatRB6Bym2DDqPE870e3uMsySIjHjrMbMNxNqk; " 
     28              "SUBP=0033WrSXqPxfM72-Ws9jqgMF55529P9D9WFXmxLGpAG5k05lCJw6qgYe; " 
     29              "SINAGLOBAL=172.16.92.24_1542789082.401113; " 
     30              "Apache=172.16.92.24_1542789082.401115; UOR=www.baidu.com,blog.sina.com.cn,; " 
     31              "ULV=1542789814434:1:1:1:172.16.92.24_1542789082.401115:; U_TRS1=000000d1.1f4d3546.5bf53673.955fa32e; " 
     32              "U_TRS2=000000d1.1f593546.5bf53673.736853cc; FINANCE2=661413ac85cadaab72ec7e3d842d6a3a; _s_upa=1"
     33 
     34     r.add_header("Cookie", cookie)
     35 
     36     html = urllib.request.urlopen(r, timeout=500).read()
     37 
     38     bsObj = BeautifulSoup(html, "lxml")  # 将html对象转化为BeautifulSoup对象
     39 
     40     return bsObj
     41 
     42 
     43 # 获取股票价格
     44 def shares_price(code, year, quarter):
     45     link = "http://money.finance.sina.com.cn/corp/go.php/vMS_MarketHistory/stockid/%s.phtml?year=%d&jidu=%d" % (code, year, quarter)
     46 
     47     bsObj = public(link)
     48     # print(bsObj)
     49 
     50     a = 0
     51     # date_list为日期列表,open_list为开盘价列表,high_list为最高价列表,close_list为收盘价列表,low_list为最低价列表
     52     price_list, date_list, open_list, high_list, close_list, low_list = [], [], [], [], [], []
     53     # 获取股票信息
     54     jpg_title = re.findall("(.*?))", bsObj.title.text)
     55 
     56     prices_bs = bsObj.find_all(name='div', attrs={"align": 'center'})
     57     # 获取并处理价格信息
     58     for price_bs in prices_bs:
     59         # 去除空格
     60         price_bs_1 = price_bs.text.replace("
    
    			", "")
     61         price_bs_2 = price_bs_1.replace("			
    ", "")
     62 
     63         # 6个字符串为一个列表
     64         if a != 6:
     65             price_list.append(price_bs_2)
     66             a = a + 1
     67         else:
     68             date_list.append(price_list[0])
     69             open_list.append(price_list[1])
     70             high_list.append(price_list[2])
     71             close_list.append(price_list[3])
     72             low_list.append(price_list[4])
     73             a = 0
     74             price_list = []
     75     # 删除列表头
     76     for b in (date_list, open_list, high_list, close_list, low_list):
     77         b.pop(0)
     78 
     79     # 全部倒序排列(由日期远到近,从左到右排列)
     80     for c in (date_list, open_list, high_list, close_list, low_list):
     81         c.reverse()
     82 
     83     return date_list, open_list, high_list, close_list, low_list, jpg_title
     84 
     85 
     86 # 输入股票代码,年份,季度
     87 code = "002925"
     88 year = "2018"
     89 quarter = 4
     90 # 以下为手动输入模式,因调试方便默认上面固定模式。
     91 # code = input("code:")  # 002925
     92 # year = input("year:")    # 2018
     93 # quarter = int(input("quarter:"))
     94 
     95 # 列表字符串转为数值date
     96 x = [datetime.strptime(d, '%Y-%m-%d').date() for d in shares_price(code, int(year), quarter)[0]]
     97 # 将爬取的数据(字符串)转化为浮点型
     98 open_list = [float(i) for i in shares_price(code, int(year), quarter)[1]]
     99 high_list = [float(i) for i in shares_price(code, int(year), quarter)[2]]
    100 close_list = [float(i) for i in shares_price(code, int(year), quarter)[3]]
    101 low_list = [float(i) for i in shares_price(code, int(year), quarter)[4]]
    102 
    103 # 线条设置
    104 plt.plot(x, open_list, label='open', linewidth=1, color='red', marker='o', markerfacecolor='blue', markersize=2)
    105 plt.plot(x, high_list, label='high', linewidth=1, color='green', marker='o', markerfacecolor='blue', markersize=2)
    106 plt.plot(x, close_list, label='close', linewidth=1, color='blue', marker='o', markerfacecolor='blue', markersize=2)
    107 plt.plot(x, low_list, label='low', linewidth=1, color='black', marker='o', markerfacecolor='blue', markersize=2)
    108 
    109 # 取数列最大数值与最小值做图表的边界值。
    110 plt.ylim(min(low_list)-1, max(high_list)+1)
    111 plt.gcf().autofmt_xdate()  # 自动旋转日期标记
    112 
    113 # 打印表头
    114 plt.xlabel('time')
    115 plt.ylabel('price')
    116 # shares_price(code, int(year), quarter)[5][0]为title中的股票名称与代码
    117 plt.title('gp_1_{0}.jpg'.format(shares_price(code, int(year), quarter)[5][0]))
    118 plt.legend()
    119 plt.show()

    效果如下:

    是不是有另一种看法的感觉?如:黑线下跌后向上的第一个大拐点为买入点。

  • 相关阅读:
    [linux] 将socket设置为非阻塞(nonblocking)
    翻译—IvorHorton的Begining Visual C++ 2005 [第一章]
    深入解析ATL(第二版ATL8.0)(1.11.3节)
    gdb使用初步
    编程其实就是一个不断做出选择的过程
    windows和linux下多线程的一些区别
    makefile编写入门
    深入解析ATL(第二版ATL8.0)(1.41.7节)
    深入解析ATL(第二版ATL8.0)(1.81.10节)
    用gdb调试子进程
  • 原文地址:https://www.cnblogs.com/4wheel/p/10162564.html
Copyright © 2011-2022 走看看