zoukankan html css js c++ java

Python股票信息抓取~

本来想把股票的涨跌抓取出来，用汇通网的股票为例，就找了国际外汇为例。

页面里有xhr请求,并且每个xhr的url请求的 http://api.q.fx678.com/history.php?symbol=USD&limit=288&resolution=5&codeType=8100&st=0.43342772855649625

然后请求里面哟一个limit=288表示288个时间段，汇通网里面将5分钟为一个时间段，所以一天的话有288个时间间隔。

xhr请求中的st参数好像是跟页面的请求的过期与否有关，因为有的股票可以通过该st值获得数据，但是有的股票就不可以。然后此xhr请求可以将从该时间段为止的24小时内的所有股票的变动返回，包括该时间段的开盘，收盘，最高，最低，然后依次来解析。

处理的时候只是将所有整点的数据该股票的最高点解析出来，xhr请求中的时间戳是秒级的，然后将之转换成北京时间，并且将时间进行排序，然后获取正确的其他的并没有进行处理，程序中存储时间和最高点都是用数组存储的，并且改程序是单进程，会更新~

只想说xpath是真心好用，第二次用，解析参数的时候很方便。

#-*-coding:utf-8 -*-
import urllib
import re
import json
import urllib2
from lxml import etree
import requests
import time
from Queue import Queue
import matplotlib.pyplot as plt
URL = 'http://quote.fx678.com/exchange/WH'
nation_que = Queue()
nation = Queue()
high = Queue()
start_time = Queue()

Chart = []
def __cmp__(self, other):
    if self.time < other.time:
        return -1
    elif self.time == other.time:
        return 0
    else:
        return 1
class Share:
    def __init__(self,time,score):
        self.time = time
        self.score = score

    def __getitem__(self, key):
        self.item.get(key)

    def __cmp__(self, other):
        return cmp(self.time, other.time)

    def __str__(self):
        return self.time + " " + str(self.score)

def download(url, headers, num_try=2):
    while num_try >0:
        num_try -= 1
        try:
            content = requests.get(url, headers=headers)
            return content.text

        except urllib2.URLError as e:
            print 'Download error', e.reason

    return None


def get_type_url():
    headers = {
        'User_agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36',
        'Referer': 'http://quote.fx678.com/exchange/WH',
        'Cookie': 'io=-voMclEjiizK9nWKALqB; UM_distinctid=15f5938ddc72db-089cf9ba58d9e5-31657c00-fa000-15f5938ddc8b24; Hm_lvt_d25bd1db5bca2537d34deae7edca67d3=1509030420; Hm_lpvt_d25bd1db5bca2537d34deae7edca67d3=1509031023',
        'Accept-Language': 'zh-CN,zh;q=0.8',
        'Accept-Encoding': 'gzip, deflate',
        'Accept': '*/*'
    }
    content = download(URL,headers)
    html = etree.HTML(content)
    result = html.xpath('//a[@class="mar_name"]/@href')
    for each in result:
        print each
        st = each.split('/')
        nation_que.put(st[len(st)-1])

    get_precent()

def get_precent():

    while not nation_que.empty():
        ss = nation_que.get(False)
        print ss
        url = 'http://api.q.fx678.com/history.php?symbol=' + ss +'&limit=288&resolution=5&codeType=8100&st=0.43342772855649625'
        print url
        headers = {'Accept':'application/json, text/javascript, */*; q=0.01',
                'Accept-Encoding':'gzip, deflate',
                'Accept-Language':'zh-CN,zh;q=0.8',
                'Connection':'keep-alive',
                'Host':'api.q.fx678.com',
                'Origin':'http://quote.fx678.com',
                'Referer':'http://quote.fx678.com/symbol/USD',
                'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'
               }
        num_try = 2
        while num_try >0:
            num_try -= 1
            try:
                content = requests.get(url, headers=headers)
                html = json.loads(content.text)
                st = html['h']
                T_time = html['t']
                if  len(st) > 0 and len(T_time) > 0:
                    nation.put(ss)
                    high.put(st)
                    start_time.put(T_time)
                break
            except urllib2.URLError as e:
                print 'Download error', e.reason
        nation_que.task_done()
        draw_pict()


def sub_sort(array,array1,low,high):
    key = array[low]
    key1 = array1[low]
    while low < high:
        while low < high and array[high] >= key:
            high -= 1
        while low < high and array[high] < key:
            array[low] = array[high]
            array1[low] = array1[high]
            low += 1
            array[high] = array[low]
            array1[high] = array1[low]
    array[low] = key
    array1[low] = key1
    return low


def quick_sort(array,array1,low,high):
     if low < high:
        key_index = sub_sort(array,array1,low,high)
        quick_sort(array,array1,low,key_index)
        quick_sort(array,array1,key_index+1,high)

def draw_pict():
    while True:
        Nation = nation.get(False)
        High = high.get(False)
        Time = start_time.get(False)
        T_time = []
        High_Rate = []
        num = 0
        for each,high1 in zip(Time,High):
            st = time.localtime(float(each))
            if st.tm_min == 0:
                T_time.append(st.tm_hour)
                High_Rate.append(high1)

        quick_sort(T_time,High_Rate,0,len(T_time)-1)

        plt.plot(T_time,High_Rate, marker='*')
        plt.show()
        plt.title(Nation)
        nation.task_done()
        high.task_done()
        break


if __name__ == '__main__':
    get_type_url()

查看全文

相关阅读:
4.9版本的linux内核中实时时钟芯片pt7c4338的驱动源码在哪里
 4.9版本的linux内核中eeprom存储芯片at24c512的驱动源码在哪里
 4.9版本的linux内核中温度传感器adt7461的驱动源码在哪里
 4.9版本linux内核的ina220电流检测芯片源码在哪里
 nxp的layerscape系列芯片中的rcw指定了一些什么信息
 openwrt的编译系统是如何制作根文件系统的
 openwrt的编译系统在哪里对程序进行开机自动启动配置
 makefile中的word函数作用是什么
 jquery 动态增加table行，动态删除table行
 使用jQuery创建可删除添加行的动态表格，超级简单实用的方法

原文地址：https://www.cnblogs.com/chenyang920/p/7741053.html