zoukankan      html  css  js  c++  java
  • Python股票信息抓取(三)

    最近在看mongodb,然后会用了一些最简单的mongodb的操作,然后想着结合股票信息的数据的抓取,然后将数据存储在mongodb中,对于mongo和数据库的最大的区别是,mongo不需要建表,直接进行存储,然后在选择数据表的时候在进行插入数据的时候要将str格式的字符串转换成json的格式进行插入,这个我在插入数据的时候调试了十多分钟,一直以为是自己字符串的原因,然后看了看插入数据的格式和百度,然后才发现这点。然后我是插入在本机的test.Share表中的,然后其他的注重点就没有什么了~代码写的很丑,冗余也很大,还是会继续更新~并且程序是但进程进行的数据抓取~嗯~ 很蠢~

      1 #-*-coding:utf-8 -*-
      2 import urllib
      3 import re
      4 import json
      5 import urllib2
      6 from lxml import etree
      7 import requests
      8 import time
      9 from Queue import Queue
     10 from pymongo import MongoClient
     11 import matplotlib.pyplot as plt
     12 URL = 'http://quote.fx678.com/exchange/WH'
     13 nation_que = Queue()
     14 client = MongoClient('localhost',27017)
     15 db = client.test
     16 Share = db.Share
     17 
     18 def sub_sort(array,array1,low,high):
     19     key = array[low]
     20     key1 = array1[low]
     21     while low < high:
     22         while low < high and array[high] >= key:
     23             high -= 1
     24         while low < high and array[high] < key:
     25             array[low] = array[high]
     26             array1[low] = array1[high]
     27             low += 1
     28             array[high] = array[low]
     29             array1[high] = array1[low]
     30     array[low] = key
     31     array1[low] = key1
     32     return low
     33 
     34 
     35 def quick_sort(array,array1,low,high):
     36      if low < high:
     37         key_index = sub_sort(array,array1,low,high)
     38         quick_sort(array,array1,low,key_index)
     39         quick_sort(array,array1,key_index+1,high)
     40 
     41 def download(url, headers, num_try=2):
     42     while num_try >0:
     43         num_try -= 1
     44         try:
     45             content = requests.get(url, headers=headers)
     46             return content.text
     47 
     48         except urllib2.URLError as e:
     49             print 'Download error', e.reason
     50 
     51     return None
     52 
     53 current_quto = Queue()
     54 open_quto = Queue()
     55 high_quto = Queue()
     56 low_quto = Queue()
     57 close_quto = Queue()
     58 update_time = Queue()
     59 def get_type_url():
     60     headers = {
     61         'User_agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36',
     62         'Referer': 'http://quote.fx678.com/exchange/WH',
     63         'Cookie': 'io=-voMclEjiizK9nWKALqB; UM_distinctid=15f5938ddc72db-089cf9ba58d9e5-31657c00-fa000-15f5938ddc8b24; Hm_lvt_d25bd1db5bca2537d34deae7edca67d3=1509030420; Hm_lpvt_d25bd1db5bca2537d34deae7edca67d3=1509031023',
     64         'Accept-Language': 'zh-CN,zh;q=0.8',
     65         'Accept-Encoding': 'gzip, deflate',
     66         'Accept': '*/*'
     67     }
     68     content = download(URL,headers)
     69     html = etree.HTML(content)
     70     result = html.xpath('//a[@class="mar_name"]/@href')
     71     result1 = html.xpath('//td/text()')
     72     num = 0
     73     for each in result1:
     74 
     75         if num%6 == 0:
     76             current_quto.put(each)
     77             num += 1
     78         elif num%6 == 1:
     79             open_quto.put(each)
     80             num += 1
     81         elif num%6 == 2:
     82             high_quto.put(each)
     83             num += 1
     84         elif num%6 == 3:
     85             low_quto.put(each)
     86             num += 1
     87         elif num %6 == 4:
     88             close_quto.put(each)
     89             num +=1
     90         elif num %6 == 5:
     91             update_time.put(each)
     92             num +=1
     93     #while not
     94     for each in result:
     95         st = each.split('/')
     96         nation_que.put(st[len(st)-1])
     97 
     98     get_precent()
     99 
    100 def get_precent():
    101 
    102     while not nation_que.empty():
    103         if not update_time.empty():
    104             time_update = update_time.get(False)
    105             update_time.task_done()
    106         if not current_quto.empty():
    107             new_rates = current_quto.get(False)
    108             current_quto.task_done()
    109         if not open_quto.empty():
    110             opening = open_quto.get(False)
    111             open_quto.task_done()
    112         if not high_quto.empty():
    113             high = high_quto.get(False)
    114             high_quto.task_done()
    115         if not low_quto.empty():
    116             low = low_quto.get(False)
    117             low_quto.task_done()
    118         if not close_quto.empty():
    119             closing = close_quto.get(False)
    120             close_quto.task_done()
    121 
    122         ss = nation_que.get(False)
    123         print ss
    124         print low
    125         print high
    126         print time_update
    127         print new_rates
    128         print opening
    129 
    130         url = 'http://api.q.fx678.com/history.php?symbol=' + ss +'&limit=288&resolution=5&codeType=8100&st=0.8274405615006541'
    131         print url
    132         headers = {'Accept':'application/json, text/javascript, */*; q=0.01',
    133                 'Accept-Encoding':'gzip, deflate',
    134                 'Accept-Language':'zh-CN,zh;q=0.8',
    135                 'Connection':'keep-alive',
    136                 'Host':'api.q.fx678.com',
    137                 'Origin':'http://quote.fx678.com',
    138                 'Referer':'http://quote.fx678.com/symbol/USD',
    139                 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'
    140                }
    141         num_try = 2
    142         while num_try >0:
    143             num_try -= 1
    144             try:
    145                 content = requests.get(url, headers=headers)
    146                 html = json.loads(content.text)
    147                 st = html['h']
    148                 T_time = html['t']
    149                 if  len(st) > 0 and len(T_time) > 0:
    150                     draw_pict(ss,T_time,st,time_update,new_rates,opening,high,low,closing)
    151                 break
    152             except urllib2.URLError as e:
    153                 print 'Download error', e.reason
    154         nation_que.task_done()
    155 List = []
    156 def draw_pict(name,T_time1,high_rate,time_update,new_rate,opening,high,low,closing):
    157 
    158     High = T_time1
    159     Time = high_rate
    160     High_Rate = []
    161     T_time = []
    162     mmap = "{"Type":"%s","Current_quto":"%s","Opening_quto":"%s","High_quto":"%s","low_quto":"%s","Closing_quto":"%s","Update_Time":"%s","Real_TIme_infor":{" % (    name, new_rate, opening, high, low, closing, time_update)
    163     print mmap
    164     flag = 0
    165     for each,high1 in zip(T_time1,high_rate):
    166         if flag == 1:
    167             mmap += ","
    168         else:
    169             flag = 1
    170         mm = ""%s":"%s""%(each,high1)
    171 
    172 
    173         st = time.localtime(float(each))
    174         mmap += mm
    175         if st.tm_min == 0:
    176             T_time.append(st.tm_hour)
    177             High_Rate.append(high1)
    178         else:
    179             pass
    180     mmap += "}}"
    181     mmap1 = json.loads(mmap)
    182     print mmap1
    183     Share.insert(mmap1)
    184     if len(T_time) == len(High_Rate):
    185         quick_sort(T_time,High_Rate,0,len(High_Rate)-1)
    186         List.append(High_Rate)
    187 
    188 def draw_picture():
    189     colu = len(List)
    190 
    191     num = 1
    192     for each in List:
    193         plt.subplot(colu/2 + 1,2,num)
    194         num+=1
    195 
    196         list = each
    197         T_time = []
    198         for i in range(len(list)):
    199             T_time.append(i)
    200         print len(list)
    201         print len(T_time)
    202         plt.plot(T_time, list, marker='*')
    203 
    204     plt.show()
    205     plt.title('Share Message')
    206 
    207 if __name__ == '__main__':
    208     get_type_url()
    209     draw_picture()
  • 相关阅读:
    [洛谷P3674]小清新人渣的本愿
    [洛谷P2698][USACO12MAR]花盆Flowerpot
    [洛谷P4329][COCI2006-2007#1] Bond
    [洛谷P3203][HNOI2010]弹飞绵羊
    [洛谷P1407][国家集训队]稳定婚姻
    [洛谷P3388]【模板】割点(割顶)
    TX2_安装view_team
    tx2的一些系统命令
    tensorflow-cnnn-mnist
    mnist数据集tensorflow实现
  • 原文地址:https://www.cnblogs.com/chenyang920/p/7795089.html
Copyright © 2011-2022 走看看