import re
import json
from odps import ODPS
from threading import Thread
import threading
from urllib import parse
import datetime
from lxml import etree
import random
import requests
import time
from models import *
# def write_txt(html_data):
# f = open("a.txt", 'a+')
# f.write(html_data)
# f.write("
")
# f.close()
def get_nodes_json():
url = r.lpop('qunar.com:ticket_url')
print(url)
if url:
process_response_data(url)
def process_response_data(url): # 处理response的相应信息
#url = 'https://piao.qunar.com/ticket/list_%E6%96%97%E9%97%A8%E5%8C%BA.html?from=mps_search_suggest_c&keyword=%E6%96%97%E9%97%A8%E5%8C%BA&page=1'
district = re.search(r"list_(.*).html",url).group(1) # 获取管辖区域名称
district = parse.unquote(district) # 对管理辖区进行url转ascii
response = requests.request("POST", url)
html = etree.HTML(response.text)
data = html.xpath('//div[@class="result_list"]/div[contains(@class,"sight_item")]')
ticket_num = html.xpath('//div[contains(@id,"pager-container")]')[0]
ticket_num = re.search(r'd+',str(ticket_num.attrib))[0]
ticket_num = int(ticket_num)
print(ticket_num)
for item in range(len(data)):
data_0 = data[item].attrib
ticket = Tickets()
ticket.ticket_name = data_0['data-sight-name']
ticket.ticket_pic = data_0['data-sight-img-u-r-l']
try:
ticket_level = data[item].xpath('./div[@class="sight_item_detail clrfix"]/div[@class="sight_item_about"]/div[@class="sight_item_info"]/div[@class="clrfix"]/span[@class="level"]/text()') # 景区等级
ticket_level = "".join(ticket_level)
ticket.ticket_level = ticket_level
except:
ticket.ticket_level = " "
try:
ticket_price = data[item].xpath('./div[@class="sight_item_detail clrfix"]/div[@class="sight_item_pop"]/table/tr/td/span[@class="sight_item_price"]/em/text()') # 获取票面价
ticket_price = "".join(ticket_price)
ticket.ticket_price = float(ticket_price)
except:
ticket.ticket_price = 0
ticket_city = data_0['data-districts']
ticket_city = ticket_city.split("·")[0] + " " + ticket_city.split("·")[1]
ticket.ticket_city = ticket_city
#ticket.ticket_district = data_0['data-districts']
ticket.ticket_district = district
ticket.ticket_location = data_0['data-address']
ticket.ticket_sales = data_0['data-sale-count']
ticket.create_time = datetime.datetime.now()
ticket.save(force_insert=True)
class parse_qunar_url_Thread(Thread):
def run(self):
while(1):
get_nodes_json()
#保存最终的数据
if __name__ == "__main__":
get_nodes_json()
# for i in range(10):
# parse_qunar_url_thread = parse_qunar_url_Thread()
# parse_qunar_url_thread.start()