zoukankan      html  css  js  c++  java
  • 每日日报2021.6.2

    今天完成内容:

    学习web

     学习python爬取:爬取航班信息并存储数据库

    #coding:utf-8
    import requests
    from lxml import etree
    import pymysql
    import random
    def create():
    db = pymysql.connect(host="localhost", user="root", password="123456", db="jichang") # 连接数据库
    cursor = db.cursor()
    cursor.execute("DROP TABLE IF EXISTS LINE")

    sql = """CREATE TABLE LINE (
    ID INT PRIMARY KEY AUTO_INCREMENT,
    dp CHAR(255), # 出发地
    ap CHAR(255), # 目的地
    ndate CHAR(255), #出发日期
    dtime CHAR(255), # 出发时间
    atime CHAR(255), # 到达时间
    dname CHAR(255), # 出发机场
    aname CHAR(255), # 到达机场
    flightname CHAR(255), # 航班信息
    flightspace CHAR(255), # 舱位
    rate CHAR(255), # 折扣率
    price CHAR(255) # 价格
    )"""


    cursor.execute(sql)

    db.close()

    def insert(value):
    db = pymysql.connect(host="localhost", user="root", password="123456", db="jichang")
    cursor = db.cursor()
    sql = "INSERT INTO LINE(dp,ap,ndate,dtime,atime,dname,aname,flightname,flightspace,rate,price) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
    try:
    cursor.execute(sql, value)
    db.commit()
    print('插入数据成功')
    except:
    db.rollback()
    print("插入数据失败")
    db.close()




    def chaxun(start,end,date):
    cookies = {
    'arrCityPy': start,
    'depCityPy': end,
    }

    headers = {
    'Connection': 'keep-alive',
    'Pragma': 'no-cache',
    'Cache-Control': 'no-cache',
    'sec-ch-ua': '^\^',
    'sec-ch-ua-mobile': '?0',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'Sec-Fetch-Site': 'none',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-User': '?1',
    'Sec-Fetch-Dest': 'document',
    'Accept-Language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7',
    }

    params = (
    ('unionId', '427'),
    ('godate', date),
    ('searchType', '0'),
    )

    response = requests.get(f'https://jipiao.114piaowu.com/{start}-{end}.html', headers=headers, params=params, cookies=cookies)
    # print(response.text)
    return response.content.decode('utf-8')

    def jianxi(html,start,end,time):
    xp = etree.HTML(html)
    hangban_data = []
    hangban_list = xp.xpath('//*[@class="jp_list"]//*[@class="mainDiv66"]')
    for i in range(1,len(hangban_list)+1):
    start_time = xp.xpath(f'//*[@class="jp_list"]/div[{i}]//*[@class="aboutfj"]/li[1]/b/text()')[0] #起飞时间
    start_time = ''.join(start_time.split())
    end_time = xp.xpath(f'//*[@class="jp_list"]/div[{i}]//*[@class="aboutfj"]/li[1]/p/text()')[0] #到达时间
    end_time = ''.join(end_time.split())
    start_address = xp.xpath(f'//*[@class="jp_list"]/div[{i}]//*[@class="aboutfj"]/li[2]/p[1]/text()')[0] #出发地点
    start_address = ''.join(start_address.split())
    end_address = xp.xpath(f'//*[@class="jp_list"]/div[{i}]//*[@class="aboutfj"]/li[2]/p[2]/text()')[0] #抵达地点
    end_address = ''.join(end_address.split())
    hangban = xp.xpath(f'//*[@class="jp_list"]/div[{i}]//*[@class="aboutfj"]/li[3]/p[1]//text()') #航班
    hangban = ''.join([''.join(hangban[0].split()),''.join(hangban[1].split())])
    # jijian_ranyou = xp.xpath(f'//*[@class="jp_list"]/div[{i}]//*[@class="aboutfj"]/li[4]/p[2]/text()')[0] #机建加燃油
    # jijian_ranyou = ''.join(jijian_ranyou.split())
    jipiao = [] #机票
    jipiao_list = xp.xpath(f'//*[@class="jp_list"]/div[{i}]//*[@class="yd_list"]')
    for j in range(1,len(jipiao_list)+1):
    cangwei = xp.xpath(f'//*[@class="jp_list"]/div[{i}]//*[@class="yd_list"][{j}]/li[2]/b/text()')[0] #舱位
    cangwei = ''.join(cangwei.split())
    price = xp.xpath(f'//*[@class="jp_list"]/div[{i}]//*[@class="yd_list"][{j}]/li[3]/b//text()') #价格
    price = ''.join([''.join(price[0].split()), ''.join(price[1].split())])
    zhekou = xp.xpath(f'//*[@class="jp_list"]/div[{i}]//*[@class="yd_list"][{j}]/li[3]/em/text()')[0] #折扣
    zhekou = ''.join(zhekou.split())
    jipiao.append({'舱位':cangwei,'价格':price,'折扣':zhekou})
    value = [start,end,time,start_time,end_time,start_address,end_address,hangban,cangwei,zhekou,price]
    insert(value)
    # hangban_data.append({'起飞时间':start_time,'到达时间':end_time,'出发地点':start_address,'抵达地点':end_address,'航班':hangban,'机建加燃油':jijian_ranyou,'机票':jipiao})
    hangban_data.append({'起飞时间': start_time, '到达时间': end_time, '出发日期': time, '出发地点': start_address, '抵达地点': end_address, '航班': hangban,'机票': jipiao})
    #hangban_data= [start_time,end_time,start_address,end_address,hangban,jipiao[cangwei],jipiao[zhekou],jipiao[price]]

    return hangban_data

    if __name__ == '__main__':
    '''
    start_time = time.time()
    start1 = input('请输入出发地的拼音:')
    end1 = input('请输入目的地的拼音:')
    date = input('请输入出发日期:')
    while len(re.findall('d{4}-d{2}-d{2}',date)) ==0:
    date = input('请输入出发日期:')
    print('正在查询')
    '''
    '''
    for fcity in fcitys:
    start1 = fcity
    for tcity in tcitys:
    end1 = tcity
    html = chaxun(start1,end1,date)
    datas = jianxi(html,start1,end1)
    print(len(datas))
    if len(datas) != 0:
    for data in datas:
    print(data)
    else:
    print('未查询到相关数据')

    '''
    create()
    date = '2021-06-17'
    start1 = 'shanghai'
    end1 = 'beijing'
    html = chaxun(start1, end1, date)
    datas = jianxi(html, start1, end1,date)
    print(len(datas))
    if len(datas) != 0:
    for data in datas:
    print(data)
    else:
    print('未查询到相关数据')



    看视频

    遇到问题:

    明日目标:

    学习web端的开发

    学习css

  • 相关阅读:
    axios 讲解 和vue搭建使用
    在过去一年中做了很多项目。纪念一下
    使用vue遇到坑
    python 学习总结
    webpack 学习
    gulp-rev 添加版本号
    解决微信转发到朋友圈没有获取页面头像以图标展示。
    对象-数组-字符串:
    mobiscroll时间控件
    Chrome Adobe flash player已过期怎么办
  • 原文地址:https://www.cnblogs.com/leiyu1905/p/14913731.html
Copyright © 2011-2022 走看看