zoukankan      html  css  js  c++  java
  • python爬去酒店信息

    一、代码是跟着网上一个视频教学敲的,还有一部分待优化

    二、全部源码

    import requests#网络请求
    import re#正则 
    import time
    import random
    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    #网页真实网址
    url='http://hotel.elong.com/ajax/list/asyncsearch'
    header={
    'Accept': 'application/json, text/javascript, */*; q=0.01',
    'Accept-Encoding': 'gzip, deflate',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    'Content-Length': '1599',
    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
    'Host': 'hotel.elong.com',
    'Origin': 'http://hotel.elong.com',
    'Pragma': 'no-cache',
    'Referer': 'http://hotel.elong.com/beijing/',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.79 Safari/537.36',
    'X-Requested-With': 'XMLHttpRequest'}
    #提交数据
    for n in range(20):
        dat={
    'code': '9254658',
    'listRequest.areaID': '',
    'listRequest.bookingChannel': '1',
    'listRequest.cardNo': '192928',
    'listRequest.checkInDate': '2018-06-19 00:00:00',
    'listRequest.checkOutDate': '2018-06-20 00:00:00',
    'listRequest.cityID': '0101',
    'listRequest.cityName': '北京市',
    'listRequest.customLevel': '11',
    'listRequest.distance': '20',
    'listRequest.endLat': '0',
    'listRequest.endLng': '0',
    'listRequest.facilityIds': '',
    'listRequest.highPrice': '0',
    'listRequest.hotelBrandIDs': '',
    'listRequest.isAdvanceSave': 'false',
    'listRequest.isAfterCouponPrice': 'true',
    'listRequest.isCoupon': 'false',
    'listRequest.isDebug': 'false',
    'listRequest.isLimitTime': 'false',
    'listRequest.isLogin': 'false',
    'listRequest.isMobileOnly': 'true',
    'listRequest.isNeed5Discount': 'true',
    'listRequest.isNeedNotContractedHotel': 'false',
    'listRequest.isNeedSimilarPrice': 'false',
    'listRequest.isReturnNoRoomHotel': 'true',
    'listRequest.isStaySave': 'false',
    'listRequest.isTrace': 'false',
    'listRequest.isUnionSite': 'false',
    'listRequest.keywords':'', 
    'listRequest.keywordsType': '0',
    'listRequest.language': 'cn',
    'listRequest.listType': '0',
    'listRequest.lowPrice': '0',
    'listRequest.orderFromID': '50',#
    'listRequest.pageIndex': n,
    'listRequest.pageSize': '20',
    'listRequest.payMethod': '0',
    'listRequest.personOfRoom': '0',
    'listRequest.poiId': '0',
    'listRequest.promotionChannelCode': '0000',
    'listRequest.proxyID': 'ZD',
    'listRequest.rankType': '0',
    'listRequest.returnFilterItem': 'true',
    'listRequest.sellChannel': '1',
    'listRequest.seoHotelStar': '0',
    'listRequest.sortDirection': '1',
    'listRequest.sortMethod': '1',
    'listRequest.starLevels':'', 
    'listRequest.startLat': '0',
    'listRequest.startLng': '0',
    'listRequest.taRecommend': 'false',
    'listRequest.themeIds':'',
    'listRequest.ctripToken': 'c3502aec-c095-4f09-b122-5d5d6dfb6a8f',
    'listRequest.elongToken': 'a7af9982-c0fb-4bcf-ba63-b9f70e801680'}
        html=requests.post(url,data=dat,headers=header)
        content=html.json()['value']['hotelListHtml']
    #print(content)
        hotel_pri=re.findall('n class="h_pri_num ">(.*?)</span',content)
        hotol_name=re.findall(' target="_blank" title="(.*?)"><span',content)
        data=list(map(lambda x:(hotol_name[x],hotel_pri[x]),range(20)))
        data2=pd.DataFrame(data)
        data2.to_csv('C:\Users\你若成风618\Desktop\aa\1.csv',header=False,index=False,mode='a+')
    
    
    


  • 相关阅读:
    关于商业智能(Business Intelligence,简称BI)的认识
    Python连接mysql数据库和关闭数据库的方法
    Python 列表list方法clear( )和直接list [ ]的区别
    截止今天学习大数据技术的笔记
    【已解决】hive导出mysql报错:Container [pid=3962,containerID=container_1632883011739_0002_01_000002] is running 270113280B beyond the 'VIRTUAL' memory limit.
    sqoop安装配置以及简单使用
    大数据相关常用命令行或操作
    阿里巴巴数据库设计规范
    【已解决】linux环境jps命令不显示进程
    【已解决】初始化 Hive 元数据库报错slf4j-log4j12-1.7.25.jar包冲突
  • 原文地址:https://www.cnblogs.com/tuboshu/p/10752377.html
Copyright © 2011-2022 走看看