zoukankan      html  css  js  c++  java
  • 地区列车经过查询

     1 # coding:utf-8
     2 # 
     3 # 把qq.ip138.com/train/上面的列车时刻表抓取解析出来,输出在命令行显示,并存入一个文件train_time.text
     4 # 
     5 import requests
     6 import time
     7 from bs4 import BeautifulSoup
     8 import random
     9 
    10 BSLIB = 'html5lib'
    11 BASE_URL = 'http://qq.ip138.com'
    12 UA = ["Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36", "Mozilla/5.0 (X11; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0", "Mozilla/5.0 (X11; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0", "Mozilla/5.0 (X11; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0", "Mozilla/5.0 (X11; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0"]
    13 
    14 
    15 def get_province(province, url,file):
    16     print(province)
    17     file.write("%s
    " % province)
    18     HEADERS = {'user-agent': random.choice(UA)}
    19     r = requests.get(url, headers=HEADERS)
    20     s = BeautifulSoup(r.text.encode(r.encoding).decode('gbk'), BSLIB)
    21     C = s.select('div > table > tbody > tr > td > a')
    22     for c in C: # 每个城市
    23         get_city(c.text, BASE_URL+c.get('href'),file)
    24     time.sleep(random.random()*30) # 防止因访问频繁而被拒绝请求
    25 
    26 
    27 def get_city(city, url,file):
    28     print('  %s' % city)
    29     file.write("  %s
    " % city)
    30     HEADERS = {'user-agent': random.choice(UA)}
    31     r = requests.get(url, headers=HEADERS)
    32     s = BeautifulSoup(r.text.encode(r.encoding).decode('gbk'), BSLIB)
    33     T = s.select('div#checilist > table > tbody > tr')
    34     for t in T: # 每个车次
    35         t_text = "	"
    36         tt = t.select('td')
    37         for i in tt: # 每个车次的具体每个信息用	隔开
    38             t_text += "%s	" % i.text
    39         print(t_text)
    40         file.write('%s
    ' % t_text)
    41     time.sleep(random.random()*4)# 防止因访问频繁而被拒绝请求
    42 
    43 if __name__=='__main__':
    44     out_file = open('train_time.txt', 'w')
    45     url = BASE_URL+'/train/'
    46     HEADERS = {'user-agent': random.choice(UA)}
    47     r = requests.get(url, headers=HEADERS)
    48     s = BeautifulSoup(r.text.encode(r.encoding).decode('gbk'), BSLIB)
    49     P = s.select('table[width="600"] > tbody > tr > td > a')
    50     for p in P: # 每个省份
    51         get_province(p.text, BASE_URL+p.get('href'), out_file)
    52         
    53         
    54         
    55 
    56 地区列车经过查询
  • 相关阅读:
    Matplotlib如何绘制子图
    数据挖掘的葵花宝典
    Matplotlib如何显示中文
    python绘制WordCloud词云图
    Selenium实现微博自动化运营:关注、点赞、评论
    从小白视角理解<数据挖掘十大算法>
    Laravel模型自动转换类型
    python数据分析常用图大集合
    数据分析常见概念
    Pandas数据分析基础之时间序列
  • 原文地址:https://www.cnblogs.com/qinxiaoqin/p/9058007.html
Copyright © 2011-2022 走看看