zoukankan      html  css  js  c++  java
  • Python 爬取12306火车票

    获取火车站

    stations.py

    #import certifi
    #import urllib3
    import re
    import requests
    from pprint import pprint
    
    
    url = 'https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9001'
    response = requests.get(url, verify=False)
    stations = re.findall('([u4e00-u9fa5]+)|([A-Z]+)',response.text)
    print(dict(stations))
    #pprint(dict(stations),indent=4)

    根据出发地,目的地,时间查询

    #!/usr/bin/env python
    #coding:utf-8
    """命令行的火车票查看器
    Usage:
        12306 [-gdtkz] <from> <to> <date>
    
    Options:
        -h,--help  显示帮助
        -g           高铁
        -d         动车
        -t         特快
        -k         快速
        -z         直达
    
    Example:
        12306 上海 北京 2016-12-16
        12306 -dg 上海北京 2016-12-16
        
    pip install requests prettytable docopt colorama
    #prettytable格式化工具
    #docopt是python3命令行的位置参数解析工具
    #colorama是添加显示颜色
    
    """
    import requests
    from docopt import docopt
    import sys
    #sys.path.append(r"/py/stations")
    from stations import stations
    import json
    from prettytable import PrettyTable
    
    class TrainsCollections:
        header = '车次 车站 时间 历时 一等 二等 软卧 硬卧 硬座 无座'.split()
        def __init__(self,available_tranins,options):
            self.available_tranins = available_tranins
            self.options = options
        def _get_duration(self,raw_train):
            durations = raw_train.get('lishi').replace(':','小时') + ''
            return durations
            
        #装饰器,把此函数定义为属性,以被别的函数调用
        @property
        def trains(self):
            for raw_train in self.available_tranins:
                train_no = raw_train['station_train_code']
                initial = train_no[0].lower()
                if not self.options or initial  in self.options:
                    train = [
                    train_no,
                    '
    '.join([raw_train['from_station_name'],raw_train['to_station_name']]),
                    '
    '.join([raw_train['start_time'],raw_train['arrive_time']]),
                    self._get_duration(raw_train),
                    raw_train['zy_num'],
                    raw_train['ze_num'],
                    raw_train['rw_num'],
                    raw_train['yw_num'],
                    raw_train['yz_num'],
                    raw_train['wz_num'],
                    ]
                    #print(train)
                    #生成器,一次返回一项
                    yield train
    
        def pretty_print(self):
            pt = PrettyTable()
            pt._set_field_names(self.header)
            for train in self.trains:
                pt.add_row(train)
                print(pt)
        
    def cli():
        '''
        {
        '-d':True,
        '-g':True,
        '-k':False,
        '-t':False,
        '-z':False,
        '<date>':'2017-03-23',
        '<from>':'海口',
        '<to>':'北京'}
        '''
        arguments = docopt(__doc__)
        print(arguments)
        from_station = stations.get(arguments['<from>'])
        to_station = stations.get(arguments['<to>'])
        date = arguments['<date>']
        #获取参数,列表解析
        options = ''.join([k for k,v in arguments.items() if v is True])
        url = 'https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date={}&leftTicketDTO.from_station={}&leftTicketDTO.to_station={}&purpose_codes=ADULT'.format(date,from_station,to_station)
        #url = 'https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date={}&leftTicketDTO.from_station={}&leftTicketDTO.to_station={}&purpose_codes=ADULT'
        r = requests.get(url,verify=False)
        print(url)
        #jjj = json.loads(str(r))
        available_tranins1 = r.json()['data']
        available_tranins = []
        for available_tranins2 in available_tranins1:
           available_tranins.append(available_tranins2['queryLeftNewDTO'])
            #available_tranins = list(available_tranins.keys())
           # print(available_tranins)
            #TrainsCollections(available_tranins,options).pretty_print()
        #available_tranins = available_tranins2['queryLeftNewDTO']
        #print(available_tranins)
        #print(type(available_tranins[0]))
        TrainsCollections(available_tranins,options).pretty_print()
    
    if __name__ == '__main__':
        #TrainsCollections.cli()
        cli()
  • 相关阅读:
    windows下面Nginx日志切割
    C#通过DocX创建word
    leetcode 189 Rotate Array
    leetcode 172 Factorial Trailing Zeroes
    leetcode 169 Majority Element 冰山查询
    leetcode 165 Compare Version Numbers
    leetcode 160 Intersection of Two Linked Lists
    【windows-》linux】SCP
    【设计】B端和C端区别
    【Flask】部署
  • 原文地址:https://www.cnblogs.com/linyouyi/p/11409910.html
Copyright © 2011-2022 走看看