zoukankan      html  css  js  c++  java
  • xpath案例-全国城市名爬取

    #!/usr/bin/python
    
    import requests 
    from lxml import etree 
    #项目需求:解析出所有的城市名称https://www.aqistudy.cn/historydata/
    
    if __name__ == "__main__":
        # headers = {
        #     'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2626.106 Safari/537.36'
        # }
    
        # url = 'https://www.aqistudy.cn/historydata/'
        # page_text = requests.get(url=url,headers=headers).text
    
        # tree = etree.HTML(page_text)
        # host_li_list = tree.xpath('//div[@class="bottom"]/ul/li')
        # all_city_names = []
        # #解析到了热门城市的名称
        # for li in host_li_list:
        #     hot_city_name = li.xpath('./a/text()')[0]
        #     all_city_names.append(hot_city_name)
    
    
        # #解析的是全部城市的名称
        # city_names_list = tree.xpath('//div[@class="bottom"]/ul/div[2]/li')
        # for li in city_names_list:
        #     city_name = li.xpath('./a/text()')[0]
        #     all_city_names.append(city_name)
    
    
        # print(all_city_names,len(all_city_names))
    
    
    
    
        headers = {
            'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2626.106 Safari/537.36'
        }
    
        url = 'https://www.aqistudy.cn/historydata/'
        page_text = requests.get(url=url,headers=headers).text
    
        tree = etree.HTML(page_text)
        #解析到热门城市和所有城市对应的a标签
        #   //div[@class="bottom"]/ul/li/          热门城市a标签的层级关系
        #   //div[@class="bottom"]/ul/div[2]/li/a  所有城市a标签的层级关系
        a_list= tree.xpath('//div[@class="bottom"]/ul/li/a | //div[@class="bottom"]/ul/div[2]/li/a')
        all_city_names = []
        for a in a_list:
            city_name = a.xpath('./text()')[0]
            all_city_names.append(city_name)
        print(all_city_names,len(all_city_names))
    
  • 相关阅读:
    C++ delete file
    C++ get file size
    C++ file copy
    C++跨类调用类成员的方法之一
    Linux下C语言实现回调函数的例子
    error: atomic: 没有那个文件或目录
    libpng warning: iCCP: known incorrect sRGB profile告警处理
    picker多级选择器的使用————小程序
    JQ的简单使用(基础)——————JQ
    选择器与过滤器(全)————JQ
  • 原文地址:https://www.cnblogs.com/gerenboke/p/13389036.html
Copyright © 2011-2022 走看看