zoukankan      html  css  js  c++  java
  • python基本语法1.4--初识爬虫

    import requests
    import time
    import xml.etree.ElementTree as ET
    
    from multiprocessing.dummy import Pool as ThreadPoo;
    from xml.parsers.expat import ParserCreate
    
    class DefaultSaxHandler(object):
        def __init__(self, provinces):
            self.provinces = provinces
    
        def start_element(self, name, attrs):
            if name != 'map':
                name = attrs['title']
                number = attrs['href']
                self.provinces.append((name, number))
                
        def end_element(self, name):
            pass
    
        def char_data(self, text):
            pass
        
    def get_provinces(url):
        content = requests.get(url).content.decode('gb2312')
        start = content.find('<map name="map_86" id="map_86">')
        end = content.find('</map>')
        content = content[start:end + len('</map>')].strip()
        print(content)
        provinces = []
        handler = DefaultSaxHandler(provinces)
        parser = ParserCreate()
        parser.StartElementHandler = handler.start_element
        parser.EndElementHandler = handler.end_element
        parser.CharacterDataHandler = handler.char_data
        parser.Parse(content)
        return provinces
    
    provinces = get_provinces('http://www.ip138.com/post')
    print(provinces)
  • 相关阅读:
    Excel sheet Column Title
    Add Two Numbers
    Add Binary
    Excel Sheet Column Number
    Lowest Common Ancestor of a Binary Search Tree
    Invert Binary Tree
    Move Zeroes
    Contains Duplicate
    Maximum Depth of Binary Tree
    Java实现二叉树的构建与遍历
  • 原文地址:https://www.cnblogs.com/xiaoyingying/p/7689841.html
Copyright © 2011-2022 走看看