zoukankan      html  css  js  c++  java
  • 1.4.1python下载网页(每天一更)

    # -*- coding: utf-8 -*-
    
    '''
    Created on 2019年4月27日
    
    @author: lenovo
    '''
    
    # import urllib3
    # def download(url):
    #     return urllib3.connection_from_url(url)
    # 
    # print(download('http://now.qq.com'))
    
    
    
    
    
    # 在python中,urllib2被urllib。request所取代
    
    # import urllib.request
    # def download(url):
    #     return urllib.request.urlopen(url).read()
    # 
    # print(download('https://baijiahao.baidu.com/s?id=1632775818269407606&wfr=spider&for=pc'))
    
    
    # import urllib.request
    # def download(url):
    #     print("Downloading:" + url)
    #     try:
    #         html = urllib.request.urlopen(url).read()
    #     except urllib.request.URLError as e:
    #         print("Download error:" , e.reason)
    #         html = None
    #     return html
    # 
    # print(download("htp://www.baidu.co"))
    
    
    # import urllib.request
    # def download(url, num_retries=2):
    #     try:
    #         html = urllib.request.urlopen(url).read()
    #     except urllib.request.URLError as e:
    #         print("Download error:" , e.reason)
    #         html = None
    #         if num_retries > 0 :
    #             if hasattr(e, "code") and 500 <= e.code < 600 :
    #                 return download(url, num_retries-1)
    #     return html
    #     
    # # print(download("http://httpstat.us/500"))
    # print(download("http://www.meetup.com/"))
    
    import urllib.request
    def download(url, user_agent="wswp",num_retries=2):
        print("Downloading: " , url)
        headers = { 'User-agent': user_agent}
        request = urllib.request.Request(url, headers=headers)
        try:
            html = urllib.request.urlopen(request).read()
        except urllib.request.URLError as e:
            print('Download error:' , e.reason)
            html = None
            if num_retries > 1 :
                if hasattr(e, 'code') and 500 <= e.code < 600:
                    return download(url, user_agent, num_retries-1)
        return html
    
    print(download("http://www.meetup.com/"))
  • 相关阅读:
    Cordova插件:InAppBrowser
    Redux入门学习
    【转】浅谈React、Flux 与 Redux
    .Net学习难点讨论系列17
    《集体智慧编程》读书笔记4
    《集体智慧编程》读书笔记3
    《集体智慧编程》读书笔记2
    《集体智慧编程》读书笔记1
    C#与C++的发展历程第四
    C#与C++的发展历程第三
  • 原文地址:https://www.cnblogs.com/xww115/p/10822196.html
Copyright © 2011-2022 走看看