zoukankan      html  css  js  c++  java
  • 1.4.1python下载网页(每天一更)

    # -*- coding: utf-8 -*-
    
    '''
    Created on 2019年4月27日
    
    @author: lenovo
    '''
    
    # import urllib3
    # def download(url):
    #     return urllib3.connection_from_url(url)
    # 
    # print(download('http://now.qq.com'))
    
    
    
    
    
    # 在python中,urllib2被urllib。request所取代
    
    # import urllib.request
    # def download(url):
    #     return urllib.request.urlopen(url).read()
    # 
    # print(download('https://baijiahao.baidu.com/s?id=1632775818269407606&wfr=spider&for=pc'))
    
    
    # import urllib.request
    # def download(url):
    #     print("Downloading:" + url)
    #     try:
    #         html = urllib.request.urlopen(url).read()
    #     except urllib.request.URLError as e:
    #         print("Download error:" , e.reason)
    #         html = None
    #     return html
    # 
    # print(download("htp://www.baidu.co"))
    
    
    # import urllib.request
    # def download(url, num_retries=2):
    #     try:
    #         html = urllib.request.urlopen(url).read()
    #     except urllib.request.URLError as e:
    #         print("Download error:" , e.reason)
    #         html = None
    #         if num_retries > 0 :
    #             if hasattr(e, "code") and 500 <= e.code < 600 :
    #                 return download(url, num_retries-1)
    #     return html
    #     
    # # print(download("http://httpstat.us/500"))
    # print(download("http://www.meetup.com/"))
    
    import urllib.request
    def download(url, user_agent="wswp",num_retries=2):
        print("Downloading: " , url)
        headers = { 'User-agent': user_agent}
        request = urllib.request.Request(url, headers=headers)
        try:
            html = urllib.request.urlopen(request).read()
        except urllib.request.URLError as e:
            print('Download error:' , e.reason)
            html = None
            if num_retries > 1 :
                if hasattr(e, 'code') and 500 <= e.code < 600:
                    return download(url, user_agent, num_retries-1)
        return html
    
    print(download("http://www.meetup.com/"))
  • 相关阅读:
    软件工程5
    软件工程3
    软件工程4
    软件工程2
    2020软件工程作业01
    2020软件工程个人作业06——软件工程实践总结作业
    个人作业——04
    清风不知道——冲刺日志(第一天)
    清风不知道——凡是预则立
    2020软件工程作业05
  • 原文地址:https://www.cnblogs.com/xww115/p/10822196.html
Copyright © 2011-2022 走看看