zoukankan      html  css  js  c++  java
  • 1.4.1python下载网页(每天一更)

    # -*- coding: utf-8 -*-
    
    '''
    Created on 2019年4月27日
    
    @author: lenovo
    '''
    
    # import urllib3
    # def download(url):
    #     return urllib3.connection_from_url(url)
    # 
    # print(download('http://now.qq.com'))
    
    
    
    
    
    # 在python中,urllib2被urllib。request所取代
    
    # import urllib.request
    # def download(url):
    #     return urllib.request.urlopen(url).read()
    # 
    # print(download('https://baijiahao.baidu.com/s?id=1632775818269407606&wfr=spider&for=pc'))
    
    
    # import urllib.request
    # def download(url):
    #     print("Downloading:" + url)
    #     try:
    #         html = urllib.request.urlopen(url).read()
    #     except urllib.request.URLError as e:
    #         print("Download error:" , e.reason)
    #         html = None
    #     return html
    # 
    # print(download("htp://www.baidu.co"))
    
    
    # import urllib.request
    # def download(url, num_retries=2):
    #     try:
    #         html = urllib.request.urlopen(url).read()
    #     except urllib.request.URLError as e:
    #         print("Download error:" , e.reason)
    #         html = None
    #         if num_retries > 0 :
    #             if hasattr(e, "code") and 500 <= e.code < 600 :
    #                 return download(url, num_retries-1)
    #     return html
    #     
    # # print(download("http://httpstat.us/500"))
    # print(download("http://www.meetup.com/"))
    
    import urllib.request
    def download(url, user_agent="wswp",num_retries=2):
        print("Downloading: " , url)
        headers = { 'User-agent': user_agent}
        request = urllib.request.Request(url, headers=headers)
        try:
            html = urllib.request.urlopen(request).read()
        except urllib.request.URLError as e:
            print('Download error:' , e.reason)
            html = None
            if num_retries > 1 :
                if hasattr(e, 'code') and 500 <= e.code < 600:
                    return download(url, user_agent, num_retries-1)
        return html
    
    print(download("http://www.meetup.com/"))
  • 相关阅读:
    asd
    自勉
    php cli 参数
    编译
    WebMisSharp的协同开发
    WebMisDeveloper4.5.0 基于Ext.net,Extjs,MVC/简单三层的自动生成开发利器
    WebMisDeveloper更新专版
    外设驱动库开发笔记37:S13365BQ光敏二极管作为光度计驱动
    Java学习笔记之多线程 生产者 消费者
    Java学习笔记之IO流&Properties集合
  • 原文地址:https://www.cnblogs.com/xww115/p/10822196.html
Copyright © 2011-2022 走看看