zoukankan      html  css  js  c++  java
  • 1.4.1python下载网页(每天一更)

    # -*- coding: utf-8 -*-
    
    '''
    Created on 2019年4月27日
    
    @author: lenovo
    '''
    
    # import urllib3
    # def download(url):
    #     return urllib3.connection_from_url(url)
    # 
    # print(download('http://now.qq.com'))
    
    
    
    
    
    # 在python中,urllib2被urllib。request所取代
    
    # import urllib.request
    # def download(url):
    #     return urllib.request.urlopen(url).read()
    # 
    # print(download('https://baijiahao.baidu.com/s?id=1632775818269407606&wfr=spider&for=pc'))
    
    
    # import urllib.request
    # def download(url):
    #     print("Downloading:" + url)
    #     try:
    #         html = urllib.request.urlopen(url).read()
    #     except urllib.request.URLError as e:
    #         print("Download error:" , e.reason)
    #         html = None
    #     return html
    # 
    # print(download("htp://www.baidu.co"))
    
    
    # import urllib.request
    # def download(url, num_retries=2):
    #     try:
    #         html = urllib.request.urlopen(url).read()
    #     except urllib.request.URLError as e:
    #         print("Download error:" , e.reason)
    #         html = None
    #         if num_retries > 0 :
    #             if hasattr(e, "code") and 500 <= e.code < 600 :
    #                 return download(url, num_retries-1)
    #     return html
    #     
    # # print(download("http://httpstat.us/500"))
    # print(download("http://www.meetup.com/"))
    
    import urllib.request
    def download(url, user_agent="wswp",num_retries=2):
        print("Downloading: " , url)
        headers = { 'User-agent': user_agent}
        request = urllib.request.Request(url, headers=headers)
        try:
            html = urllib.request.urlopen(request).read()
        except urllib.request.URLError as e:
            print('Download error:' , e.reason)
            html = None
            if num_retries > 1 :
                if hasattr(e, 'code') and 500 <= e.code < 600:
                    return download(url, user_agent, num_retries-1)
        return html
    
    print(download("http://www.meetup.com/"))
  • 相关阅读:
    委托学习笔记一(调用委托)
    委托和事件— 一个虚构的故事
    WPF中控制窗口状态
    oracle创建DBLINK
    添加路由 route add
    通过脚本执行sql语句
    c#对声音系统的控制
    ProcessBuilder调用外部脚本
    sql server 重新编译所有视图
    silverlight 隐藏ChildWindow 右上角的关闭按钮
  • 原文地址:https://www.cnblogs.com/xww115/p/10822196.html
Copyright © 2011-2022 走看看