zoukankan      html  css  js  c++  java
  • 模仿浏览器访问网页

    基于python2

    # -*- coding: utf-8 -*-
    import urllib2
    import random
    # User-Agent大全
    # http://www.360doc.com/content/12/1012/21/7662927_241124973.shtml
    user_agents = [
        # safari 5.1 – MAC
        'Mozilla / 5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en - us) AppleWebKit / 534.50 (KHTML, like Gecko) Version / 5.1 Safari / 534.50',
        # safari 5.1 – Windows
        'Mozilla / 5.0 (Windows; U; Windows NT 6.1; en - us) AppleWebKit / 534.50 (KHTML, like Gecko) Version / 5.1 Safari / 534.50',
        # IE 9.0
        'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;',
        # IE 8.0
        'Mozilla / 4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident / 4.0)',
        # IE 7.0
        'Mozilla / 4.0 (compatible; MSIE 7.0; Windows NT 6.0)',
        # IE 6.0
        'Mozilla / 4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
        # Firefox 4.0.1 – MAC
        'Mozilla / 5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko / 20100101 Firefox / 4.0.1',
        # Firefox 4.0.1 – Windows
        'Mozilla / 5.0 (Windows NT 6.1; rv:2.0.1) Gecko / 20100101 Firefox / 4.0.1',
        # Opera 11.11 – MAC
        'Opera / 9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto / 2.8.131 Version / 11.11',
        # Opera 11.11 – Windows
        'Opera / 9.80 (Windows NT 6.1; U; en) Presto / 2.8.131 Version / 11.11',
        # Chrome 17.0 – MAC
        'Mozilla / 5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit / 535.11 (KHTML, like Gecko) Chrome / 17.0.963.56 Safari / 535.11',
        # 傲游(Maxthon)
        'Mozilla / 4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)',
        # 腾讯TT
        'Mozilla / 4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)',
        # 世界之窗(The World) 2.x
        'Mozilla / 4.0 (compatible; MSIE 7.0; Windows NT 5.1)',
        # 世界之窗(The World) 3.x
        'Mozilla / 4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)',
        # 搜狗浏览器 1.x
        'Mozilla / 4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident / 4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)',
        # 360浏览器
        'Mozilla / 4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)',
        # Avant
        'Mozilla / 4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)',
        # Green Browser
        'Mozilla / 4.0 (compatible; MSIE 7.0; Windows NT 5.1)'
    ]
    
    url = 'http://blog.csdn.net/****'
    
    my_headers = {'User-Agent':
                  # 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36',
                  random.choice(user_agents),
                  'Host': 'blog.csdn.net',
                  'Referer': 'http://blog.csdn.net/',
                  'GET': url
                  }
    
    
    def get_content(url_add, headers):
        req = urllib2.Request(url, headers=headers)
        html = urllib2.urlopen(req)
        return html.read()
    
    print get_content(url, my_headers)
    
  • 相关阅读:
    【Java每日一题】20161227
    【Java每日一题】20161226
    【Java每日一题】20161223
    【Java每日一题】20161222
    【Java每日一题】20161221
    【Java每日一题】20161220
    【Java每日一题】20161219
    【Java每日一题】20161216
    【Java每日一题】20161215
    【Java每日一题】20161214
  • 原文地址:https://www.cnblogs.com/keer2345/p/6008844.html
Copyright © 2011-2022 走看看