zoukankan      html  css  js  c++  java
  • 爬虫day 04_01(爬百度页面)

    import urllib.request
    import http.cookiejar
    from lxml import etree
    head = {
        'Connection': 'Keep-Alive',
        'Accept': 'text/html, application/xhtml+xml, */*',
        'Accept-Language': 'en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko'
    }
    # 给opener加上cookie
    def makeMyOpener(head):
        cj = http.cookiejar.CookieJar()
        opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
        header = []
        for key, value in head.items():
            elem = (key, value)
            header.append(elem)
        opener.addheaders = header
        return opener
    # 通过cookie 爬百度
    oper=makeMyOpener(head)
    url="https://www.baidu.com/s?ie=utf-8&f=3&rsv_bp=1&rsv_idx=1&tn=baidu&wd=python%20str%20%E8%BD%AC%20int&oq=python%2520str%2520%25E8%25BD%25AC%2520int&rsv_pq=c24aa0760000154b&rsv_t=c323uk7fLXupzfPqhHcqM%2F6l8k7Re4K90ZvzI33LDwW0kHYMiSED9rhKzCg&rqlang=cn&rsv_enter=0&prefixsug=python%2520str%2520%25E8%25BD%25AC%2520int&rsp=0"
    uop=oper.open(url,timeout=1000)
    data=uop.read()
    html=data.decode();
    print(html)
  • 相关阅读:
    Django(四)
    Django(三)
    Django(二)
    Django 基础篇
    jQuery
    JDK,JRE,JVM区别与联系
    webdriver API中文文档
    selenium及webdriver的原理
    JAVA IO流结构图
    抽象工厂与工厂方法的区别
  • 原文地址:https://www.cnblogs.com/qieyu/p/7818516.html
Copyright © 2011-2022 走看看