zoukankan      html  css  js  c++  java
  • urllib爬取实例

    #汉字转码、多个参数拼接
    
    from urllib import request
    
    base_url = "http://www.baidu.com/s?"
    
    content = input("请输入你要搜索的内容:")
    
    qs = {
        "wd":content,
        "rsv_sp":1
    }
    
    #将汉字转成unicode码
    from urllib import parse
    qs = parse.urlencode(qs) #wd=%E5%85%84%E5%BC%9F%E8%BF%9E
    print(qs)
    
    base_url = base_url+qs #http://www.baidu.com/s?wd=%E5%85%84%E5%BC%9F%E8%BF%9E
    
    headers = {
        "user_agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36"
    }
    req = request.Request(base_url,headers = headers)
    response = request.urlopen(req)
    html = response.read()
    html = html.decode("utf-8")
    
    with open(content+".html","w",encoding="utf-8") as f:
        f.write(html)
  • 相关阅读:
    bzoj1098 1301
    bzoj3237
    bzoj3170
    bzoj4008
    一些题解
    bzoj4028
    bzoj3196
    redis学习
    quartz学习
    电商618 压测、优化、降级预案
  • 原文地址:https://www.cnblogs.com/zhangboblogs/p/8542082.html
Copyright © 2011-2022 走看看