zoukankan      html  css  js  c++  java
  • python爬虫学习(3):使用User-Agent和代理ip

    使用User-Agent
    方法一,先建立head,作为参数传进去

    import urllib.request
    import json

    content=input("请输入需要翻译的内容:")
    url='http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'

    data={}

    data['i']=content
    data['from']='AUTO'
    data['to']='AUTO'
    data['smartresult']='dict'
    data['client']='fanyideskweb'
    data['salt']='1520575049536'
    data['sign']='4514c46c320493ba8c034eaa8d9decaf'
    data['doctype']='json'
    data['version']='2.1'
    data['keyfrom']='fanyi.web'
    data['action']='FY_BY_CLICKBUTTION'
    data['typoResult']='false'
    data['ue']='utf-8'
    data=urllib.parse.urlencode(data).encode('utf-8')

    head={}
    head['User-Agent']='Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3355.4 Safari/537.36'
    #urllib.request.Request的可以有三个参数,head(必须是字典)可以作为第三个参数
    req=urllib.request.Request(url,data,head)
    response=urllib.request.urlopen(req)
    html=response.read().decode('utf-8')
    target=json.loads(html)
    print("翻译结果:%s"%target['translateResult'][0][0]['tgt'])

    
    

    ################################################################################

    方法二使用建立request后add_hander


    import urllib.request
    import json

    
    

    content=input("请输入需要翻译的内容:")
    url='http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'

    
    

    data={}
    data['i']=content
    data['from']='AUTO'
    data['to']='AUTO'
    data['smartresult']='dict'
    data['client']='fanyideskweb'
    data['salt']='1520575049536'
    data['sign']='4514c46c320493ba8c034eaa8d9decaf'
    data['doctype']='json'
    data['version']='2.1'
    data['keyfrom']='fanyi.web'
    data['action']='FY_BY_CLICKBUTTION'
    data['typoResult']='false'
    data['ue']='utf-8'
    data=urllib.parse.urlencode(data).encode('utf-8')

    
    

    req=urllib.request.Request(url,data)
    req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3355.4 Safari/537.36')
    response=urllib.request.urlopen(req)
    html=response.read().decode('utf-8')
    target=json.loads(html)
    print("翻译结果:%s"%target['translateResult'][0][0]['tgt'])

    
    

    ###########################################################################################

    #使用代理ip

    import urllib

    def main():
    #要访问的网址
    url='http://www.whatismyip.com.tw/'
    #代理ip
    proxy={'http':'106.46.136.112:808'}
    #创建ProxyHandler
    proxy_support=urllib.request.ProxyHandler(proxy)
    #创建opener
    opener=urllib.request.build_opener(proxy_support)
    #添加User-Agent
    opener.addheaders=[('User-Agent','Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36')]
    #安装opener
    urllib.request.install_opener(opener)
    #使用自己安装好的opener
    response=urllib.request.urlopen(url)
    #读取相应信息并解码
    html=response.read().decode('utf-8')
    print(html)
    if __name__=='__name__':
    main()

     
  • 相关阅读:
    嵌入式工程师C语言面试常见的0x10个问题
    C语言初学者网站推荐
    strlen和sizeof
    基于Docker搭建GitLab和Maven私服
    linux暴露端口可以被外部访问
    MySQL新增用户及赋予权限
    Docker添加域名解析
    Netstat 网络命令详解
    Mysql索引太长导致同步数据结构失败解决方法
    完美解决Cannot download "https://github.com/sass/node-sass/releases/download/binding.nod的问题
  • 原文地址:https://www.cnblogs.com/imzscilovecode/p/8537946.html
Copyright © 2011-2022 走看看