zoukankan      html  css  js  c++  java
  • urllib url解析学习

    #!/usr/bin/env python  
    # encoding: utf-8  
    from urllib.parse import *
    #urlparse:解析url分段
    #urlsplit:类似urlparse,不再单独解析params部分
    #urlunsplit:后面必须1个列表,里面5个参数
    #urljoin:字符串的拼接
    #parse_qs:字符串参数转字典参数
    #parse_qsl:字符串参数转列表元组
    #urlencode:GET请求参数逇编码
    #quote:url带中文的时候可用来编码
    #unquote:用来解码
    URL="http://www.baidu.com/s?wd=falsk%20%E6%95%B0%E6%8D%AE%E6%93%8D%E4%BD%9C&rsv_spt=1&rsv_iqid=0x869d926300006682&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&rqlang=cn&tn=baiduhome_pg&rsv_enter=0&oq=kwargs&inputT=6623&rsv_t=121540vUUWCKfciS1Ir0wlIBRBImaOVodO0fliZnJ%2BDCLWcUQ%2BHb5lhrg204XikhTHMY&rsv_pq=ef5d22fb0001200c&sug=python%2520utf-8&rsv_sug3=125&rsv_sug1=36&rsv_sug7=100&rsv_sug2=0&rsv_sug4=7658" #scheme表示http还是https的协议,如果url不带协议根据scheme的值加上 result1=urlparse(URL,scheme='',allow_fragments=True) print(urlunsplit([result1.scheme,result1.netloc,"","",""])) # ParseResult(scheme='http', netloc='www.baidu.com', path='/s', params='', query='wd=falsk%20%E6%95%B0%E6%8D%AE%E6%93%8D%E4%BD%9C&rsv_spt=1&rsv_iqid=0x869d926300006682&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&rqlang=cn&tn=baiduhome_pg&rsv_enter=0&oq=kwargs&inputT=6623&rsv_t=121540vUUWCKfciS1Ir0wlIBRBImaOVodO0fliZnJ%2BDCLWcUQ%2BHb5lhrg204XikhTHMY&rsv_pq=ef5d22fb0001200c&sug=python%2520utf-8&rsv_sug3=125&rsv_sug1=36&rsv_sug7=100&rsv_sug2=0&rsv_sug4=7658', fragment='')
    #scheme表示http还是https的协议,如果url不带协议根据scheme的值加上
    result1=urlparse(URL,scheme='',allow_fragments=True)
    
    
    print(urlunsplit([result1.scheme,result1.netloc,"","",""]))
    # ParseResult(scheme='http', netloc='www.baidu.com', path='/s', params='', query='wd=falsk%20%E6%95%B0%E6%8D%AE%E6%93%8D%E4%BD%9C&rsv_spt=1&rsv_iqid=0x869d926300006682&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&rqlang=cn&tn=baiduhome_pg&rsv_enter=0&oq=kwargs&inputT=6623&rsv_t=121540vUUWCKfciS1Ir0wlIBRBImaOVodO0fliZnJ%2BDCLWcUQ%2BHb5lhrg204XikhTHMY&rsv_pq=ef5d22fb0001200c&sug=python%2520utf-8&rsv_sug3=125&rsv_sug1=36&rsv_sug7=100&rsv_sug2=0&rsv_sug4=7658', fragment='')
    

      

  • 相关阅读:
    应用默认编码不对的问题定位
    以http server为例简要分析netty3实现
    用qemu+gdb tcp server+CDT调试linux内核启动-起步
    用virtualbox+模拟串口+CDT调试linux内核 TCP/IP协议栈-起步
    【转】常见容错机制
    python文档注释参数获取
    scrapy爬取图片
    xpath语法
    python爬虫爬取赶集网数据
    爬虫小总结
  • 原文地址:https://www.cnblogs.com/c-x-a/p/9019228.html
Copyright © 2011-2022 走看看