zoukankan      html  css  js  c++  java
  • python xpath基础 02

    from lxml import etree
    
    html = etree.parse('./test.html', etree.HTMLParser())## #test.html是html文件,etree.HTMLParser(),解析器
    # result = html.xpath('//li')#选取所有的li节点,是一个列表的形式
    # print(result)
    # print(result[0])
    #
    #
    # #输出结果
    # '''
    # [<Element li at 0x119b71b88>, <Element li at 0x119b71bc8>, <Element li at 0x119b71c08>, <Element li at 0x119b71c48>, <Element li at 0x119b71c88>]
    # <Element li at 0x119b71b88>
    #
    # '''
    
    # result = html.xpath('//li[@class="item-0"]')
    #
    # '''
    # 选取当前文档所有属性
    #
    # class的值等于item-0的li标签内容的列表
    #
    # '''
    # print(result)
    # '''
    # 输出结果:
    # [<Element li at 0x1162f0d08>, <Element li at 0x1162f0d48>]
    # '''
    # result = html.xpath('//li[@class="item-0"]/text()')
    # '''
    # 选取当前文档所有属性
    #
    # class的值等于item-0的li标签里面的文本内容
    #
    # '''
    # print(result)
    #
    # '''
    # 输出结果:
    # ['
         ']
    # '''
    # result = html.xpath('//li[@class="item-0"]/a/text()')
    # '''
    # 选取当前文档所有属性
    #
    # class的值等于item-0的li标签里面的a标签里面的文本内容
    #
    # '''
    # print(result)
    #
    # '''
    # 输出结果:
    # ['first item', 'fifth item']
    # '''
    # result = html.xpath('//li[@class="item-0"]//text()')
    # print(result)
    # '''
    # 输出结果是:['first item', 'fifth item', '
         ']
    #
    # '''
    # result = html.xpath('//li/a/@href')
    # print(result)
    # '''
    # 输出结果:['link1.html', 'link2.html', 'link3.html', 'link4.html', 'link5.html']
    #
    # '''
    # result = html.xpath('//a[@href="link4.html"]/../@class')
    
    # print(result)
    # '''
    # 输出结果:['item-1']
    #
    # '''
    # result = html.xpath('//a[@href="link4.html"]/parent::*/@class')
    # print(result)
    # '''
    # 输出结果:
    # ['item-1']
    #
    # '''
    #
    # result=html.xpath('//li/@class')
    # print(result)
    # '''
    # 输出结果:['item-0', 'item-1', 'item-inactive', 'item-1', 'item-0']
    #
    # '''
    # result = html.xpath('//li/a')
    # print(result)
    # '''
    # 输出结果:
    # <Element a at 0x113e35c88>, <Element a at 0x113e35cc8>,
    #  <Element a at 0x113e35d08>, <Element a at 0x113e35d48>, <Element a at 0x113e35d88>]
    # '''
    # result = html.xpath('//ul//a')
    # print(result)
    # '''
    # 输出结果:[<Element a at 0x117874c88>, <Element a at 0x117874cc8>, <Element a at 0x117874d08>,
    # <Element a at 0x117874d48>, <Element a at 0x117874d88>]
    #
    # '''
    # result = html.xpath('//ul/a')
    # print(result)
    # '''
    # 输出结果:[]
    # '''
  • 相关阅读:
    Flask&&人工智能AI -- 12
    Flask&&人工智能AI -- 11
    Flask&&人工智能AI -- 10
    Flask&&人工智能AI -- 9
    Flask&&人工智能AI -- 8
    Flask&&人工智能AI -- 8 HTML5+ 初识,HBuilder,夜神模拟器,Webview
    Flask&&人工智能AI -- 7 MongoDB
    Flask&&人工智能AI -- 6 人工智能初识,百度AI,图灵机器人
    Flask&&人工智能AI --5 Flask-session、WTForms、数据库连接池、Websocket
    [转]八款开源Android游戏引擎
  • 原文地址:https://www.cnblogs.com/liangliangzz/p/10176267.html
Copyright © 2011-2022 走看看