zoukankan      html  css  js  c++  java
  • python xpath基础 02

    from lxml import etree
    
    html = etree.parse('./test.html', etree.HTMLParser())## #test.html是html文件,etree.HTMLParser(),解析器
    # result = html.xpath('//li')#选取所有的li节点,是一个列表的形式
    # print(result)
    # print(result[0])
    #
    #
    # #输出结果
    # '''
    # [<Element li at 0x119b71b88>, <Element li at 0x119b71bc8>, <Element li at 0x119b71c08>, <Element li at 0x119b71c48>, <Element li at 0x119b71c88>]
    # <Element li at 0x119b71b88>
    #
    # '''
    
    # result = html.xpath('//li[@class="item-0"]')
    #
    # '''
    # 选取当前文档所有属性
    #
    # class的值等于item-0的li标签内容的列表
    #
    # '''
    # print(result)
    # '''
    # 输出结果:
    # [<Element li at 0x1162f0d08>, <Element li at 0x1162f0d48>]
    # '''
    # result = html.xpath('//li[@class="item-0"]/text()')
    # '''
    # 选取当前文档所有属性
    #
    # class的值等于item-0的li标签里面的文本内容
    #
    # '''
    # print(result)
    #
    # '''
    # 输出结果:
    # ['
         ']
    # '''
    # result = html.xpath('//li[@class="item-0"]/a/text()')
    # '''
    # 选取当前文档所有属性
    #
    # class的值等于item-0的li标签里面的a标签里面的文本内容
    #
    # '''
    # print(result)
    #
    # '''
    # 输出结果:
    # ['first item', 'fifth item']
    # '''
    # result = html.xpath('//li[@class="item-0"]//text()')
    # print(result)
    # '''
    # 输出结果是:['first item', 'fifth item', '
         ']
    #
    # '''
    # result = html.xpath('//li/a/@href')
    # print(result)
    # '''
    # 输出结果:['link1.html', 'link2.html', 'link3.html', 'link4.html', 'link5.html']
    #
    # '''
    # result = html.xpath('//a[@href="link4.html"]/../@class')
    
    # print(result)
    # '''
    # 输出结果:['item-1']
    #
    # '''
    # result = html.xpath('//a[@href="link4.html"]/parent::*/@class')
    # print(result)
    # '''
    # 输出结果:
    # ['item-1']
    #
    # '''
    #
    # result=html.xpath('//li/@class')
    # print(result)
    # '''
    # 输出结果:['item-0', 'item-1', 'item-inactive', 'item-1', 'item-0']
    #
    # '''
    # result = html.xpath('//li/a')
    # print(result)
    # '''
    # 输出结果:
    # <Element a at 0x113e35c88>, <Element a at 0x113e35cc8>,
    #  <Element a at 0x113e35d08>, <Element a at 0x113e35d48>, <Element a at 0x113e35d88>]
    # '''
    # result = html.xpath('//ul//a')
    # print(result)
    # '''
    # 输出结果:[<Element a at 0x117874c88>, <Element a at 0x117874cc8>, <Element a at 0x117874d08>,
    # <Element a at 0x117874d48>, <Element a at 0x117874d88>]
    #
    # '''
    # result = html.xpath('//ul/a')
    # print(result)
    # '''
    # 输出结果:[]
    # '''
  • 相关阅读:
    TextBox 只有下划线
    can't find web control library(web控件库)
    DropDownListSalesAC”有一个无效 SelectedValue,因为它不在项目列表中。
    IDE、SATA、SCSI、SAS、FC、SSD 硬盘类型
    如何打印1px表格
    CSS控制打印 分页
    Virtual Server could not open its emulated Ethernet switch driver. To fix this problem, reenable the Virtual Server Emulated Et
    Xml中SelectSingleNode方法中的xpath用法
    热带水果莫入冰箱?水果存放冰箱大法
    探索Asp.net的Postback机制
  • 原文地址:https://www.cnblogs.com/liangliangzz/p/10176267.html
Copyright © 2011-2022 走看看