zoukankan      html  css  js  c++  java
  • xpath的基本用法

    from lxml import etree
    text = """
    <div>
    <ul>
    <li class="item-0"><a href="link1,html">one</a></li>
    <li class="item-1"><a href="link1,html">two</a></li>
    <li class="item-inactive"><a href="link1,html">three</a></li>
    <li class="item-1"><a href="link1,html">four</a></li>
    <li class="item-0"><a href="link1,html">five</a>
    </ul>
    </div>
    """
    html = etree.HTML(text)
    # 从文本读取
    html = etree.parse('test.html', etree.HTMLParser())
    result = etree.tostring(html)  # bytes类型
    # print(result)
    # print(result.decode('utf-8'))  # string类型
    
    #匹配
    # rp1 = html.xpath('//*')
    # print(rp1)
    
    # rp2 = html.xpath('//li')
    # print(rp2)
    
    # 匹配文本
    rp3 = html.xpath("//li[@class='item-0']/a/text()")
    print(rp3)
    rp4 = html.xpath("//li[@class='item-0']//text()")
    print(rp4)
    # 获取属性
    rp5 = html.xpath('//li//a/@href')
    print(rp5)
    
    # 一个属性多个值 需要contains匹配
    rp6 = html.xpath("//li[contains(@class, '-first')]/a/text()")
    print(rp6)
    
    # 多属性匹配 and连接
    rp7 = html.xpath("//li[contains(@class, 'li') and @name='item']//text()")
    print(rp7)
    # 在条件中传入顺序 如数字 position()> last等等
    rp8 = html.xpath("//li[1]/a/text()")
    rp9 = html.xpath("//li[position()>2]/a/text()")
    rp10 = html.xpath("//li[last()-1]/a/text()")
    print(rp8)
    print(rp9)
    print(rp10)
    

      

    你不能把坏习惯扔出窗外 但你可以一步步赶下电梯
  • 相关阅读:
    软件测试工具
    Linux笔记
    Google Test
    字典dict()
    元组tuple 可迭代对象
    列表list
    一些总结
    format()
    列表list
    format() expandtabs() 输入表格数据
  • 原文地址:https://www.cnblogs.com/Ychao/p/9372216.html
Copyright © 2011-2022 走看看