// 双斜杠 定位根节点,会对全文进行扫描,在文档中选取所有符合条件的内容,以列表的形式返回。
//从匹配选择的当前节点选择文档中的节点,而不考虑它们的位置。
# !/usr/bin/env python
# -*- coding: utf-8 -*-
from lxml import etree
# 获取文件元素
from lxml import etree
# 获取文件元素
htmlEmt = etree.parse('text.xml')
# 获取所有的 <li> 标签
result = htmlEmt.xpath('//li')
print(result)
print type(result)
for x in result:
print x
print type(x)
print x.text
C:Python27python.exe C:/Users/TLCB/PycharmProjects/untitled/xpath/l1.py
[<Element li at 0x26da9e0>, <Element li at 0x26da198>, <Element li at 0x26da030>, <Element li at 0x26da0d0>, <Element li at 0x26da238>]
<type 'list'>
<Element li at 0x26da9e0>
<type 'lxml.etree._Element'>
a01
<Element li at 0x26da198>
<type 'lxml.etree._Element'>
b02
<Element li at 0x26da030>
<type 'lxml.etree._Element'>
c03
<Element li at 0x26da0d0>
<type 'lxml.etree._Element'>
d04
<Element li at 0x26da238>
<type 'lxml.etree._Element'>
e05
Process finished with exit code 0