zoukankan html css js c++ java

lxml获取结点属性整并转换为字典参数

#-*- coding: UTF-8 -*-

from lxml import etree

source = u'''
<div><p class="p1" data-a="1">测试数据1</p>
<p class="p1" data-a="2">测试数据2</p>
<p class="p1" data-a="3" style="height:100px;">
<strong class="s">测试数据3</strong></p>
<p class="p1" data-a="4" width="200"><img src="1.jpg" class="img"/><br/>
图片</p>
'''

# 从字符串解析
page = etree.HTML(source)

# 元素列表
ps = page.xpath("//p")
for p in ps:
    print u"属性：%s" % p.attrib
    print u"文本：%s" % p.text

# 文本列表
ts = page.xpath("//p/text()")
for t in ts:
    print t

# xpath定位 
ls = page.xpath('//p[@class="p1"][last()]/img')
for l in ls:
    print l.attrib

查看全文

相关阅读:
topcoder srm 445 div1
topcoder srm 440 div1
topcoder srm 435 div1
topcoder srm 430 div1
topcoder srm 400 div1
topcoder srm 380 div1
topcoder srm 370 div1
topcoder srm 425 div1
WKWebView强大的新特性
 Runtime那些事

原文地址：https://www.cnblogs.com/liuliu-word/p/9574895.html