1, firefox 下可以使用 firepath 插件 寻找到每个属性的xpath
2,感觉xpath 非常好用,简单,适合取网页中结构化的数据
1 import sys 2 import lxml.html as HTML 3 4 file=sys.argv[1] 5 doc = HTML.fromstring(open(file).read()) 6 7 table = doc.xpath(".//*[@id='infoTable']/tbody/tr") 8 for i in range(1,len(table)): 9 tr = table[i] 10 for tds in tr: 11 td = tds.text_content().strip(" \t\r\n") 12 if td != "": 13 print td,"\t", 14 print