基础命令
hxs = response.xpath('//a') # 子子孙孙的 【a标签】 # 标签
hxs = response.xpath('//a[2]') # 无结果 # 标签
hxs = response.xpath('//a[@id]') # 所有含有id值的【a标签】 # 标签
hxs = response.xpath('//a/@id') # 所有a标签的【id值】 # 属性值
hxs = response.xpath('//a[@id="i1"]') # id值=1的【a标签】 # 标签
hxs = response.xpath('//a[@href="link.html"][@id]') # 逻辑与+标签
hxs = response.xpath('//a[contains(@href, "link")]') # 包含+标签
hxs = response.xpath('//a[starts-with(@href, "link")]') # 开头+标签
hxs = response.xpath('//a[re:test(@id, "id+")]') # 正则+标签
hxs = response.xpath('//a[re:test(@id, "id+")]/text()') # 正则+文本
hxs = response.xpath('//a[re:test(@id, "id+")]/@href') # 正则+属性值
hxs = response.xpath('/html/body/ul/li/a/@href') # 属性值
hxs = response.xpath('//body/ul/li/a/@href') # 属性值
# print(hxs) # xpath对象
# print(hxs.extract()) # 解析所有
# print(hxs.extract_first()) # 解析第一个
循环
ul_list = Selector(response=response).xpath('//body/ul/li')
for item in ul_list:
print(item.extract())
# v = item.xpath('./a/span') # 1
# v = item.xpath('a/span') # 2
v = item.xpath('.//span') # 3
print(v.extract())