from lxml import etree import requests import re headers = { 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36', } #session请求 拿到cookies sessions = requests.Session() #创建一个session对象 # sessions.get(z_url,headers=headers) #发送请求 url = 'http://www.agri.cn/V20/SC/jcyj_1/' ret = sessions.get(url=url,headers=headers) ret.encoding="utf-8" ret_text =ret.text tree= etree.HTML(ret_text) # li_list = tree.xpath('/html/body/table[3]//tr/td[1]/table[2]//tr/td/table[2]//tr/td[1]/a/script/text()')[0] print(li_list) # document.write(tit1_('<a href="./202005/t20200513_7387962.htm">5月份第1周畜产品和饲料集贸市场价格情况</a>')); dta = re.findall('htm">(.*)</a>',li_list) print(dta) # ['5月份第1周畜产品和饲料集贸市场价格情况']