import requests from lxml import html etree = html.etree from bs4 import BeautifulSoup url = "https://mp.weixin.qq.com/s/drle9K4jgVWxm4v14ETbpQ" response = requests.get(url) soup = BeautifulSoup(response.text, features='html.parser') content = soup.prettify() html_content = etree.HTML(content) #//*[@id="js_content"]/section[2]/section/section[2]/table ret_data = html_content.xpath('//tr') for item in ret_data: con = item.xpath("./td[1]/text()") con1 =item.xpath("./td[2]/text()") print(con[0].strip(" ").strip(" ").strip(" ") + con1[0].strip(" ").strip(" "))
import requests from lxml import html etree = html.etree from bs4 import BeautifulSoup url = "https://mp.weixin.qq.com/s/Zt2K7aOfSr8mrSdArfzWAg" response = requests.get(url) soup = BeautifulSoup(response.text, features='html.parser') content = soup.prettify() html_content = etree.HTML(content) #//*[@id="js_content"]/section[2]/section/section[2]/table #//*[@id="js_content"]/section[2]/section/section[2]/section[2]/section/section/table/tbody/tr[6]/td[1]/p/span[1] ret_data = html_content.xpath('//tr') for item in ret_data: con = item.xpath("./td[1]/p/span[1]/text()") con1 = item.xpath("./td[1]/p/span[2]/text()") con2 =item.xpath("./td[2]/p/text()") con3 =item.xpath("./td[2]/p/span/text()") # print(con) # print(con1) # print(con2[0].strip(" ").strip(" ").strip(" ")) # print(con3[0].strip(" ").strip(" ").strip(" ")) if con1: print(con[0].strip(" ").strip(" ").strip(" ")+con1[0].strip(" ").strip(" ").strip(" ")+ " 答案→" +con2[0].strip(" ").strip(" ").strip(" ") + con3[0].strip(" ").strip(" ").strip(" ") ) else : print(con[0].strip(" ").strip(" ").strip(" ") ) # print( con1[0].strip(" ").strip(" ")) # print(con) # print(con1)
下载csv文件,并获取其内容
import csv import requests CSV_URL = 'https://www.remedy-cloud.com/download/csv/CVE-2020-1938' with requests.Session() as s: download = s.get(CSV_URL) decoded_content = download.content.decode('utf-8') cr = csv.reader(decoded_content.splitlines(), delimiter=',') my_list = list(cr) for row in my_list: print(row)