import hackhttp from bs4 import BeautifulSoup as BS import re def tomcat(raw): url = 'https://www.cnvd.org.cn/flaw/list.htm?flag=true' hh = hackhttp.hackhttp() code, head, html, redirect, log = hh.http(url=url, raw=raw) soup=BS(html,'lxml') tomcat_html=soup.tbody # print tomcat_html tomcat_cnvds=BS(str(tomcat_html),'lxml') cnvds=tomcat_cnvds.find_all(name='a', attrs={'href': re.compile('/flaw/show/CNVD-.*?')}) # print cnvds for cnvd in cnvds: print cnvd['title'] raw_start = ''' POST /flaw/list.htm?flag=true HTTP/1.1 Host: www.cnvd.org.cn User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:68.0) Gecko/20100101 Firefox/68.0 Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 Accept-Language: zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2 Accept-Encoding: gzip, deflate Referer: https://www.cnvd.org.cn/flaw/list.htm?flag=true Content-Type: application/x-www-form-urlencoded Content-Length: 417 Connection: close Cookie: __jsluid=6ff239d5330d7cd7d5e868e806ecb36c; bdshare_firstime=1555326104784; __jsluid_h=dd2d406deb91951802e54ebb9219e6cf; __jsluid_s=8d912e302fdbe184f1a11997dfd56a6b; JSESSIONID=63D22544AB60228489EDAB88B34068E2; Hm_lvt_d7682ab43891c68a00de46e9ce5b76aa=1586437424; Hm_lpvt_d7682ab43891c68a00de46e9ce5b76aa=1586437614 Upgrade-Insecure-Requests: 1 number=%E8%AF%B7%E8%BE%93%E5%85%A5%E7%B2%BE%E7%A1%AE%E7%BC%96%E5%8F%B7&startDate=&endDate=&flag=%5BLjava.lang.String%3B%4018cb0a99&field=&order=&keyword=tomcat&condition=1&keywordFlag=0&cnvdId=&cnvdIdFlag=0&baseinfoBeanbeginTime=&baseinfoBeanendTime=&baseinfoBeanFlag=0&refenceInfo=&referenceScope=-1&manufacturerId=-1&categoryId=-1&editionId=-1&causeIdStr=&threadIdStr=&serverityIdStr=&positionIdStr=&max=10&offset=''' for pages_count in range(0,121,10): raw=raw_start+str(pages_count) tomcat(raw)
(python2 -m pip install lxml)