zoukankan      html  css  js  c++  java
  • XML爬取

    url_str = 'https://www.tenable.com/plugins/feeds?sort=updated'
            respose_str = requests.get(url_str)
            print(respose_str.text)
            soup = BeautifulSoup(respose_str.text, 'xml')
            soup_items = soup.find_all('item')
            pattern = re.compile(r'<[^>]+>', re.S)
            for soup_item in soup_items:
                name = soup_item.find("title")
                name = pattern.sub('', str(name))
                link = soup_item.find("link")
                link = pattern.sub('', str(link))
                nessus_id = link.split("/")[-1]
                # description = soup_item.find("description")
                total_description = soup_item.find("description")
                # print(total_description.text)
                soup1 = BeautifulSoup(total_description.text, "lxml")
                span = soup1.find_all("span")
                # Synopsis =   BeautifulSoup(span[0],'html.parser').get_text
                # Description = BeautifulSoup(span[1],'html.parser').get_text
                # Solution = BeautifulSoup(span[2],'html.parser').get_text
                # print(Solution)
                Synopsis = pattern.sub('', str(span[0]))
                Description = pattern.sub('', str(span[1]))
                Solution = pattern.sub('', str(span[2]))
                up_dic = {
                    "name": name,
                    "nessus_id": nessus_id,
                    "synopsis": Synopsis,
                    "description": Description,
                    "solution": Solution,
                }
                ne_item = mdb.get_one("CrawlDataForIDbyNessus", {"nessus_id": nessus_id})
                print(ne_item)
                if not ne_item:
                    mdb.add("CrawlDataForIDbyNessus", up_dic)
    

      

  • 相关阅读:
    Spring MVC多动作控制器
    Spring MVC简单URL处理程序映射
    Spring MVC控制器类名称处理映射
    Spring MVC文件上传处理
    再探Tomcat
    Git教程之工作区和暂存区
    linux系统启动级别
    浅析JAVA_HOME,CLASSPATH和PATH的作用
    *Linux之rm命令
    @CentOS环境下Java开发环境的搭建
  • 原文地址:https://www.cnblogs.com/weidaijie/p/14097431.html
Copyright © 2011-2022 走看看