zoukankan      html  css  js  c++  java
  • [工具开发] Perl 爬虫脚本从美国国家漏洞数据库抓取实时信息

    一、简介

    美国国家漏洞数据库收集了操作系统,应用软件的大量漏洞信息,当有新的漏洞出现时,它也会及时发布出来.

    由于信息量巨大,用户每次都需要到它的网站进行搜索,比较麻烦.如果能有个工具,每天自动分析它发布的漏洞数据库,如果发现有所需要的新漏洞信息,通过邮件自动发送给公司的系统或者安全管理员就好了.

    下面我写的这个工具就是起到这个作用的.图片是工具自动发送的邮件截图:

    它每天都会根据用户设置的关键字自动抓取 NVD 数据,然后和前一天抓取的数据进行对比分析,当发现今天有新的数据时就发送邮件给用户,否则不发送.

    二、效果截图

    三、源代码

    #!/usr/bin/perl -w
    #hahp@qq.com
    use 5.10.1;
    use strict;
    use LWP::Simple;
    use Net::SMTP;
    use MIME::Base64;
    use Encode qw/ decode encode /;
     
    my $REC_DIR = '/home/hupeng/nvd';
    my @query_keywords = qw/ kernel tomcat apache spring /;
     
    my $TO_DAY = `date +%Y-%m-%d`;
    my $LAST_DAY = `date +%Y-%m-%d -d '-1 days'`;
    my $THIS_MONTH = `date +%m -d '-1 months'`;
    my $NEXT_MONTH = `date +%m`;
    my $THIS_YEAR = `date +%Y`;
    my $NEXT_YEAR = `date +%Y -d '+1 months'`;
     
    chomp($TO_DAY);
    chomp($LAST_DAY);
    chomp($THIS_MONTH);
    chomp($THIS_YEAR);
    chomp($NEXT_MONTH);
    chomp($NEXT_YEAR);
    $THIS_MONTH =~ s/^0+//g;
     
    my $nvdfile_lastday = "$REC_DIR/nvd_$LAST_DAY.txt";
    my $nvdfile_today = "$REC_DIR/nvd_$TO_DAY.txt";
     
    my $nvd_url_pre = 'http://web.nvd.nist.gov/view/vuln/detail?vulnId=';
    #my $sev_base = 'MEDIUM_HIGH';
     
    my $theSmtpServer = 'XXXX';
    my $theSmtpUser = 'XXXX';
    my $theSmtpPasswd = 'XXXXX';
    my $theSmtpSend = 'XXXXX';
    my @theSmtpTo = ('hupeng@test2.com','hupeng@test.com');
    my $theSmtpSubject = 'NVD 新记录 '.$TO_DAY;
    my $query_keywords_str = &arr2str0(@query_keywords);
    my $theSmtpBody = '<p>NVD 新记录</p><br><p>关键字:'.$query_keywords_str.'</p><br>';
     
    sub str2arr {
            my ($str) = @_;
            $str =~ s/^\n|\n$//g;
            my @arr = split /\n/,$str;
            @arr = sort(@arr);
        #@arr = keys %{{ map { $_ => 1 } @arr }};
            return @arr;
    }
     
    sub arr2str {
            my @arr = @_;
            my $str = '';
            @arr = sort(@arr);
            foreach(@arr){
                    $str = $str.$_."\n";
            }
            return $str;
    }
     
    sub arr2str0 {
            my @arr = @_;
            my $str = '';
            @arr = sort(@arr);
            foreach(@arr){
                    $str = $str.$_.', ';
            }
        $str =~ s/,\ $//g;
            return $str;
    }
     
    sub getContent {
        my ($query_keywords) = @_;
        my @content = ();
     
        foreach my $query_keyword (@query_keywords){
            #my $url = "http://web.nvd.nist.gov/view/vuln/search-results?adv_search=true\&cves=on\&query=$query_keyword\&pub_date_start_month=$start_month\&pub_date_start_year=$start_year\&cvss_sev_base=$sev_base\&cve_id=";
            #my $url = "http://web.nvd.nist.gov/view/vuln/search-results?adv_search=true\&cves=on\&query=$query_keyword";
            my $url = "http://web.nvd.nist.gov/view/vuln/search-results?adv_search=true\&cves=on\&query=$query_keyword\&pub_date_start_month=$THIS_MONTH\&pub_date_start_year=$THIS_YEAR\&cve_id=";
     
            my $tmpStr = get($url);
            my @tmpArr = &str2arr($tmpStr);
            $tmpStr = '';
            foreach(@tmpArr){
                my $str = $_;
                chomp($str);
                $str =~ s/\s+//g;
                if( $str =~ m/BodyPlaceHolder_cplPageContent_plcZones_lt_zoneCenter_VulnerabilitySearchResults_VulnResultsRepeater_[\w]+(Anchor_.*$)/ ){
                    push(@content,$query_keyword.$1."\n");
                }
            }
            @content = keys %{{ map { $_ => 1 } @content }};
            @content = sort(@content);
            @tmpArr = ();
        }
        return @content;
    }
     
    sub getNvd {
            my ($nvd_file) = @_;
        my $maxnvd = '0';
        my @nvds = ();
        my %result = ('maxnvd'=>'0','nvds'=>[]);
        if( open(FILE, "$nvd_file") ){
            while(<FILE>){
                push(@nvds, $_);
            }
            close FILE;
            foreach(@nvds){
                if( $_ gt $maxnvd ){
                    $maxnvd = $_;
                }
            }
        }
        $result{'maxnvd'} = $maxnvd;
        $result{'nvds'} = [@nvds];
        @nvds = ();
        return %result;
    }
     
    sub putNvd {
        my ($content,$nvd_file) = @_;
        if ( open(FILE, "> $nvd_file") ){
            foreach (@$content){
                if ($_ =~ m/[\w-]+Anchor_[\d]+">([\w-]+)<\/a>/){
                    print FILE $1."\n";
                }
            }
            close FILE;
        }
    }
     
    sub getNewNvdRds {
        my ($maxNvd_lastday,$nvdsToday,$content) = @_;
        my @newNvds = ();
        foreach (@{$nvdsToday}){
            my $nvd = '';
            if( $_ gt $maxNvd_lastday){
                my $str = $_;
                chomp($str);
                foreach my $ln1 (@{$content}){
                    if( $ln1 =~ m/^([\w-]+Anchor_[\d]+\">)$str<\/a>$/ ){
                        my $nvdID = $1;
                        foreach my $ln2 (@{$content}){
                            if( $ln2 =~ m/^$nvdID([\d.]+)<\/a>([\w]+)$/ ){
                                $nvd = '<a href="'.$nvd_url_pre.$str.'">'.$str.'</a>  CVSS Severity:  '.encode('UTF-8',$1).'  '.encode('UTF-8',$2).'<br>';
                            }
                        }
                    }
                }
                push(@newNvds,$nvd);
            }
        }
        return @newNvds;
    }
     
    # get max value of last day
    my %tmpHsh = ();
    %tmpHsh = &getNvd($nvdfile_lastday);
    my $maxNvd_lastday = $tmpHsh{'maxnvd'};
     
    # get content of today
    # nvd 记录的详细信息
    my @content = &getContent(@query_keywords);
     
    # put values of today
    &putNvd([@content],$nvdfile_today);
     
    # get max value of today
    %tmpHsh = &getNvd($nvdfile_today);
    my $maxNvd_today = $tmpHsh{'maxnvd'};
     
    # get all values of today
    my @nvdsToday = @{$tmpHsh{'nvds'}};
     
    %tmpHsh = ();
     
    # find new values
    # 排版后新记录的详细信息
    my @newNvdRds = &getNewNvdRds($maxNvd_lastday,[@nvdsToday],[@content]);
     
    # send email
    my $count = @newNvdRds;
    if( $count ){
        $theSmtpBody .= &arr2str(@newNvdRds);
        $theSmtpBody .= '<br><br>'.$TO_DAY.'<br><br>';
     
        my $theSmtp = Net::SMTP->new($theSmtpServer,Timeout=>10);
        $theSmtp->auth($theSmtpUser,$theSmtpPasswd);
        $theSmtp->mail($theSmtpSend);
        $theSmtp->to(@theSmtpTo);
        $theSmtp->data();
        $theSmtp->datasend("To: @theSmtpTo\n");
        $theSmtp->datasend("Content-Type:text/html;charset=UTF-8\n");
        $theSmtp->datasend("Subject:=?UTF-8?B?".encode_base64($theSmtpSubject, '')."?=\n\n");
        $theSmtp->datasend("\n");
        $theSmtp->datasend($theSmtpBody);
        $theSmtp->dataend();
        $theSmtp->quit;
    }
  • 相关阅读:
    ATL接口返回类型&&ATL接口返回字符串BSTR*
    不允许使用抽象类类型
    error C2039: 'SetDefaultDllDirectories'错误解决办法<转>
    directshow 虚拟摄像头 实例 代码解读
    UML建模之时序图(Sequence Diagram)<转>
    【干货】Chrome插件(扩展)开发全攻略(不点进来看看你肯定后悔)<转>
    在VS13上编译通过的代码放在12上编译-错误:l __dtoui3 referenced in function _event_debug_map_HT_GROW
    struct 方法使用
    2014华为机试题目
    贪心-poj-2437-Muddy roads
  • 原文地址:https://www.cnblogs.com/hahp/p/4224445.html
Copyright © 2011-2022 走看看