zoukankan      html  css  js  c++  java
  • logstash 使用grok正则解析日志

    http://xiaorui.cc/2015/01/27/logstash%E4%BD%BF%E7%94%A8grok%E6%AD%A3%E5%88%99%E8%A7%A3%E6%9E%90%E6%97%A5%E5%BF%97%E9%81%87%E5%88%B0%E7%9A%84%E9%97%AE%E9%A2%98/
    http://grokdebug.herokuapp.com/
    demo:http://www.tuicool.com/articles/M7ryEv
    Logstash 最佳实践:http://udn.yyuap.com/doc/logstash-best-practice-cn/filter/grok.html
    logstash filter 语法:
    Example
    
    下面是日志的样子
    55.3.244.1 GET /index.html 15824 0.043
    
    正则的例子
    %{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}
    
    配置文件里是怎么写得? 
    
    input {
      file {
        path => “/var/log/http.log”
      }
    }
    filter {
      grok {
        match => [ "message", "%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}" ]
      }
    }
    
    解析后,是个什么样子? 
    
    client: 55.3.244.1
    method: GET
    request: /index.html
    bytes: 15824
    duration: 0.043
    
    
    自定义正则
    
    
    (?<field_name>the pattern here)
    
    (?<queue_id>[0-9A-F]{10,11})
    
    当然你也可以把众多的正则,放在一个集中文件里面。 
    # in ./patterns/postfix 
    POSTFIX_QUEUEID [0-9A-F]{10,11}
    
    filter {
      grok {
        patterns_dir => “./patterns”
        match => [ "message", "%{SYSLOGBASE} %{POSTFIX_QUEUEID:queue_id}: %{GREEDYDATA:syslog_message}" ]
      }
    }
    
    ############
    
    
    logstash已经自带了不少的正则,如果想偷懒的话,可以在内置正则里借用下。 
    
    USERNAME [a-zA-Z0-9._-]+
    USER %{USERNAME}
    INT (?:[+-]?(?:[0-9]+))
    BASE10NUM (?<![0-9.+-])(?>[+-]?(?:(?:[0-9]+(?:.[0-9]+)?)|(?:.[0-9]+)))
    NUMBER (?:%{BASE10NUM})
    BASE16NUM (?<![0-9A-Fa-f])(?:[+-]?(?:0x)?(?:[0-9A-Fa-f]+))
    BASE16FLOAT (?<![0-9A-Fa-f.])(?:[+-]?(?:0x)?(?:(?:[0-9A-Fa-f]+(?:.[0-9A-Fa-f]*)?)|(?:.[0-9A-Fa-f]+)))
    
    POSINT (?:[1-9][0-9]*)
    NONNEGINT (?:[0-9]+)
    WORD w+
    NOTSPACE S+
    SPACE s*
    DATA .*?
    GREEDYDATA .*
    QUOTEDSTRING (?>(?<!\)(?>”(?>\.|[^\"]+)+”|”"|(?>’(?>\.|[^\']+)+’)|”|(?>(?>\.|[^]+)+)|`))
    UUID [A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}
    
    # Networking
    MAC (?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC})
    CISCOMAC (?:(?:[A-Fa-f0-9]{4}.){2}[A-Fa-f0-9]{4})
    WINDOWSMAC (?:(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2})
    COMMONMAC (?:(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2})
    IPV6 ((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]d|1dd|[1-9]?d)(.(25[0-5]|2[0-4]d|1dd|[1-9]?d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]d|1dd|[1-9]?d)(.(25[0-5]|2[0-4]d|1dd|[1-9]?d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]d|1dd|[1-9]?d)(.(25[0-5]|2[0-4]d|1dd|[1-9]?d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]d|1dd|[1-9]?d)(.(25[0-5]|2[0-4]d|1dd|[1-9]?d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]d|1dd|[1-9]?d)(.(25[0-5]|2[0-4]d|1dd|[1-9]?d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]d|1dd|[1-9]?d)(.(25[0-5]|2[0-4]d|1dd|[1-9]?d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]d|1dd|[1-9]?d)(.(25[0-5]|2[0-4]d|1dd|[1-9]?d)){3}))|:)))(%.+)?
    IPV4 (?<![0-9])(?:(?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})[.](?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})[.](?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})[.](?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2}))(?![0-9])
    IP (?:%{IPV6}|%{IPV4})
    HOSTNAME (?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(.?|)
    HOST %{HOSTNAME}
    IPORHOST (?:%{HOSTNAME}|%{IP})
    HOSTPORT (?:%{IPORHOST=~/./}:%{POSINT})
    
    # paths
    PATH (?:%{UNIXPATH}|%{WINPATH})
    UNIXPATH (?>/(?>[w_%!$@:.,-]+|\.)*)+
    TTY (?:/dev/(pts|tty([pq])?)(w+)?/?(?:[0-9]+))
    WINPATH (?>[A-Za-z]+:|\)(?:
    ^\?*]*)+ 
    URIPROTO [A-Za-z]+(+[A-Za-z+]+)? 
    URIHOST %{IPORHOST}(?::%{POSINT:port})? 
    # uripath comes loosely from RFC1738, but mostly from what Firefox 
    # doesn’t turn into %XX 
    URIPATH (?:/[A-Za-z0-9$.+!*'(){},~:;=@#%_-]*)+ 
    #URIPARAM ?(?:[A-Za-z0-9]+(?:=(?:[^&]*))?(?:&(?:[A-Za-z0-9]+(?:=(?:[^&]*))?)?)*)? 
    URIPARAM ?[A-Za-z0-9$.+!*’|(){},~@#%&/=:;_?-[
    ]*
    URIPATHPARAM %{URIPATH}(?:%{URIPARAM})?
    URI %{URIPROTO}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})?
    
    # Months: January, Feb, 3, 03, 12, December
    MONTH (?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)
    MONTHNUM (?:0?[1-9]|1[0-2])
    MONTHDAY (?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9])
    
    # Days: Monday, Tue, Thu, etc…
    DAY (?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?)
    
    # Years?
    YEAR (?>dd){1,2}
    HOUR (?:2[0123]|[01]?[0-9])
    MINUTE (?:[0-5][0-9])
    # ’60′ is a leap second in most time standards and thus is valid.
    SECOND (?:(?:[0-5][0-9]|60)(?:[:.,][0-9]+)?)
    TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9])
    # datestamp is YYYY/MM/DD-HH:MM:SS.UUUU (or something like it)
    DATE_US %{MONTHNUM}[/-]%{MONTHDAY}[/-]%{YEAR}
    DATE_EU %{MONTHDAY}[./-]%{MONTHNUM}[./-]%{YEAR}
    ISO8601_TIMEZONE (?:Z|[+-]%{HOUR}(?::?%{MINUTE}))
    ISO8601_SECOND (?:%{SECOND}|60)
    TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}?
    DATE %{DATE_US}|%{DATE_EU}
    DATESTAMP %{DATE}[- ]%{TIME}
    TZ (?:[PMCE][SD]T|UTC)
    DATESTAMP_RFC822 %{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ}
    DATESTAMP_OTHER %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{TZ} %{YEAR}
    
    # Syslog Dates: Month Day HH:MM:SS
    SYSLOGTIMESTAMP %{MONTH} +%{MONTHDAY} %{TIME}
    PROG (?:[w._/%-]+)
    SYSLOGPROG %{PROG:program}(?:
    )?
    SYSLOGHOST %{IPORHOST}
    SYSLOGFACILITY <%{NONNEGINT:facility}.%{NONNEGINT:priority}>
    HTTPDATE %{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT}
    
    # Shortcuts
    QS %{QUOTEDSTRING}
    
    # Log formats
    SYSLOGBASE %{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource} %{SYSLOGPROG}:
    COMMONAPACHELOG %{IPORHOST:clientip} %{USER:ident} %{USER:auth}
    “(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})” %{NUMBER:response} (?:%{NUMBER:bytes}|-)
    COMBINEDAPACHELOG %{COMMONAPACHELOG} %{QS:referrer} %{QS:agent}
    
    # Log Levels
    LOGLEVEL ([A-a]lert|ALERT|[T|t]race|TRACE|[D|d]ebug|DEBUG|[N|n]otice|NOTICE|[I|i]nfo|INFO|[W|w]arn?(?:ing)?|WARN?(?:ING)?|[E|e]rr?(?:or)?|ERR?(?:OR)?|[C|c]rit?(?:ical)?|CRIT?(?:ICAL)?|[F|f]atal|FATAL|[S|s]evere|SEVERE|EMERG(?:ENCY)?|[Ee]merg(?:ency)?)
    
    Logstash 最佳实践
    http://udn.yyuap.com/doc/logstash-best-practice-cn/index.html
    https://www.gitbook.com/book/chenryn/kibana-guide-cn
    配置示例
    
    input{
        file {
            path => ["/var/log/*.log", "/var/log/message"]
            type => "system"
            start_position => "beginning"
        }
    }
    解释
    有一些比较有用的配置项,可以用来指定 FileWatch 库的行为:
    discover_interval
    logstash 每隔多久去检查一次被监听的 path 下是否有新文件。默认值是 15 秒。
    exclude
    不想被监听的文件可以排除出去,这里跟 path 一样支持 glob 展开。
    sincedb_path
    如果你不想用默认的 $HOME/.sincedb(Windows 平台上在C:WindowsSystem32configsystemprofile.sincedb),可以通过这个配置定义 sincedb 文件到其他位置。
    sincedb_write_interval
    logstash 每隔多久写一次 sincedb 文件,默认是 15 秒。
    stat_interval
    logstash 每隔多久检查一次被监听文件状态(是否有更新),默认是 1 秒。
    start_position
    logstash 从什么位置开始读取文件数据,默认是结束位置,也就是说 logstash 进程会以类似 tail -F 的形式运行。如果你是要导入原有数据,把这个设定改成 "beginning",logstash 进程就从头开始读取,有点类似cat,但是读到最后一行不会终止,而是继续变成 tail -F。
    

  • 相关阅读:
    如何使用jpegtran 压缩JPG图片
    JS获取后台返回的JSON数据
    VUE安装步骤1
    VUE安装步骤
    SVN使用教程总结
    WebStorm的下载与安装
    理解CSS3 transform中的Matrix(矩阵)
    http statusCode(状态码) 200、300、400、500序列详解
    JS如何获取屏幕、浏览器及网页高度宽度?
    html5使用local storage存储的数据在本地是以何种形式保存的
  • 原文地址:https://www.cnblogs.com/hzcya1995/p/13350345.html
Copyright © 2011-2022 走看看