ELK官网:https://www.elastic.co
一、logstash简介
Logstash是一个开源数据收集引擎,具有实时管道功能。Logstash可以动态地将来自不同数据源的数据统一起来,并将数据标准化到你所选择的目的地。
Logstash 能够动态地采集、转换和传输数据,不受格式或复杂度的影响。利用 Grok 从非结构化数据中派生出结构,从 IP 地址解码出地理坐标,匿名化或排除敏感字段,并简化整体处理过程。
Logstash有三个主要模块input输入、filter过滤器、output输出。
1.1 input输入
输入:采集各种样式、大小和来源的数据
数据往往以各种各样的形式,或分散或集中地存在于很多系统中。Logstash 支持各种输入选择 ,可以在同一时间从众多常用来源捕捉事件。能够以连续的流式传输方式,轻松地从您的日志、指标、Web 应用、数据存储以及各种 AWS 服务采集数据。
1.2 filter过滤器
过滤器:实时解析和转换数据
数据从源传输到存储库的过程中,Logstash 过滤器能够解析各个事件,识别已命名的字段以构建结构,并将它们转换成通用格式,以便更轻松、更快速地分析和实现商业价值。
- 利用 Grok 从非结构化数据中派生出结构
- 从 IP 地址破译出地理坐标
- 将 PII 数据匿名化,完全排除敏感字段
- 简化整体处理,不受数据源、格式或架构的影响
1.3 output输出
输出:选择您的存储库,导出您的数据,Elasticsearch是首选
尽管 Elasticsearch 是我们的首选输出方向,能够为我们的搜索和分析带来无限可能,但它并非唯一选择。
Logstash 提供众多输出选择,您可以将数据发送到您要指定的地方,并且能够灵活地解锁众多下游用例。
二、logstash.conf
logstasg将模块input输入、filter过滤器、output输出写入到自建的logstash.conf来生成json字段保存到Elasticsearch中。以下有几个ELK官网模板借鉴
官网模板:https://www.elastic.co/guide/en/logstash/current/logstash-config-for-filebeat-modules.html
2.1 logstash之Apache日志模板
logstash之Apache日志模板
input {
beats {
port => 5044
host => "0.0.0.0"
}
}
filter {
if [fileset][module] == "apache2" {
if [fileset][name] == "access" {
grok {
match => { "message" => ["%{IPORHOST:[apache2][access][remote_ip]} - %{DATA:[apache2][access][user_name]} [%{HTTPDATE:[apache2][access][time]}] "%{WORD:[apache2][access][method]} %{DATA:[apache2][access][url]} HTTP/%{NUMBER:[apache2][access][http_version]}" %{NUMBER:[apache2][access][response_code]} %{NUMBER:[apache2][access][body_sent][bytes]}( "%{DATA:[apache2][access][referrer]}")?( "%{DATA:[apache2][access][agent]}")?",
"%{IPORHOST:[apache2][access][remote_ip]} - %{DATA:[apache2][access][user_name]} \[%{HTTPDATE:[apache2][access][time]}\] "-" %{NUMBER:[apache2][access][response_code]} -" ] }
remove_field => "message"
}
mutate {
add_field => { "read_timestamp" => "%{@timestamp}" }
}
date {
match => [ "[apache2][access][time]", "dd/MMM/YYYY:H:m:s Z" ]
remove_field => "[apache2][access][time]"
}
useragent {
source => "[apache2][access][agent]"
target => "[apache2][access][user_agent]"
remove_field => "[apache2][access][agent]"
}
geoip {
source => "[apache2][access][remote_ip]"
target => "[apache2][access][geoip]"
}
}
else if [fileset][name] == "error" {
grok {
match => { "message" => ["[%{APACHE_TIME:[apache2][error][timestamp]}] [%{LOGLEVEL:[apache2][error][level]}]( [client %{IPORHOST:[apache2][error][client]}])? %{GREEDYDATA:[apache2][error][message]}",
"[%{APACHE_TIME:[apache2][error][timestamp]}] [%{DATA:[apache2][error][module]}:%{LOGLEVEL:[apache2][error][level]}] [pid %{NUMBER:[apache2][error][pid]}(:tid %{NUMBER:[apache2][error][tid]})?]( [client %{IPORHOST:[apache2][error][client]}])? %{GREEDYDATA:[apache2][error][message1]}" ] }
pattern_definitions => {
"APACHE_TIME" => "%{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{YEAR}"
}
remove_field => "message"
}
mutate {
rename => { "[apache2][error][message1]" => "[apache2][error][message]" }
}
date {
match => [ "[apache2][error][timestamp]", "EEE MMM dd H:m:s YYYY", "EEE MMM dd H:m:s.SSSSSS YYYY" ]
remove_field => "[apache2][error][timestamp]"
}
}
}
}
output {
elasticsearch {
hosts => localhost
manage_template => false
index => "%{[@metadata][beat]}-%{[@metadata][version]}-%{+YYYY.MM.dd}"
}
}
2.2 logstash之nginx日志模板
logstash之nginx日志模板
input {
beats {
port => 5044
host => "0.0.0.0"
}
}
filter {
if [fileset][module] == "nginx" {
if [fileset][name] == "access" {
grok {
match => { "message" => ["%{IPORHOST:[nginx][access][remote_ip]} - %{DATA:[nginx][access][user_name]} [%{HTTPDATE:[nginx][access][time]}] "%{WORD:[nginx][access][method]} %{DATA:[nginx][access][url]} HTTP/%{NUMBER:[nginx][access][http_version]}" %{NUMBER:[nginx][access][response_code]} %{NUMBER:[nginx][access][body_sent][bytes]} "%{DATA:[nginx][access][referrer]}" "%{DATA:[nginx][access][agent]}""] }
remove_field => "message"
}
mutate {
add_field => { "read_timestamp" => "%{@timestamp}" }
}
date {
match => [ "[nginx][access][time]", "dd/MMM/YYYY:H:m:s Z" ]
remove_field => "[nginx][access][time]"
}
useragent {
source => "[nginx][access][agent]"
target => "[nginx][access][user_agent]"
remove_field => "[nginx][access][agent]"
}
geoip {
source => "[nginx][access][remote_ip]"
target => "[nginx][access][geoip]"
}
}
else if [fileset][name] == "error" {
grok {
match => { "message" => ["%{DATA:[nginx][error][time]} [%{DATA:[nginx][error][level]}] %{NUMBER:[nginx][error][pid]}#%{NUMBER:[nginx][error][tid]}: (*%{NUMBER:[nginx][error][connection_id]} )?%{GREEDYDATA:[nginx][error][message]}"] }
remove_field => "message"
}
mutate {
rename => { "@timestamp" => "read_timestamp" }
}
date {
match => [ "[nginx][error][time]", "YYYY/MM/dd H:m:s" ]
remove_field => "[nginx][error][time]"
}
}
}
}
output {
elasticsearch {
hosts => localhost
manage_template => false
index => "%{[@metadata][beat]}-%{[@metadata][version]}-%{+YYYY.MM.dd}"
}
}
2.3 logstash之mysql日志模板
logstash之mysql日志模板
input {
beats {
port => 5044
host => "0.0.0.0"
}
}
filter {
if [fileset][module] == "mysql" {
if [fileset][name] == "error" {
grok {
match => { "message" => ["%{LOCALDATETIME:[mysql][error][timestamp]} ([%{DATA:[mysql][error][level]}] )?%{GREEDYDATA:[mysql][error][message]}",
"%{TIMESTAMP_ISO8601:[mysql][error][timestamp]} %{NUMBER:[mysql][error][thread_id]} [%{DATA:[mysql][error][level]}] %{GREEDYDATA:[mysql][error][message1]}",
"%{GREEDYDATA:[mysql][error][message2]}"] }
pattern_definitions => {
"LOCALDATETIME" => "[0-9]+ %{TIME}"
}
remove_field => "message"
}
mutate {
rename => { "[mysql][error][message1]" => "[mysql][error][message]" }
}
mutate {
rename => { "[mysql][error][message2]" => "[mysql][error][message]" }
}
date {
match => [ "[mysql][error][timestamp]", "ISO8601", "YYMMdd H:m:s" ]
remove_field => "[mysql][error][time]"
}
}
else if [fileset][name] == "slowlog" {
grok {
match => { "message" => ["^# User@Host: %{USER:[mysql][slowlog][user]}([[^]]+])? @ %{HOSTNAME:[mysql][slowlog][host]} [(IP:[mysql][slowlog][ip])?](s*Id:s* %{NUMBER:[mysql][slowlog][id]})?
# Query_time: %{NUMBER:[mysql][slowlog][query_time][sec]}s* Lock_time: %{NUMBER:[mysql][slowlog][lock_time][sec]}s* Rows_sent: %{NUMBER:[mysql][slowlog][rows_sent]}s* Rows_examined: %{NUMBER:[mysql][slowlog][rows_examined]}
(SET timestamp=%{NUMBER:[mysql][slowlog][timestamp]};
)?%{GREEDYMULTILINE:[mysql][slowlog][query]}"] }
pattern_definitions => {
"GREEDYMULTILINE" => "(.|
)*"
}
remove_field => "message"
}
date {
match => [ "[mysql][slowlog][timestamp]", "UNIX" ]
}
mutate {
gsub => ["[mysql][slowlog][query]", "
# Time: [0-9]+ [0-9][0-9]:[0-9][0-9]:[0-9][0-9](\.[0-9]+)?$", ""]
}
}
}
}
output {
elasticsearch {
hosts => localhost
manage_template => false
index => "%{[@metadata][beat]}-%{[@metadata][version]}-%{+YYYY.MM.dd}"
}
}
2.4 logstash之系统日志模板
logstash之system日志模板
input {
beats {
port => 5044
host => "0.0.0.0"
}
}
filter {
if [fileset][module] == "system" {
if [fileset][name] == "auth" {
grok {
match => { "message" => ["%{SYSLOGTIMESTAMP:[system][auth][timestamp]} %{SYSLOGHOST:[system][auth][hostname]} sshd(?:[%{POSINT:[system][auth][pid]}])?: %{DATA:[system][auth][ssh][event]} %{DATA:[system][auth][ssh][method]} for (invalid user )?%{DATA:[system][auth][user]} from %{IPORHOST:[system][auth][ssh][ip]} port %{NUMBER:[system][auth][ssh][port]} ssh2(: %{GREEDYDATA:[system][auth][ssh][signature]})?",
"%{SYSLOGTIMESTAMP:[system][auth][timestamp]} %{SYSLOGHOST:[system][auth][hostname]} sshd(?:[%{POSINT:[system][auth][pid]}])?: %{DATA:[system][auth][ssh][event]} user %{DATA:[system][auth][user]} from %{IPORHOST:[system][auth][ssh][ip]}",
"%{SYSLOGTIMESTAMP:[system][auth][timestamp]} %{SYSLOGHOST:[system][auth][hostname]} sshd(?:[%{POSINT:[system][auth][pid]}])?: Did not receive identification string from %{IPORHOST:[system][auth][ssh][dropped_ip]}",
"%{SYSLOGTIMESTAMP:[system][auth][timestamp]} %{SYSLOGHOST:[system][auth][hostname]} sudo(?:[%{POSINT:[system][auth][pid]}])?: s*%{DATA:[system][auth][user]} :( %{DATA:[system][auth][sudo][error]} ;)? TTY=%{DATA:[system][auth][sudo][tty]} ; PWD=%{DATA:[system][auth][sudo][pwd]} ; USER=%{DATA:[system][auth][sudo][user]} ; COMMAND=%{GREEDYDATA:[system][auth][sudo][command]}",
"%{SYSLOGTIMESTAMP:[system][auth][timestamp]} %{SYSLOGHOST:[system][auth][hostname]} groupadd(?:[%{POSINT:[system][auth][pid]}])?: new group: name=%{DATA:system.auth.groupadd.name}, GID=%{NUMBER:system.auth.groupadd.gid}",
"%{SYSLOGTIMESTAMP:[system][auth][timestamp]} %{SYSLOGHOST:[system][auth][hostname]} useradd(?:[%{POSINT:[system][auth][pid]}])?: new user: name=%{DATA:[system][auth][useradd][name]}, UID=%{NUMBER:[system][auth][useradd][uid]}, GID=%{NUMBER:[system][auth][useradd][gid]}, home=%{DATA:[system][auth][useradd][home]}, shell=%{DATA:[system][auth][useradd][shell]}$",
"%{SYSLOGTIMESTAMP:[system][auth][timestamp]} %{SYSLOGHOST:[system][auth][hostname]} %{DATA:[system][auth][program]}(?:[%{POSINT:[system][auth][pid]}])?: %{GREEDYMULTILINE:[system][auth][message]}"] }
pattern_definitions => {
"GREEDYMULTILINE"=> "(.|
)*"
}
remove_field => "message"
}
date {
match => [ "[system][auth][timestamp]", "MMM d HH:mm:ss", "MMM dd HH:mm:ss" ]
}
geoip {
source => "[system][auth][ssh][ip]"
target => "[system][auth][ssh][geoip]"
}
}
else if [fileset][name] == "syslog" {
grok {
match => { "message" => ["%{SYSLOGTIMESTAMP:[system][syslog][timestamp]} %{SYSLOGHOST:[system][syslog][hostname]} %{DATA:[system][syslog][program]}(?:[%{POSINT:[system][syslog][pid]}])?: %{GREEDYMULTILINE:[system][syslog][message]}"] }
pattern_definitions => { "GREEDYMULTILINE" => "(.|
)*" }
remove_field => "message"
}
date {
match => [ "[system][syslog][timestamp]", "MMM d HH:mm:ss", "MMM dd HH:mm:ss" ]
}
}
}
}
output {
elasticsearch {
hosts => localhost
manage_template => false
index => "%{[@metadata][beat]}-%{[@metadata][version]}-%{+YYYY.MM.dd}"
}
}