zoukankan      html  css  js  c++  java
  • Logstash处理json格式日志文件

    假设日志文件中的每一行记录格式为json的,如:

    {"Method":"JSAPI.JSTicket","Message":"JSTicket:kgt8ON7yVITDhtdwci0qeZg4L-Dj1O5WF42Nog47n_0aGF4WPJDIF2UA9MeS8GzLe6MPjyp2WlzvsL0nlvkohw","CreateTime":"2015/10/13 9:39:59","AppGUID":"cb54ba2d-1d38-45f2-9ed1-abff0bf7dd3d","_PartitionKey":"cb54ba2d-1d38-45f2-9ed1-abff0bf7dd3d","_RowKey":"1444700398710_ad4d33ce-a9d9-4d11-932e-e2ccebdb726c","_UnixTS":1444700398710}

    默认配置下,logstash处理插入进elasticsearch后,查到的结果是这样的:

    {
        "_index": "logstash-2015.10.16",
        "_type": "voip_feedback",
        "_id": "sheE9eXiQASMDVtRJ0EYcg",
        "_version": 1,
        "found": true,
        "_source": {
            "message": "{"Method":"JSAPI.JSTicket","Message":"JSTicket:kgt8ON7yVITDhtdwci0qeZg4L-Dj1O5WF42Nog47n_0aGF4WPJDIF2UA9MeS8GzLe6MPjyp2WlzvsL0nlvkohw","CreateTime":"2015/10/13 9:39:59","AppGUID":"cb54ba2d-1d38-45f2-9ed1-abff0bf7dd3d","_PartitionKey":"cb54ba2d-1d38-45f2-9ed1-abff0bf7dd3d","_RowKey":"1444700398710_ad4d33ce-a9d9-4d11-932e-e2ccebdb726c","_UnixTS":1444700398710}",
            "@version": "1",
            "@timestamp": "2015-10-16T00:39:51.252Z",
            "type": "voip_feedback",
            "host": "ipphone",
            "path": "/usr1/data/voip_feedback.txt"
        }
    }

    即会将json记录做为一个字符串放到”message”下,但是我是想让logstash自动解析json记录,将各字段放入elasticsearch中。有三种配置方式可以实现。

    第一种,直接设置format => json

        file {
            type => "voip_feedback"
            path => ["/usr1/data/voip_feedback.txt"]  
            format => json
            sincedb_path => "/home/jfy/soft/logstash-1.4.2/voip_feedback.access"     
        }

    这种方式查询出的结果是:

    {
        "_index": "logstash-2015.10.16",
        "_type": "voip_feedback",
        "_id": "NrNX8HrxSzCvLl4ilKeyCQ",
        "_version": 1,
        "found": true,
        "_source": {
            "Method": "JSAPI.JSTicket",
            "Message": "JSTicket:kgt8ON7yVITDhtdwci0qeZg4L-Dj1O5WF42Nog47n_0aGF4WPJDIF2UA9MeS8GzLe6MPjyp2WlzvsL0nlvkohw",
            "CreateTime": "2015/10/13 9:39:59",
            "AppGUID": "cb54ba2d-1d38-45f2-9ed1-abff0bf7dd3d",
            "_PartitionKey": "cb54ba2d-1d38-45f2-9ed1-abff0bf7dd3d",
            "_RowKey": "1444700398710_ad4d33ce-a9d9-4d11-932e-e2ccebdb726c",
            "_UnixTS": 1444700398710,
            "@version": "1",
            "@timestamp": "2015-10-16T00:16:11.455Z",
            "type": "voip_feedback",
            "host": "ipphone",
            "path": "/usr1/data/voip_feedback.txt"
        }
    }

    可以看到,json记录已经被直接解析成各字段放入到了_source中,但是原始记录内容没有被保存

    第二种,使用codec => json

        file {
            type => "voip_feedback"
            path => ["/usr1/data/voip_feedback.txt"]  
            sincedb_path => "/home/jfy/soft/logstash-1.4.2/voip_feedback.access"
            codec => json {
                charset => "UTF-8"
            }       
        }

    这种方式查询出的结果与第一种一样,字段被解析,原始记录内容也没有保存

    第三种,使用filter json

    filter {
        if [type] == "voip_feedback" {
            json {
                source => "message"
                #target => "doc"
                #remove_field => ["message"]
            }        
        }
    }

    这种方式查询出的结果是这样的:

    {
        "_index": "logstash-2015.10.16",
        "_type": "voip_feedback",
        "_id": "CUtesLCETAqhX73NKXZfug",
        "_version": 1,
        "found": true,
        "_source": {
            "message": "{"Method222":"JSAPI.JSTicket","Message":"JSTicket:kgt8ON7yVITDhtdwci0qeZg4L-Dj1O5WF42Nog47n_0aGF4WPJDIF2UA9MeS8GzLe6MPjyp2WlzvsL0nlvkohw","CreateTime":"2015/10/13 9:39:59","AppGUID":"cb54ba2d-1d38-45f2-9ed1-abff0bf7dd3d","_PartitionKey":"cb54ba2d-1d38-45f2-9ed1-abff0bf7dd3d","_RowKey":"1444700398710_ad4d33ce-a9d9-4d11-932e-e2ccebdb726c","_UnixTS":1444700398710}",
            "@version": "1",
            "@timestamp": "2015-10-16T00:28:20.018Z",
            "type": "voip_feedback",
            "host": "ipphone",
            "path": "/usr1/data/voip_feedback.txt",
            "Method222": "JSAPI.JSTicket",
            "Message": "JSTicket:kgt8ON7yVITDhtdwci0qeZg4L-Dj1O5WF42Nog47n_0aGF4WPJDIF2UA9MeS8GzLe6MPjyp2WlzvsL0nlvkohw",
            "CreateTime": "2015/10/13 9:39:59",
            "AppGUID": "cb54ba2d-1d38-45f2-9ed1-abff0bf7dd3d",
            "_PartitionKey": "cb54ba2d-1d38-45f2-9ed1-abff0bf7dd3d",
            "_RowKey": "1444700398710_ad4d33ce-a9d9-4d11-932e-e2ccebdb726c",
            "_UnixTS": 1444700398710,
            "tags": [
                "111",
                "222"
            ]
        }
    }

    可以看到,原始记录被保存,同时字段也被解析保存。如果确认不需要保存原始记录内容,可以加设置:remove_field => [“message”]

    比较以上三种方法,最方便直接的就是在file中设置format => json

    另外需要注意的是,logstash会在向es插入数据时默认会在_source下增加type,host,path三个字段,如果json内容中本身也含有type,host,path字段,那么解析后将覆盖掉logstash默认的这三个字段,尤其是type字段,这个同时也是做为index/type用的,覆盖掉后,插入进es中的index/type就是json数据记录中的内容,将不再是logstash config中配置的type值。

    这时需要设置filter.json.target,设置该字段后json原始内容将不会放在_source下,而是放到设置的”doc”下:

    {
        "_index": "logstash-2015.10.20",
        "_type": "3alogic_log",
        "_id": "xfj3ngd5S3iH2YABjyU6EA",
        "_version": 1,
        "found": true,
        "_source": {
            "@version": "1",
            "@timestamp": "2015-10-20T11:36:24.503Z",
            "type": "3alogic_log",
            "host": "server114",
            "path": "/usr1/app/log/mysql_3alogic_log.log",
            "doc": {
                "id": 633796,
                "identity": "13413602120",
                "type": "EAP_TYPE_PEAP",
                "apmac": "88-25-93-4E-1F-96",
                "usermac": "00-65-E0-31-62-5D",
                "time": "20151020-193624",
                "apmaccompany": "TP-LINK TECHNOLOGIES CO.,LTD",
                "usermaccompany": ""
            }
        }
    }

    这样就不会覆盖掉_source下的type,host,path值 
    而且在kibana中显示时字段名称为doc.type,doc.id…


    json中嵌套json:

    上传的json:

    {
        "indexName": "tv_app_default",
        "baseInfo": {
          "deviceId": "458ec202-e02e-4b82-a7ca-18e5cb4e3df1",
          "deviceModel": "PRO 7-H",
          "deviceSubModel": "",
          "devicePlatform": "Android",
          "appName": "IMetis",
          "appVersion": "1.0",
          "networkStatus": "wifi连接",
          "systemVersion": "24"
        },
        "event": {
          "id": "cd478c7b167a7a1030deaeb40036b0f9",
          "name": "TestFragment.java_com.italkbb.test.TestFragment_Bundle[{name=test1}]",
          "event_index": "tv_app_default",
          "event_level": "verbose",
          "timestamp": "2019-03-20T15:15:15.029+08:00",
          "duration": "13043968231158",
          "instant": "0",
          "line": -1
        }
      }

    filter里面这么写,会把event和baseInfo里面的json字串解析出来。

    filter {
       json {
          source => "message"
          remove_field => ["message"]
        }
    
        mutate {
          add_field => { "eventtmp" => "%{event}" }
        }
        json {
          source => "eventtmp"
          remove_field => ["eventtmp"]
        }
    
        mutate {
          add_field => { "baseInfotmp" => "%{baseInfo}" }
        }
        json {
          source => "baseInfotmp"
          remove_field => ["baseInfotmp" , "baseInfo"]   #只能加一个数组删除 remove_field => ["eventtmp"] 这句里面也加上就无法插入es了。
    #其实没问题。是由于kibana按timestamp排序了。时间转换了不对。直接用search语句可以搜索到。

    }
    }

    结果如下:

    {
      "_index": "tv_app_default-2019.05.08",
      "_type": "doc",
      "_id": "k1aVlmoBZV0IMWE2odMF",
      "_version": 1,
      "_score": null,
      "_source": {
        "timestamp": "2019-05-08T14:10:47.340",
        "deviceId": "458ec202-e02e-4b82-a7ca-18e5cb4e3df1",
        "networkStatus": "wifi连接",
        "appName": "IMetis",
        "duration": "59960912157",
        "indexName": "tv_app_default",
        "id": "f353b68c07b661f2fdd42e2260e061d9",
        "deviceSubModel": "",
        "@timestamp": "2019-05-08T15:44:14.968Z",
        "systemVersion": "24",
        "deviceModel": "PRO 7-H",
        "event_level": "verbose",
        "appVersion": "1.0",
        "devicePlatform": "Android",
        "instant": "0",
        "@version": "1",
        "event_index": "tv_app_default",
        "event": {
          "instant": "0",
          "event_index": "tv_app_default",
          "name": "BaseActivity.java_com.italkbb.test.Main2Activity",
          "line": -1,
          "timestamp": "2019-05-08T14:10:47.340",
          "id": "f353b68c07b661f2fdd42e2260e061d9",
          "event_level": "verbose",
          "duration": "59960912157"
        },
        "line": -1,
        "type": "tv_app_log",
        "name": "BaseActivity.java_com.italkbb.test.Main2Activity"
      },
      "fields": {
        "event.timestamp": [
          "2019-05-08T14:10:47.340Z"
        ]
      },
      "sort": [
        1557324647340
      ]
    }
  • 相关阅读:
    User-Agent大全
    Python yield 使用浅析
    解决Ubuntu终端里面显示路径名称太长
    百度搜索URL中的参数都是什么
    Fiddler 网页采集抓包利器__手机app抓包
    《samba服务配置的文本》
    《samba服务搭建》RHEL6
    《NFS文件共享服务的搭建》RHEL
    《shell脚本if..then..elif..then.if语句的总结》
    《shell条件测试语句,字符串测试apache是否开启》
  • 原文地址:https://www.cnblogs.com/bigben0123/p/10572034.html
Copyright © 2011-2022 走看看