zoukankan      html  css  js  c++  java
  • 处理用千牛导出淘宝数据,供Logstash到Elasticsearch使用。(NodeJS)

    var rf=require("fs");  
    
    // 加载编码转换模块  
    //npm install iconv-lite
    var iconv = require('iconv-lite');  
    
    var fileName = "2017-03-01~2017-05-31";
    
    //读取二进制
    var data=rf.readFileSync(fileName+".txt","binary");  
    
    //转化GBK格式
    var buf = new Buffer(data, 'binary');  
    var str = iconv.decode(buf, 'GBK');  
    
    var newData = handleMS(str);
    var oDate = new Date();
    writeFile(fileName+newGuid()+".json", newData);
    
    console.log("The END");  
    
    //解析数据
    function handleMS(data){
    
        var newData = "";
    
        var arr = str.split('
    ');
    
        //获取客服名称
        var callcenter = arr[0];
    
        var customer = "";
        for (var i = 7 ; i <arr.length; i++) {
    
            var item = arr[i];
    
            if (item == "") {
                continue;
            };
    
            
            var delimiter = '----------------------------';
            if (item.indexOf(delimiter)  != -1) {
                customer = item.split(delimiter)[1];
                continue;
            };
    
            var cc = item.split('(')[0]; 
            var date = "";item.split('(')[1];
    
            var message =  "";
            var preMessage  =item.split('):  ');
            if (preMessage.length == 2) {
                message = preMessage[1];
                var date = item.split(')')[0].split('(')[1];
            };  
            newData += JSON.stringify({who:cc,date:new Date(date),m:message, isCC:cc == callcenter ? 1 : 0})+"
    ";
        }
        return newData;
    }
    
    //写文件
    function writeFile(file, data){  
        // 把中文转换成字节数组  
        var arr = iconv.encode(data, 'utf-8');  
          
        // appendFile,如果文件不存在,会自动创建新文件  
        // 如果用writeFile,那么会删除旧文件,直接写新文件  
        rf.writeFile(file, arr, function(err){  
            if(err)  
                console.log("fail " + err);  
            else  
                console.log("写入文件ok");  
        });  
    }  
      
    function newGuid()
    {
        var guid = "";
        for (var i = 1; i <= 32; i++){
          var n = Math.floor(Math.random()*16.0).toString(16);
          guid +=   n;
          if((i==8)||(i==12)||(i==16)||(i==20))
            guid += "-";
        }
        return guid;    
    }

    Logstash.conf

    input {  
          file {
              path => "D:/logstash-5.2.2/testdata/*.json"
              start_position => "beginning"
              sincedb_path => "D:/logstash-5.2.2/bin/sincedb"
              codec => json {
                charset => "UTF-8"
            }       
          }
    }
    filter {  
       json{
       source => "message"
       }
    
       
        mutate
        {
            remove_field => [ "message","path","@version","@timestamp","host","_id","value"]
        }
    }
    output {  
        elasticsearch {
            action => "index"
            hosts => ["http://172.31.2.9:9200/"] 
            user => "admin"
            password => "123456"
         
            index => "testtbmsdb3"
            document_type => "ms"
            workers => 1
        }
         #stdout {
             #codec => rubydebug
             #codec => json_lines
         #}
    } 
  • 相关阅读:
    【自动化测试】rf+selenium中解决for计数嵌套循环问题
    【自动化测试】RF链接数据库12c遇到的问题总结
    【自动化测试】关于如何管理规范整个团队的想法(1)
    python爬虫--基本流程
    python爬虫--理论
    [工具箱]一键优化Windows
    [工具箱]禁用Windows系统更新做了哪些操作?
    有人说要节能,有人说要耗能
    让攻击挖矿耗能的攻击者闭嘴的方法是?
    POC挖矿没有前途
  • 原文地址:https://www.cnblogs.com/haoliansheng/p/6944418.html
Copyright © 2011-2022 走看看