zoukankan      html  css  js  c++  java
  • 自定义flume的hbase sink 的序列化程序

    package com.hello.hbase;
    
    import java.nio.charset.Charset;
    import java.text.SimpleDateFormat;
    import java.util.Calendar;
    import java.util.Date;
    import java.util.List;
    import java.util.Locale;
    import java.util.concurrent.atomic.AtomicInteger;
    import java.util.regex.Pattern;
    
    import org.apache.commons.lang.RandomStringUtils;
    import org.apache.flume.Context;
    import org.apache.flume.Event;
    import org.apache.flume.FlumeException;
    import org.apache.flume.conf.ComponentConfiguration;
    import org.apache.flume.sink.hbase.HbaseEventSerializer;
    import org.apache.hadoop.hbase.client.Increment;
    import org.apache.hadoop.hbase.client.Put;
    import org.apache.hadoop.hbase.client.Row;
    import com.google.common.base.Charsets;
    import com.google.common.collect.Lists;
    
    
    public class FlumeHbaseEventSerializer implements HbaseEventSerializer {
      
        // Config vars  
        /** Regular expression used to parse groups from event data. */  
        public static final String REGEX_CONFIG = "regex";  
        public static final String REGEX_DEFAULT = " ";  
        /** Whether to ignore case when performing regex matches. */  
        public static final String IGNORE_CASE_CONFIG = "regexIgnoreCase";  
        public static final boolean INGORE_CASE_DEFAULT = false;  
        /** Comma separated list of column names to place matching groups in. */  
        public static final String COL_NAME_CONFIG = "colNames";  
        public static final String COLUMN_NAME_DEFAULT = "ip";  
        /** Index of the row key in matched regex groups */  
        public static final String ROW_KEY_INDEX_CONFIG = "rowKeyIndex";  
        /** Placeholder in colNames for row key */  
        public static final String ROW_KEY_NAME = "ROW_KEY";  
        /** Whether to deposit event headers into corresponding column qualifiers */  
        public static final String DEPOSIT_HEADERS_CONFIG = "depositHeaders";  
        public static final boolean DEPOSIT_HEADERS_DEFAULT = false;  
        /** What charset to use when serializing into HBase's byte arrays */  
        public static final String CHARSET_CONFIG = "charset";  
        public static final String CHARSET_DEFAULT = "UTF-8";  
        /* 
         * This is a nonce used in HBase row-keys, such that the same row-key never 
         * gets written more than once from within this JVM. 
         */  
        protected static final AtomicInteger nonce = new AtomicInteger(0);  
        protected static String randomKey = RandomStringUtils.randomAlphanumeric(10);  
        protected byte[] cf;  
        private byte[] payload;  
        private List<byte[]> colNames = Lists.newArrayList();  
        private boolean regexIgnoreCase;  
        private Charset charset;  
        @Override  
        public void configure(Context context) {  
            String regex = context.getString(REGEX_CONFIG, REGEX_DEFAULT);  
            regexIgnoreCase = context.getBoolean(IGNORE_CASE_CONFIG, INGORE_CASE_DEFAULT);  
            context.getBoolean(DEPOSIT_HEADERS_CONFIG, DEPOSIT_HEADERS_DEFAULT);  
            Pattern.compile(regex, Pattern.DOTALL + (regexIgnoreCase ? Pattern.CASE_INSENSITIVE : 0));  
            charset = Charset.forName(context.getString(CHARSET_CONFIG, CHARSET_DEFAULT));  
      
            String cols = new String(context.getString("columns"));  
            String colNameStr;  
            if (cols != null && !"".equals(cols)) {  
                colNameStr = cols;  
            } else {  
                colNameStr = context.getString(COL_NAME_CONFIG, COLUMN_NAME_DEFAULT);  
            }  
      
            String[] columnNames = colNameStr.split(",");  
            for (String s : columnNames) {  
                colNames.add(s.getBytes(charset));  
            }  
        }  
        
        @Override  
        public void configure(ComponentConfiguration conf) {}  
      
        @Override  
        public void initialize(Event event, byte[] columnFamily) {  
            event.getHeaders();  
            this.payload = event.getBody();  
            this.cf = columnFamily;  
        }  
        
        protected byte[] getRowKey(Calendar cal) {  
            String str = new String(payload, charset);  
            String tmp = str.replace(""", "");  
            String[] arr = tmp.split(" ");  
            String log_data = arr[4];
            String[] param_arr = log_data.split("&");
            String userid = param_arr[0];
            String itemid = param_arr[1];
            String type = param_arr[2];
            String ip_str = param_arr[3];
            
    //        String dataStr = arr[3].replace("[", "");  
    //        String rowKey = getDate2Str(dataStr) + "-" + clientIp + "-" + nonce.getAndIncrement();
            String rowKey = ip_str + "-" + nonce.getAndIncrement();
            
            return rowKey.getBytes(charset);  
        }  
      
        protected byte[] getRowKey() {  
            return getRowKey(Calendar.getInstance());  
        }  
    
        @Override  
        public List<Row> getActions() throws FlumeException {  
            List<Row> actions = Lists.newArrayList();  
            byte[] rowKey;  
      
            String body = new String(payload, charset);  
            String tmp = body.replace(""", "");  
    //        String[] arr = tmp.split(REGEX_DEFAULT); 
            String[] arr = tmp.split(" ");
            
            String log_data = arr[4];
            String[] param_arr = log_data.split("&");
            
            String userid = param_arr[0].split("=")[1];
            String itemid = param_arr[1].split("=")[1];
            String type = param_arr[2].split("=")[1];
            String ip_str = param_arr[3].split("=")[1];
                          
            System.out.println("===========");
            System.out.println("===========");
            System.out.println("===========");
            System.out.println("===========");
            System.out.println(userid);
            System.out.println(itemid);
            System.out.println(type);
            System.out.println(ip_str);
            System.out.println("===========");
            System.out.println("===========");
            System.out.println("===========");
            System.out.println("===========");
             
            try {  
                rowKey = getRowKey();
                Put put = new Put(rowKey);  
                put.add(cf, colNames.get(0), userid.getBytes(Charsets.UTF_8));  
                put.add(cf, colNames.get(1), itemid.getBytes(Charsets.UTF_8));  
                put.add(cf, colNames.get(2), type.getBytes(Charsets.UTF_8));
                put.add(cf, colNames.get(3), ip_str.getBytes(Charsets.UTF_8));
                actions.add(put);  
            } catch (Exception e) {  
                throw new FlumeException("Could not get row key!", e);  
            }  
            return actions;  
        }  
      
        @Override  
        public List<Increment> getIncrements() {  
            return Lists.newArrayList();  
        }  
      
        @Override  
        public void close() {}  
      
        public static String getDate2Str(String dataStr) {  
            SimpleDateFormat formatter = null;  
            SimpleDateFormat format = null;  
            Date date = null;  
            try {  
                formatter = new SimpleDateFormat("dd/MMM/yyyy:hh:mm:ss", Locale.ENGLISH);  
                date = formatter.parse(dataStr);  
                format = new SimpleDateFormat("yyyy-MM-dd-HH:mm:ss");  
            } catch (Exception e) {  
                e.printStackTrace();  
            }  
      
            return format.format(date);  
        }  
    }
  • 相关阅读:
    C程序课题设计——基于图形界面开发的学生信息管理系统
    Linux系统的介绍(以下以Manjaro最新版为例子)
    linux环境下PS1变量配置
    C指针课题实验报告——职工工资管理系统
    vim系统配置文件,配置专属自己的环境
    git常用操作命令
    vim设置成类source insight功能,实现跳转和查找
    ext2文件系统学习札记
    【转载】解析Linux中的VFS文件系统机制
    linux中链表_队列等的基本原理以及操作以及堆栈
  • 原文地址:https://www.cnblogs.com/luozeng/p/9267276.html
Copyright © 2011-2022 走看看