zoukankan      html  css  js  c++  java
  • 自定义Hive UDAF 实现相邻去重

    内置的两个聚合函数(UDAF)

    collect_list():多行字符串拼接为一行
    collect_set():多行字符串拼接为一行并去重
    多行字符串拼接为一行并相邻去重UDAF:Concat()

    concat_udaf.jar

    package com.tcc.udaf;

    import org.apache.hadoop.hive.ql.exec.UDAF;
    import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;

    public class Concat extends UDAF
    {
    public static class ConcatUDAFEvaluator
    implements UDAFEvaluator
    {
    private PartialResult partial;

    public void init()
    {
    this.partial = null;
    }

    public boolean iterate(String value, String deli)
    {
    if (value == null) {
    return true;
    }
    if (this.partial == null) {
    this.partial = new PartialResult();
    this.partial.result = new String("");
    if ((deli == null) || (deli.equals("")))
    {
    this.partial.delimiter = new String(",");
    }
    else
    {
    this.partial.delimiter = new String(deli);
    }
    }

    if (this.partial.result.length() > 0)
    {
    this.partial.result = this.partial.result.concat(this.partial.delimiter);
    }

    this.partial.result = this.partial.result.concat(value);

    return true;
    }

    public PartialResult terminatePartial() {
    return this.partial;
    }

    public boolean merge(PartialResult other) {
    if (other == null) {
    return true;
    }
    if (this.partial == null) {
    this.partial = new PartialResult();
    this.partial.result = new String(other.result);
    this.partial.delimiter = new String(other.delimiter);
    }
    else
    {
    if (this.partial.result.length() > 0)
    {
    this.partial.result = this.partial.result.concat(this.partial.delimiter);
    }
    this.partial.result = this.partial.result.concat(other.result);
    }
    return true;
    }

    public String terminate() {
    String s = new String(this.partial.result);

    if (s.indexOf(this.partial.delimiter) != -1) {
    String[] str = s.split(this.partial.delimiter);

    StringBuffer sb = new StringBuffer();

    int i = 0; int j = 1;
    while (i < str.length - 1) {
    while (j < str.length) {
    if (str[j].equals(str[i])) {
    if (j == str.length - 1) {
    sb.append(str[i]);
    break;
    }
    j++;
    } else {
    sb.append(str[i]);
    sb.append(this.partial.delimiter);
    break;
    }
    }
    i = j;
    j = i + 1;
    }
    if ((i == str.length - 1) && (!str[i].equals(str[(i - 1)]))) {
    sb.append(str[i]);
    }
    return sb.toString();
    }
    return s;
    }

    public static class PartialResult
    {
    String result;
    String delimiter;
    }
    }
    }


    使用:

    add jar concat_udaf.jar;
    create temporary function Concat as 'com.tcc.udaf.Concat';
    select a,concat(b,',') from concat_test group by a;
    ————————————————
    转自:https://me.csdn.net/chuangchuangtao
    原文链接:https://blog.csdn.net/chuangchuangtao/article/details/77455675

  • 相关阅读:
    [补]2019HDU杭电多校第一场A
    [补]2019nowcoder牛客第三场F(暂且)
    [补]2019nowcoder牛客第一场E、I
    [学]从零(多项式基础与FFT)开始BM学习笔记
    [补]2019nowcoder牛客第二场E、H(upd0730)
    从一个简单的例子对win 服务程序进行讲解
    HTTP协议学习记录及总结
    Windows身份验证与forms身份验证的结合
    关于Sql server 的 几道面试题
    PlaceHolder控件的使用
  • 原文地址:https://www.cnblogs.com/db-record/p/11498897.html
Copyright © 2011-2022 走看看