sss
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.udf.generic; import java.util.HashSet; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorObject; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.util.StringUtils; /** * GenericUDAFSum. * */ @Description(name = "sum", value = "_FUNC_(x) - Returns the sum of a set of numbers") public class GenericUDAFSum extends AbstractGenericUDAFResolver { static final Logger LOG = LoggerFactory.getLogger(GenericUDAFSum.class.getName()); @Override public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { if (parameters.length != 1) { throw new UDFArgumentTypeException(parameters.length - 1, "Exactly one argument is expected."); } if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but " + parameters[0].getTypeName() + " is passed."); } switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) { case BYTE: case SHORT: case INT: case LONG: return new GenericUDAFSumLong(); case TIMESTAMP: case FLOAT: case DOUBLE: case STRING: case VARCHAR: case CHAR: return new GenericUDAFSumDouble(); case DECIMAL: return new GenericUDAFSumHiveDecimal(); case BOOLEAN: case DATE: default: throw new UDFArgumentTypeException(0, "Only numeric or string type arguments are accepted but " + parameters[0].getTypeName() + " is passed."); } } @Override public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo info) throws SemanticException { TypeInfo[] parameters = info.getParameters(); GenericUDAFSumEvaluator eval = (GenericUDAFSumEvaluator) getEvaluator(parameters); eval.setWindowing(info.isWindowing()); eval.setSumDistinct(info.isDistinct()); return eval; } public static PrimitiveObjectInspector.PrimitiveCategory getReturnType(TypeInfo type) { if (type.getCategory() != ObjectInspector.Category.PRIMITIVE) { return null; } switch (((PrimitiveTypeInfo) type).getPrimitiveCategory()) { case BYTE: case SHORT: case INT: case LONG: return PrimitiveObjectInspector.PrimitiveCategory.LONG; case TIMESTAMP: case FLOAT: case DOUBLE: case STRING: case VARCHAR: case CHAR: return PrimitiveObjectInspector.PrimitiveCategory.DOUBLE; case DECIMAL: return PrimitiveObjectInspector.PrimitiveCategory.DECIMAL; } return null; } /** * The base type for sum operator evaluator * */ public static abstract class GenericUDAFSumEvaluator<ResultType extends Writable> extends GenericUDAFEvaluator { static abstract class SumAgg<T> extends AbstractAggregationBuffer { boolean empty; T sum; HashSet<ObjectInspectorObject> uniqueObjects; // Unique rows. } protected PrimitiveObjectInspector inputOI; protected PrimitiveObjectInspector outputOI; protected ResultType result; protected boolean isWindowing; protected boolean sumDistinct; public void setWindowing(boolean isWindowing) { this.isWindowing = isWindowing; } public void setSumDistinct(boolean sumDistinct) { this.sumDistinct = sumDistinct; } protected boolean isWindowingDistinct() { return isWindowing && sumDistinct; } @Override public Object terminatePartial(AggregationBuffer agg) throws HiveException { if (isWindowingDistinct()) { throw new HiveException("Distinct windowing UDAF doesn't support merge and terminatePartial"); } else { return terminate(agg); } } /** * Check if the input object is eligible to contribute to the sum. If it's null * or the same value as the previous one for the case of SUM(DISTINCT). Then * skip it. * @param input the input object * @return True if sumDistinct is false or the non-null input is different from the previous object */ protected boolean isEligibleValue(SumAgg agg, Object input) { if (input == null) { return false; } if (isWindowingDistinct()) { HashSet<ObjectInspectorObject> uniqueObjs = agg.uniqueObjects; ObjectInspectorObject obj = input instanceof ObjectInspectorObject ? (ObjectInspectorObject)input : new ObjectInspectorObject( ObjectInspectorUtils.copyToStandardObject(input, inputOI, ObjectInspectorCopyOption.JAVA), outputOI); if (!uniqueObjs.contains(obj)) { uniqueObjs.add(obj); return true; } return false; } return true; } } /** * GenericUDAFSumHiveDecimal. * */ public static class GenericUDAFSumHiveDecimal extends GenericUDAFSumEvaluator<HiveDecimalWritable> { @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { assert (parameters.length == 1); super.init(m, parameters); result = new HiveDecimalWritable(0); inputOI = (PrimitiveObjectInspector) parameters[0]; // The output precision is 10 greater than the input which should cover at least // 10b rows. The scale is the same as the input. DecimalTypeInfo outputTypeInfo = null; if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { int precision = Math.min(HiveDecimal.MAX_PRECISION, inputOI.precision() + 10); outputTypeInfo = TypeInfoFactory.getDecimalTypeInfo(precision, inputOI.scale()); } else { outputTypeInfo = (DecimalTypeInfo) inputOI.getTypeInfo(); } ObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(outputTypeInfo); outputOI = (PrimitiveObjectInspector) ObjectInspectorUtils.getStandardObjectInspector( oi, ObjectInspectorCopyOption.JAVA); return oi; } /** class for storing decimal sum value. */ @AggregationType(estimable = false) // hard to know exactly for decimals static class SumHiveDecimalWritableAgg extends SumAgg<HiveDecimalWritable> { } @Override public AggregationBuffer getNewAggregationBuffer() throws HiveException { SumHiveDecimalWritableAgg agg = new SumHiveDecimalWritableAgg(); reset(agg); return agg; } @Override public void reset(AggregationBuffer agg) throws HiveException { SumAgg<HiveDecimalWritable> bdAgg = (SumAgg<HiveDecimalWritable>) agg; bdAgg.empty = true; bdAgg.sum = new HiveDecimalWritable(0); bdAgg.uniqueObjects = new HashSet<ObjectInspectorObject>(); } boolean warned = false; @Override public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { assert (parameters.length == 1); try { if (isEligibleValue((SumHiveDecimalWritableAgg) agg, parameters[0])) { ((SumHiveDecimalWritableAgg)agg).empty = false; ((SumHiveDecimalWritableAgg)agg).sum.mutateAdd( PrimitiveObjectInspectorUtils.getHiveDecimal(parameters[0], inputOI)); } } catch (NumberFormatException e) { if (!warned) { warned = true; LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e)); LOG .warn(getClass().getSimpleName() + " ignoring similar exceptions."); } } } @Override public void merge(AggregationBuffer agg, Object partial) throws HiveException { if (partial != null) { SumHiveDecimalWritableAgg myagg = (SumHiveDecimalWritableAgg) agg; if (myagg.sum == null || !myagg.sum.isSet()) { return; } myagg.empty = false; if (isWindowingDistinct()) { throw new HiveException("Distinct windowing UDAF doesn't support merge and terminatePartial"); } else { myagg.sum.mutateAdd(PrimitiveObjectInspectorUtils.getHiveDecimal(partial, inputOI)); } } } @Override public Object terminate(AggregationBuffer agg) throws HiveException { SumHiveDecimalWritableAgg myagg = (SumHiveDecimalWritableAgg) agg; if (myagg.empty || myagg.sum == null || !myagg.sum.isSet()) { return null; } DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo)outputOI.getTypeInfo(); myagg.sum.mutateEnforcePrecisionScale(decimalTypeInfo.getPrecision(), decimalTypeInfo.getScale()); if (!myagg.sum.isSet()) { LOG.warn("The sum of a column with data type HiveDecimal is out of range"); return null; } result.set(myagg.sum); return result; } @Override public GenericUDAFEvaluator getWindowingEvaluator(WindowFrameDef wFrameDef) { // Don't use streaming for distinct cases if (sumDistinct) { return null; } return new GenericUDAFStreamingEvaluator.SumAvgEnhancer<HiveDecimalWritable, HiveDecimal>( this, wFrameDef) { @Override protected HiveDecimalWritable getNextResult( org.apache.hadoop.hive.ql.udf.generic.GenericUDAFStreamingEvaluator.SumAvgEnhancer<HiveDecimalWritable, HiveDecimal>.SumAvgStreamingState ss) throws HiveException { SumHiveDecimalWritableAgg myagg = (SumHiveDecimalWritableAgg) ss.wrappedBuf; HiveDecimal r = myagg.empty ? null : myagg.sum.getHiveDecimal(); HiveDecimal d = ss.retrieveNextIntermediateValue(); if (d != null ) { r = r == null ? null : r.subtract(d); } return r == null ? null : new HiveDecimalWritable(r); } @Override protected HiveDecimal getCurrentIntermediateResult( org.apache.hadoop.hive.ql.udf.generic.GenericUDAFStreamingEvaluator.SumAvgEnhancer<HiveDecimalWritable, HiveDecimal>.SumAvgStreamingState ss) throws HiveException { SumHiveDecimalWritableAgg myagg = (SumHiveDecimalWritableAgg) ss.wrappedBuf; return myagg.empty ? null : myagg.sum.getHiveDecimal(); } }; } } /** * GenericUDAFSumDouble. * */ public static class GenericUDAFSumDouble extends GenericUDAFSumEvaluator<DoubleWritable> { @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { assert (parameters.length == 1); super.init(m, parameters); result = new DoubleWritable(0); inputOI = (PrimitiveObjectInspector) parameters[0]; outputOI = (PrimitiveObjectInspector)ObjectInspectorUtils.getStandardObjectInspector(inputOI, ObjectInspectorCopyOption.JAVA); return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; } /** class for storing double sum value. */ @AggregationType(estimable = true) static class SumDoubleAgg extends SumAgg<Double> { @Override public int estimate() { return JavaDataModel.PRIMITIVES1 + JavaDataModel.PRIMITIVES2; } } @Override public AggregationBuffer getNewAggregationBuffer() throws HiveException { SumDoubleAgg result = new SumDoubleAgg(); reset(result); return result; } @Override public void reset(AggregationBuffer agg) throws HiveException { SumDoubleAgg myagg = (SumDoubleAgg) agg; myagg.empty = true; myagg.sum = 0.0; myagg.uniqueObjects = new HashSet<ObjectInspectorObject>(); } boolean warned = false; @Override public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { assert (parameters.length == 1); try { if (isEligibleValue((SumDoubleAgg) agg, parameters[0])) { ((SumDoubleAgg)agg).empty = false; ((SumDoubleAgg)agg).sum += PrimitiveObjectInspectorUtils.getDouble(parameters[0], inputOI); } } catch (NumberFormatException e) { if (!warned) { warned = true; LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e)); LOG .warn(getClass().getSimpleName() + " ignoring similar exceptions."); } } } @Override public void merge(AggregationBuffer agg, Object partial) throws HiveException { if (partial != null) { SumDoubleAgg myagg = (SumDoubleAgg) agg; myagg.empty = false; if (isWindowingDistinct()) { throw new HiveException("Distinct windowing UDAF doesn't support merge and terminatePartial"); } else { myagg.sum += PrimitiveObjectInspectorUtils.getDouble(partial, inputOI); } } } @Override public Object terminate(AggregationBuffer agg) throws HiveException { SumDoubleAgg myagg = (SumDoubleAgg) agg; if (myagg.empty) { return null; } result.set(myagg.sum); return result; } @Override public GenericUDAFEvaluator getWindowingEvaluator(WindowFrameDef wFrameDef) { // Don't use streaming for distinct cases if (sumDistinct) { return null; } return new GenericUDAFStreamingEvaluator.SumAvgEnhancer<DoubleWritable, Double>(this, wFrameDef) { @Override protected DoubleWritable getNextResult( org.apache.hadoop.hive.ql.udf.generic.GenericUDAFStreamingEvaluator.SumAvgEnhancer<DoubleWritable, Double>.SumAvgStreamingState ss) throws HiveException { SumDoubleAgg myagg = (SumDoubleAgg) ss.wrappedBuf; Double r = myagg.empty ? null : myagg.sum; Double d = ss.retrieveNextIntermediateValue(); if (d != null) { r = r == null ? null : r - d; } return r == null ? null : new DoubleWritable(r); } @Override protected Double getCurrentIntermediateResult( org.apache.hadoop.hive.ql.udf.generic.GenericUDAFStreamingEvaluator.SumAvgEnhancer<DoubleWritable, Double>.SumAvgStreamingState ss) throws HiveException { SumDoubleAgg myagg = (SumDoubleAgg) ss.wrappedBuf; return myagg.empty ? null : new Double(myagg.sum); } }; } } /** * GenericUDAFSumLong. * */ public static class GenericUDAFSumLong extends GenericUDAFSumEvaluator<LongWritable> { @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { assert (parameters.length == 1); super.init(m, parameters); result = new LongWritable(0); inputOI = (PrimitiveObjectInspector) parameters[0]; outputOI = (PrimitiveObjectInspector)ObjectInspectorUtils.getStandardObjectInspector(inputOI, ObjectInspectorCopyOption.JAVA); return PrimitiveObjectInspectorFactory.writableLongObjectInspector; } /** class for storing double sum value. */ @AggregationType(estimable = true) static class SumLongAgg extends SumAgg<Long> { @Override public int estimate() { return JavaDataModel.PRIMITIVES1 + JavaDataModel.PRIMITIVES2; } } @Override public AggregationBuffer getNewAggregationBuffer() throws HiveException { SumLongAgg result = new SumLongAgg(); reset(result); return result; } @Override public void reset(AggregationBuffer agg) throws HiveException { SumLongAgg myagg = (SumLongAgg) agg; myagg.empty = true; myagg.sum = 0L; myagg.uniqueObjects = new HashSet<ObjectInspectorObject>(); } private boolean warned = false; @Override public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { assert (parameters.length == 1); try { if (isEligibleValue((SumLongAgg) agg, parameters[0])) { ((SumLongAgg)agg).empty = false; ((SumLongAgg)agg).sum += PrimitiveObjectInspectorUtils.getLong(parameters[0], inputOI); } } catch (NumberFormatException e) { if (!warned) { warned = true; LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e)); } } } @Override public void merge(AggregationBuffer agg, Object partial) throws HiveException { if (partial != null) { SumLongAgg myagg = (SumLongAgg) agg; myagg.empty = false; if (isWindowingDistinct()) { throw new HiveException("Distinct windowing UDAF doesn't support merge and terminatePartial"); } else { myagg.sum += PrimitiveObjectInspectorUtils.getLong(partial, inputOI); } } } @Override public Object terminate(AggregationBuffer agg) throws HiveException { SumLongAgg myagg = (SumLongAgg) agg; if (myagg.empty) { return null; } result.set(myagg.sum); return result; } @Override public GenericUDAFEvaluator getWindowingEvaluator(WindowFrameDef wFrameDef) { // Don't use streaming for distinct cases if (isWindowingDistinct()) { return null; } return new GenericUDAFStreamingEvaluator.SumAvgEnhancer<LongWritable, Long>(this, wFrameDef) { @Override protected LongWritable getNextResult( org.apache.hadoop.hive.ql.udf.generic.GenericUDAFStreamingEvaluator.SumAvgEnhancer<LongWritable, Long>.SumAvgStreamingState ss) throws HiveException { SumLongAgg myagg = (SumLongAgg) ss.wrappedBuf; Long r = myagg.empty ? null : myagg.sum; Long d = ss.retrieveNextIntermediateValue(); if (d != null) { r = r == null ? null : r - d; } return r == null ? null : new LongWritable(r); } @Override protected Long getCurrentIntermediateResult( org.apache.hadoop.hive.ql.udf.generic.GenericUDAFStreamingEvaluator.SumAvgEnhancer<LongWritable, Long>.SumAvgStreamingState ss) throws HiveException { SumLongAgg myagg = (SumLongAgg) ss.wrappedBuf; return myagg.empty ? null : new Long(myagg.sum); } }; } } }
ddd
GenericUDAF
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.udf.generic; import java.io.Closeable; import java.io.IOException; import java.lang.annotation.Retention; import java.lang.annotation.RetentionPolicy; import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hive.common.util.AnnotationUtils; /** * A Generic User-defined aggregation function (GenericUDAF) for the use with * Hive. * * New GenericUDAF classes need to inherit from this GenericUDAF class. * * The GenericUDAF are superior to normal UDAFs in the following ways: 1. It can * accept arguments of complex types, and return complex types. 2. It can accept * variable length of arguments. 3. It can accept an infinite number of function * signature - for example, it's easy to write a GenericUDAF that accepts * array<int>, array<array<int>> and so on (arbitrary levels of nesting). */ @UDFType(deterministic = true) public abstract class GenericUDAFEvaluator implements Closeable { @Retention(RetentionPolicy.RUNTIME) public static @interface AggregationType { boolean estimable() default false; } public static boolean isEstimable(AggregationBuffer buffer) { if (buffer instanceof AbstractAggregationBuffer) { Class<? extends AggregationBuffer> clazz = buffer.getClass(); AggregationType annotation = AnnotationUtils.getAnnotation(clazz, AggregationType.class); return annotation != null && annotation.estimable(); } return false; } /** * Mode. * */ public static enum Mode { /** * PARTIAL1: from original data to partial aggregation data: iterate() and * terminatePartial() will be called. */ PARTIAL1, 相当于map阶段,调用iterate()和terminatePartial() /** * PARTIAL2: from partial aggregation data to partial aggregation data: * merge() and terminatePartial() will be called. */ PARTIAL2, 相当于combiner阶段,调用merge()和terminatePartial() /** * FINAL: from partial aggregation to full aggregation: merge() and * terminate() will be called. */ FINAL, 相当于reduce阶段调用merge()和terminate() /** * COMPLETE: from original data directly to full aggregation: iterate() and * terminate() will be called. */ COMPLETE COMPLETE: 相当于没有reduce阶段map,调用iterate()和terminate() }; Mode mode; /** * The constructor. */ public GenericUDAFEvaluator() { } /** * Additionally setup GenericUDAFEvaluator with MapredContext before initializing. * This is only called in runtime of MapRedTask. * * @param mapredContext context */ public void configure(MapredContext mapredContext) { } /** * Initialize the evaluator. * * @param m mode Init方式 mode在初始四个方法需要的调用或者初始化 * The mode of aggregation. * @param parameters * The ObjectInspector for the parameters: In PARTIAL1 and COMPLETE 在partial1 complelte 存储是初始化数据,原理很简单。parital1是map complete 是没有map 的reduce * mode, the parameters are original data; In PARTIAL2 and FINAL * mode, the parameters are just partial aggregations (in that case,剩下两个是聚合后的数据。 * the array will always have a single element). * @return The ObjectInspector for the return value. In PARTIAL1 and PARTIAL2 * mode, the ObjectInspector for the return value of * terminatePartial() call; In FINAL and COMPLETE mode, the * ObjectInspector for the return value of terminate() call. * * NOTE: We need ObjectInspector[] (in addition to the TypeInfo[] in * GenericUDAFResolver) for 2 reasons: 1. ObjectInspector contains * more information than TypeInfo; and GenericUDAFEvaluator.init at * execution time. 2. We call GenericUDAFResolver.getEvaluator at * compilation time, */ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { // This function should be overriden in every sub class // And the sub class should call super.init(m, parameters) to get mode set. mode = m; return null; } /** * The interface for a class that is used to store the aggregation result * during the process of aggregation. * * We split this piece of data out because there can be millions of instances * of this Aggregation in hash-based aggregation process, and it's very * important to conserve memory. * * In the future, we may completely hide this class inside the Evaluator and * use integer numbers to identify which aggregation we are looking at. * * @deprecated use {@link AbstractAggregationBuffer} instead */ public static interface AggregationBuffer { }; public static abstract class AbstractAggregationBuffer implements AggregationBuffer { /** * Estimate the size of memory which is occupied by aggregation buffer. * Currently, hive assumes that primitives types occupies 16 byte and java object has * 64 byte overhead for each. For map, each entry also has 64 byte overhead. */ public int estimate() { return -1; } } /** * Get a new aggregation object. */ public abstract AggregationBuffer getNewAggregationBuffer() throws HiveException; /** * Reset the aggregation. This is useful if we want to reuse the same * aggregation. */ public abstract void reset(AggregationBuffer agg) throws HiveException; /** * Close GenericUDFEvaluator. * This is only called in runtime of MapRedTask. */ public void close() throws IOException { } /** * This function will be called by GroupByOperator when it sees a new input * row. * * @param agg * The object to store the aggregation result. * @param parameters * The row, can be inspected by the OIs passed in init(). */ public void aggregate(AggregationBuffer agg, Object[] parameters) throws HiveException { if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { iterate(agg, parameters); } else { assert (parameters.length == 1); merge(agg, parameters[0]); } } /** * This function will be called by GroupByOperator when it sees a new input * row. * * @param agg * The object to store the aggregation result. */ public Object evaluate(AggregationBuffer agg) throws HiveException { if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) { return terminatePartial(agg); } else { return terminate(agg); } } /** * Iterate through original data. * * @param parameters * The objects of parameters. */ public abstract void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException; /** * Get partial aggregation result. * * @return partial aggregation result. */ public abstract Object terminatePartial(AggregationBuffer agg) throws HiveException; /** * Merge with partial aggregation result. NOTE: null might be passed in case * there is no input data. * * @param partial * The partial aggregation result. */ public abstract void merge(AggregationBuffer agg, Object partial) throws HiveException; /** * Get final aggregation result. * * @return final aggregation result. */ public abstract Object terminate(AggregationBuffer agg) throws HiveException; /** * When evaluating an aggregates over a fixed Window, the naive way to compute * results is to compute the aggregate for each row. But often there is a way * to compute results in a more efficient manner. This method enables the * basic evaluator to provide a function object that does the job in a more * efficient manner. * <p> * This method is called after this Evaluator is initialized. The returned * Function must be initialized. It is passed the 'window' of aggregation for * each row. * * @param wFrmDef * the Window definition in play for this evaluation. * @return null implies that this fn cannot be processed in Streaming mode. So * each row is evaluated independently. */ public GenericUDAFEvaluator getWindowingEvaluator(WindowFrameDef wFrmDef) { return null; } }
http://paddy-w.iteye.com/blog/2081409