package hive.udaf;

import org.apache.hadoop.hive.ql.exec.UDAF;

import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;

/**

*

* It should be very easy to follow and can be used as an example for writing

* new UDAFs.

*

* Note that Hive internally uses a different mechanism (called GenericUDAF) to

* implement built-in aggregation functions, which are harder to program but

* more efficient.

*

*/

public final class ActionCount extends UDAF {

/**

* The internal state of an aggregation for average.

*

* Note that this is only needed if the internal state cannot be represented

* by a primitive.

*

* The internal state can also contains fields with types like

* ArrayList and HashMap if needed.

*/

public static class UDAFState {

private long mCount;

private long mSum;

}

/**

* The actual class for doing the aggregation. Hive will automatically look

* for all internal classes of the UDAF that implements UDAFEvaluator.

*/

public static class UDAFExampleAvgEvaluator implements UDAFEvaluator {

UDAFState state;

public UDAFExampleAvgEvaluator() {

super();

state = new UDAFState();

init();

}

/**

* Reset the state of the aggregation.

*/

public void init() {

state.mSum = 0;

state.mCount = 0;

}

/**

* Iterate through one row of original data.

*

* The number and type of arguments need to the same as we call this UDAF

* from Hive command line.

*

* This function should always return true.

*/

public boolean iterate(String act_code,long act_times,String act_type) // 来了一行

{

if (act_code .equals(act_type))

{

state.mSum += act_times;

state.mCount++;

}

return true;

}

/**

* Terminate a partial aggregation and return the state. If the state is a

* primitive, just return primitive Java classes like Integer or String.

*/

public UDAFState terminatePartial() {//状态传递

// This is SQL standard – average of zero items should be null.

return state.mCount == 0 ? null : state;

}

/**

* Merge with a partial aggregation.

*

* This function should always have a single argument which has the same

* type as the return value of terminatePartial().

*/

public boolean merge(UDAFState o) {//子任务合并

if (o != null) {

state.mSum += o.mSum;

state.mCount += o.mCount;

}

return true;

}

/**

* Terminates the aggregation and return the final result.

*/

public long terminate() {//返回最终结果

// This is SQL standard – average of zero items should be null.

return state.mCount == 0 ? 0 : state.mSum;

}

}

private ActionCount() {

// prevent instantiation

}

}

关键还是要深刻理解map-reduce工作模型,才能更好驾驭hive。


版权声明:本文为weixin_39689377原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
原文链接:https://blog.csdn.net/weixin_39689377/article/details/111892810