zoukankan      html  css  js  c++  java
  • hive GenericUDF1

     和UDF相比,通用GDF(GenericUDF)支持复杂类型(比如List,struct等)的输入和输出。

               下面来看一个小示例。

                Hive中whereme表中包含若干人的行程如下:  

    [plain] view plain copy
     
    1. A       2013-10-10 8:00:00      home  
    2. A       2013-10-10 10:00:00     Super Market  
    3. A       2013-10-10 12:00:00     KFC  
    4. A       2013-10-10 15:00:00     school  
    5. A       2013-10-10 20:00:00     home  
    6. A       2013-10-15 8:00:00      home  
    7. A       2013-10-15 10:00:00     park  
    8. A       2013-10-15 12:00:00     home  
    9. A       2013-10-15 15:30:00     bank  
    10. A       2013-10-15 19:00:00     home  

               通过查询我们要得到如下结果:  

    [plain] view plain copy
     
    1. A   2013-10-10  08:00:00    home    10:00:00    Super Market  
    2. A   2013-10-10  10:00:00    Super Market    12:00:00    KFC  
    3. A   2013-10-10  12:00:00    KFC 15:00:00    school  
    4. A   2013-10-10  15:00:00    school  20:00:00    home  
    5. A   2013-10-15  08:00:00    home    10:00:00    park  
    6. A   2013-10-15  10:00:00    park    12:00:00    home  
    7. A   2013-10-15  12:00:00    home    15:30:00    bank  
    8. A   2013-10-15  15:30:00    bank    19:00:00    home  

               1.编写GenericUDF. 

    [java] view plain copy
     
    1. package com.wz.udf;  
    2. import org.apache.hadoop.io.Text;  
    3. import org.apache.hadoop.io.LongWritable;  
    4. import org.apache.hadoop.io.IntWritable;  
    5. import org.apache.hadoop.io.FloatWritable;  
    6. import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;  
    7. import org.apache.hadoop.hive.ql.exec.UDFArgumentException;  
    8. import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;  
    9. import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;  
    10. import org.apache.hadoop.hive.ql.metadata.HiveException;  
    11. import org.apache.hadoop.hive.serde2.lazy.LazyString;  
    12. import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;  
    13. import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;  
    14. import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;  
    15. import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;  
    16. import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector;  
    17. import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;  
    18. import org.apache.hadoop.hive.serde2.objectinspector.StructField;  
    19. import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;  
    20. import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;  
    21. import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;  
    22. import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;  
    23. import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;  
    24. import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;  
    25. import java.text.DateFormat;  
    26. import java.text.SimpleDateFormat;  
    27. import java.util.Date;   
    28. import java.util.Calendar;  
    29. import java.util.ArrayList;  
    30.    
    31. public class helloGenericUDF extends GenericUDF {  
    32.      ////输入变量定义  
    33.      private ObjectInspector peopleObj;  
    34.      private ObjectInspector timeObj;  
    35.      private ObjectInspector placeObj;  
    36.      //之前记录保存  
    37.      String strPreTime = "";  
    38.      String strPrePlace = "";   
    39.      String strPrePeople = "";  
    40.    
    41.      @Override  
    42.      //1.确认输入类型是否正确  
    43.      //2.输出类型的定义  
    44.      public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {  
    45.           peopleObj = (ObjectInspector)arguments[0];  
    46.           timeObj = (ObjectInspector)arguments[1];  
    47.           placeObj = (ObjectInspector)arguments[2];  
    48.           //输出结构体定义  
    49.           ArrayList structFieldNames = new ArrayList();  
    50.           ArrayList structFieldObjectInspectors = new ArrayList();  
    51.           structFieldNames.add("people");  
    52.       structFieldNames.add("day");  
    53.           structFieldNames.add("from_time");  
    54.           structFieldNames.add("from_place");  
    55.           structFieldNames.add("to_time");  
    56.           structFieldNames.add("to_place");  
    57.    
    58.           structFieldObjectInspectors.add( PrimitiveObjectInspectorFactory.writableStringObjectInspector );  
    59.           structFieldObjectInspectors.add( PrimitiveObjectInspectorFactory.writableStringObjectInspector );  
    60.           structFieldObjectInspectors.add( PrimitiveObjectInspectorFactory.writableStringObjectInspector );  
    61.           structFieldObjectInspectors.add( PrimitiveObjectInspectorFactory.writableStringObjectInspector );  
    62.       structFieldObjectInspectors.add( PrimitiveObjectInspectorFactory.writableStringObjectInspector );  
    63.       structFieldObjectInspectors.add( PrimitiveObjectInspectorFactory.writableStringObjectInspector );  
    64.   
    65.           StructObjectInspector si2;  
    66.           si2 = ObjectInspectorFactory.getStandardStructObjectInspector(structFieldNames, structFieldObjectInspectors);   
    67.           return si2;  
    68.      }  
    69.    
    70.      //遍历每条记录  
    71.      @Override  
    72.      public Object evaluate(DeferredObject[] arguments) throws HiveException{  
    73.       LazyString LPeople = (LazyString)(arguments[0].get());  
    74.       String strPeople = ((StringObjectInspector)peopleObj).getPrimitiveJavaObject( LPeople );  
    75.   
    76.       LazyString LTime = (LazyString)(arguments[1].get());  
    77.       String strTime = ((StringObjectInspector)timeObj).getPrimitiveJavaObject( LTime );  
    78.   
    79.       LazyString LPlace = (LazyString)(arguments[2].get());  
    80.       String strPlace = ((StringObjectInspector)placeObj).getPrimitiveJavaObject( LPlace );  
    81.       
    82.       Object[] e;     
    83.       e = new Object[6];  
    84.   
    85.           try  
    86.       {  
    87.                 //如果是同一个人,同一天  
    88.         if(strPrePeople.equals(strPeople) && IsSameDay(strTime) )  
    89.         {  
    90.                 e[0] = new Text(strPeople);  
    91.                         e[1] = new Text(GetYearMonthDay(strTime));  
    92.                 e[2] = new Text(GetTime(strPreTime));  
    93.                 e[3] = new Text(strPrePlace);  
    94.                 e[4] = new Text(GetTime(strTime));  
    95.                 e[5] = new Text(strPlace);  
    96.         }  
    97.                 else  
    98.                 {  
    99.                 e[0] = new Text(strPeople);  
    100.             e[1] = new Text(GetYearMonthDay(strTime));  
    101.                 e[2] = new Text("null");  
    102.                 e[3] = new Text("null");  
    103.                 e[4] = new Text(GetTime(strTime));  
    104.                 e[5] = new Text(strPlace);  
    105.                 }  
    106.           }  
    107.           catch(java.text.ParseException ex)  
    108.           {  
    109.           }  
    110.              
    111.       strPrePeople = new String(strPeople);  
    112.       strPreTime= new String(strTime);  
    113.       strPrePlace = new String(strPlace);  
    114.   
    115.           return e;  
    116.      }  
    117.    
    118.      @Override  
    119.      public String getDisplayString(String[] children) {  
    120.           assert( children.length>0 );  
    121.    
    122.           StringBuilder sb = new StringBuilder();  
    123.           sb.append("helloGenericUDF(");  
    124.           sb.append(children[0]);  
    125.           sb.append(")");  
    126.    
    127.           return sb.toString();  
    128.      }  
    129.   
    130.      //比较相邻两个时间段是否在同一天  
    131.      private boolean IsSameDay(String strTime) throws java.text.ParseException{     
    132.      if(strPreTime.isEmpty()){  
    133.          return false;  
    134.          }  
    135.          String curDay = GetYearMonthDay(strTime);  
    136.          String preDay = GetYearMonthDay(strPreTime);  
    137.      return curDay.equals(preDay);  
    138.      }  
    139.   
    140.      //获取年月日  
    141.      private String GetYearMonthDay(String strTime)  throws java.text.ParseException{  
    142.          DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");  
    143.          Date curDate = df.parse(strTime);  
    144.      df = new SimpleDateFormat("yyyy-MM-dd");  
    145.          return df.format(curDate);  
    146.      }  
    147.   
    148.      //获取时间  
    149.      private String GetTime(String strTime)  throws java.text.ParseException{  
    150.          DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");  
    151.          Date curDate = df.parse(strTime);  
    152.          df = new SimpleDateFormat("HH:mm:ss");  
    153.          return df.format(curDate);  
    154.      }  
    155. }  

               2.在Hive里面创建两张表,一张包含结构体的表保存执行GenericUDF查询后的结果,另外一张用于保存最终结果.

    [plain] view plain copy
     
    1. hive> create table whereresult(people string,day string,from_time string,from_place string,to_time string,to_place string);  
    2. OK  
    3. Time taken: 0.287 seconds  
    4. hive> create table tmpResult(info struct<people:string,day:string,from_time:str>ing,from_place:string,to_time:string,to_place:string>);  
    5. OK  
    6. Time taken: 0.074 seconds  

               3.执行GenericUDF查询,得到最终结果。  

    [plain] view plain copy
     
      1. hive> insert overwrite table tmpResult select hellogenericudf(whereme.people,whereme.time,whereme.place) from whereme;  
      2. hive> insert overwrite table whereresult select info.people,info.day,info.from_time,info.from_place,info.to_time,info.to_place from tmpResult where info.from_time<>'null';  
      3. Total MapReduce jobs = 2  
      4. Launching Job 1 out of 2  
      5. Number of reduce tasks is set to 0 since there's no reduce operator  
      6. Starting Job = job_201312022129_0006, Tracking URL = http://localhost:50030/jobdetails.jsp?jobid=job_201312022129_0006  
      7. Kill Command = /home/wangzhun/hadoop/hadoop-0.20.2/bin/../bin/hadoop job  -Dmapred.job.tracker=localhost:9001 -kill job_201312022129_0006  
      8. Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 0  
      9. 2013-12-02 22:48:40,733 Stage-1 map = 0%,  reduce = 0%  
      10. 2013-12-02 22:48:49,825 Stage-1 map = 100%,  reduce = 0%  
      11. 2013-12-02 22:48:52,869 Stage-1 map = 100%,  reduce = 100%  
      12. Ended Job = job_201312022129_0006  
      13. Ended Job = -383357832, job is filtered out (removed at runtime).  
      14. Moving data to: hdfs://localhost:9000/tmp/hive-root/hive_2013-12-02_22-48-24_406_2701579121398466034/-ext-10000  
      15. Loading data to table default.whereresult  
      16. Deleted hdfs://localhost:9000/user/hive/warehouse/whereresult  
      17. Table default.whereresult stats: [num_partitions: 0, num_files: 1, num_rows: 0, total_size: 346, raw_data_size: 0]  
      18. 8 Rows loaded to whereresult  
      19. MapReduce Jobs Launched:   
      20. Job 0: Map: 1   HDFS Read: 420 HDFS Write: 346 SUCESS  
      21. Total MapReduce CPU Time Spent: 0 msec  
      22. OK  
      23. Time taken: 29.098 seconds  
      24. hive> select * from whereresult;  
      25. OK  
      26. A   2013-10-10  08:00:00    home    10:00:00    Super Market  
      27. A   2013-10-10  10:00:00    Super Market    12:00:00    KFC  
      28. A   2013-10-10  12:00:00    KFC 15:00:00    school  
      29. A   2013-10-10  15:00:00    school  20:00:00    home  
      30. A   2013-10-15  08:00:00    home    10:00:00    park  
      31. A   2013-10-15  10:00:00    park    12:00:00    home  
      32. A   2013-10-15  12:00:00    home    15:30:00    bank  
      33. A   2013-10-15  15:30:00    bank    19:00:00    home  
      34. Time taken: 0.105 seconds  
  • 相关阅读:
    I Show
    Centos上安装Google Authenticator
    linux 上mailx通过465端口发送邮件
    Win10 RDP CredSSP报错处理
    linux 双网卡代理上网
    English trip EM3-LP2B Teacher:Taylor
    【BZOJ1984】月下“毛景树”-树链剖分
    c++ 数据对拍器
    【BZOJ2157】旅游-树链剖分
    游戏-博弈论-树形DP
  • 原文地址:https://www.cnblogs.com/cxhfuujust/p/7553002.html
Copyright © 2011-2022 走看看