zoukankan      html  css  js  c++  java
  • hive添加UDF

    hive添加UDF

    步骤如下:
    • 函数分为永久和临时函数,后者会话退出则消失,前者不会

    • 查看已有函数(创建好后也可以通过这个来查看是否成功

    show functions;
    
    • 写UDF的java文件,如:
    /**
     * Licensed to the Apache Software Foundation (ASF) under one
     * or more contributor license agreements.  See the NOTICE file
     * distributed with this work for additional information
     * regarding copyright ownership.  The ASF licenses this file
     * to you under the Apache License, Version 2.0 (the
     * "License"); you may not use this file except in compliance
     * with the License.  You may obtain a copy of the License at
     *
     *     http://www.apache.org/licenses/LICENSE-2.0
     *
     * Unless required by applicable law or agreed to in writing, software
     * distributed under the License is distributed on an "AS IS" BASIS,
     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     * See the License for the specific language governing permissions and
     * limitations under the License.
     */
    
    package org.apache.hadoop.hive.ql.udf;
    
    import org.apache.hadoop.hive.ql.exec.UDF;
    import org.apache.hadoop.hive.serde2.ByteStream;
    import org.apache.hadoop.hive.serde2.io.ByteWritable;
    import org.apache.hadoop.hive.serde2.io.DoubleWritable;
    import org.apache.hadoop.hive.serde2.io.ShortWritable;
    import org.apache.hadoop.hive.serde2.io.TimestampWritable;
    import org.apache.hadoop.hive.serde2.lazy.LazyInteger;
    import org.apache.hadoop.hive.serde2.lazy.LazyLong;
    import org.apache.hadoop.io.BooleanWritable;
    import org.apache.hadoop.io.BytesWritable;
    import org.apache.hadoop.io.FloatWritable;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.io.Text;
    
    /**
     * UDFToString.
     *
     */
    public class UDFToString extends UDF {
      private final Text t = new Text();
      private final ByteStream.Output out = new ByteStream.Output();
    
      public UDFToString() {
      }
    
      public Text evaluate(NullWritable i) {
        return null;
      }
    
      private final byte[] trueBytes = {'T', 'R', 'U', 'E'};
      private final byte[] falseBytes = {'F', 'A', 'L', 'S', 'E'};
    
      public Text evaluate(BooleanWritable i) {
        if (i == null) {
          return null;
        } else {
          t.clear();
          t.set(i.get() ? trueBytes : falseBytes);
          return t;
        }
      }
    
      public Text evaluate(ByteWritable i) {
        if (i == null) {
          return null;
        } else {
          out.reset();
          LazyInteger.writeUTF8NoException(out, i.get());
          t.set(out.getData(), 0, out.getCount());
          return t;
        }
      }
    
      public Text evaluate(ShortWritable i) {
        if (i == null) {
          return null;
        } else {
          out.reset();
          LazyInteger.writeUTF8NoException(out, i.get());
          t.set(out.getData(), 0, out.getCount());
          return t;
        }
      }
    
      public Text evaluate(IntWritable i) {
        if (i == null) {
          return null;
        } else {
          out.reset();
          LazyInteger.writeUTF8NoException(out, i.get());
          t.set(out.getData(), 0, out.getCount());
          return t;
        }
      }
    
      public Text evaluate(LongWritable i) {
        if (i == null) {
          return null;
        } else {
          out.reset();
          LazyLong.writeUTF8NoException(out, i.get());
          t.set(out.getData(), 0, out.getCount());
          return t;
        }
      }
    
      public Text evaluate(FloatWritable i) {
        if (i == null) {
          return null;
        } else {
          t.set(i.toString());
          return t;
        }
      }
    
      public Text evaluate(DoubleWritable i) {
        if (i == null) {
          return null;
        } else {
          t.set(i.toString());
          return t;
        }
      }
    
      public Text evaluate(Text i) {
          if (i == null) {
              return null;
          }
          i.set(i.toString());
          return i;
      }
    
      public Text evaluate(TimestampWritable i) {
        if (i == null) {
          return null;
        } else {
          t.set(i.toString());
          return t;
        }
      }
    
      public Text evaluate (BytesWritable bw) {
        if (null == bw) {
          return null;
    }
        t.set(bw.getBytes(),0,bw.getLength());
        return t;
      }
    }
    
    • 将写好的java文件打包成jar:
    jar cvf UDFUpper.jar -c bin UDFUpper.java
    
    • 进入hive,添加jar文件
    hive> add jar UDFToString.jar;
    Added [UDFToString.jar] to class path
    Added resources: [UDFToString.jar]
    
    • 添加临时函数(会话结束函数消失

      • 进入hive,添加jar文件

        hive> add jar UDFToString.jar;
        Added [UDFToString.jar] to class path
        Added resources: [UDFToString.jar]
        
      • 添加函数(注意class所在包)

        语法为:
        CREATE TEMPORARY FUNCTION function_name AS class_name;
        
        hive> create temporary function mytest as 'org.apache.hadoop.hive.ql.udf.UDFToString';
        OK
        Time taken: 0.009 seconds
        
        路径出错会提示:
        FAILED: Class default.udf.Upper not found
        FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.FunctionTask
        
        具体类路径为UDF java文件里的package+'.'+java文件名
        
      • 删除临时函数

        DROP TEMPORARY FUNCTION [IF EXISTS] function_name;
        
    • 添加永久函数

      • 添加

        语法:
        CREATE FUNCTION [db_name.]function_name AS class_name
          [USING JAR|FILE|ARCHIVE 'file_uri' [, JAR|FILE|ARCHIVE 'file_uri'] ];
          
        注意:
        如果hive非本地模式运行,则后面应该是为非本地文件等URI,如hdfs路径,否则会报错;
        
        例子:
        hive> create function default.hah as "org.apache.hadoop.hive.ql.udf.UDFToString" using jar "UDFToString.jar";
        FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.FunctionTask. Hive warehouse is non-local, but UDFToString.jar specifies file on local filesystem. Resources on non-local warehouse should specify a non-local scheme/path
        
        hive> create function default.hah as "org.apache.hadoop.hive.ql.udf.UDFToString" using jar "hdfs:///hdfs_home/UDFToString.jar";
        converting to local hdfs:///hdfs_home/UDFToString.jar
        Added [/tmp/fda83e6d-e1af-4005-affa-9f9c4ee226a6_resources/UDFToString.jar] to class path
        Added resources: [hdfs:///hdfs_home/UDFToString.jar]
        OK
        Time taken: 0.521 seconds
        
      • 删除

        DROP FUNCTION [IF EXISTS] function_name;
        
    引用第三方包的情况

    假如在你的UDF文件里引用了第三方包,那么只需要在生成jar文件的时候改变一下命令就可以了,如下:

    javac -classpath hive-0.4.1.jar:commons-io-2.5.jar:bcprov-jdk15on-158.jar  com/example/hive/udf/UDFDecrypt.java
    
    jar -cvf UDFDecrypt.jar ./com/example/hive/udf/UDFDecrypt.class
    
    上面javac命令中classpath跟的是用到的第三方包名,使用:做间隔,后面跟的是java文件路径
    

    此处参考了这里

    参考
  • 相关阅读:
    逆向初级-win32(四)
    逆向初级-C++(三)
    逆向初级-C语言(二)
    逆向初级-汇编(一)
    Kimabll数仓架构下如何确定模型落地哪些表
    浅谈数据仓库设计
    (转)Go语言的%d,%p,%v等占位符的使用
    (转)深入MySQL源码 学习方法 何登成专家
    (转)浅析MySQL二段锁
    (转)MySQL:为什么无法KILL在processlist中的语句
  • 原文地址:https://www.cnblogs.com/wswang/p/7718083.html
Copyright © 2011-2022 走看看