zoukankan      html  css  js  c++  java
  • hive数据文件简单合并

    MR代码:

    package merge;
    import java.io.IOException;
    import java.util.Iterator;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapred.FileInputFormat;
    import org.apache.hadoop.mapred.FileOutputFormat;
    import org.apache.hadoop.mapred.JobClient;
    import org.apache.hadoop.mapred.JobConf;
    import org.apache.hadoop.mapred.MapReduceBase;
    import org.apache.hadoop.mapred.Mapper;
    import org.apache.hadoop.mapred.OutputCollector;
    import org.apache.hadoop.mapred.Reducer;
    import org.apache.hadoop.mapred.Reporter;
    import org.apache.hadoop.mapred.TextInputFormat;
    import org.apache.hadoop.mapred.TextOutputFormat;
    
    public class merge
    {
        public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, Text>
        {
            private Text word=new Text("");
            public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
                    throws IOException
            {
                output.collect(value,word);
            }
        }
    
    
        public static void main(String[] args) throws Exception
        {
            JobConf conf = new JobConf(merge.class);
            conf.setJobName("wordcount");
            conf.setOutputKeyClass(Text.class);
            conf.setOutputValueClass(Text.class);
            conf.setMapperClass(Map.class);
            conf.setInputFormat(TextInputFormat.class);
            conf.setOutputFormat(TextOutputFormat.class);
            FileInputFormat.setInputPaths(conf, new Path(args[0]));
            FileOutputFormat.setOutputPath(conf, new Path(args[1]));
            JobClient.runJob(conf);
        }
    }

    Eclipse自动生成.class文件,打包命令:

    jar打包:在项目的bin目录下
    Dev-Fac:bin ce-pc$ jar -cvf hive-merge.jar -C  ../ .

    合并命令:

    hadoop jar /tmp/hive-merge.jar merge.merge /user/hive/warehouse/table1 /user/hive/warehouse/table1/out
    
    #merge.merge 表示merge包下的merge
  • 相关阅读:
    javascript 实现 TreeView全选(实现子节点全选,中父节点自动全选)
    关于健康档案的基本架构与数据标准
    SQLite内建语法表
    狼群中的男人(A Man Among Wolves)
    教你瞬间赢得别人信任的 “冷读术”
    SymbianOS精要
    为幼龄儿童设计 iPad 软件介面的四条心得
    OpenGL ES
    如何变得更加优秀
    创业的八大能力
  • 原文地址:https://www.cnblogs.com/ggzone/p/5094492.html
Copyright © 2011-2022 走看看