zoukankan      html  css  js  c++  java
  • hive数据文件简单合并

    MR代码:

    package merge;
    import java.io.IOException;
    import java.util.Iterator;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapred.FileInputFormat;
    import org.apache.hadoop.mapred.FileOutputFormat;
    import org.apache.hadoop.mapred.JobClient;
    import org.apache.hadoop.mapred.JobConf;
    import org.apache.hadoop.mapred.MapReduceBase;
    import org.apache.hadoop.mapred.Mapper;
    import org.apache.hadoop.mapred.OutputCollector;
    import org.apache.hadoop.mapred.Reducer;
    import org.apache.hadoop.mapred.Reporter;
    import org.apache.hadoop.mapred.TextInputFormat;
    import org.apache.hadoop.mapred.TextOutputFormat;
    
    public class merge
    {
        public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, Text>
        {
            private Text word=new Text("");
            public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
                    throws IOException
            {
                output.collect(value,word);
            }
        }
    
    
        public static void main(String[] args) throws Exception
        {
            JobConf conf = new JobConf(merge.class);
            conf.setJobName("wordcount");
            conf.setOutputKeyClass(Text.class);
            conf.setOutputValueClass(Text.class);
            conf.setMapperClass(Map.class);
            conf.setInputFormat(TextInputFormat.class);
            conf.setOutputFormat(TextOutputFormat.class);
            FileInputFormat.setInputPaths(conf, new Path(args[0]));
            FileOutputFormat.setOutputPath(conf, new Path(args[1]));
            JobClient.runJob(conf);
        }
    }

    Eclipse自动生成.class文件,打包命令:

    jar打包:在项目的bin目录下
    Dev-Fac:bin ce-pc$ jar -cvf hive-merge.jar -C  ../ .

    合并命令:

    hadoop jar /tmp/hive-merge.jar merge.merge /user/hive/warehouse/table1 /user/hive/warehouse/table1/out
    
    #merge.merge 表示merge包下的merge
  • 相关阅读:
    PostgreSQL configure: error: readline library not found
    JS实现的模态框弹窗并自动消失
    MySQL安装详细教程
    C++与C#中枚举的区别
    句柄
    RESTFul API
    四、使用ADB命令清除缓存
    C#枚举案例
    Pycharm安装教程
    七、Monkey实战
  • 原文地址:https://www.cnblogs.com/ggzone/p/5094492.html
Copyright © 2011-2022 走看看