zoukankan      html  css  js  c++  java
  • Hadoop之——HBASE结合MapReduce批量导入数据

    转载请注明出处:http://blog.csdn.net/l1028386804/article/details/46463889
    

    废话不多说。直接上代码,你懂得

    package hbase;
    
    import java.text.SimpleDateFormat;
    import java.util.Date;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.hbase.client.Put;
    import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
    import org.apache.hadoop.hbase.mapreduce.TableReducer;
    import org.apache.hadoop.hbase.util.Bytes;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Counter;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
    /**
     * HBASE结合MapReduce批量导入
     * @author liuyazhuang
     */
    public class BatchImport {
    	static class BatchImportMapper extends Mapper<LongWritable, Text, LongWritable, Text>{
    		SimpleDateFormat dateformat1=new SimpleDateFormat("yyyyMMddHHmmss");
    		Text v2 = new Text();
    		
    		protected void map(LongWritable key, Text value, Context context) throws java.io.IOException ,InterruptedException {
    			final String[] splited = value.toString().split("	");
    			try {
    				final Date date = new Date(Long.parseLong(splited[0].trim()));
    				final String dateFormat = dateformat1.format(date);
    				String rowKey = splited[1]+":"+dateFormat;
    				v2.set(rowKey+"	"+value.toString());
    				context.write(key, v2);
    			} catch (NumberFormatException e) {
    				final Counter counter = context.getCounter("BatchImport", "ErrorFormat");
    				counter.increment(1L);
    				System.out.println("出错了"+splited[0]+" "+e.getMessage());
    			}
    		};
    	}
    	
    	static class BatchImportReducer extends TableReducer<LongWritable, Text, NullWritable>{
    		protected void reduce(LongWritable key, java.lang.Iterable<Text> values, 	Context context) throws java.io.IOException ,InterruptedException {
    			for (Text text : values) {
    				final String[] splited = text.toString().split("	");
    				
    				final Put put = new Put(Bytes.toBytes(splited[0]));
    				put.add(Bytes.toBytes("cf"), Bytes.toBytes("date"), Bytes.toBytes(splited[1]));
    				put.add(Bytes.toBytes("cf"), Bytes.toBytes("msisdn"), Bytes.toBytes(splited[2]));
    				//省略其它字段,调用put.add(....)就可以
    				context.write(NullWritable.get(), put);
    			}
    		};
    	}
    	
    	public static void main(String[] args) throws Exception {
    		final Configuration configuration = new Configuration();
    		//设置zookeeper
    		configuration.set("hbase.zookeeper.quorum", "hadoop0");
    		//设置hbase表名称
    		configuration.set(TableOutputFormat.OUTPUT_TABLE, "wlan_log");
    		//将该值改大,防止hbase超时退出
    		configuration.set("dfs.socket.timeout", "180000");
    		
    		final Job job = new Job(configuration, "HBaseBatchImport");
    		
    		job.setMapperClass(BatchImportMapper.class);
    		job.setReducerClass(BatchImportReducer.class);
    		//设置map的输出,不设置reduce的输出类型
    		job.setMapOutputKeyClass(LongWritable.class);
    		job.setMapOutputValueClass(Text.class);
    		
    		job.setInputFormatClass(TextInputFormat.class);
    		//不再设置输出路径,而是设置输出格式类型
    		job.setOutputFormatClass(TableOutputFormat.class);
    		
    		FileInputFormat.setInputPaths(job, "hdfs://hadoop0:9000/input");
    		
    		job.waitForCompletion(true);
    	}
    }
    


  • 相关阅读:
    第十二讲 Web 服务的创建和使用
    第十七讲 ASP.NET安全性
    第九讲 水晶报表的使用
    第十五讲 数据集的使用方法和技巧
    第十六讲 调试和跟踪ASP.NET应用程序
    第十讲 ASP.NET程序的部署
    第十四讲 ADO.NET数据操作
    第十八讲 Web服务器控件使用
    【笔记】java多线程 2 五种状态
    【笔记】数据库模式
  • 原文地址:https://www.cnblogs.com/yutingliuyl/p/7152039.html
Copyright © 2011-2022 走看看