zoukankan      html  css  js  c++  java
  • hadoop 倒排索引-分布式作业二

    import java.io.IOException;
    import java.util.StringTokenizer;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.input.FileSplit;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    public class InvertedIndex {
    
    	public static class InversedIndexMapper extends Mapper<Object, Text, Text, Text> {
    		
    		private Text outKey = new Text();
    		private Text outVal = new Text();
    		
    		@Override
    		public void map (Object key,Text value,Context context) {
    			StringTokenizer tokens = new StringTokenizer(value.toString());
    			FileSplit split = (FileSplit) context.getInputSplit();
    			while(tokens.hasMoreTokens()) {
    				String token = tokens.nextToken();
    				try {
    					outKey.set(token + ":" + split.getPath());
    					outVal.set("1");
    					context.write(outKey, outVal);
    				} catch (IOException e) {
    					e.printStackTrace();
    				} catch (InterruptedException e) {
    					e.printStackTrace();
    				}
    			}
    			
    			System.out.println("Map phase finished ...");
    		}
    	}
    	
    	public static class InversedIndexCombiner extends Reducer<Text, Text, Text, Text> {
    		
    		private Text outKey = new Text();
    		private Text outVal = new Text();
    		
    		@Override
    		public void reduce(Text key,Iterable<Text> values,Context context) {
    			String[] keys = key.toString().split(":");
    			int sum = 0;
    			for(Text val : values) {
    				sum += Integer.parseInt(val.toString());
    			}
    			try {
    				outKey.set(keys[0]);
    				int index = keys[keys.length-1].lastIndexOf('/');
    				outVal.set(keys[keys.length-1].substring(index+1) + ":" + sum);
    				context.write(outKey, outVal);
    			} catch (IOException e) {
    				e.printStackTrace();
    			} catch (InterruptedException e) {
    				e.printStackTrace();
    			}
    
    			System.out.println("Combine phase finished ...");
    		}
    		
    	}
    	
    	public static class InversedIndexReducer extends Reducer<Text, Text, Text, Text> {
    		
    		@Override
    		public void reduce (Text key,Iterable<Text> values,Context context) {
    			StringBuffer sb = new StringBuffer();
    			for(Text text : values) {
    				sb.append(text.toString() + " ,");
    			}
    			try {
    				context.write(key, new Text(sb.toString()));
    			} catch (IOException e) {
    				e.printStackTrace();
    			} catch (InterruptedException e) {
    				e.printStackTrace();
    			}
    
    			System.out.println("Reduce phase finished ...");
    		}
    	}
    	
    	public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    		Configuration conf = new Configuration();
    		@SuppressWarnings("deprecation")
    		Job job = new Job(conf,"index inverted");
    		
    		job.setJarByClass(InvertedIndex.class);
    		job.setMapperClass(InversedIndexMapper.class);
    		job.setCombinerClass(InversedIndexCombiner.class);
    		job.setReducerClass(InversedIndexReducer.class);
    		job.setMapOutputKeyClass(Text.class);
    		job.setMapOutputValueClass(Text.class);
    		job.setOutputKeyClass(Text.class);
    		job.setOutputValueClass(Text.class);
    
    		job.setNumReduceTasks(1);
    		
    		FileInputFormat.addInputPath(job, new Path("input"));
    		FileOutputFormat.setOutputPath(job, new Path("output"));
    		if(job.waitForCompletion(true))
    		{
    			System.out.println("All job finished ...");
    			System.exit(0);
    		}
    	}
    
    }
    

      

  • 相关阅读:
    [CSP-S模拟测试]:attack(支配树+LCA+bitset)
    [杂题]:C/c(二分答案)
    [杂题]:B/b(二分答案)
    二维莫队(离线)
    [CSP-S模拟测试]:联盟(搜索+树的直径)
    [CSP-S模拟测试]:蔬菜(二维莫队)
    [CSP-S模拟测试]:施工(DP+单调栈+前缀和)
    [CSP-S模拟测试]:画作(BFS+数学)
    [CSP-S模拟测试]:折射(DP)
    [CSP-S模拟测试]:养花(分块)
  • 原文地址:https://www.cnblogs.com/shenbingyu/p/4940676.html
Copyright © 2011-2022 走看看