zoukankan      html  css  js  c++  java
  • intelij创建MapReduce工程

    1、创建一个maven工程

    2、POM文件

    <?xml version="1.0" encoding="UTF-8"?>
    <project xmlns="http://maven.apache.org/POM/4.0.0"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.sogou</groupId>
    <artifactId>teemo-dc-etl</artifactId>
    <version>1.0.0</version>
    <packaging>jar</packaging>

    <name>teemo-dc-etl</name>
    <url>http://maven.apache.org</url>

    <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <mahout.version>0.5</mahout.version>
    <mahout.groupid>org.apache.mahout</mahout.groupid>
    <spring.version>3.0.6.RELEASE</spring.version>
    </properties>

    <repositories>
    <repository>
    <id>maven-ali</id>
    <url>http://maven.twttr.com/</url>
    <releases>
    <enabled>true</enabled>
    </releases>
    <snapshots>
    <enabled>true</enabled>
    <updatePolicy>always</updatePolicy>
    <checksumPolicy>fail</checksumPolicy>
    </snapshots>
    </repository>
    </repositories>

    <dependencies>
    <dependency>
    <groupId>junit</groupId>
    <artifactId>junit</artifactId>
    <version>3.8.1</version>
    <scope>test</scope>
    </dependency>

    <dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-hdfs</artifactId>
    <version>2.5.0</version>
    </dependency>

    <dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-client</artifactId>
    <version>2.5.1</version>
    </dependency>

    <dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-common</artifactId>
    <version>2.5.0</version>
    </dependency>

    <dependency>
    <groupId>com.hadoop.gplcompression</groupId>
    <artifactId>hadoop-lzo</artifactId>
    <version>0.4.19</version>
    </dependency>

    <dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-yarn-common</artifactId>
    <version>2.5.2</version>
    </dependency>

    <dependency>
    <groupId>com.alibaba</groupId>
    <artifactId>fastjson</artifactId>
    <version>1.2.4</version>
    </dependency>

    </dependencies>


    <build>
    <plugins>
    <!--
    bind the maven-assembly-plugin to the package phase
    this will create a jar file without the storm dependencies
    suitable for deployment to a cluster.
    -->
    <plugin>
    <artifactId>maven-assembly-plugin</artifactId>
    <configuration>
    <archive>
    <manifest>
    <mainClass></mainClass>
    </manifest>
    </archive>
    <descriptorRefs>
    <descriptorRef>jar-with-dependencies</descriptorRef>
    </descriptorRefs>
    </configuration>
    <executions>
    <execution>
    <id>make-assembly</id>
    <phase>package</phase> <!-- packaging phase -->
    <goals>
    <goal>single</goal>
    </goals>
    </execution>
    </executions>
    </plugin>
    <plugin>
    <groupId>org.apache.maven.plugins</groupId>
    <artifactId>maven-compiler-plugin</artifactId>
    <configuration>
    <source>1.6</source>
    <target>1.6</target>
    <encoding>UTF-8</encoding>
    </configuration>
    </plugin>

    <plugin>
    <groupId>org.apache.maven.plugins</groupId>
    <artifactId>maven-surefire-plugin</artifactId>
    <version>2.14.1</version>
    <configuration>
    <argLine>-Xmx2048m</argLine>
    </configuration>
    </plugin>
    </plugins>
    </build>
    </project>
    这里有个lzo包,需要增加twiter的资源库
    3、mapreduce文件写法
    package com.sogou.teemo.test;

    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

    import java.io.IOException;
    import java.util.StringTokenizer;

    public class WordCount {
    /* Mapper */
    public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> {
    private final static IntWritable one = new IntWritable(1);
    private Text word = new Text();
    @Override
    public void map(Object key, Text value, Context context) throws IOException, InterruptedException{
    StringTokenizer itr = new StringTokenizer(value.toString());
    while(itr.hasMoreTokens()){
    word.set(itr.nextToken());
    context.write(word, one);
    }
    }
    }

    /* Reducer */
    public static class IntSumReducer extends Reducer<Text,IntWritable,Text,IntWritable>{
    private IntWritable result = new IntWritable();
    @Override
    public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException,InterruptedException{
    int sum = 0;
    for(IntWritable val : values){
    sum += val.get();
    }
    result.set(sum);
    context.write(key,result);
    }
    }

    /* 启动 MapReduce Job */
    public static void main(String[] args) throws Exception{
    System.setProperty("hadoop.home.dir","D:/hadoop-2.6.5" );
    Configuration conf = new Configuration();
    /*if(args.length != 2){
    System.err.println("Usage: wordcount <int> <out>");
    System.exit(2);
    }*/
    String arg1 = "input";
    String arg2 = "output";
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job,new Path(arg1));
    FileOutputFormat.setOutputPath(job,new Path(arg2));
    System.exit(job.waitForCompletion(true)?0:1);
    }
    }


  • 相关阅读:
    HDU 2196 Computer
    HDU 1520 Anniversary party
    POJ 1217 FOUR QUARTERS
    POJ 2184 Cow Exhibition
    HDU 2639 Bone Collector II
    POJ 3181 Dollar Dayz
    POJ 1787 Charlie's Change
    POJ 2063 Investment
    HDU 1114 Piggy-Bank
    Lca hdu 2874 Connections between cities
  • 原文地址:https://www.cnblogs.com/shenguo/p/10483161.html
Copyright © 2011-2022 走看看