zoukankan      html  css  js  c++  java
  • hadoop初步

    设置环境变量

    vim ~/.profile
    
    export HADOOP_HOME=/home/mmc/hadoop
    

    hadoop 配置

    vim etc/hadoop/hadoop-env.sh

    export JAVA_HOME=/opt/java/jdk1.8.0_151
    

    vim etc/hadoop/core-site.xml

    <configuration>
        <property>
            <name>fs.defaultFS</name>
            <value>hdfs://0.0.0.0:9000</value>
            <description>hdfs通讯访问地址</description>
        </property>
        <property>
          <name>dfs.permissions</name>
          <value>false</value>
        </property>
        <property>
            <name>hadoop.tmp.dir</name>
            <value>file:/home/mmc/hadoop/tmp</value>
            <description>hadoop数据存放</description>
        </property>
    </configuration>
    

    vim etc/hadoop/hdfs-site.xml

    <configuration>
        <property>
            <name>dfs.namenode.name.dir</name>
            <value>file:/home/mmc/hadoop/hdfs/name</value>
        </property>
        <property>
            <name>dfs.datanode.data.dir</name>
            <value>file:/home/mmc/hadoop/hdfs/data</value>
        </property>
        <property>
            <name>dfs.replication</name>
            <value>1</value>
        </property>
    </configuration>
    

    本地免密访问

    ssh-keygen
    cd ~/.ssh
    touch authorized_keys
    chmod 600 authorized_keys
    cat id_rsa.pub >> authorized_keys
    

    格式化

    ./bin/hdfs namenode -format
    

    服务端打印日志

    export HADOOP_ROOT_LOGGER=DEBUG,console
    ./sbin/start-all.sh
    

    检查

    ./bin/hadoop fs -ls /
    ./bin/hadoop fs -mkdir -p /user/hadoop/input
    ./bin/hadoop fs -ls /user/hadoop/input
    

    pom.xml

        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
        <dependency>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-common</artifactId>
          <version>3.3.0</version>
        </dependency>
    
        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs -->
        <dependency>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-hdfs</artifactId>
          <version>3.3.0</version>
          <scope>test</scope>
        </dependency>
    
        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client -->
        <dependency>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-client</artifactId>
          <version>3.3.0</version>
        </dependency>
    
        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-yarn-common -->
        <dependency>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-yarn-common</artifactId>
          <version>3.3.0</version>
        </dependency>
    
    
        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-yarn-api -->
        <dependency>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-yarn-api</artifactId>
          <version>3.3.0</version>
        </dependency>
    

    log 配置

    src/main/resources/log4j.properties

    # Set root logger level to DEBUG and its only appender to A1.
    log4j.rootLogger=DEBUG, A1
    
    # A1 is set to be a ConsoleAppender.
    log4j.appender.A1=org.apache.log4j.ConsoleAppender
    
    # A1 uses PatternLayout.
    log4j.appender.A1.layout=org.apache.log4j.PatternLayout
    log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
    

    java 代码

    WordCount.java

    package org.example;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    
    import java.io.IOException;
    
    public class WordCount {
        static class WordCountMapper extends Mapper<LongWritable,Text,Text,IntWritable>{
    
            @Override
            protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
                //拿到一行数据,将输入的序列化数据转换成字符串
                String line = value.toString();
                //将一行数据按照分隔符拆分
                String[] words = line.split("	");
                //遍历单词数据,输出单词<k,1>
                for(String word:words){
                    //需要序列化写出
                    context.write(new Text(word),new IntWritable(1));
                }
            }
        }
        static class WordCountReducer extends Reducer<Text,IntWritable,Text,IntWritable>{
            //reduce方法是针对输入的一组数据,一个key和它的所有value组成一组(k:v1,v2,v3)
            @Override
            protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
                //定义一个计数器
                int count = 0;
                //遍历一组数据,将key出现次数累加到count
                for(IntWritable value : values){
                    count += value.get();
                }
                context.write(key,new IntWritable(count));
    
            }
        }
    }
    

    App.java

    package org.example;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    public class App
    {
        public static void main( String[] args ) throws Exception{
            String jobName = "word count";
    //        String inputPath = "hdfs://192.168.56.200:9000/user/hadoop/input/";
            String inputPath = "hdfs://192.168.0.24:9000/user/hadoop/input/";
    //        String inputPath = "/user/hadoop/input/";
    
    //        String outputPath = "hdfs://192.168.56.200:9000/user/hadoop/output/";
            String outputPath = "/home/mmc/downloads/hadoop/output";
    
            Configuration conf = new Configuration();
    
    //        conf.set("fs.defaultFS", "hdfs://192.168.56.200:9000");
            conf.set("fs.hdfs.impl","org.apache.hadoop.hdfs.DistributedFileSystem");
            conf.set("dfs.client.block.write.replace-datanode-on-failure.policy", "NEVER");
            conf.set("dfs.client.use.datanode.hostname", "true");
    
            Job job = Job.getInstance(conf);
            job.setJobName(jobName);
    
            job.setJarByClass(WordCount.class);
    
            job.setMapperClass(WordCount.WordCountMapper.class);
            job.setReducerClass(WordCount.WordCountReducer.class);
    
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
    
            FileInputFormat.setInputPaths(job,new Path(inputPath));
            FileOutputFormat.setOutputPath(job,new Path(outputPath));
            System.exit(job.waitForCompletion(true)?0:1);
        }
    }
    
  • 相关阅读:
    Attributes in C#
    asp.net C# 时间格式大全
    UVA 10518 How Many Calls?
    UVA 10303 How Many Trees?
    UVA 991 Safe Salutations
    UVA 10862 Connect the Cable Wires
    UVA 10417 Gift Exchanging
    UVA 10229 Modular Fibonacci
    UVA 10079 Pizza Cutting
    UVA 10334 Ray Through Glasses
  • 原文地址:https://www.cnblogs.com/ziyouchutuwenwu/p/14835698.html
Copyright © 2011-2022 走看看