zoukankan      html  css  js  c++  java
  • 统计apachelog各访问状态个数(使用MapReduce)

    统计日志文件中各访问状态的个数.

    1.将日志数据上传到hdfs

    路径 /mapreduce/data/apachelog/in 中

    内容如下

    0:0:0:0:0:0:0:1 - - [15/Feb/2017:16:24:16 +0800] "GET / HTTP/1.1" 200 11452
    0:0:0:0:0:0:0:1 - - [15/Feb/2017:16:24:16 +0800] "GET /tomcat.css HTTP/1.1" 200 5926
    0:0:0:0:0:0:0:1 - - [15/Feb/2017:16:24:16 +0800] "GET /tomcat.png HTTP/1.1" 200 5103
    0:0:0:0:0:0:0:1 - - [15/Feb/2017:16:24:16 +0800] "GET /bg-nav.png HTTP/1.1" 200 1401
    0:0:0:0:0:0:0:1 - - [15/Feb/2017:16:24:16 +0800] "GET /asf-logo.png HTTP/1.1" 200 17811
    0:0:0:0:0:0:0:1 - - [15/Feb/2017:16:24:16 +0800] "GET /bg-upper.png HTTP/1.1" 200 3103
    0:0:0:0:0:0:0:1 - - [15/Feb/2017:16:24:16 +0800] "GET /bg-button.png HTTP/1.1" 200 713
    0:0:0:0:0:0:0:1 - - [15/Feb/2017:16:24:16 +0800] "GET /bg-middle.png HTTP/1.1" 200 1918
    127.0.0.1 - - [15/Feb/2017:16:25:53 +0800] "GET / HTTP/1.1" 404 994
    127.0.0.1 - - [15/Feb/2017:16:25:53 +0800] "GET / HTTP/1.1" 404 994
    0:0:0:0:0:0:0:1 - - [15/Feb/2017:16:25:53 +0800] "GET / HTTP/1.1" 404 994
    0:0:0:0:0:0:0:1 - - [15/Feb/2017:16:28:39 +0800] "GET / HTTP/1.1" 404 994
    127.0.0.1 - - [15/Feb/2017:16:30:32 +0800] "GET / HTTP/1.1" 404 994
    127.0.0.1 - - [15/Feb/2017:16:30:32 +0800] "GET / HTTP/1.1" 404 994
    0:0:0:0:0:0:0:1 - - [15/Feb/2017:16:30:33 +0800] "GET / HTTP/1.1" 404 994
    0:0:0:0:0:0:0:1 - - [15/Feb/2017:16:33:52 +0800] "GET / HTTP/1.1" 404 994
    127.0.0.1 - - [15/Feb/2017:16:40:54 +0800] "GET / HTTP/1.1" 404 994
    127.0.0.1 - - [15/Feb/2017:16:40:54 +0800] "GET / HTTP/1.1" 404 994
    0:0:0:0:0:0:0:1 - - [15/Feb/2017:16:40:59 +0800] "GET /sentiment_ms/login HTTP/1.1" 404 1030
    0:0:0:0:0:0:0:1 - - [15/Feb/2017:16:41:07 +0800] "GET / HTTP/1.1" 404 994
    0:0:0:0:0:0:0:1 - - [15/Feb/2017:16:41:08 +0800] "GET / HTTP/1.1" 404 994

    2.代码

    package com.zhen.apachelog;
    
    import java.io.IOException;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import org.apache.hadoop.util.GenericOptionsParser;
    
    public class ApacheLog {
    
    public static class apacheMapper extends Mapper<Object, Text, Text, IntWritable>{
    
    @Override
    protected void map(Object key, Text value, Mapper<Object, Text, Text, IntWritable>.Context context)
    throws IOException, InterruptedException {
    String valueStr = value.toString();
    String[] strings = valueStr.split("" ");
    String status = strings[1].split(" ")[0];
    context.write(new Text(status), new IntWritable(1));
    }
    
    }
    
    public static class apacheReduce extends Reducer<Text, IntWritable, Text, IntWritable>{
    
    @Override
    protected void reduce(Text key, Iterable<IntWritable> value,
    Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
    int count = 0;
    for (IntWritable intWritable : value) {
    count+=intWritable.get();
    }
    context.write(key, new IntWritable(count));
    }
    
    }
    
    
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    
    Configuration conf = new Configuration(); 
    String[] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs(); 
    
    Job job = new Job(conf,"ApacheLog");
    job.setJarByClass(ApacheLog.class);
    
    job.setMapperClass(apacheMapper.class);
    job.setReducerClass(apacheReduce.class);
    
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    
    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    
    System.exit(job.waitForCompletion(true)?0:1);
    }
    
    }

    3.将代码生成jar包

    4.调用

    EFdeMacBook-Pro:hadoop-2.8.0 FengZhen$ hadoop jar /Users/FengZhen/Desktop/ApacheLog.jar com.zhen.apachelog.ApacheLog /mapreduce/data/apachelog/in /mapreduce/data/apachelog/out
    17/09/13 15:32:22 INFO client.RMProxy: Connecting to ResourceManager at localhost/127.0.0.1:8032
    17/09/13 15:32:23 INFO input.FileInputFormat: Total input files to process : 1
    17/09/13 15:32:23 INFO mapreduce.JobSubmitter: number of splits:1
    17/09/13 15:32:23 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1505268150495_0017
    17/09/13 15:32:23 INFO impl.YarnClientImpl: Submitted application application_1505268150495_0017
    17/09/13 15:32:23 INFO mapreduce.Job: The url to track the job: http://192.168.1.64:8088/proxy/application_1505268150495_0017/
    17/09/13 15:32:23 INFO mapreduce.Job: Running job: job_1505268150495_0017
    17/09/13 15:32:32 INFO mapreduce.Job: Job job_1505268150495_0017 running in uber mode : false
    17/09/13 15:32:32 INFO mapreduce.Job: map 0% reduce 0%
    17/09/13 15:32:37 INFO mapreduce.Job: map 100% reduce 0%
    17/09/13 15:32:43 INFO mapreduce.Job: map 100% reduce 100%
    17/09/13 15:32:43 INFO mapreduce.Job: Job job_1505268150495_0017 completed successfully
    17/09/13 15:32:43 INFO mapreduce.Job: Counters: 49
    File System Counters
    FILE: Number of bytes read=216
    FILE: Number of bytes written=272795
    FILE: Number of read operations=0
    FILE: Number of large read operations=0
    FILE: Number of write operations=0
    HDFS: Number of bytes read=1776
    HDFS: Number of bytes written=13
    HDFS: Number of read operations=6
    HDFS: Number of large read operations=0
    HDFS: Number of write operations=2
    Job Counters
    Launched map tasks=1
    Launched reduce tasks=1
    Data-local map tasks=1
    Total time spent by all maps in occupied slots (ms)=3160
    Total time spent by all reduces in occupied slots (ms)=3167
    Total time spent by all map tasks (ms)=3160
    Total time spent by all reduce tasks (ms)=3167
    Total vcore-milliseconds taken by all map tasks=3160
    Total vcore-milliseconds taken by all reduce tasks=3167
    Total megabyte-milliseconds taken by all map tasks=3235840
    Total megabyte-milliseconds taken by all reduce tasks=3243008
    Map-Reduce Framework
    Map input records=21
    Map output records=21
    Map output bytes=168
    Map output materialized bytes=216
    Input split bytes=150
    Combine input records=0
    Combine output records=0
    Reduce input groups=2
    Reduce shuffle bytes=216
    Reduce input records=21
    Reduce output records=2
    Spilled Records=42
    Shuffled Maps =1
    Failed Shuffles=0
    Merged Map outputs=1
    GC time elapsed (ms)=54
    CPU time spent (ms)=0
    Physical memory (bytes) snapshot=0
    Virtual memory (bytes) snapshot=0
    Total committed heap usage (bytes)=358612992
    Shuffle Errors
    BAD_ID=0
    CONNECTION=0
    IO_ERROR=0
    WRONG_LENGTH=0
    WRONG_MAP=0
    WRONG_REDUCE=0
    File Input Format Counters
    Bytes Read=1626
    File Output Format Counters
    Bytes Written=13

    5.查看结果

    EFdeMacBook-Pro:lib FengZhen$ hadoop fs -ls /mapreduce/data/apachelog/out
    Found 2 items
    -rw-r--r-- 1 FengZhen supergroup 0 2017-09-13 15:32 /mapreduce/data/apachelog/out/_SUCCESS
    -rw-r--r-- 1 FengZhen supergroup 13 2017-09-13 15:32 /mapreduce/data/apachelog/out/part-r-00000
    EFdeMacBook-Pro:lib FengZhen$ hadoop fs -text /mapreduce/data/apachelog/out/part-r-00000
    200 8
    404 13

  • 相关阅读:
    Python基础之基本数据类型
    Python基础之变量
    mysql数据库
    进程与线程
    并发编程
    网络编程
    内置函数(魔法方法)
    组合,封装,访问限制机制,property装饰器
    面向对象之继承
    Web开发中最致命的8个小错误
  • 原文地址:https://www.cnblogs.com/EnzoDin/p/7515299.html
Copyright © 2011-2022 走看看