zoukankan      html  css  js  c++  java
  • 大数据课堂实验

    今天我先搭建了环境

    自己修改了网上的代码分为两个类 LogBean对数据进行封装打包

    package org.apache.hadoop.examples;
    import org.apache.hadoop.io.Writable;
    import org.apache.hadoop.io.WritableComparable;
    
    import java.io.DataInput;
    import java.io.DataOutput;
    import java.io.IOException;
    
    public class LogBean implements WritableComparable<LogBean> {
        private String id;
        private String time;
        private String traffic;
        private String book;
        private String voide;
    
        public LogBean() {
            super();
        }
    
        public LogBean(String id, String traffic, String time,String book,String voide) {
            this.time = time;
            this.id = id;
            this.traffic = traffic;
            this.book=book;
            this.voide=voide;
            
        }
    
        @Override
        public String toString() {
            return "LogBean{" +
                    "id='" + id + ' ' +
                    ", time='" + time+ ' ' +
                    ", traffic='" + traffic + ' ' +
                      ", book='" + book + ' ' +
                        ", voide='" + voide + ' ' +
                    '}';
        }
    
        public String getTime() {
            return time;
        }
    
        public void setTime(String time) {
            this.time = time;
        }
    
        public String getId() {
            return id;
        }
    
        public void setIp(String id) {
            this.id = id;
        }
    
        public String getBook() {
            return book;
        }
    
        public void setBook(String book) {
            this.book = book;
        }
        public String getVoide() {
            return voide;
        }
    
        public void setUrl(String voide) {
            this.voide = voide;
        }
    
        @Override
        public int compareTo(LogBean o) {
            return 0;
        }
    
        @Override
        public void write(DataOutput out) throws IOException {
            
            out.writeUTF(id);
            out.writeUTF(time);
            out.writeUTF(traffic);
            out.writeUTF(book);
            out.writeUTF(voide);
    
        }
    
        @Override
        public void readFields(DataInput in) throws IOException {
          
            id = in.readUTF();
            time = in.readUTF();
            traffic =in.readUTF();
            book =in.readUTF();
            voide =in.readUTF();
        }
    }
    

     BaiDuLog.java

    负责对数据进行处理然后筛选

    package org.apache.hadoop.examples;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import java.io.IOException;
    public class BaiduLog {
        public static class BaiduLogMapper extends Mapper<LongWritable,Text, Text, LogBean> {
            @Override
            protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    //            super.map(key, value, context);
                String log = value.toString();
                String str = "(cn.baidu.core.inteceptor.LogInteceptor:55)";
                if (log.indexOf(str)!=-1){
                    String[] log_arr = log.split(str);
                    String time = log_arr[0].substring(1, 10);
                    String[] log_arr2 = log_arr[1].split(" ");
                    String id = log_arr2[1];
                    String traffic=log_arr2[2];
                    String book = log_arr2[3];
                    String voide =log_arr2[4];
                    if (id.equals("null")){
                        id = log_arr2[1];
                    }
                    LogBean logbean = new LogBean(id,time,traffic,book,voide);
                    context.write(new Text(ip),logbean);
                }
            }
        }
        public static class BaiduLogReducer extends Reducer<Text,LogBean,IntWritable,Text>{
            @Override
            protected void reduce(Text key, Iterable<LogBean> values, Context context) throws IOException, InterruptedException {
    //            super.reduce(key, values, context);
                int sum = 0;
                StringBuffer str = new StringBuffer();
                int flag = 0;
                for (LogBean logbean:values){
                    sum++;
                    if (flag==0){
                        str.append(logbean.toString());
                        flag = 1;
                    }
                }
                context.write(new IntWritable(sum),new Text(str.toString()));
            }
        }
        public static void main(String[] args) throws Exception {
            Configuration conf = new Configuration();
            Job job = Job.getInstance(conf, "avg");
            job.setJarByClass(BaiduLog.class);
            job.setMapperClass(BaiduLog.BaiduLogMapper.class);
            job.setReducerClass(BaiduLog.BaiduLogReducer.class);
    //        job.setCombinerClass(BaiduLog.BaiduLogReducer.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(LogBean.class);
            FileInputFormat.addInputPath(job,new Path(args[0]));
            FileOutputFormat.setOutputPath(job,new Path(args[1]));
            System.exit(job.waitForCompletion(true)?0:1);
        }
    }
     但是最后hive数据库没装好,所以导致后续的发展受到了限制
  • 相关阅读:
    【LeetCode】Longest Substring Without Repeating Characters 解题报告
    高速搞定Eclipse的语法高亮
    [置顶] think in java interview-高级开发人员面试宝典(二)
    数学公式的规约(reduce)和简化(simplify)
    数学公式的规约(reduce)和简化(simplify)
    Analysis of variance(ANOVA)
    Analysis of variance(ANOVA)
    explanatory variable(independent vs dependent)、design matrix
    explanatory variable(independent vs dependent)、design matrix
    OpenGL(十八) 顶点数组和抗锯齿(反走样)设置
  • 原文地址:https://www.cnblogs.com/yang-qiu/p/11853953.html
Copyright © 2011-2022 走看看