zoukankan      html  css  js  c++  java
  • Scala实现Mapreduce程序2-----Top5

    输入n个数,返回TOP5的数字

    scala实现,以各个数字为key,""为空,按照key进行排序,取出前5个

    object Top5 {
    def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setMaster("")
    val sc = new SparkContext(conf)
    val one = sc.textFile("/spark/test")
    var index=0
    val text=one.filter(x=>(x.trim.length>0)&&(x.split(",").length==4)).map(_.split(",")(2).toInt).
    map(x=>(x,"")).sortByKey(false).map(x=>x._1).take(5).foreach(x=>{
    index=index+1
    println("top index:"+index+" "+x)
    })

    }
    }

    Mapreduce实现,(key,"") =>(index+"",key)

    MapReduce中的IntWritable默认是按照降序排列的,要实现升序排序,自己实现MyIntWritabel
    public class MyIntWritable implements WritableComparable<MyIntWritable> {
    private Integer num;

    public MyIntWritable(Integer num){
    this.num=num;
    }

    public MyIntWritable(){}

    public void write(DataOutput output) throws IOException {
    output.writeInt(num);
    }

    public void readFields(DataInput input) throws IOException {
    this.num=input.readInt();
    }

    public int compareTo(MyIntWritable o){
    int minux=this.num-o.num;
    return minux*(-1);
    }

    @Override
    public int hashCode() {
    return this.num.hashCode();
    }

    public String toSting(){
    return this.num+"";
    }

    public boolean equals(Object obj) {
    if (obj instanceof MyIntWritable) {
    return false;
    }
    MyIntWritable ok2 = (MyIntWritable) obj;
    return (this.num == ok2.num);
    }
    }
    package HadoopvsSpark;

    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

    import java.io.IOException;

    /**
    * Created by Administrator on 2017/5/26.
    */
    public class TopN {
    public static class TopNMapper extends Mapper<LongWritable,Text,MyIntWritable,Text>{
    public void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException {
    String line=value.toString();
    if(line.trim().length()>0){
    String str[]=line.split( "," );
    if(str.length==4){
    context.write( new MyIntWritable( Integer.parseInt( str[2] ) ),new Text( "" ) );
    }
    }
    }
    }

    public static class TopNReducer extends Reducer<MyIntWritable,Text,Text,MyIntWritable>{
    private int index=0;
    public void reduce(MyIntWritable key,Iterable<Text> values,Context context) throws IOException, InterruptedException {
    index++;
    if(index<=5){
    context.write( new Text( index+" " ),key );
    }
    }
    }

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {


    org.apache.hadoop.conf.Configuration conf=new org.apache.hadoop.conf.Configuration();
    Job job=new Job(conf,"topn");
    job.setJarByClass( TopN.class );

    job.setMapperClass( TopNMapper.class );
    job.setMapOutputKeyClass( MyIntWritable.class );
    job.setMapOutputValueClass( Text.class );

    job.setReducerClass( TopNReducer.class );
    job.setOutputKeyClass( Text.class);
    job.setOutputValueClass( MyIntWritable.class );

    FileInputFormat.addInputPath( job,new Path( args[0] ) );
    Path outputdir=new Path( args[1] );
    FileSystem fs=FileSystem.get( conf ); //判断输出目录是否存在
    if(fs.exists( outputdir )){
    fs.delete( outputdir,true );
    }
    FileOutputFormat.setOutputPath( job,outputdir ) ;
    System.out.println(job.waitForCompletion( true )?1:0);
    }
    }
  • 相关阅读:
    一步步学习SPD2010--第八章节--理解工作流(3)--使用操作和条件
    《python深度学习》笔记---9.1、深度学习知识回顾
    《python深度学习》笔记---8.5、生成式对抗网络简介
    《python深度学习》笔记---8.4、用变分自编码器生成图像
    《python深度学习》笔记---8.3、神经风格迁移
    Git 里面的 origin 到底代表啥意思?
    《python深度学习》笔记---8.2、DeepDream
    《python深度学习》笔记---8.1、使用LSTM生成文本
    《python深度学习》笔记---7.3.3、模型集成
    2021年1月份中国浏览器份额
  • 原文地址:https://www.cnblogs.com/sunt9/p/6936383.html
Copyright © 2011-2022 走看看