zoukankan      html  css  js  c++  java
  • 大数据学习之Spark实现自定义排序 49

    SparkRDD实现自定义排序实现Order接口,

    原始方法:元组输出

    部分代码如下:

    方法一:自定义一个类, 实现Ordered自定义的排序

    代码如下:

    package day04
    
    import org.apache.spark.rdd.RDD
    import org.apache.spark.{SparkConf, SparkContext}
    
    /**
      * @author Dawn
      * @version 1.0, 2019年6月23日20:39:03
      *
      *          实现自定义的排序
      *          按照年龄进行排序
      */
    object MySort1 {
      def main(args: Array[String]): Unit = {
        //1.Spark程序入口
        val conf:SparkConf=new SparkConf().setAppName("MySort1").setMaster("local[2]")
        val sc:SparkContext=new SparkContext(conf)
    
        //2.创建数组
        val girl:Array[String]=Array("dawn,20,65","yaya,20,55","susu,25,50","liuyang,18,45")
    
        //3.转换RDD
        val grdd: RDD[String] = sc.parallelize(girl)
    
        //4.切分数据
        val grdd2 = grdd.map(line => {
          val  fields: Array[String] = line.split(",")
          //拿到每个属性
          val name=fields(0).toString
          val age=fields(1).toInt
          val weight=fields(2).toInt
    
          //原始方法:元组输出
    //      (name,age,weight)
          //自定义排序,通过实现Order接口
          new Girl(name,age,weight)
        })
    
        //5.按照年龄排序  倒序
    //    val sortedRdd: RDD[(String, Int, Int)] = grdd2.sortBy(t => t._2,false)
    //    val r=sortedRdd.collect()
    //    println( r.toBuffer)
    
        val sortedRdd = grdd2.sortBy(t => t)
        val r = sortedRdd.collect()
        println(r.toBuffer)
        //6.关闭资源
        sc.stop()
      }
    }
    
    //自定义类 scala Ordered
    class Girl(val name:String,val age:Int,val weight:Int)extends Ordered[Girl] with Serializable {
      override def compare(that: Girl): Int = {
        //如果年龄相同 体重重的往前排
        if (this.age==that.age){
          //如果正数 正序 负数 倒序
          -(this.weight-that.weight)
        }else{
          //年龄小的往前排
          this.age-that.age
        }
      }
    
      override def toString: String = s"名字:$name,年龄:$age,体重:$weight"
    }
    

      

    方法二:模式匹配方式进行排序

    代码如下:

    package day04
    
    import org.apache.spark.{SparkConf, SparkContext}
    
    /**
      * @author Dawn
      * @version 1.0, 2019年6月23日21:03:30
      *          模式匹配方式进行排序
      */
    object MySort2 {
      def main(args: Array[String]): Unit = {
        //1.spark程序入口
        val conf:SparkConf = new SparkConf().setAppName("MySort2").setMaster("local[2]")
        val sc:SparkContext=new SparkContext(conf)
    
        //2.创建数组
        val girl:Array[String]=Array("dawn,20,65","yaya,20,55","susu,25,50","liuyang,18,45")
    
        //3.装换成RDD
        val grdd=sc.parallelize(girl)
    
        //4.切分数据
        val grdd1 = grdd.map(line => {
          val fields: Array[String] = line.split(",")
    
          val name:String=fields(0).toString
          val age:Int=fields(1).toInt
          val weight:Int=fields(2).toInt
    
          //元组输出
          (name,age,weight)
        })
    
        //5.模式匹配方式进行排序
        val sortedRdd=grdd1.sortBy(s => Girl1(s._1,s._2,s._3))
        val r = sortedRdd.collect()
        println(r.toBuffer)
        sc.stop()
      }
    }
    
    case class Girl1(val name:String,val age:Int,val weight:Int) extends Ordered[Girl1]{
    
      override def compare(that: Girl1) = {
        //如果年龄相同 体重重的往前排
        if (this.age==this.age){
          -(this.weight-that.weight)
        }else{
          this.age-that.age
        }
    
      }
    
      override def toString: String = s"名字:$name,年龄:$age,体重:$weight"
    }
    

      

    方法三:专门定义一个隐世类来排序

    建议写成隐式类,应为可以将你需要的隐世装换全写在一个隐式类中,直接导入就行了!!

     

    编写隐式类:

    package day04
    
    //定义一个专门处理隐式的类
    object ImplicitRules{
      //定义隐世规则
      implicit object OrderingGirl extends Ordering[Girl2]{
        override def compare(x: Girl2, y: Girl2): Int = {
          if(x.age==y.age){
            //体重重的往前排
            -(x.weight-y.weight)
          }else{
            //年龄小的往前排
            x.age-y.age
          }
        }
      }
    }
    

      

    编写主程序:

    package day04
    
    import org.apache.spark.{SparkConf, SparkContext}
    
    /**
      * @author Hunter
      * @version 1.0, 20:55 2019/1/16
      *          专门定义一个隐世类来排序
      *          建议写成隐式类,应为可以将你需要的隐世装换全写在一个隐式类中,直接导入就行了
      */
    
    object MySort3 {
      def main(args: Array[String]): Unit = {
        //1.spark程序的入口
        val conf:SparkConf=new SparkConf().setAppName("MySort3").setMaster("local[2]")
        val sc:SparkContext=new SparkContext(conf)
    
        //2.创建数组
        val girl:Array[String]=Array("dawn,20,65","yaya,20,55","susu,25,50","liuyang,18,45")
    
        //3.转换RDD
        val grdd1=sc.parallelize(girl)
    
        //4.切分数据
        val grdd2 = grdd1.map(line => {
          val fields = line.split(",")
    
          val name:String=fields(0).toString
          val age:Int=fields(1).toInt
          val weight:Int=fields(2).toInt
          //元祖输出
          (name,age,weight)
        })
    
        import ImplicitRules.OrderingGirl
        val sortedRdd=grdd2.sortBy(s => Girl2(s._1,s._2,s._3))
        val r=sortedRdd.collect()
        println(r.toBuffer)
      }
    }
    
    case class Girl2(val name:String,val age:Int,val weight:Int)
    

      

  • 相关阅读:
    nginx 转发请求头信息
    canny算法实现
    单字符旋转校正
    图像匹配
    Edge Extraction (subpixel Precise)
    Edge Extraction 边缘提取
    圆形环绕字 从直角坐标到极坐标的转换
    Image acquisition
    Contour Processing
    Matching
  • 原文地址:https://www.cnblogs.com/hidamowang/p/11144189.html
Copyright © 2011-2022 走看看