zoukankan      html  css  js  c++  java
  • scala 常用方法

    1、take

     获取前n条记录

    scala> val t = sc.parallelize(1  to 10)
    t: org.apache.spark.rdd.RDD[Int] = ParallelCollectionRDD[0] at parallelize at <console>:27
    
    scala> t.take(3)
    res0: Array[Int] = Array(1, 2, 3)

    2、reverse

    反转列表

    scala> val a  = Seq(1,2,3,6,4)
    a: Seq[Int] = List(1, 2, 3, 6, 4)
    
    scala> a.reverse
    res6: Seq[Int] = List(4, 6, 3, 2, 1)

    3、tail

    获取列表去除行首

    scala> a.tail
    res7: Seq[Int] = List(2, 3, 6, 4)

    4、filter

    过滤

    scala> val a  = Seq(1,2,3,6,4)
    a: Seq[Int] = List(1, 2, 3, 6, 4)
    
    scala>     val d = a.flatMap(f=>{
         |       try{
         |         Some(f/(f-1))
         |       }catch{
         |         case e:Exception=>None
         |       }
         |     }).filter(_>1)
    d: Seq[Int] = List(2)

    5、init

    除去最后一个元素

    scala> val t = sc.parallelize(1  to 10)
    t: org.apache.spark.rdd.RDD[Int] = ParallelCollectionRDD[380] at parallelize at <console>:40
    
    scala> val c = t.take(5)
    c: Array[Int] = Array(1, 2, 3, 4, 5)
    
    scala> val d = c.init
    d: Array[Int] = Array(1, 2, 3, 4)

    6、last

    获取最后一个元素

    scala> val t = sc.parallelize(1  to 10).take(3)
    t: Array[Int] = Array(1, 2, 3)
    
    scala> t.last
    res55: Int = 3

    7、randomSplit

    数据分割

    scala> c.take(10)
    res64: Array[Array[Double]] = Array(Array(9.0, 21.0, 3.0, 4.0, 5.0, 1.0), Array(21.0, 3.0, 4.0, 21.0, 5.0, 2.0), Array(6.0, 12.0, 45.0, 32.0, 32.0, 3.0))
    
    scala> val Array(trainData, cvData, testData) = c.randomSplit(Array(0.8, 0.1, 0.1))
    trainData: org.apache.spark.rdd.RDD[Array[Double]] = MapPartitionsRDD[391] at randomSplit at <console>:50
    cvData: org.apache.spark.rdd.RDD[Array[Double]] = MapPartitionsRDD[392] at randomSplit at <console>:50
    testData: org.apache.spark.rdd.RDD[Array[Double]] = MapPartitionsRDD[393] at randomSplit at <console>:50

    8、yield 生成集合

    scala> for(i <- 1 to 10) yield i%3
    res7: scala.collection.immutable.IndexedSeq[Int] = Vector(1, 2, 0, 1, 2, 0, 1, 2, 0, 1)
  • 相关阅读:
    linux C总结篇(进程)
    进程与线程的区分
    递归的两种思路
    Linux下git与github的一般使用
    文件读写和文件指针的移动
    文件的创建,打开与关闭
    一个简单脚本
    linux 三剑客命令(grep,sed ,awk)
    常用正则表达式
    PAT:1002. A+B for Polynomials (25) 部分错误
  • 原文地址:https://www.cnblogs.com/huanhuanang/p/7228092.html
Copyright © 2011-2022 走看看