这下面的练习中,需要自己将spark的jar包 添加进来。
1.spark Mlib 底层使用的向量、矩阵运算使用了Breeze库。
scalaNLP 是一套 机器学习和数值技算的库。它主要是关于科学技术(sc)、机器学习(ML)和自然语言处理(NLP)的。它包括三个库,Breeze、Epic 和 Puck。
Breeze :是机器学习和数值技术库 ,它是sparkMlib的核心,包括线性代数、数值技术和优化,是一种通用、功能强大、有效的机器学习方法。
Epic :是一种高性能能统计分析器和结构化预测库
Puck :是一个快速GPU加速解析器
在使用Breeze 库时,需要导入相关包:
import breeze.linalg._ import breeze.numerics._
具体练习如下:
package leaning import breeze.linalg._ import breeze.numerics._ import breeze.stats.distributions.Rand /** * Created by dy9776 on 2017/12/5. */ object Practise_breeze{ def main(args: Array[String]) { val matrix: DenseMatrix[Double] = DenseMatrix.zeros[Double](3,2) println(matrix) /* 0.0 0.0 0.0 0.0 0.0 0.0 */ //全0向量 val testVector: DenseVector[Double] = DenseVector.zeros[Double](2) println(testVector) //全1向量 val allOneVector=DenseVector.ones[Double](2) println(allOneVector) //按数值填充向量 val haveNumberFill =DenseVector.fill[Double](3,2) println(haveNumberFill) //生成随机向量 val rangeNUm= DenseVector.range(1, 10 , 2)//DenseVector(1, 3, 5, 7, 9) val rangeNUmD= DenseVector.rangeD(1, 9 , 2)//DenseVector(1.0, 3.0, 5.0, 7.0) val rangeNUmF= DenseVector.rangeF(1, 7 , 2)//DenseVector(1.0, 3.0, 5.0) println(rangeNUm) println(rangeNUmD) println(rangeNUmF) //单位矩阵 val unitMatrix=DenseMatrix.eye[Double](4) // println(unitMatrix) /* 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 */ //对角矩阵 val doubleVecoter=diag(DenseVector(3.0, 4.0 , 5.0)) // println(doubleVecoter) /* 3.0 0.0 0.0 0.0 4.0 0.0 0.0 0.0 5.0 */ //按照行创建矩阵 val byRowCreateMatrix= DenseMatrix( (4.0, 5.0, 6.0 ) , (7.0 ,8.0 ,9.0)) // println(byRowCreateMatrix) /* 4.0 5.0 6.0 7.0 8.0 9.0 */ //按照行创建向量 val denseCreateVector = DenseVector((4.0, 5.0, 6.0, 7.0, 8.0, 9.0)) // println(denseCreateVector) ///DenseVector((4.0,5.0,6.0,7.0,8.0,9.0) //向量装置 val vectorTranspostion= DenseVector( (4.0, 5.0, 6.0, 7.0, 8.0, 9.0) ).t println(vectorTranspostion)//Transpose(DenseVector((4.0,5.0,6.0,7.0,8.0,9.0))) //从函数创建向量 val funCreateVector=DenseVector.tabulate(5)(i=> i*i) println(funCreateVector)//DenseVector(0, 1, 4, 9, 16) val funCreateVector2=DenseVector.tabulate( 0 to 5)(i=> i*i) println(funCreateVector2)//DenseVector(0, 1, 4, 9, 16, 25) //从函数创建矩阵 val createFuncMatrix= DenseMatrix.tabulate(3, 4) { case (i ,j ) => i*i + j*j } // println(createFuncMatrix) /* 0 1 4 9 1 2 5 10 4 5 8 13 */ //从数组创建矩阵 val createFunctionMatrix= new DenseMatrix[Double](3, 2, Array(1.0, 4.0, 7.0, 3.0, 6.0, 9.0)) // println(createFunctionMatrix) /* 1.0 3.0 4.0 6.0 7.0 9.0 */ //0 到 1的随机向量 val formZeroToOneRandomVector= DenseVector.rand( 9, Rand.uniform) println(formZeroToOneRandomVector) // DenseVector(0.7978222133507369, 0.48978247271729325, 0.24943434133065834, 0.6619447026155139, 0.5324942068054981, 0.9051865626036415, 0.5989291014099107, 0.9221881029987078, 0.17371486701192662) val formZeroToOneRandomVector2= DenseVector.rand( 9, Rand.uniform) println(formZeroToOneRandomVector2) //DenseVector(0.7978222133507369, 0.48978247271729325, 0.24943434133065834, 0.6619447026155139, 0.5324942068054981, 0.9051865626036415, 0.5989291014099107, 0.9221881029987078, 0.17371486701192662) //0 到 1 的随机矩阵 val formZeroToOneRandomMatrix= DenseMatrix.rand(3, 2, Rand.uniform) println(formZeroToOneRandomMatrix) /* 0.8036324612618653 0.538112087890035 0.6864375371630702 0.3123993272549075 0.9458628172312897 0.01137554621536796 */ val formZeroToOneRandomMatrix2=DenseMatrix.rand(3, 2, Rand.gaussian) println(formZeroToOneRandomMatrix2) /* 0.9510499901472648 0.287812938654061 -0.5266499883462216 0.9380426076781263 -0.3959295333472151 -0.9057610233257112 */ //Breeze元素访问 val a = new DenseVector[Int](Array(1 to 20 : _*)) println(a)//DenseVector(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20) //指定位置 println(a(0)) //1 //向量子集 println( a(1 to 4) )//DenseVector(2, 3, 4, 5) println( a(1 until 4) )//DenseVector(2, 3, 4) //指定开始位置至结尾 println( a(1 to -1) )//DenseVector(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20) //按照指定步长去子集 这个是倒序方式 println( a(5 to 0 by -1) )//DenseVector(6, 5, 4, 3, 2, 1) //最后一个元素 println( a(-1)) //20 val m = DenseMatrix((1.0, 2.0, 3.0), (4.0, 5.0, 6.0)) println(m) /* 1.0 2.0 3.0 4.0 5.0 6.0 */ //指定位置 println( m(0 ,1) ) //2.0 //矩阵指定列 println( m(:: ,1) ) // DenseVector(2.0, 5.0) //Breeze元素操作 //调整矩阵形状 val justAdjustMatrix =m.reshape(3, 2) println(justAdjustMatrix) /* DenseVector(2.0, 5.0) 1.0 5.0 4.0 3.0 2.0 6.0 */ //矩阵转成向量 val toVector=m.toDenseVector println(toVector)//DenseVector(1.0, 4.0, 2.0, 5.0, 3.0, 6.0) println(toVector.toDenseMatrix)//1.0 4.0 2.0 5.0 3.0 6.0 //复制下三角 println(lowerTriangular(m))// /* 1.0 0.0 4.0 5.0 */ //复制上三角 println(upperTriangular(m)) /* 1.0 2.0 0.0 5.0 */ //矩阵复制 println(m.copy) //m 原始为这样的 // 1.0 2.0 // 0.0 5.0 //取对角线元素 println(diag(upperTriangular(m))) //DenseVector(1.0, 5.0) 个人觉得很怪异,不是应该为(1.0, 6.0)吗?上面上、下三角 也好像出乎的意料 //子集赋数值 println(a(1 to 4 ):=5) //(1.0, 5.0) //子集赋向量 println( a(1 to 4):=DenseVector(1,2,3,4) ) //DenseVector(1, 2, 3, 4) println(m) //矩阵赋值 // println( m( 1 to 2, 1 to 2) := 0.0 ) //Exception in thread "main" java.lang.IndexOutOfBoundsException: Row slice of Range(1, 2) was bigger than matrix rows of 2 println("-==========m1================-") println( m( 0 to 1, 1 to 2) := 0.0 ) println("-==========m================-") println(m) println("-==========m end================-") /* -==========m1================- 0.0 0.0 0.0 0.0 -==========m================- 0.0 0.0 3.0 0.0 0.0 6.0 -==========m end================- */ //矩阵列赋值 val re=m(::, 2) := 5.0 println(re.toDenseMatrix) //5.0 5.0 val a1 = DenseMatrix((1.0, 2.0, 3.0), (4.0, 5.0, 6.0)) val a2 = DenseMatrix((7.0, 8.0, 9.0), (10.0, 11.0, 12.0)) //垂直连接矩阵 val verticalLike=DenseMatrix.vertcat(a1, a2) println(verticalLike) println("-==========================-") /* 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 11.0 12.0 */ //横向连接矩阵 val twoMatrixConn=DenseMatrix.horzcat( a1, a2) println(twoMatrixConn) println("-==========================-") /* 1.0 2.0 3.0 7.0 8.0 9.0 4.0 5.0 6.0 10.0 11.0 12.0 */ //向量的连接 val connnectVector1=DenseVector.vertcat(DenseVector(20, 21, 22), DenseVector(23, 24, 25)) val connnectVector2=DenseVector.horzcat(DenseVector(20, 21, 22), DenseVector(23, 24, 25)) println(connnectVector1)//DenseVector(20, 21, 22, 23, 24, 25) println(connnectVector2) /* 20 23 21 24 22 25 */ //Breeze数值计算函数 //元素加法 println(a1 + a2) /* 8.0 10.0 12.0 14.0 16.0 18.0 */ //元素乘法 println(a1 :* a2) /* 7.0 16.0 27.0 40.0 55.0 72.0 */ //元素除法 println(a1 :/ a2) /* 0.14285714285714285 0.25 0.3333333333333333 0.4 0.45454545454545453 0.5 */ //元素比较 println(a1 :< a2) /* true true true true true true */ //元素相等 println(a1 :== a2) /* false false false false false false */ //元素追加 println(a1 :+=2.0) /* 3.0 4.0 5.0 6.0 7.0 8.0 */ //元素追乘 println(a1 :*=2.0) /* 6.0 8.0 10.0 12.0 14.0 16.0 */ //向量点积 val vectorDot=DenseVector(1, 2, 3, 4) dot DenseVector(1, 1, 1, 1) println(vectorDot)//10 //元素最大值 println(max(a1))//16.0 //元素最小值 println(min(a1))//6.0 //元素最大值的位置 println(argmax(a1))// (1,2) //元素最小值的位置 println(argmin(a1))// (0,0) //Breeze求和函数 val m1 = DenseMatrix((1.0, 2.0, 3.0, 4.0), (5.0, 6.0, 7.0, 8.0), (9.0, 10.0, 11.0, 12.0)) println(m1) /* 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 11.0 12.0 */ println("-==========================-") //元素求和 println(sum(m1))//78.0 //每一列求和 println(sum(m1, Axis._0))//res59: breeze.linalg.DenseMatrix[Double] = 15.0 18.0 21.0 24.0 //每一行求和 println(sum(m1, Axis._1))//res60: breeze.linalg.DenseVector[Double] = DenseVector(10.0, 26.0, 42.0) //对角线元素和 println(trace(lowerTriangular(m1)))// res61: Double = 18.0 //累积和 val a3 = new DenseVector[Int](Array(10 to 20: _*)) println(accumulate(a3)) // DenseVector(10, 21, 33, 46, 60, 75, 91, 108, 126, 145, 165) //Breeze布尔函数 val c = DenseVector(true, false, true) val d = DenseVector(false, true, true) //元素与操作 println(c :& d) // DenseVector(false, false, true) //元素或操作 println(c :| d) //DenseVector(true, true, true) //元素非操作 println(!c) //DenseVector(false, true, false) val e = DenseVector[Int](-3, 0, 2) //存在非零元素 println(any(e)) //true //所有元素非零 println(all(e)) //false //Breeze线性代数函数 val f = DenseMatrix((1.0, 2.0, 3.0), (4.0, 5.0, 6.0), (7.0, 8.0, 9.0)) val g = DenseMatrix((1.0, 1.0, 1.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0)) //线性求解,AX = B,求解X println(f g) /* breeze.linalg.DenseMatrix[Double] = -2.5 -2.5 -2.5 4.0 4.0 4.0 -1.5 -1.5 -1.5 */ //转置 println(f.t) /* breeze.linalg.DenseMatrix[Double] = 1.0 4.0 7.0 2.0 5.0 8.0 3.0 6.0 9.0 */ //求特征值 println(det(f)) // Double = 6.661338147750939E-16 //求逆 println(inv(f)) /* -4.503599627370499E15 9.007199254740992E15 -4.503599627370495E15 9.007199254740998E15 -1.8014398509481984E16 9.007199254740991E15 -4.503599627370498E15 9.007199254740992E15 -4.5035996273704955E15 */ //求伪逆 println(pinv(f)) /* -3.7720834019330525E14 7.544166803866101E14 -3.77208340193305E14 7.544166803866094E14 -1.5088333607732208E15 7.544166803866108E14 -3.772083401933041E14 7.544166803866104E14 -3.772083401933055E14 */ //特征值和特征向量 println(eig(f)) /* Eig(DenseVector(16.116843969807043, -1.1168439698070427, -1.3036777264747022E-15),DenseVector(0.0, 0.0, 0.0),-0.23197068724628617 -0.7858302387420671 0.40824829046386363 -0.5253220933012336 -0.08675133925662833 -0.816496580927726 -0.8186734993561815 0.61232756022881 0.4082482904638625 ) */ //奇异值分解 val svd.SVD(u,s,v) = svd(g) println(u) /* -0.5773502691896255 -0.5773502691896257 -0.5773502691896256 -0.5773502691896256 -0.2113248654051871 0.7886751345948126 -0.5773502691896256 0.7886751345948129 -0.21132486540518708 */ println("==============================") println(s) //DenseVector(3.0000000000000004, 0.0, 0.0) println("==============================") println(v) /* -0.5773502691896256 -0.5773502691896257 -0.5773502691896256 0.0 -0.7071067811865474 0.7071067811865477 0.816496580927726 -0.4082482904638629 -0.4082482904638628 */ //求矩阵的秩 println(rank(f)) //2 //矩阵长度 println(f.size) //9 //矩阵行数 println(f.rows) // 3 //矩阵列数 f.cols // 3 //Breeze取整函数 val h = DenseVector(-1.2, 0.7, 2.3) // breeze.linalg.DenseVector[Double] = DenseVector(-1.2, 0.7, 2.3) //四舍五入 println( round(h) ) // breeze.linalg.DenseVector[Long] = DenseVector(-1, 1, 2) //大于它的最小整数 println( ceil(h) ) // breeze.linalg.DenseVector[Double] = DenseVector(-1.0, 1.0, 3.0) //小于它的最大整数 println( floor(h) ) // breeze.linalg.DenseVector[Double] = DenseVector(-2.0, 0.0, 2.0) //符号函数 println( signum(h) ) // breeze.linalg.DenseVector[Double] = DenseVector(-1.0, 1.0, 1.0) //取正数 println( abs(h) ) // breeze.linalg.DenseVector[Double] = DenseVector(1.2, 0.7, 2.3) } }