zoukankan      html  css  js  c++  java
  • spark MLlib DataType ML中的数据类型

    package ML.DataType;
    
    
    import org.apache.spark.SparkConf;
    import org.apache.spark.api.java.JavaRDD;
    import org.apache.spark.api.java.JavaSparkContext;
    import org.apache.spark.mllib.linalg.*;
    import org.apache.spark.mllib.linalg.distributed.*;
    import org.apache.spark.mllib.regression.LabeledPoint;
    import org.apache.spark.mllib.util.MLUtils;
    
    import java.util.Arrays;
    
    /**
     * TODO
     *
     * @ClassName: DataType
     * @author: DingH
     * @since: 2019/4/3 10:06
     */
    public class DataType {
        public static void main(String[] args) {
    
            SparkConf conf = new SparkConf().setMaster("local").setAppName("Datatype");
            JavaSparkContext javaSparkContext = new JavaSparkContext(conf);
    
            /**
             * @Title: vectors.dense方法生成向量,sparse生成稀疏向量。第一个3是向量的大小,第二个列表是不为0的下表,第三个是对应的value.
             */
            Vector dense = Vectors.dense(1.0, 0.0, 3.0);
            Vector sparse = Vectors.sparse(3, new int[]{0, 2}, new double[]{1.0, 3.0});
    
            /**
             * @Title: 对向量进行标记,1.0为正,0.0为负
             */
            LabeledPoint labeledPoint = new LabeledPoint(1.0, dense);
            LabeledPoint labeledPoint1 = new LabeledPoint(0.0, sparse);
    
            /**
             * @Title: libSVM文件: lable1  index1:value1  index2:value2
             */
            JavaRDD<LabeledPoint> labeledPointJavaRDD = MLUtils.loadLibSVMFile(javaSparkContext.sc(), "/data...").toJavaRDD();
    
            /**
             * @Title: matricex.dense生成矩阵。3*2的矩阵  列式优先
             * [1.0 2.0
             * 3.0 4.0
             * 5.0 6.0]
             */
            Matrix dense1 = Matrices.dense(3, 2, new double[]{1.0, 3.0, 5.0, 2.0, 4.0, 6.0});
    
            /**
             * @Title: matricex.sparse生成稀疏矩阵。3*2的矩阵。第三个参数和第四个参数对应为不为0的元素。
             * [9 0
             * 0 6
             * 0 8]     第三个参数: 1-0=1,3-1=2,每列不为0的元素分别是1个和2个。   第四个参数,从头开始遍历行,不为0的行。
             */
            Matrix sparse1 = Matrices.sparse(3, 2, new int[]{0, 1, 3}, new int[]{0, 2, 1}, new double[]{9, 6, 8});
    
            /**
             * @Title: Rowmatrix
             */
            JavaRDD<Vector> parallelize = javaSparkContext.parallelize(Arrays.asList(
                    Vectors.dense(1, 2, 3),
                    Vectors.dense(2, 3, 4),
                    Vectors.dense(3, 4, 5)
            ));
            RowMatrix rowMatrix = new RowMatrix(parallelize.rdd());
            long l = rowMatrix.numRows();
            long l1 = rowMatrix.numCols();
            QRDecomposition<RowMatrix, Matrix> rowMatrixMatrixQRDecomposition = rowMatrix.tallSkinnyQR(true);
    
            /**
             * @Title: IndexedRowMatrix
             */
            JavaRDD<IndexedRow> parallelize1 = javaSparkContext.parallelize(Arrays.asList(
                    new IndexedRow(1, dense),
                    new IndexedRow(2, dense),
                    new IndexedRow(3, dense)
            ));
            IndexedRowMatrix indexedRowMatrix = new IndexedRowMatrix(parallelize1.rdd());
            long l2 = indexedRowMatrix.numCols();
            long l3 = indexedRowMatrix.numRows();
            RowMatrix rowMatrix1 = indexedRowMatrix.toRowMatrix();
    
            /**
             * @Title: CoordinateMatrix
             */
            JavaRDD<MatrixEntry> parallelize2 = javaSparkContext.parallelize(Arrays.asList(
                    new MatrixEntry(0, 1, 3),
                    new MatrixEntry(1, 3, 1),
                    new MatrixEntry(2, 1, 1)
            ));
            CoordinateMatrix coordinateMatrix = new CoordinateMatrix(parallelize2.rdd());
            long l4 = coordinateMatrix.numCols();
            long l5 = coordinateMatrix.numRows();
            IndexedRowMatrix indexedRowMatrix1 = coordinateMatrix.toIndexedRowMatrix();
    
            /**
             * @Title: BlocakMatrix 。   toBlockMatrix可以设置参数,规定row,col的大小,默认1024*1024
             */
            BlockMatrix cache = indexedRowMatrix.toBlockMatrix().cache();
            BlockMatrix cache1 = coordinateMatrix.toBlockMatrix().cache();
            cache.validate();
            BlockMatrix multiply = cache.transpose().multiply(cache);
        }
    }
  • 相关阅读:
    centos7安装mysql8 ERROR! The server quit without updating PID file
    linux桌面系统开启windows远程访问
    intellij ide 激活(转发)
    intellij ide调用一个对象所有的set方法
    linux服务器磁盘挂载
    互联网公司研发团队服务器开发工具清单
    intellij ide 集成cmder
    maven 私服上有jar包但是却下载不下来
    java开发人员win10配置
    996 icu我能为你做什么?
  • 原文地址:https://www.cnblogs.com/dhName/p/10655057.html
Copyright © 2011-2022 走看看