zoukankan      html  css  js  c++  java
  • Spark LR逻辑回归中RDD转DF中VectorUDT设置

      System.setProperty("hadoop.home.dir", "C:\hadoop-2.7.2");
      val spark = SparkSession.builder().config(new SparkConf().setAppName("LR").setMaster("local[*]")).config("spark.sql.warehouse.dir", "file:///").getOrCreate()
    
      val sc = spark.sparkContext
    
      val rdd = sc.textFile("C:\Users\Daxin\Documents\GitHub\OptimizedRF\sql_data\LRDATA")
    
    
      val schemaString = "label features"
      //  val fields = schemaString.split(" ").map(StructField(_, StringType, true))
      //  org.apache.spark.ml.linalg.SQLDataTypes.VectorType替换org.apache.spark.ml.linalg.VectorUDT(一个spark包私有的类型)
      val fields = Array(StructField("label", DoubleType, true), StructField("features", org.apache.spark.ml.linalg.SQLDataTypes.VectorType, true))
    
      val rowRdd = rdd.map {
        x =>
          Row(x.split(",")(1).toDouble, Vectors.dense(Array[Double](x.split(",")(0).toDouble)))
      }
    
      val schema = StructType(fields)
    
    
      val Array(train, test) = spark.createDataFrame(rowRdd, schema).randomSplit(Array[Double](0.6, 0.4))
    
      val lr = new LinearRegression()
        .setMaxIter(100)
        .setRegParam(0.3)
        .setElasticNetParam(0.8) //.setTol(0.01) // 收敛阈值
    
    
      val lrModel = lr.fit(train)
    
      println(lrModel.transform(test).columns.toBuffer)
    
      lrModel.transform(test).select("label", "prediction").show()
      
      println(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}")
  • 相关阅读:
    [翻译] Blocks and Variables
    UIView独占响应事件
    iOS中block类型大全
    Erlang入门(二)—并发编程
    Erlang入门(一)
    学习Erlang--1、入门
    一位Erlang程序员的自白
    安装ejabberd2并配置MySQL为其数据库
    JDBC 与ODBC的区别
    ejabberd的多域名(domain)设置
  • 原文地址:https://www.cnblogs.com/leodaxin/p/7862450.html
Copyright © 2011-2022 走看看