System.setProperty("hadoop.home.dir", "C:\hadoop-2.7.2"); val spark = SparkSession.builder().config(new SparkConf().setAppName("LR").setMaster("local[*]")).config("spark.sql.warehouse.dir", "file:///").getOrCreate() val sc = spark.sparkContext val rdd = sc.textFile("C:\Users\Daxin\Documents\GitHub\OptimizedRF\sql_data\LRDATA") val schemaString = "label features" // val fields = schemaString.split(" ").map(StructField(_, StringType, true)) // org.apache.spark.ml.linalg.SQLDataTypes.VectorType替换org.apache.spark.ml.linalg.VectorUDT(一个spark包私有的类型) val fields = Array(StructField("label", DoubleType, true), StructField("features", org.apache.spark.ml.linalg.SQLDataTypes.VectorType, true)) val rowRdd = rdd.map { x => Row(x.split(",")(1).toDouble, Vectors.dense(Array[Double](x.split(",")(0).toDouble))) } val schema = StructType(fields) val Array(train, test) = spark.createDataFrame(rowRdd, schema).randomSplit(Array[Double](0.6, 0.4)) val lr = new LinearRegression() .setMaxIter(100) .setRegParam(0.3) .setElasticNetParam(0.8) //.setTol(0.01) // 收敛阈值 val lrModel = lr.fit(train) println(lrModel.transform(test).columns.toBuffer) lrModel.transform(test).select("label", "prediction").show() println(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}")