数据集
house.csv
数据集概览
代码
package org.apache.spark.examples.examplesforml import org.apache.spark.ml.classification.LogisticRegression import org.apache.spark.ml.feature.VectorAssembler import org.apache.spark.ml.regression.{IsotonicRegression, LinearRegression} import org.apache.spark.sql.SparkSession import org.apache.spark.{SparkConf, SparkContext} import scala.util.Random /* 日期:2018.10.15 描述: 7-14 保序回归算法 实现房价预测 数据集: house.csv */ object IstonicRegression { def main(args: Array[String]): Unit = { val conf = new SparkConf() .setAppName("linear") .setMaster("local") val sc = new SparkContext(conf) val spark = SparkSession .builder() .config(conf) .getOrCreate() val file = spark.read .format("csv") .option("sep",";") .option("header","true") .load("D:\7-6线性回归-预测房价\house.csv") import spark.implicits._ //打乱顺序 val rand = new Random() val data = file.select("square","price") .map( row => (row.getAs[String](0).toDouble,row.getString(1).toDouble,rand.nextDouble())) .toDF("square","price","rand").sort("rand") //强制类型转换过程 val ass = new VectorAssembler() .setInputCols(Array("square")) .setOutputCol("features") val dataset = ass.transform(data)//特征包装 val Array(train,test) = dataset.randomSplit(Array(0.8,0.2))//拆分成训练数据集和测试数据集 val isotonic = new IsotonicRegression() .setFeaturesCol("features") .setLabelCol("price") val model = isotonic.fit(train) model.transform(test).show() } }
输出结果