zoukankan      html  css  js  c++  java
  • 特征变化--->索引到标签的转换(IndexToString)

    package Spark_MLlib
    
    import org.apache.spark.ml.feature.{IndexToString, StringIndexer}
    import org.apache.spark.sql.SparkSession
    
    object 特征变换_IndexToString {
             val spark=SparkSession.builder().master("local").appName("IndexToString").getOrCreate()
            import spark.implicits._
      def main(args: Array[String]): Unit = {
          val df=spark.createDataFrame(Seq(
            (0,"log"),
            (1,"text"),
            (2,"text"),
            (3,"soyo"),
            (4,"text"),
            (5,"log"),
            (6,"log"),
            (7,"log")
          )).toDF("id","label")
        val model=new StringIndexer().setInputCol("label").setOutputCol("label_index").fit(df)
        val indexed=model.transform(df)
        indexed.createOrReplaceTempView("soyo")
           spark.sql("select * from soyo ").show()
           spark.sql("select distinct label,label_index from soyo ").show()  //去重
        //把标签索引的一列重新映射回原有的字符型标签
        val converter=new IndexToString().setInputCol("label_index").setOutputCol("original_index")
        val converted=converter.transform(indexed)
        converted.show()
    
      }
    }

    结果:

    +---+-----+-----------+
    | id|label|label_index|
    +---+-----+-----------+
    |  0|  log|        0.0|
    |  1| text|        1.0|
    |  2| text|        1.0|
    |  3| soyo|        2.0|
    |  4| text|        1.0|
    |  5|  log|        0.0|
    |  6|  log|        0.0|
    |  7|  log|        0.0|
    +---+-----+-----------+

    +-----+-----------+
    |label|label_index|
    +-----+-----------+
    | soyo|        2.0|
    | text|        1.0|
    |  log|        0.0|
    +-----+-----------+

    +---+-----+-----------+--------------+
    | id|label|label_index|original_index|
    +---+-----+-----------+--------------+
    |  0|  log|        0.0|           log|
    |  1| text|        1.0|          text|
    |  2| text|        1.0|          text|
    |  3| soyo|        2.0|          soyo|
    |  4| text|        1.0|          text|
    |  5|  log|        0.0|           log|
    |  6|  log|        0.0|           log|
    |  7|  log|        0.0|           log|
    +---+-----+-----------+--------------+


  • 相关阅读:
    hdu5289 RMQ+二分
    poj1459 最大流Dinic
    poj2391 最大流+拆点
    poj1087&&hdu1526 最大流
    NOI2004 郁闷的出纳员
    Treap入门(转自NOCOW)
    poj 2892
    vijos 1512
    对拍程序
    poj 3264
  • 原文地址:https://www.cnblogs.com/soyo/p/7763172.html
Copyright © 2011-2022 走看看