zoukankan      html  css  js  c++  java
  • 特征变化--->标签到索引的转换(StringIndexer)

    package Spark_MLlib
    
    import org.apache.spark.ml.feature.StringIndexer
    import org.apache.spark.sql.SparkSession
    
    
    object 特征变换_StringIndexer {
         val spark = SparkSession.builder().master("local[2]").appName("标签和索引的转换").getOrCreate()
         import spark.implicits._
      def main(args: Array[String]): Unit = {
           val df=spark.createDataFrame(Seq(
             (0,"log"),
             (1,"text"),
             (2,"text"),
             (3,"soyo"),
             (4,"text"),
             (5,"log"),
             (6,"log"),
             (7,"log")
           )).toDF("id","type")
         val indexer=new StringIndexer().setInputCol("type").setOutputCol("type_index")
         val model=indexer.fit(df)
           model.labels.foreach(println)   //类型的频率顺序(高-->低)
         val index=model.transform(df)     //索引先排频率高的即log为0
          index.show(false)
    
      }
    }

    结果:

    log
    text
    soyo
    +---+----+----------+
    |id |type|type_index|
    +---+----+----------+
    |0  |log |0.0       |
    |1  |text|1.0       |
    |2  |text|1.0       |
    |3  |soyo|2.0       |
    |4  |text|1.0       |
    |5  |log |0.0       |
    |6  |log |0.0       |
    |7  |log |0.0       |
    +---+----+----------+

  • 相关阅读:
    音频,视频简单运用
    转载:Linux Used内存到底到哪里去了?
    shell awk统计重复个数
    Java中的单例模式
    Grub启动配置文件
    C语言实现全排列
    C语言缓冲区清空
    c语言内存对齐(1)
    防盗链原理
    C语言内存对齐(2)
  • 原文地址:https://www.cnblogs.com/soyo/p/7760500.html
Copyright © 2011-2022 走看看