zoukankan      html  css  js  c++  java
  • 验证一下spark Row getAS类型以及控制问题

    package com.javartisan.demo
    
    import org.apache.spark.sql.SparkSession
    
    object SparkLocal {
    
      def main(args: Array[String]): Unit = {
    
        val spark = SparkSession.builder().master("local[*]").appName("spark test").getOrCreate()
        import spark.implicits._
    
        val sc = spark.sparkContext
        val a: Int = 1
        val b: Int = 1
        val c: String = "1"
        val d: String = "1"
    
        val arr1 = Array[(Int, Int, String, String)]((a, b, c, d))
        val arr2 = Array[(Int, Int, String, String)]((2, b, c, d))
        val rdd1 = sc.parallelize[(Int, Int, String, String)](arr1)
        val rdd2 = sc.parallelize[(Int, Int, String, String)](arr2)
        val df1 = rdd1.toDF("a", "b", "c", "d")
        val df2 = rdd2.toDF("a1", "b1", "c1", "d1")
        df1.printSchema()
        df2.printSchema()
    
        val full = df1.join(df2, $"a" === $"a1", "full")
        val newFull = full.rdd.map(row => {
          //GenericRowWithSchema
          println("row class " + row.getClass)
          row
        })
        println(newFull.count())
        full.show(false)
        println(df1.count())
        println(df2.count())
        spark.stop()
      }
    }
    

      

  • 相关阅读:
    python爬取代理IP地址
    神经网络训练的过程
    机器学习中用到的数学概念
    Navicat连接Mysql错误代码1251
    mysql安装
    mysql运行找不到MSVCP140.dll
    tomcat 日志乱码
    扁平化 Flat
    常见的WEB安全及防护
    CentOS ceph 集群搭建(单节点)
  • 原文地址:https://www.cnblogs.com/leodaxin/p/10948959.html
Copyright © 2011-2022 走看看