package com.javartisan.demo import org.apache.spark.sql.SparkSession object SparkLocal { def main(args: Array[String]): Unit = { val spark = SparkSession.builder().master("local[*]").appName("spark test").getOrCreate() import spark.implicits._ val sc = spark.sparkContext val a: Int = 1 val b: Int = 1 val c: String = "1" val d: String = "1" val arr1 = Array[(Int, Int, String, String)]((a, b, c, d)) val arr2 = Array[(Int, Int, String, String)]((2, b, c, d)) val rdd1 = sc.parallelize[(Int, Int, String, String)](arr1) val rdd2 = sc.parallelize[(Int, Int, String, String)](arr2) val df1 = rdd1.toDF("a", "b", "c", "d") val df2 = rdd2.toDF("a1", "b1", "c1", "d1") df1.printSchema() df2.printSchema() val full = df1.join(df2, $"a" === $"a1", "full") val newFull = full.rdd.map(row => { //GenericRowWithSchema println("row class " + row.getClass) row }) println(newFull.count()) full.show(false) println(df1.count()) println(df2.count()) spark.stop() } }