一般情况下,一个客户端指定一个集群,但是存在一个客户端指定多个集群的情况,这种情况下,需要动态调整配置文件
//配置文件可以通过传参或者数据库进行读取
package com.cslc import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import scala.collection.JavaConversions._ import org.apache.spark.rdd.RDD import org.apache.spark.sql.SparkSession object Day01 { def main(args: Array[String]): Unit = { val sparkBuilder=SparkSession.builder val conf =new Configuration() val core=new Path("F:\IdeaWorkspace\lzm\Resource\core-site.xml") val hdfs=new Path("F:\IdeaWorkspace\lzm\Resource\hdfs-site.xml") val hive=new Path("F:\IdeaWorkspace\lzm\Resource\hive-site.xml") val yarn=new Path("F:\IdeaWorkspace\lzm\Resource\yarn-site.xml") val mapred=new Path("F:\IdeaWorkspace\lzm\Resource\mapred-site.xml") conf.addResource(hive) conf.addResource(core) conf.addResource(hdfs) conf.addResource(mapred) conf.addResource(yarn) for(c<-conf.iterator()){ sparkBuilder.config(c.getKey,c.getValue) } val spark:SparkSession=sparkBuilder.master("local[2]").getOrCreate() spark.sparkContext.setLogLevel("error") val data=spark.read.parquet("hdfs://cslcdip/home/dip/lzm/sparkdata/users.parquet") println(data.schema) data.show() } }