pom.xml
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.47</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-xml</artifactId>
<version>2.11.0-M4</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.6.0</version>
</dependency>
scala代码
val spark = new SparkSession.Builder()
.master("local[6]")
.appName("kuduhomework")
.getOrCreate()
//设置日志级别
spark.sparkContext.setLogLevel("WARN")
import spark.implicits._
val df = spark.read
.format("jdbc")
.option("url", "jdbc:mysql://192.168.100.100:3306/test")
.option("dbtable", "person")
.option("user", "root")
.option("password", "123456")
.load()
//DataFrame数据成映射person表
df.createOrReplaceTempView("person")
//使用sql语句操作数据
spark.sql("select * from person")
.show()
sparksql 是spark支持sql操作的API。
用于处理结构化数据,获取数据的schema信息,像表一样使用sql操作数据。