import org.apache.spark.{SparkConf, SparkContext}
import scala.util.parsing.json.JSON
/**
* Created with IntelliJ IDEA.
* User: @别慌
* Date: 2019-11-24
* Time: 18:04
* Description:
*/
object hello {
def main(args: Array[String]): Unit = {
// Create spark context
val conf = new SparkConf()
.setAppName("WordFreq_Spark")
.setMaster("local")
val sc = new SparkContext(conf)
val do01= sc.textFile("D:\杂七杂八\瞎画\test.json")
val do02=do01.map(JSON.parseFull)
println(do02.collect().mkString(" "))
do02.foreach (
{
case Some(do02) =>println(do02)
case None => println("unvaild sign")
case _ =>println("other sign")
}
)
sc.stop()
// val rdd = sc.textFile("hdfs://192.168.199.120:9000/words.txt")
// val wc=rdd.flatMap(_.split(" "))
// .map(s=>(s,1))
// .reduceByKey((a,b)=>a+b)
// .sortBy(_._2,true)
//
//
// //wc.saveAsTextFile("D:\")
//
// for (arg <- wc.collect())
// print(arg+" ")
// println()
// wc.saveAsTextFile("hdfs://192.168.199.120:9000/tai")
// sc.stop
}
}

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.syllabus</groupId>
<artifactId>chapter-3</artifactId>
<version>1.0-SNAPSHOT</version>
<!-- 额外指定可以通过如下链接下载Jar包依赖 -->
<repositories>
<repository>
<id>1</id>
<name>MAVEN-CENTRE</name>
<url>http://central.maven.org/maven2/</url>
</repository>
</repositories>
<!-- 添加相关依赖 -->
<dependencies>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.11.12</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.4.4</version>
<!-- 排除Spark依赖中关于Hadoop和Scala的依赖,以便于添加自已的版本 -->
<exclusions>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
</exclusion>
<exclusion>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- 添加自己的Hadoop版本 -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.9.0</version>
</dependency>
</dependencies>
<!-- 编译Scala代码的插件-->
<build>
<plugins>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<version>2.15.2</version>
<executions>
<execution>
<id>scala-compile-first</id>
<goals>
<goal>compile</goal>
</goals>
<configuration>
<includes>
<include>**/*.scala</include>
</includes>
</configuration>
</execution>
<execution>
<id>scala-test-compile</id>
<goals>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
View Code
提交到集群中的时候,shell 代码为
./bin/spark-submit --class hello --master spark://192.168.199.120:7077 --executor-memory 1G --total-executor-cores 3 comspark.jar ;
这里 spark端口为 7070
hdfs端口为 9000