计算圆周率
# bin/spark-submit --class org.apache.spark.examples.SparkPi --executor-memory 1G --total-executor-cores 2 ./examples/jars/spark-examples_2.11-2.3.3.jar 100
wordcount
sc.textFile("input").flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_).collect
输出
res3: Array[(String, Int)] = Array((scala,2), ("",1), (object,1), (is,1), (python,3), (hello,4), (java,3), (go,1), (and,1))
spark 运行在yarn
mv spark-env.sh.template spark-env.sh # 编辑 vim spark-env.sh #修改 YARN_CONF_DIR=/itcatst/hadoop2.8/etc/hadoop #执行命令 测试 bin/spark-submit --class org.apache.spark.examples.SparkPi --master yarn --deploy-mode client ./examples/jars/spark-examples_2.11-2.3.3.jar 100
启动
bin/spark-shell --master yarn
local模式
bin/spark-submit --class org.wondersgroup.bigdata.spark.WordCount WordCount-jar-with-dependencies.jar
独立的部署模式
mv slaves.template slaves #编辑 vim slaves hadoop01 hadoop02 hadoop03 vim spark-env.sh export JAVA_HOME=/usr/local/jdk1.8.0_221 SPARK_MASTER_HOST=hadoop01 SPARK_MASTER_PORT=7077
计算
bin/spark-submit --class org.apache.spark.examples.SparkPi --master spark://hadoop01:7077 --executor-memory 1G --total-executor-cores 2 ./examples/jars/spark-examples_2.11-2.3.3.jar 100
启动spark集群
sbin/spark-all.sh