1.spark on yarn
import org.apache.spark.SparkContext import org.apache.spark.SparkContext._ /** * Created by zzy on 8/27/15. */ object SaprkOnYarn { def main(args: Array[String]) { if(args.length !=2){ println("args lenght !=2") System.exit(0) } // val conf = new SparkConf() // conf.setMa val sc = new SparkContext // val file = sc.textFile(args(0)) // file.cache // val tmp = file.flatMap(_.split(" ")).map(line => (line,1)).reduceByKey(_ + _) tmp.saveAsTextFile(args(1)) } }
2.spark on standalone
import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.SparkContext._ /** * Created by zzy on 8/27/15. */ object SsdTest { def main(args: Array[String]) { if(args.length !=2){ println("args lenght !=2") System.exit(0) } // val conf = new SparkConf() // conf.setMa val conf = new SparkConf() // conf.setMaster("spark://192.168.122.213:7077") // conf.setSparkHome("/usr/local/spark/spark-1.4.1-bin-hadoop2.6") // conf.setAppName("StandaloneSparktest") // conf.set("SPARK_EXECUTOR_MEMORY", "1g") // val sc = new SparkContext(conf) // val file = sc.textFile(args(0)) // file.cache // val tmp = file.flatMap(_.split(" ")).map(line => (line,1)).reduceByKey(_ + _) tmp.saveAsTextFile(args(1)) } }
3.pom 文件配置
<repositories> <repository> <id>Akka repository</id> <url>http://repo.akka.io/releases</url> </repository> <repository> <id>cloudera</id> <url>https://repository.cloudera.com/artifactory/cloudera-repos/.</url> </repository> <repository> <id>jboss</id> <url>http://repository.jboss.org/nexus/content/groups/public-jboss</url> </repository> <repository> <id>Sonatype snapshots</id> <url>http://oss.sonatype.org/content/repositories/snapshots/</url> </repository> </repositories> <build> <sourceDirectory>src/</sourceDirectory> <testSourceDirectory>src/</testSourceDirectory> <plugins> <plugin> <groupId>org.scala-tools</groupId> <artifactId>maven-scala-plugin</artifactId> <executions> <execution> <goals> <goal>compile</goal> <goal>testCompile</goal> </goals> </execution> </executions> <configuration> <scalaVersion>2.10.3</scalaVersion> </configuration> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-shade-plugin</artifactId> <version>2.2</version> <executions> <execution> <phase>package</phase> <goals> <goal>shade</goal> </goals> <configuration> <filters> <filter> <artifact>*:*</artifact> <excludes> <exclude>META-INF/*.SF</exclude> <exclude>META-INF/*.DSA</exclude> <exclude>META-INF/*.RSA</exclude> </excludes> </filter> </filters> <transformers> <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer"> <resource>reference.conf</resource> </transformer> <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> </transformer> </transformers> </configuration> </execution> </executions> </plugin> </plugins> </build> <dependencies> <!--spark--> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_2.10</artifactId> <version>1.2.0-cdh5.3.2</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>2.5.0-cdh5.3.0</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-tools_2.10</artifactId> <version>1.1.0-cdh5.2.0-SNAPSHOT</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-assembly_2.10</artifactId> <version>1.2.0-cdh5.3.3</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-repl_2.10</artifactId> <version>1.2.0-cdh5.3.3</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-catalyst_2.10</artifactId> <version>1.2.0-cdh5.3.2</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-network-common_2.10</artifactId> <version>1.2.0-cdh5.3.2</version> </dependency> <!--spark on yarn--> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-yarn_2.10</artifactId> <version>1.2.0-cdh5.3.3</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-network-yarn_2.10</artifactId> <version>1.2.0-cdh5.3.2</version> </dependency> <!--spark-sql--> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-sql_2.10</artifactId> <version>1.2.0-cdh5.3.2</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-hive_2.10</artifactId> <version>1.2.0-cdh5.3.2</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-hive-thriftserver_2.10</artifactId> <version>1.2.0-cdh5.3.3</version> </dependency> <!--spark-streaming--> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-streaming_2.10</artifactId> <version>1.2.0-cdh5.3.2</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-streaming-flume_2.10</artifactId> <version>1.2.0-cdh5.3.2</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-streaming-flume-sink_2.10</artifactId> <version>1.2.0-cdh5.3.2</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-streaming-kafka_2.10</artifactId> <version>1.2.0-cdh5.3.2</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-streaming-kafka-assembly_2.10</artifactId> <version>1.3.0-cdh5.4.0</version> </dependency> </dependencies>
仅此献给努力的你我!