zoukankan      html  css  js  c++  java
  • spark简单入门

    查看spark版本  spark-submit --version

    查看hadoop版本 Hadoop version

    下载spark 对应版本 spark-2.0.2

    下载idea的scala插件https://plugins.jetbrains.com/plugin/1347-scala

    package com.bj58
    import org.apache.spark.SparkContext import org.apache.spark.SparkContext._ import org.apache.spark.SparkConf
    /** * Hello world! * */ //object App extends Application {

    object App{

    def main(args: Array[String]) { println( "Hello World!" )
    // val logFile = "/usr/local/spark/spark-1.3.1-bin-hadoop2.6/README.md"
    /**为你的spark安装目录**/ // val conf = new SparkConf().setAppName("App") //
    val sc = new SparkContext(conf) // val logData = sc.textFile(logFile,2).cache() //
    val numAs = logData.filter(line => line.contains("a")).count() //
    val numBs = logData.filter(line => line.contains("b")).count() // //
    println("Lines with a: %s,Lines with b: %s".format(numAs,numBs))
    if (args.length < 2) { System.err.println("Usage: <infile> <outfile>")
    System.exit(1) }
    val conf = new SparkConf().setAppName("App")
    val sc = new SparkContext(conf)
    val line = sc.textFile(args(0))
    var counts = line.flatMap(_.split(" ")).map((_, 1)).reduceByKey(_+_) // // .collect()
    counts.collect().foreach(println)
    counts.saveAsTextFile(args(1))
    sc.stop() }
    }

    打包并启动jar

    #  --queue root.online.hdp_teu_dia
    $sparkbin --class "com.energy1010.App" 
      --master yarn
      --deploy-mode cluster
      --queue root.offline.normal
      --name Test
      --executor-memory 10G 
      --num-executors 20
      --executor-cores 2
      --driver-memory 10g
      ./spark.jar ${inputpath} ${Outpath}

    终端输出:

    17/05/09 16:53:37 INFO yarn.Client main: Application report for application_1491903146022_2119985 (state: RUNNING)
    17/05/09 16:53:38 INFO yarn.Client main: Application report for application_1491903146022_2119985 (state: FINISHED)
    17/05/09 16:53:38 INFO yarn.Client main:
             client token: N/A
             diagnostics: N/A
             ApplicationMaster host: 10.126.14.136
             ApplicationMaster RPC port: 0
             queue: root.offline.normal
             start time: 1494319993385
             final status: SUCCEEDED
             tracking URL: http://tjtx-81-187.org:9088/proxy/application_1491903146022_2119985/history/application_1491903146022_2119985/1
             user: hdp_teu_dia
    17/05/09 16:53:38 INFO util.ShutdownHookManager Thread-3: Shutdown hook called
    17/05/09 16:53:38 INFO util.ShutdownHookManager Thread-3: Deleting directory /tmp/spark-79598c10-7db4-4ead-9a44-3ce7681c2cee
    done:20170509 16:53:38
    http://tjtx-81-187.org:9088/cluster/apps

  • 相关阅读:
    boost之实用工具
    boost之内存池
    boost之智能指针
    boost之日期date_time
    boost之时间timer
    boost之网络通信
    boost之定时器和io_service
    【Linux 线程】线程同步《一》
    【Linux 线程】常用线程函数复习《四》
    【Linux 线程】常用线程函数复习《三》
  • 原文地址:https://www.cnblogs.com/energy1010/p/6812967.html
Copyright © 2011-2022 走看看