zoukankan      html  css  js  c++  java
  • Spark操作

    ### scala源码
    /* SimpleApp.scala */
    import org.apache.spark.SparkContext
    import org.apache.spark.SparkContext._
    import org.apache.spark.SparkConf
    import org.apache.log4j.Logger
    import org.apache.log4j.Level
    
    object SimpleApp {
      def main(args: Array[String]) {
        // stop spark console messaging
        Logger.getLogger("org").setLevel(Level.OFF)
        Logger.getLogger("akka").setLevel(Level.OFF)
        
        val logFile = "/data/spark/README.md" // Should be some file on your system
        val conf = new SparkConf().setAppName("Simple Application")
        val sc = new SparkContext(conf)
        val logData = sc.textFile(logFile, 2).cache()
        val numAs = logData.filter(line => line.contains("a")).count()
        val numBs = logData.filter(line => line.contains("b")).count()
        println("Lines with a: %s, Lines with b: %s".format(numAs, numBs))
      }
    }
    
    ### sbt打包
    sbt package
    
    ### 将文件put到hdfs
    su - hdfs
    hdfs dfs -mkdir -p /data/spark/
    hdfs dfs -chmod -R 777 /data/spark/
    exit
    hdfs dfs -put /usr/hdp/2.6.0.3-8/spark/README.md /usr/hdp/2.6.0.3-8/spark/README.md
    
    ### 提交spark任务
    spark-submit 
        --class "SimpleApp" 
        --master local[4] 
         /root/_learn/scala/target/scala-2.10/simple-project_2.10-1.0.jar
  • 相关阅读:
    CentOS7.4安装Docker
    责任链模式
    策略模式
    状态模式
    解释器模式
    备忘录模式
    中介者模式
    观察者模式
    迭代器模式
    private、default、protected和public的作用域
  • 原文地址:https://www.cnblogs.com/chenzechao/p/7668998.html
Copyright © 2011-2022 走看看