zoukankan      html  css  js  c++  java
  • spark集群安装部署

    1.在官网下载spark安装包

    # wget https://archive.apache.org/dist/spark/spark-2.4.8/spark-2.4.8-bin-hadoop2.7.tgz
    

    2.解压

    # tar -zxvf spark-2.4.8-bin-hadoop2.7.tgz -C /home/hadoop/app
    

    3.修改配置

    # cd  /home/hadoop/app/spark-2.4.8-bin-hadoop2.7/conf/
    # cp spark-env.sh.template spark-env.sh
    # cp slaves.template slaves
    # cp spark-defaults.conf.template spark-defaults.conf
    # vim spark-env.sh
    
    添加
    export HADOOP_CONF_DIR=/home/hadoop/app/hadoop-2.7.5/etc/hadoop
    export HADOOP_HOME=/home/hadoop/app/hadoop-2.7.5
    export JAVA_HOME=/opt/jdk1.8.0_202
    export SPARK_HOME=/home/hadoop/app/spark-2.4.8-bin-hadoop2.7
    export SCALA_HOME=/home/hadoop/app/scala-2.11.8
    export SPARK_LOG_DIR=/home/hadoop/app/spark-2.4.8-bin-hadoop2.7/logs
    export SPARK_PID_DIR=/home/hadoop/app/spark-2.4.8-bin-hadoop2.7/logs/pid
    
    修改spark-defaults.conf
    # vim spark-defaults.conf
    
    添加
    spark.eventLog.enabled                             true
    spark.eventLog.dir                                 hdfs://ns1/spark/eventLog
    spark.rdd.compress                                 true
    spark.driver.mebaiwanry                                4G
    spark.yarn.historyServer.address                   dba-01:18080
    spark.history.ui.port                              18080
    spark.history.fs.logDirectory                      hdfs://ns1/spark/eventLog
    spark.yarn.maxAppAttempts                          4
    spark.yarn.stagingDir                              hdfs://ns1/spark/stagingDir
    
    spark.yarn.singleContainerPerNode                  false
    spark.yarn.allocator.waitTime                      60s
    spark.logConf                                      true
    spark.ui.killEnabled                               false
    spark.streaming.backpressure.initialRate           1000
    spark.streaming.kafka.maxRatePerPartition         10000
    spark.streaming.blockInterval                     1000
    spark.streaming.backpressure.enabled              true
    spark.streaming.receiver.maxRate                  10000
    spark.streaming.kafka.maxRetries                  10
    spark.default.parallelism                         64
    spark.streaming.dynamicAllocation.enabled         false
    spark.streaming.dynamicAllocation.minExecutors    1
    spark.streaming.dynamicAllocation.maxExecutors    50
    spark.shuffle.service.enabled             true
    spark.dynamicAllocation.enabled           true
    spark.dynamicAllocation.minExecutors      1
    spark.dynamicAllocation.maxExecutors      20
    spark.driver.maxResultSize  4g
    
    修改slaves
    # vim slaves
    添加
    dba-01
    dba-02
    dba-03
    

    4.创建目录

    # cd /home/hadoop/app/spark-2.3.1-bin-hadoop2.7
    # mkdir -p logs/pid
    # hdfs dfs -mkdir -p /spark/stagingDir
    # hdfs dfs -mkdir -p /spark/eventLog
    

    5.传输到其他节点

    # cd /home/hadoop/app
    # scp -r spark-2.4.8-bin-hadoop2.7 hadoop@dba-02:/home/hadoop/app
    # scp -r spark-2.4.8-bin-hadoop2.7 hadoop@dba-03:/home/hadoop/app
    # scp -r spark-2.4.8-bin-hadoop2.7 hadoop@dba-04:/home/hadoop/app
    # scp -r spark-2.4.8-bin-hadoop2.7 hadoop@dba-05:/home/hadoop/app
    

    6.添加spark环境变量

    # vim /etc/profile
    export SPARK_HOME=/home/hadoop/app/spark-2.4.8-bin-hadoop2.7
    export PATH=$SPARK_HOME/bin
    
    # source /etc/profile
    

    7.任意一个节点启动spark集群

    # cd /home/hadoop/app/spark-2.4.8-bin-hadoop2.7/sbin
    # ./start-all.sh
    
  • 相关阅读:
    《区块链100问》第51集:区块链资产有匿名性
    《区块链100问》第52集:区块链资产能去中心化记账
    《区块链100问》第53集:区块链资产不可复制
    day 5 模块导入、常用模块os shutil sys commands subprocess hashlib json pickle zipfile traceback random datetime pathlib
    apache中配置php支持模块模式、cgi模式和fastcgi模式的实验
    day3--集合、文件操作、字符编码与转换、函数(递归,lambda,filter,map)、字典排序
    练习题目 :if for while else range、xrange、zip
    解决mySQL占用内存超大问题
    my.cnf重要配置参数说明
    chattr与lsattr命令详解
  • 原文地址:https://www.cnblogs.com/slqdba/p/15716631.html
Copyright © 2011-2022 走看看