zoukankan      html  css  js  c++  java
  • 1、环境搭建

    一、Linux基础环境准备

    系统:centos6.5      三台
    1、系统安装
    
    2、关闭防火墙、selinux
    
    3、修改主机名并修改hosts文件
    
    4、配置ssh互信
    
    5、安装JDK1.7


    二、hadoop安装

    1、解压安装

    版本: hadoop2.4.1
    
    [root@s1 src]# pwd
    /usr/local/src
    
    [root@s1 src]# ls
    apache-hive-0.13.1-bin.tar.gz  hadoop-2.4.1.tar.gz        kafka_2.9.2-0.8.1.tgz  slf4j-1.7.6.zip                zookeeper-3.4.5.tar.gz
    CentOS6-Base-163.repo          jdk-7u80-linux-x64.tar.gz  scala-2.11.4.tgz       spark-1.3.0-bin-hadoop2.4.tgz
    
    [root@s1 src]# tar zxf hadoop-2.4.1.tar.gz -C /usr/local/
    
    [root@s1 local]# mv hadoop-2.4.1/ hadoop
    
    
    ##添加环境变量
    vim /etc/profile
    export JAVA_HOME=/usr/local/jdk1.7
    export HADOOP_HOME=/usr/local/hadoop
    export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
    
    [root@s1 local]# source /etc/profile


    2、修改配置文件

    ##core-site.xml
    <property>  <
      name>fs.default.name</name>  
      <value>hdfs://spark1:9000</value>
    </property>
    
    
    ##hdfs-site.xml
    <property>
      <name>dfs.name.dir</name>
      <value>/usr/local/data/namenode</value>
    </property>
    <property>
      <name>dfs.data.dir</name>
      <value>/usr/local/data/datanode</value>
    </property>
    <property>
      <name>dfs.tmp.dir</name>
      <value>/usr/local/data/tmp</value>
    </property>
    <property>
      <name>dfs.replication</name>
      <value>3</value>
    </property>
    
    [root@spark1 ~]# mkdir /usr/local/data
    [root@spark2 ~]# mkdir /usr/local/data
    [root@spark3 ~]# mkdir /usr/local/data
    
    
    ##mapred-site.xml
    <property>  
      <name>mapreduce.framework.name</name>  
      <value>yarn</value>
    </property>
    
    
    ##yarn-site.xml
    <property>
      <name>yarn.resourcemanager.hostname</name>
      <value>spark1</value>
    </property>
    <property>
      <name>yarn.nodemanager.aux-services</name>
      <value>mapreduce_shuffle</value>
    </property>
    
    
    ##slaves
    spark1
    spark2
    spark3

    hadoop-env.sh、mapred-env.sh、yarn-env.sh 这几个文件中的JAVA HOME最好也配置一下;


    3、分发hadoop

    [root@spark1 local]# scp -r hadoop spark2:/usr/local/
    
    [root@spark1 local]# scp -r hadoop spark3:/usr/local/
    
    
    
    ##分发profile
    [root@spark1 local]# scp -r /etc/profile spark2:/etc/
    
    [root@spark1 local]# scp -r /etc/profile spark3:/etc/


    4、启动

    ##启动hdfs
    [root@spark1 ~]# start-dfs.sh 
    
    
    spark1:namenode、datanode、secondarynamenode
    spark2:datanode
    spark3:datanode
    
    
    ##浏览器访问50070端口
    
    
    
    ##启动yarn
    [root@spark1 hadoop]# start-yarn.sh
    
    spark1:resourcemanager、nodemanager
    spark2:nodemanager
    spark3:nodemanager
    
    ##浏览器访问8088端口


    三、Hive搭建

    1、安装

    [root@spark1 src]# tar zxf apache-hive-0.13.1-bin.tar.gz -C /usr/local/
    
    [root@spark1 local]# mv apache-hive-0.13.1-bin/ hive
    
    
    ##修改环境变量
    [root@spark1 local]# vim /etc/profile
    export JAVA_HOME=/usr/java/latest
    export HADOOP_HOME=/usr/local/hadoop
    export HIVE_HOME=/usr/local/hive
    export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/bin


    2、安装mysql

    ##mysql存储hive的元数据
    
    ##在spark1安装mysql
    [root@spark1 ~]# yum install -y mysql-server
    
    [root@spark1 ~]# service mysqld start
    
    [root@spark1 ~]# chkconfig mysqld on
    
    
    ##安装mysql connector
    yum install -y mysql-connector-java
    
    ##将mysql connector拷贝到hive的lib包中
    [root@spark1 ~]# cp /usr/share/java/mysql-connector-java-5.1.17.jar /usr/local/hive/lib/
    
    
    ##在mysql上创建hive元数据库,并对hive进行授权
    [root@spark1 ~]# mysql 
    Welcome to the MySQL monitor.  Commands end with ; or g.
    Your MySQL connection id is 2
    Server version: 5.1.73 Source distribution
    
    Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
    
    Oracle is a registered trademark of Oracle Corporation and/or its
    affiliates. Other names may be trademarks of their respective
    owners.
    
    Type 'help;' or 'h' for help. Type 'c' to clear the current input statement.
    
    mysql> create database if not exists hive_metadata;
    Query OK, 1 row affected (0.00 sec)
    
    mysql> grant all privileges on hive_metadata.* to 'hive'@'%' identified by 'hive';
    Query OK, 0 rows affected (0.00 sec)
    
    mysql> grant all privileges on hive_metadata.* to 'hive'@'localhost' identified by 'hive';
    Query OK, 0 rows affected (0.00 sec)
    
    mysql> grant all privileges on hive_metadata.* to 'hive'@'spark1' identified by 'hive';
    Query OK, 0 rows affected (0.00 sec)
    
    mysql> flush privileges;
    Query OK, 0 rows affected (0.00 sec)
    
    mysql> use hive_metadata;
    Database changed
    mysql> show tables;
    Empty set (0.00 sec)


    3、配置hive

    ##hive-site.xml
    <configuration>
    
    <property>
      <name>javax.jdo.option.ConnectionURL</name>
      <value>jdbc:mysql://spark1:3306/hive_metadata?createDatabaseIfNotExist=true</value>
    </property>
    <property>
      <name>javax.jdo.option.ConnectionDriverName</name>
      <value>com.mysql.jdbc.Driver</value>
    </property>
    <property>
      <name>javax.jdo.option.ConnectionUserName</name>
      <value>hive</value>
    </property>
    <property>
      <name>javax.jdo.option.ConnectionPassword</name>
      <value>hive</value>
    </property>
    <property>
      <name>hive.metastore.warehouse.dir</name>
      <value>/user/hive/warehouse</value>
    </property>
    
    </configuration>
    
    
    
    ##
    mv hive-env.sh.template hive-env.sh
    
    
    
    ##
    vi /usr/local/hive/bin/hive-config.sh
    export JAVA_HOME=/usr/java/latest
    export HIVE_HOME=/usr/local/hive
    export HADOOP_HOME=/usr/local/hadoop
    
    
    
    ##验证
    [root@spark1 bin]# hive
    
    Logging initialized using configuration in jar:file:/usr/local/hive/lib/hive-common-0.13.1.jar!/hive-log4j.properties
    hive> create table t1(id int);
    OK
    Time taken: 0.645 seconds


    四、zookeeper搭建

    1、搭建

    [root@spark1 src]# tar zxf zookeeper-3.4.5.tar.gz -C /usr/local/
    
    [root@spark1 local]# mv zookeeper-3.4.5/ zk
    
    
    #配置环境变量
    vim /etc/profile
    ##path
    export JAVA_HOME=/usr/java/latest
    export HADOOP_HOME=/usr/local/hadoop
    export HIVE_HOME=/usr/local/hive
    export ZOOKEEPER_HOME=/usr/local/zk
    export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/bin:$ZOOKEEPER_HOME/bin
    
    
    #创建zk data目录
    [root@spark1 conf]# mkdir /usr/local/zk/data
    
    
    #编辑配置文件
    [root@spark1 conf]# mv zoo_sample.cfg zoo.cfg
    [root@spark1 conf]# vim zoo.cfg 
    dataDir=/usr/local/zk/data
    server.0=spark1:2888:3888       
    server.1=spark2:2888:3888
    server.2=spark3:2888:3888
    
    
    
    ##设置zk节点标识
    [root@spark1 conf]# cd /usr/local/zk/data/
    
    [root@spark1 data]# vim myid
    0


    2、分发

    [root@spark1 local]# scp -r /usr/local/zk spark2:/usr/local/
    
    [root@spark1 local]# scp -r /usr/local/zk spark3:/usr/local/
    
    
    分发完后,唯一的区别是spark2和spark3的标识号分别设置为1和2     #vim /usr/local/zk/data/myid

    ##分发/etc/profile

    [root@spark1 local]# scp -r /etc/profile spark2:/etc/
      
    [root@spark1 local]# scp -r /etc/profile spark3:/etc/

    source /etc/profile


    3、启动

    1、分别在三台机器上执行:zkServer.sh start
    
    2、检查ZooKeeper状态:zkServer.sh status


    五、kafka搭建

    1、安装scala

    ##安装scala
    [root@spark1 src]# tar -zxvf scala-2.11.4.tgz -C /usr/local/
    
    [root@spark1 local]# mv scala-2.11.4 scala
    
    
    
    ##修改环境变量
    ##path
    export JAVA_HOME=/usr/java/latest
    export HADOOP_HOME=/usr/local/hadoop
    export HIVE_HOME=/usr/local/hive
    export ZOOKEEPER_HOME=/usr/local/zk
    export SCALA_HOME=/usr/local/scala
    export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/bin:$ZOOKEEPER_HOME/bin:$SCALA_HOME/bin
    
    
    
    ##分发scala
    scp -r scala spark2:/usr/local/
    scp -r scala spark3:/usr/local/
    
    [root@spark1 local]# scp -r /etc/profile spark2:/etc/
    [root@spark1 local]# scp -r /etc/profile spark3:/etc/
    
    都要:source /etc/profile


    2、搭建kafka

    [root@spark1 src]# tar zxf kafka_2.9.2-0.8.1.tgz -C /usr/local/
    
    [root@spark1 local]# mv kafka_2.9.2-0.8.1 kafka
    
    
    ##配置
    kafkavi /usr/local/kafka/config/server.properties
    broker.id            #要唯一,依次增长(0、1、2、3、4)
    
    zookeeper.connect=192.168.1.135:2181,192.168.1.136:2181,192.168.1.137:2181            #配置zookeeper
    
    
    
    ##安装slf4j
    unzip slf4j-1.7.6.zip
    
    [root@spark1 src]# cp slf4j-1.7.6/slf4j-nop-1.7.6.jar /usr/local/kafka/libs/
    
    
    
    
    ##分发kafka
    用scp把kafka拷贝到spark2和spark3即可;
    唯一区别的,就是server.properties中的broker.id,要设置为1和2


    3、启动kafka

    ###解决kafka Unrecognized VM option 'UseCompressedOops'问题,去掉-XX:+UseCompressedOops即可
    vi bin/kafka-run-class.sh 
    if [ -z "$KAFKA_JVM_PERFORMANCE_OPTS" ]; then  KAFKA_JVM_PERFORMANCE_OPTS="-server  -XX:+UseCompressedOops -XX:+UseParNewGC -XX:+UseConcMarkSweepGC 
    -XX:+CMSClassUnloadingEnabled -XX:+CMSScavengeBeforeRemark -XX:+DisableExplicitGC -Djava.awt.headless=true"fi
    
    
    ##分发kafka-run-class.sh
    [root@spark1 kafka]# scp -r bin/kafka-run-class.sh spark2:/usr/local/kafka/bin/
    
    [root@spark1 kafka]# scp -r bin/kafka-run-class.sh spark3:/usr/local/kafka/bin/
    
    
    
    ##启动,三台都要执行
    cd //usr/local/kafka/        #必须
    
    [root@spark3 kafka]# nohup bin/kafka-server-start.sh config/server.properties &
    
    jps
    
    
    
    
    ##在spark1测试kafka集群,要开两个窗口
    //窗口1
    cd //usr/local/kafka/        #必须
    
    [root@spark1 kafka]# bin/kafka-topics.sh --zookeeper 192.168.1.135:2181,192.168.1.136:2181,192.168.1.137:2181 --topic TestTopic --replication-factor 1 --partitions 1 --create
    Created topic "TestTopic".
    
    [root@spark1 kafka]# bin/kafka-console-producer.sh --broker-list 192.168.1.135:9092,192.168.1.136:9092,192.168.1.137:9092 --topic TestTopic
    hello kafka        #这边发送消息
    
    
    //窗口2
    cd //usr/local/kafka/        #必须
    
    [root@spark1 kafka]# bin/kafka-console-consumer.sh --zookeeper 192.168.1.135:2181,192.168.1.136:2181,192.168.1.137:2181 --topic TestTopic --from-beginning
    hello kafka        #这边已经收到了


    六、spark搭建

    1、安装

    [root@spark1 src]# tar zxf spark-1.3.0-bin-hadoop2.4.tgz -C /usr/local/
    
    [root@spark1 local]# mv spark-1.3.0-bin-hadoop2.4 spark
    
    
    ##设置spark环境变量
    [root@spark1 local]# vim /etc/profile
    ##path
    export JAVA_HOME=/usr/java/latest
    export HADOOP_HOME=/usr/local/hadoop
    export HIVE_HOME=/usr/local/hive
    export ZOOKEEPER_HOME=/usr/local/zk
    export SCALA_HOME=/usr/local/scala
    export SPARK_HOME=/usr/local/spark
    export CLASSPATH=.:$CLASSPATH:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
    export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/bin:$ZOOKEEPER_HOME/bin:$SCALA_HOME/bin:$SPARK_HOME/bin


    2、配置

    ##修改spark-env.sh文件
    [root@spark1 ~]# cd /usr/local/spark/conf/
    
    [root@spark1 conf]# cp spark-env.sh.template spark-env.sh
    
    vim  spark-env.sh
    export JAVA_HOME=/usr/java/latest
    export SCALA_HOME=/usr/local/scala
    export SPARK_MASTER_IP=192.168.1.135
    export SPARK_WORKER_MEMORY=1g
    export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop
    
    
    
    ##slaves
    [root@spark1 conf]# mv slaves.template slaves
    
    vim slaves
    #spark1        #我这里spark1就不让它跑了
    spark2
    spark3


    3、分发

    ##分发spark
    用scp将spark和/etc/profile拷贝到spark2和spark3即可
    
    scp -r spark spark2:/usr/local/
    scp -r spark spark3:/usr/local/
    
    scp -r /etc/profile spark2:/etc/
    scp -r /etc/profile spark3:/etc/
    source /etc/profile


    4、启动

    ##spark1
    [root@spark1 local]# cd /usr/local/spark/sbin/
    
    [root@spark1 sbin]# ./start-all.sh
    
    
    
    ##jps
    [root@spark1 sbin]# jps
    4615 Jps
    3718 QuorumPeerMain
    1664 SecondaryNameNode
    4052 Kafka
    4450 Master
    1397 NameNode
    1879 ResourceManager
    1976 NodeManager
    1514 DataNode
    
    
    [root@spark2 kafka]# jps
    1374 NodeManager
    2249 Jps
    1988 Kafka
    2130 Worker
    1263 DataNode
    1774 QuorumPeerMain
    
    
    [root@spark3 kafka]# jps
    1265 DataNode
    2014 Kafka
    1377 NodeManager
    2155 Worker
    1791 QuorumPeerMain
    2274 Jps
    
    
    ##浏览器打开:ip+8080
    
    
    ##进入spark shell
    [root@spark1 sbin]# spark-shell
  • 相关阅读:
    The Future of Middleware and the BizTalk Roadmap
    FW: How to spawn a process that runs under the context of the impersonated user in Microsoft ASP.NET pages
    Strips illegal Xml characters
    luogu P2280 激光炸弹(二维前缀和)
    luogu P2704 炮兵阵地(经典状态压缩DP)
    SP1716 GSS3 Can you answer these queries III (线段树维护最大连续子段和)
    二分图判定、匹配问题
    C++语法综合 | 基于char*设计一个字符串类MyString
    luogu P1044 火车进出栈问题(Catalan数)
    C++设计模式 | 三种设计模式基础
  • 原文地址:https://www.cnblogs.com/weiyiming007/p/11065812.html
Copyright © 2011-2022 走看看