一、Linux基础环境准备
系统:centos6.5 三台
1、系统安装
2、关闭防火墙、selinux
3、修改主机名并修改hosts文件
4、配置ssh互信
5、安装JDK1.7
二、hadoop安装
1、解压安装
版本: hadoop2.4.1
[root@s1 src]# pwd
/usr/local/src
[root@s1 src]# ls
apache-hive-0.13.1-bin.tar.gz hadoop-2.4.1.tar.gz kafka_2.9.2-0.8.1.tgz slf4j-1.7.6.zip zookeeper-3.4.5.tar.gz
CentOS6-Base-163.repo jdk-7u80-linux-x64.tar.gz scala-2.11.4.tgz spark-1.3.0-bin-hadoop2.4.tgz
[root@s1 src]# tar zxf hadoop-2.4.1.tar.gz -C /usr/local/
[root@s1 local]# mv hadoop-2.4.1/ hadoop
##添加环境变量
vim /etc/profile
export JAVA_HOME=/usr/local/jdk1.7
export HADOOP_HOME=/usr/local/hadoop
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
[root@s1 local]# source /etc/profile
2、修改配置文件
##core-site.xml <property> <
name>fs.default.name</name>
<value>hdfs://spark1:9000</value>
</property> ##hdfs-site.xml <property> <name>dfs.name.dir</name> <value>/usr/local/data/namenode</value> </property> <property> <name>dfs.data.dir</name> <value>/usr/local/data/datanode</value> </property> <property> <name>dfs.tmp.dir</name> <value>/usr/local/data/tmp</value> </property> <property> <name>dfs.replication</name> <value>3</value> </property> [root@spark1 ~]# mkdir /usr/local/data
[root@spark2 ~]# mkdir /usr/local/data
[root@spark3 ~]# mkdir /usr/local/data ##mapred-site.xml <property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property> ##yarn-site.xml <property> <name>yarn.resourcemanager.hostname</name> <value>spark1</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> ##slaves spark1 spark2 spark3
hadoop-env.sh、mapred-env.sh、yarn-env.sh 这几个文件中的JAVA HOME最好也配置一下;
3、分发hadoop
[root@spark1 local]# scp -r hadoop spark2:/usr/local/
[root@spark1 local]# scp -r hadoop spark3:/usr/local/
##分发profile
[root@spark1 local]# scp -r /etc/profile spark2:/etc/
[root@spark1 local]# scp -r /etc/profile spark3:/etc/
4、启动
##启动hdfs
[root@spark1 ~]# start-dfs.sh
spark1:namenode、datanode、secondarynamenode
spark2:datanode
spark3:datanode
##浏览器访问50070端口
##启动yarn
[root@spark1 hadoop]# start-yarn.sh
spark1:resourcemanager、nodemanager
spark2:nodemanager
spark3:nodemanager
##浏览器访问8088端口
三、Hive搭建
1、安装
[root@spark1 src]# tar zxf apache-hive-0.13.1-bin.tar.gz -C /usr/local/
[root@spark1 local]# mv apache-hive-0.13.1-bin/ hive
##修改环境变量
[root@spark1 local]# vim /etc/profile
export JAVA_HOME=/usr/java/latest
export HADOOP_HOME=/usr/local/hadoop
export HIVE_HOME=/usr/local/hive
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/bin
2、安装mysql
##mysql存储hive的元数据
##在spark1安装mysql
[root@spark1 ~]# yum install -y mysql-server
[root@spark1 ~]# service mysqld start
[root@spark1 ~]# chkconfig mysqld on
##安装mysql connector
yum install -y mysql-connector-java
##将mysql connector拷贝到hive的lib包中
[root@spark1 ~]# cp /usr/share/java/mysql-connector-java-5.1.17.jar /usr/local/hive/lib/
##在mysql上创建hive元数据库,并对hive进行授权
[root@spark1 ~]# mysql
Welcome to the MySQL monitor. Commands end with ; or g.
Your MySQL connection id is 2
Server version: 5.1.73 Source distribution
Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.
Type 'help;' or 'h' for help. Type 'c' to clear the current input statement.
mysql> create database if not exists hive_metadata;
Query OK, 1 row affected (0.00 sec)
mysql> grant all privileges on hive_metadata.* to 'hive'@'%' identified by 'hive';
Query OK, 0 rows affected (0.00 sec)
mysql> grant all privileges on hive_metadata.* to 'hive'@'localhost' identified by 'hive';
Query OK, 0 rows affected (0.00 sec)
mysql> grant all privileges on hive_metadata.* to 'hive'@'spark1' identified by 'hive';
Query OK, 0 rows affected (0.00 sec)
mysql> flush privileges;
Query OK, 0 rows affected (0.00 sec)
mysql> use hive_metadata;
Database changed
mysql> show tables;
Empty set (0.00 sec)
3、配置hive
##hive-site.xml <configuration> <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://spark1:3306/hive_metadata?createDatabaseIfNotExist=true</value> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.jdbc.Driver</value> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>hive</value> </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>hive</value> </property> <property> <name>hive.metastore.warehouse.dir</name> <value>/user/hive/warehouse</value> </property> </configuration> ## mv hive-env.sh.template hive-env.sh ## vi /usr/local/hive/bin/hive-config.sh export JAVA_HOME=/usr/java/latest export HIVE_HOME=/usr/local/hive export HADOOP_HOME=/usr/local/hadoop ##验证 [root@spark1 bin]# hive Logging initialized using configuration in jar:file:/usr/local/hive/lib/hive-common-0.13.1.jar!/hive-log4j.properties hive> create table t1(id int); OK Time taken: 0.645 seconds
四、zookeeper搭建
1、搭建
[root@spark1 src]# tar zxf zookeeper-3.4.5.tar.gz -C /usr/local/
[root@spark1 local]# mv zookeeper-3.4.5/ zk
#配置环境变量
vim /etc/profile
##path
export JAVA_HOME=/usr/java/latest
export HADOOP_HOME=/usr/local/hadoop
export HIVE_HOME=/usr/local/hive
export ZOOKEEPER_HOME=/usr/local/zk
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/bin:$ZOOKEEPER_HOME/bin
#创建zk data目录
[root@spark1 conf]# mkdir /usr/local/zk/data
#编辑配置文件
[root@spark1 conf]# mv zoo_sample.cfg zoo.cfg
[root@spark1 conf]# vim zoo.cfg
dataDir=/usr/local/zk/data
server.0=spark1:2888:3888
server.1=spark2:2888:3888
server.2=spark3:2888:3888
##设置zk节点标识
[root@spark1 conf]# cd /usr/local/zk/data/
[root@spark1 data]# vim myid
0
2、分发
[root@spark1 local]# scp -r /usr/local/zk spark2:/usr/local/
[root@spark1 local]# scp -r /usr/local/zk spark3:/usr/local/
分发完后,唯一的区别是spark2和spark3的标识号分别设置为1和2 #vim /usr/local/zk/data/myid
##分发/etc/profile
[root@spark1 local]# scp -r /etc/profile spark2:/etc/
[root@spark1 local]# scp -r /etc/profile spark3:/etc/
source /etc/profile
3、启动
1、分别在三台机器上执行:zkServer.sh start
2、检查ZooKeeper状态:zkServer.sh status
五、kafka搭建
1、安装scala
##安装scala
[root@spark1 src]# tar -zxvf scala-2.11.4.tgz -C /usr/local/
[root@spark1 local]# mv scala-2.11.4 scala
##修改环境变量
##path
export JAVA_HOME=/usr/java/latest
export HADOOP_HOME=/usr/local/hadoop
export HIVE_HOME=/usr/local/hive
export ZOOKEEPER_HOME=/usr/local/zk
export SCALA_HOME=/usr/local/scala
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/bin:$ZOOKEEPER_HOME/bin:$SCALA_HOME/bin
##分发scala
scp -r scala spark2:/usr/local/
scp -r scala spark3:/usr/local/
[root@spark1 local]# scp -r /etc/profile spark2:/etc/
[root@spark1 local]# scp -r /etc/profile spark3:/etc/
都要:source /etc/profile
2、搭建kafka
[root@spark1 src]# tar zxf kafka_2.9.2-0.8.1.tgz -C /usr/local/
[root@spark1 local]# mv kafka_2.9.2-0.8.1 kafka
##配置
kafkavi /usr/local/kafka/config/server.properties
broker.id #要唯一,依次增长(0、1、2、3、4)
zookeeper.connect=192.168.1.135:2181,192.168.1.136:2181,192.168.1.137:2181 #配置zookeeper
##安装slf4j
unzip slf4j-1.7.6.zip
[root@spark1 src]# cp slf4j-1.7.6/slf4j-nop-1.7.6.jar /usr/local/kafka/libs/
##分发kafka
用scp把kafka拷贝到spark2和spark3即可;
唯一区别的,就是server.properties中的broker.id,要设置为1和2
3、启动kafka
###解决kafka Unrecognized VM option 'UseCompressedOops'问题,去掉-XX:+UseCompressedOops即可
vi bin/kafka-run-class.sh
if [ -z "$KAFKA_JVM_PERFORMANCE_OPTS" ]; then KAFKA_JVM_PERFORMANCE_OPTS="-server -XX:+UseCompressedOops -XX:+UseParNewGC -XX:+UseConcMarkSweepGC
-XX:+CMSClassUnloadingEnabled -XX:+CMSScavengeBeforeRemark -XX:+DisableExplicitGC -Djava.awt.headless=true"fi
##分发kafka-run-class.sh
[root@spark1 kafka]# scp -r bin/kafka-run-class.sh spark2:/usr/local/kafka/bin/
[root@spark1 kafka]# scp -r bin/kafka-run-class.sh spark3:/usr/local/kafka/bin/
##启动,三台都要执行
cd //usr/local/kafka/ #必须
[root@spark3 kafka]# nohup bin/kafka-server-start.sh config/server.properties &
jps
##在spark1测试kafka集群,要开两个窗口
//窗口1
cd //usr/local/kafka/ #必须
[root@spark1 kafka]# bin/kafka-topics.sh --zookeeper 192.168.1.135:2181,192.168.1.136:2181,192.168.1.137:2181 --topic TestTopic --replication-factor 1 --partitions 1 --create
Created topic "TestTopic".
[root@spark1 kafka]# bin/kafka-console-producer.sh --broker-list 192.168.1.135:9092,192.168.1.136:9092,192.168.1.137:9092 --topic TestTopic
hello kafka #这边发送消息
//窗口2
cd //usr/local/kafka/ #必须
[root@spark1 kafka]# bin/kafka-console-consumer.sh --zookeeper 192.168.1.135:2181,192.168.1.136:2181,192.168.1.137:2181 --topic TestTopic --from-beginning
hello kafka #这边已经收到了
六、spark搭建
1、安装
[root@spark1 src]# tar zxf spark-1.3.0-bin-hadoop2.4.tgz -C /usr/local/
[root@spark1 local]# mv spark-1.3.0-bin-hadoop2.4 spark
##设置spark环境变量
[root@spark1 local]# vim /etc/profile
##path
export JAVA_HOME=/usr/java/latest
export HADOOP_HOME=/usr/local/hadoop
export HIVE_HOME=/usr/local/hive
export ZOOKEEPER_HOME=/usr/local/zk
export SCALA_HOME=/usr/local/scala
export SPARK_HOME=/usr/local/spark
export CLASSPATH=.:$CLASSPATH:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME/bin:$ZOOKEEPER_HOME/bin:$SCALA_HOME/bin:$SPARK_HOME/bin
2、配置
##修改spark-env.sh文件
[root@spark1 ~]# cd /usr/local/spark/conf/
[root@spark1 conf]# cp spark-env.sh.template spark-env.sh
vim spark-env.sh
export JAVA_HOME=/usr/java/latest
export SCALA_HOME=/usr/local/scala
export SPARK_MASTER_IP=192.168.1.135
export SPARK_WORKER_MEMORY=1g
export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop
##slaves
[root@spark1 conf]# mv slaves.template slaves
vim slaves
#spark1 #我这里spark1就不让它跑了
spark2
spark3
3、分发
##分发spark
用scp将spark和/etc/profile拷贝到spark2和spark3即可
scp -r spark spark2:/usr/local/
scp -r spark spark3:/usr/local/
scp -r /etc/profile spark2:/etc/
scp -r /etc/profile spark3:/etc/
source /etc/profile
4、启动
##spark1
[root@spark1 local]# cd /usr/local/spark/sbin/
[root@spark1 sbin]# ./start-all.sh
##jps
[root@spark1 sbin]# jps
4615 Jps
3718 QuorumPeerMain
1664 SecondaryNameNode
4052 Kafka
4450 Master
1397 NameNode
1879 ResourceManager
1976 NodeManager
1514 DataNode
[root@spark2 kafka]# jps
1374 NodeManager
2249 Jps
1988 Kafka
2130 Worker
1263 DataNode
1774 QuorumPeerMain
[root@spark3 kafka]# jps
1265 DataNode
2014 Kafka
1377 NodeManager
2155 Worker
1791 QuorumPeerMain
2274 Jps
##浏览器打开:ip+8080
##进入spark shell
[root@spark1 sbin]# spark-shell