linux准备
Vmware pro准备三台centos7的虚拟机,连接软件选择MobaXterm
分别进行以下操作:
1、yum update 一路y
2、yum install net-tools.x86_64 安装ifconfig。
3、yum install -y java-1.8.0-openjdk.x86_64 安装jdk 1.8。安装完jdk目录在 /usr/lib/jvm/下
4、windows下载zookeeper和hadoop的tar.gz。我用的是zookeeper-3.4.14和hadoop-2.7.7。
5、mkdir /zookeeper tar -zxvf zookeeper-3.4.14.tar.gz -C /zookeeper/
6、mkdir /Hadoop tar -zxvf hadoop-2.7.7.tar.gz -C /Hadoop/
7、vim /etc/profile 在最下面加入之后 source /etc/profile
export HADOOP_HOME=/Hadoop/hadoop-2.7.7
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
export ZOOKEEPER_HOME=/zookeeper/zookeeper-3.4.14
export PATH=$PATH:$ZOOKEEPER_HOME/bin
8、vim hosts(三台机子的ip)
192.168.101.132 master
192.168.101.133 slaver1
192.168.101.134 slaver2
#127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
#::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
9、vim hostname 分别为 master|slaver1|slaver2
10、yum install java-1.8.0-openjdk-devel.x86_64 下载之后不会提示jps找不到
免密登录设置
进入master机器
#一直点确定
ssh-keygen -t rsa
#以下三步都是输入密码即可
ssh-copy-id master
ssh-copy-id slaver1
ssh-copy-id slaver2
#验证是否免密,以下三步如果不输入密码就进去了,则成功。
ssh master
ssh slaver1
ssh slaver2
#退出
exit
zookeeper文件修改
1、进入conf文件夹下
mv zoo_sample.cfg zoo.cfg
vim zoo.cfg
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.
dataDir=/zookeeper/zookeeper-3.4.14/data
# the port at which the clients will connect
clientPort=2181
# the maximum number of client connections.
# increase this if you need to handle more clients
maxClientCnxns=60
dataLogDir=/zookeeper/zookeeper-3.4.14/logs
#
# Be sure to read the maintenance section of the
# administrator guide before turning on autopurge.
#
# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
# The number of snapshots to retain in dataDir
autopurge.snapRetainCount=3
# Purge task interval in hours
# Set to "0" to disable auto purge feature
autopurge.purgeInterval=1
server.0=master:2888:3888
server.1=slaver1:2888:3888
server.2=slaver2:2888:3888
2、进入zookeeper-3.4.14目录
mkdir logs
mkdir data
cd data
vim myid
##master 0 | slaver1 1 | slaver2 2(对应上面的zoo.cfg)
hadoop文件修改
进入hadoop 的/etc/hadoop
1、vim hdfs-site.xml
<!--指定hdfs的nameservice为ns,需要和core‐site.xml中的保持一致-->
<property>
<name>dfs.nameservices</name>
<value>ns</value>
</property>
<!-- ns下面有两个NameNode,分别是nn1,nn2-->
<property>
<name>dfs.ha.namenodes.ns</name>
<value>nn1,nn2</value>
</property>
<!-- nn1的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.ns.nn1</name>
<value>master:9000</value>
</property>
<!-- nn1的http通信地址 -->
<property>
<name>dfs.namenode.http-address.ns.nn1</name>
<value>master:50070</value>
</property>
<!-- nn2的RPC通信地址-->
<property>
<name>dfs.namenode.rpc-address.ns.nn2</name>
<value>slaver1:9000</value>
</property>
<!-- nn2的http通信地址-->
<property>
<name>dfs.namenode.http-address.ns.nn2</name>
<value>slaver1:50070</value>
</property>
<!-- 指定NameNode的元数据在JournalNode上的存放位置 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://master:8485;slaver1:8485;slaver2:8485/ns</value>
</property>
<!-- 指定JournalNode在本地磁盘存放数据的位置 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/Hadoop/journal</value>
</property>
<!-- 开启NameNode故障时自动切换-->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!-- 配置失败自动切换实现方式-->
<property>
<name>dfs.client.failover.proxy.provider.ns</name> <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!-- 配置隔离机制 -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<!-- 使用隔离机制时需要ssh免登陆 -->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/Hadoop/hdfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/Hadoop/hdfs/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<!-- 在NN和DN上开启WebHDFS (REST API)功能,不是必须-->
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<!--dfs权限认证-->
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
2、vim core-site.xml
<!-- 指定hdfs的nameservice为ns -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://ns</value>
</property>
<!--指定hadoop数据存放目录-->
<property>
<name>hadoop.tmp.dir</name>
<value>/Hadoop/hadoop-2.7.7/tmp</value>
</property>
<!--指定文件缓存大小-->
<property>
<name>io.file.buffer.size</name>
<value>4096</value>
</property>
<!--指定zookeeper地址-->
<property>
<name>ha.zookeeper.quorum</name>
<value>master:2181,slaver1:2181,slaver2:2181</value>
</property>
<property>
<name>ipc.client.connect.max.retries</name>
<value>100</value>
<description>Indicates the number of retries a client will make to establish a server connection.
</description>
</property>
<property>
<name>ipc.client.connect.retry.interval</name>
<value>10000</value>
<description>Indicates the number of milliseconds a client will wait for
before retrying to establish a server connection.
</description>
</property>
3、vim mapred-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
4、vim yarn-site.xml
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>slaver2</value>
</property>
5、vim hadoop-env.sh
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.242.b08-0.el7_7.x86_64/
6、vim slaves (指定datanode)
master
slaver1
slaver2
发送zookeeper和hadoop到slaver1 、slaver2
scp -r /Hadoop/ root@slaver1:/
scp -r /Hadoop/ root@slaver2:/
scp -r /zookeeper/ root@slaver1:/
scp -r /zookeeper/ root@slaver2:/
启动zookeeper
#进入zookeeper的bin目录
cd /zookeeper/zookeeper-3.4.14/bin/
#启动zookeeper
./zkServer.sh start
#查看 zookeeper 状态
./zkServer.sh status
#jps查看
2385 QuorumPeerMain
2511 Jps
启动hadoop
初始化启动
1、在master、slaver1、slaver2分别启动journalNode
#进入sbin目录
cd /usr/hadoop/hadoop-2.7.3/sbin/
#启动journalNode
./hadoop-daemon.sh start journalnode
#jps查看
2385 QuorumPeerMain
2462 JournalNode
2511 Jps
2、master上格式化zkfc和hdfs(初始化需要)
hdfs zkfc -formatZK
hadoop namenode -format
3、master启动namenode(active)
./hadoop-daemon.sh start namenode
4、slaver1上启动standby nameNode
hdfs namenode -bootstrapStandby
./hadoop-daemon.sh start namenode
5、master启动datanode
./hadoop-daemons.sh start datanode
6、slaver2启动yarn
./start-yarn.sh
7、master启动zkfc
./hadoop-daemons.sh start zkfc
8、jps查看
#master
jps
3776 DataNode
3953 JournalNode
8433 Jps
4306 NodeManager
1603 QuorumPeerMain
4087 DFSZKFailoverController
3672 NameNode
#slaver1
jps
3057 NameNode
1619 QuorumPeerMain
3203 JournalNode
5173 Jps
3127 DataNode
3369 NodeManager
3278 DFSZKFailoverController
#slaver2
jps
10003 DataNode
13702 Jps
9354 QuorumPeerMain
10174 NodeManager
10079 JournalNode
2808 ResourceManager
9、全部启动和全部关闭
#全部启动
./start-all.sh
#全部关闭
./stop-all.sh
10、登录master(ip):50070或者进入slaver1:50070可以看到网站。
登录master:8042或者slaver1:8042可以看到网站
具体见下面说明:
管理界面:http://ip:8088
NameNode界面:http://ip:50070
HDFS NameNode界面:http://ip:8042
#### 未完待续