伪分布式只需要一台服务器就可以完成,搭建集群之前需要selinux和防火墙
1.安装java并配置环境变量
[root@node1 ~]# tar -xf jdk-8u144-linux-x64.gz -C /usr/ [root@node1 ~]# ln -sv /usr/jdk1.8.0_144/ /usr/java "/usr/java" -> "/usr/jdk1.8.0_144/" [root@node1 ~]# cat /etc/profile.d/java.sh export JAVA_HOME=/usr/java export PATH=$PATH:$JAVA_HOME/bin [root@node1 ~]# source /etc/profile.d/java.sh [root@node1 ~]# java -version java version "1.8.0_144" Java(TM) SE Runtime Environment (build 1.8.0_144-b01) Java HotSpot(TM) 64-Bit Server VM (build 25.144-b01, mixed mode)
2.安装hadoop程序并配置环境变量
[root@node1 ~]# tar xf hadoop-2.9.2.tar.gz -C /usr [root@node1 ~]# ln -sv /usr/hadoop-2.9.2/ /usr/hadoop "/usr/hadoop" -> "/usr/hadoop-2.9.2/" [root@node1 ~]# cat /etc/profile.d/hadoop.sh export HADOOP_HOME=/usr/hadoop-2.9.2 export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin 更改hadoop程序包内 hadoop-env.sh,mapred-env.sh,yarn-env.sh中的JAVA_HOME环境变量 [root@node1 ~]# grep 'export JAVA_HOME' /usr/hadoop/etc/hadoop/{hadoop-env.sh,mapred-env.sh,yarn-env.sh} /usr/hadoop/etc/hadoop/hadoop-env.sh:export JAVA_HOME=/usr/java /usr/hadoop/etc/hadoop/mapred-env.sh:export JAVA_HOME=/usr/java /usr/hadoop/etc/hadoop/yarn-env.sh:export JAVA_HOME=/usr/java
3.配置主机名和hosts文件
[root@localhost ~]# hostnamectl set-hostname node1 [root@localhost ~]# cat /etc/hosts 127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4 ::1 localhost localhost.localdomain localhost6 localhost6.localdomain6 192.168.159.129 node1
4.core-site.xml
<configuration> <property> <name>fs.defaultFS</name> <value>hdfs://node1:9000</value> </property> <property> <name>hadoop.tmp.dir</name> <value>/usr/data/hadoop-local</value> </property> </configuration>
5.hdfs-site.xml
<configuration> <property> <name>dfs.replication</name> <value>3</value> </property> <property> <name>dfs.namenode.secondary.http-address</name> <value>node1:50090</value> </property> </configuration>
6.slaves
node1
7.mapred-site.xml
<configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> </configuration>
8.yarn-site.xml
<configuration> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.resourcemanager.hostname</name> <value>node1</value> </property> </configuration>
9.创建hadoop数据存储目录
mkdir /usr/data/hadoop-local
10.格式化hdfs集群
/usr/hadoop-w/bin/hdfs namenode -format
11.启动各个组建
[root@node1 hadoop-w]# /usr/hadoop-w/sbin/start-all.sh This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh Starting namenodes on [node1] node1: starting namenode, logging to /usr/hadoop-w/logs/hadoop-root-namenode-node1.out node1: starting datanode, logging to /usr/hadoop-w/logs/hadoop-root-datanode-node1.out Starting secondary namenodes [node1] node1: starting secondarynamenode, logging to /usr/hadoop-w/logs/hadoop-root-secondarynamenode-node1.out starting yarn daemons starting resourcemanager, logging to /usr/hadoop-w/logs/yarn-root-resourcemanager-node1.out node1: starting nodemanager, logging to /usr/hadoop-w/logs/yarn-root-nodemanager-node1.out
12.查看各个组件启动情况
[root@node1 hadoop-w]# jps 3840 Jps 3430 ResourceManager 2264 JobHistoryServer 2985 NameNode 3116 DataNode 3532 NodeManager 3277 SecondaryNameNode
- hadoop所有存储路径,如果不指定,都会根据core-stite.xml中的hadoop.tmp.dir创建
[root@node1 hadoop-w]# tree /usr/data/hadoop-local/ /usr/data/hadoop-local/ ├── dfs │ ├── data │ │ ├── current │ │ │ ├── BP-1191695345-192.168.159.129-1582271980457 │ │ │ │ ├── current │ │ │ │ │ ├── dfsUsed │ │ │ │ │ ├── finalized │ │ │ │ │ ├── rbw │ │ │ │ │ └── VERSION │ │ │ │ ├── scanner.cursor │ │ │ │ └── tmp │ │ │ └── VERSION │ │ └── in_use.lock │ ├── name │ │ ├── current │ │ │ ├── edits_0000000000000000001-0000000000000000008 │ │ │ ├── edits_inprogress_0000000000000000009 │ │ │ ├── fsimage_0000000000000000000 │ │ │ ├── fsimage_0000000000000000000.md5 │ │ │ ├── seen_txid │ │ │ └── VERSION │ │ └── in_use.lock │ └── namesecondary │ └── in_use.lock └── nm-local-dir ├── filecache ├── nmPrivate └── usercache