1.准备一台服务器
192.168.5.106
2.提前安装jdk
3.hadoop运行服务
NameNode 192.168.5.106
SecondaryNameNode 192.168.5.106
DataNode 192.168.5.106
ResourceManager 192.168.5.106
NodeManager 192.168.5.106
4.下载并解压hadoop
http://archive.apache.org/dist/hadoop/common/hadoop-2.7.5/hadoop-2.7.5.tar.gz
mkdir -p /export/server/ hadoop解压到此目录下
5.修改配置文件
5.1 vim hadoop-2.7.5/etc/hadoop/core-site.xml
<configuration> <property> <name>fs.default.name</name> <value>hdfs://192.168.5.106:8020</value> </property> <property> <name>hadoop.tmp.dir</name> <value>/export/servers/hadoop-2.7.5/hadoopDatas/tempDatas</value> </property> <!-- 缓冲区大小,实际工作中根据服务器性能动态调整 --> <property> <name>io.file.buffer.size</name> <value>4096</value> </property> <!-- 开启hdfs的垃圾桶机制,删除掉的数据可以从垃圾桶中回收,单位分钟 --> <property> <name>fs.trash.interval</name> <value>10080</value> </property> </configuration>
5.2 vim hadoop-2.7.5/etc/hadoop/hdfs-site.xml
<configuration> <!-- NameNode存储元数据信息的路径,实际工作中,一般先确定磁盘的挂载目录,然后多个目录用,进行分割 --> <!-- 集群动态上下线 <property> <name>dfs.hosts</name> <value>/export/servers/hadoop-2.7.5/etc/hadoop/accept_host</value> </property> <property> <name>dfs.hosts.exclude</name> <value>/export/servers/hadoop-2.7.5/etc/hadoop/deny_host</value> </property> --> <property> <name>dfs.namenode.secondary.http-address</name> <value>192.168.5.106:50090</value> </property> <property> <name>dfs.namenode.http-address</name> <value>192.168.5.106:50070</value> </property> <property> <name>dfs.namenode.name.dir</name> <value>file:///export/servers/hadoop-2.7.5/hadoopDatas/namenodeDatas,file:///export/servers/hadoop-2.7.5/hadoopDatas/namenodeDatas2</value> </property> <!-- 定义dataNode数据存储的节点位置,实际工作中,一般先确定磁盘的挂载目录,然后多个目录用,进行分割 --> <property> <name>dfs.datanode.data.dir</name> <value>file:///export/servers/hadoop-2.7.5/hadoopDatas/datanodeDatas,file:///export/servers/hadoop-2.7.5/hadoopDatas/datanodeDatas2</value> </property> <property> <name>dfs.namenode.edits.dir</name> <value>file:///export/servers/hadoop-2.7.5/hadoopDatas/nn/edits</value> </property> <property> <name>dfs.namenode.checkpoint.dir</name> <value>file:///export/servers/hadoop-2.7.5/hadoopDatas/snn/name</value> </property> <property> <name>dfs.namenode.checkpoint.edits.dir</name> <value>file:///export/servers/hadoop-2.7.5/hadoopDatas/dfs/snn/edits</value> </property> <property> <name>dfs.replication</name> <value>3</value> </property> <property> <name>dfs.permissions</name> <value>false</value> </property> <property> <name>dfs.blocksize</name> <value>134217728</value> </property> </configuration>
5.3 vim hadoop-2.7.5/etc/hadoop/hadoop-env.sh
export JAVA_HOME=jdk路径
5.4 vim hadoop-2.7.5/etc/hadoop/mapred-site.xml
<configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> <property> <name>mapreduce.job.ubertask.enable</name> <value>true</value> </property> <property> <name>mapreduce.jobhistory.address</name> <value>192.168.5.106:10020</value> </property> <property> <name>mapreduce.jobhistory.webapp.address</name> <value>192.168.5.106:19888</value> </property> </configuration>
5.5 vim hadoop-2.7.5/etc/hadoop/yarn-site.xml
<configuration> <property> <name>yarn.resourcemanager.hostname</name> <value>192.168.5.106</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.log-aggregation-enable</name> <value>true</value> </property> <property> <name>yarn.log-aggregation.retain-seconds</name> <value>604800</value> </property> </configuration>
5.6 vim hadoop-2.7.5/etc/hadoop/mapred-env.sh
export JAVA_HOME=jdk路径
5.7 vim hadoop-2.7.5/etc/hadoop/slaves
192.168.5.106
6.启动服务
创建数据存放文件夹:
mkdir -p /export/servers/hadoop-2.7.5/hadoopDatas/tempDatas
mkdir -p /export/servers/hadoop-2.7.5/hadoopDatas/namenodeDatas
mkdir -p /export/servers/hadoop-2.7.5/hadoopDatas/namenodeDatas2
mkdir -p /export/servers/hadoop-2.7.5/hadoopDatas/datanodeDatas
mkdir -p /export/servers/hadoop-2.7.5/hadoopDatas/datanodeDatas2
mkdir -p /export/servers/hadoop-2.7.5/hadoopDatas/nn/edits
mkdir -p /export/servers/hadoop-2.7.5/hadoopDatas/snn/name
mkdir -p /export/servers/hadoop-2.7.5/hadoopDatas/dfs/snn/edits
配置hadoop环境变量:
vim /etc/profile
export HADOOP_HOME=/export/servers/hadoop-2.7.5/bin
export PATH=$PATH:$HADOOP_HOME
首次启动 HDFS 时,必须对其进行格式化操作:
bin/hdfs namenode -format
启动服务:
sbin/start-dfs.sh
sbin/start-yarn.sh
sbin/mr-jobhistory-daemon.sh start historyserver
关闭防火墙:
service iptables stop
chkconfig iptables off
查看是否启动成功:
[root@localhost hadoop-2.7.5]# jps
2547 DataNode
3397 Jps
3285 JobHistoryServer
2421 NameNode
2870 ResourceManager
2713 SecondaryNameNode
3148 NodeManager
浏览查看启动页面:
hdfs : http://192.168.5.106:50070
yarn : http://192.168.5.106:8088
jobhistory: http://192.168.5.106:19888