一、实验说明
1、本实验将使用DNS而不是hosts文件解析主机名;
2、使用NFS共享密钥文件,而不是逐个手工拷贝添加密钥;
3、复制Hadoop时使用批量拷贝脚本而不是逐台复制。
测试环境:
Hostname | IP | Hadoop版本 | Hadoop | 功能 | 系统 |
hadoop1 | 192.168.1.161 | 0.20.0 | namenode | nfs服务器端 | rhel5.4x86 |
hadoop2 | 192.168.1.162 | 0.20.0 | datanode | dns+nfs客服端 | rhel5.4 x86 |
hadoop3 | 192.168.1.163 | 0.20.0 | datanode | nfs客户端 | rhel5.4 x86 |
二、DNS的安装与配置
1、上传dns目录:
1 [root@hadoop2 dns]# ls 2 dnsmasq.conf dnsmasq.hosts dnsmasq.resolv.conf pid start.sh stop.sh
2、修改dns目录中的文件:
----dnsmasq.conf为dnsmasq的配置文件----
[root@hadoop2 dns]# cat dnsmasq.conf cache-size=50000 dns-forward-max=1000 resolv-file=/dns/dnsmasq.resolv.conf addn-hosts=/dns/dnsmasq.hosts ----dnsmasq缓存下来的域名,不使用/etc/hosts----
[root@hadoop2 dns]# cat dnsmasq.hosts 192.168.1.161 hadoop1 192.168.1.162 hadoop2 192.168.1.163 hadoop3 ----在dnsmasq.resolv.conf添加上游dns的地址----
[root@hadoop2 dns]# cat dnsmasq.resolv.conf ### /etc/resolv.conf file autogenerated by netconfig! # # Before you change this file manually, consider to define the # static DNS configuration using the following variables in the # /etc/sysconfig/network/config file: # NETCONFIG_DNS_STATIC_SEARCHLIST # NETCONFIG_DNS_STATIC_SERVERS # NETCONFIG_DNS_FORWARDER # or disable DNS configuration updates via netconfig by setting: # NETCONFIG_DNS_POLICY='' # # See also the netconfig(8) manual page and other documentation. # # Note: Manual change of this file disables netconfig too, but # may get lost when this file contains comments or empty lines # only, the netconfig settings are same with settings in this # file and in case of a "netconfig update -f" call. # nameserver 218.108.248.228
nameserver 218.108.248.200
[root@hadoop2 dns]# cat start.sh #!/bin/sh killall dnsmasq dnsmasq --port=53 --pid-file=/dns/pid --conf-file=/dns/dnsmasq.conf [root@hadoop2 dns]# cat stop.sh #!/bin/sh killall dnsmasq
3、启动dns,并在hadoop2上进行测试:
[root@hadoop2 dns]# dig @hadoop2 www.qq.com ; <<>> DiG 9.3.6-P1-RedHat-9.3.6-4.P1.el5 <<>> @hadoop2 www.qq.com ; (1 server found) ;; global options: printcmd ;; Got answer: ;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 41272 ;; flags: qr rd ra; QUERY: 1, ANSWER: 1, AUTHORITY: 3, ADDITIONAL: 7 ;; QUESTION SECTION: ;www.qq.com. IN A ;; ANSWER SECTION: www.qq.com. 5 IN A 182.254.8.146 ;; AUTHORITY SECTION: www.qq.com. 5 IN NS ns-cmn1.qq.com. www.qq.com. 5 IN NS ns-cnc1.qq.com. www.qq.com. 5 IN NS ns-os1.qq.com. ;; ADDITIONAL SECTION: ns-os1.qq.com. 5 IN A 184.105.66.196 ns-os1.qq.com. 5 IN A 202.55.2.226 ns-os1.qq.com. 5 IN A 202.55.2.230 ns-os1.qq.com. 5 IN A 114.134.85.106 ns-cmn1.qq.com. 5 IN A 120.204.202.200 ns-cnc1.qq.com. 5 IN A 125.39.127.27 ns-cnc1.qq.com. 5 IN A 61.135.167.182 ;; Query time: 33 msec ;; SERVER: 192.168.1.162#53(192.168.1.162) ;; WHEN: Sat Aug 24 19:40:25 2013 ;; MSG SIZE rcvd: 221
4、配置hadoop1和hadoop3的/etc/resolv.conf
----dns客户端需要对/etc/resolv.conf文件中注释掉search localdomain,否则无法解析dnsmasq缓存下来的域名----
[hadoop@hadoop3 ~]$ cat /etc/resolv.conf ; generated by /sbin/dhclient-script #search localdomain #nameserver 192.168.11.2
nameserver 192.168.1.162 [root@hadoop3 ~]# dig @hadoop2 www.qq.com ; <<>> DiG 9.3.6-P1-RedHat-9.3.6-4.P1.el5 <<>> @hadoop2 www.qq.com ; (1 server found) ;; global options: printcmd ;; Got answer: ;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 2061 ;; flags: qr rd ra; QUERY: 1, ANSWER: 1, AUTHORITY: 3, ADDITIONAL: 7 ;; QUESTION SECTION: ;www.qq.com. IN A ;; ANSWER SECTION: www.qq.com. 5 IN A 182.254.8.146 ;; AUTHORITY SECTION: www.qq.com. 5 IN NS ns-cnc1.qq.com. www.qq.com. 5 IN NS ns-cmn1.qq.com. www.qq.com. 5 IN NS ns-os1.qq.com. ;; ADDITIONAL SECTION: ns-os1.qq.com. 5 IN A 202.55.2.226 ns-os1.qq.com. 5 IN A 202.55.2.230 ns-os1.qq.com. 5 IN A 114.134.85.106 ns-os1.qq.com. 5 IN A 184.105.66.196 ns-cmn1.qq.com. 5 IN A 120.204.202.200 ns-cnc1.qq.com. 5 IN A 61.135.167.182 ns-cnc1.qq.com. 5 IN A 125.39.127.27 ;; Query time: 24 msec ;; SERVER: 192.168.1.162#53(192.168.1.162) ;; WHEN: Sat Aug 24 19:44:43 2013 ;; MSG SIZE rcvd: 221
三、配置NFS
1、查看nfs是否已经安装
[root@hadoop1 ~]# rpm -qa |grep nfs nfs-utils-1.0.9-42.el5 nfs-utils-lib-1.0.8-7.6.el5
2、编辑/etc/exports
[root@hadoop1 ~]# cat /etc/exports /home/hadoop/.ssh/ *(rw,sync,no_root_squash)
3、创建hadoop用户
[root@hadoop1 ~]# useradd hadoop [root@hadoop1 ~]# passwd hadoop Changing password for user hadoop. New UNIX password: BAD PASSWORD: it is based on a dictionary word Retype new UNIX password: passwd: all authentication tokens updated successfully.
4、生成ssh密钥
[hadoop@hadoop1 ~]$ ssh-keygen -t rsa Generating public/private rsa key pair. Enter file in which to save the key (/home/hadoop/.ssh/id_rsa): Created directory '/home/hadoop/.ssh'. Enter passphrase (empty for no passphrase): Enter same passphrase again: Your identification has been saved in /home/hadoop/.ssh/id_rsa. Your public key has been saved in /home/hadoop/.ssh/id_rsa.pub. The key fingerprint is: 11:a6:28:73:db:0b:c2:47:fc:c9:8d:1c:0c:b4:6e:00 hadoop@hadoop1
5、修改挂载点的属性
[root@hadoop1 ~]# chmod 777 /home/hadoop/.ssh/
6、重启nfs
[root@hadoop1 ~]# service nfs restart Shutting down NFS mountd: [FAILED] Shutting down NFS daemon: [FAILED] Shutting down NFS quotas: [FAILED] Shutting down NFS services: [FAILED] Starting NFS services: [ OK ] Starting NFS quotas: [ OK ] Starting NFS daemon: [ OK ] Starting NFS mountd: [ OK ]
7、在本机挂载测试
[root@hadoop1 ~]# mount 192.168.1.161:/home/hadoop/.ssh /mnt [root@hadoop1 ~]# mount /dev/sda1 on / type ext3 (rw) proc on /proc type proc (rw) sysfs on /sys type sysfs (rw) devpts on /dev/pts type devpts (rw,gid=5,mode=620) tmpfs on /dev/shm type tmpfs (rw) none on /proc/sys/fs/binfmt_misc type binfmt_misc (rw) sunrpc on /var/lib/nfs/rpc_pipefs type rpc_pipefs (rw) nfsd on /proc/fs/nfsd type nfsd (rw) 192.168.1.161:/home/hadoop/.ssh on /mnt type nfs (rw,addr=192.168.1.161) [root@hadoop1 ~]# ll /home/hadoop/.ssh/ total 8 -rw------- 1 hadoop hadoop 1675 Aug 25 10:59 id_rsa -rw-r--r-- 1 hadoop hadoop 396 Aug 25 10:59 id_rsa.pub [root@hadoop1 ~]# ll /mnt total 8 -rw------- 1 hadoop hadoop 1675 Aug 25 10:59 id_rsa -rw-r--r-- 1 hadoop hadoop 396 Aug 25 10:59 id_rsa.pub
四、nfs整合ssh密钥
1、先将id_rsa.pub拷贝成authorized_keys
[hadoop@hadoop1 ~]$ cp .ssh/id_rsa.pub .ssh/authorized_keys
2、再登陆hadoop2和hadoop3创建hadoop用户并用hadoop登陆,然后生成每个机器的ssh的rsa密钥
----hadoop2和hadoop3操作一样----
[root@hadoop2 dns]# useradd hadoop [root@hadoop2 dns]# passwd hadoop Changing password for user hadoop. New UNIX password: BAD PASSWORD: it is based on a dictionary word Retype new UNIX password: passwd: all authentication tokens updated successfully. [root@hadoop2 dns]# su - hadoop [hadoop@hadoop2 ~]$ ssh-keygen -t rsa Generating public/private rsa key pair. Enter file in which to save the key (/home/hadoop/.ssh/id_rsa): Created directory '/home/hadoop/.ssh'. Enter passphrase (empty for no passphrase): Enter same passphrase again: Your identification has been saved in /home/hadoop/.ssh/id_rsa. Your public key has been saved in /home/hadoop/.ssh/id_rsa.pub. The key fingerprint is: 3c:9d:07:2a:7d:3d:e3:d3:22:0c:0e:8b:5d:96:93:e1 hadoop@hadoop2
3、在hadoop2和hadoop3上挂载nfs
[root@hadoop2 dns]# mount 192.168.1.161:/home/hadoop/.ssh /mnt [root@hadoop2 dns]# ll /mnt total 12 -rw-r--r-- 1 hadoop hadoop 396 Aug 25 11:04 authorized_keys -rw------- 1 hadoop hadoop 1675 Aug 25 10:59 id_rsa -rw-r--r-- 1 hadoop hadoop 396 Aug 25 10:59 id_rsa.pub [root@hadoop2 dns]# mount /dev/sda1 on / type ext3 (rw) proc on /proc type proc (rw) sysfs on /sys type sysfs (rw) devpts on /dev/pts type devpts (rw,gid=5,mode=620) tmpfs on /dev/shm type tmpfs (rw) none on /proc/sys/fs/binfmt_misc type binfmt_misc (rw) sunrpc on /var/lib/nfs/rpc_pipefs type rpc_pipefs (rw) 192.168.1.161:/home/hadoop/.ssh on /mnt type nfs (rw,addr=192.168.1.161)
4、把hadoop2和hadoop3的公钥id_rsa_pub都添加到/mnt/authorized_keys里
[root@hadoop2 dns]# cat /home/hadoop/.ssh/id_rsa.pub >> /mnt/authorized_keys [root@hadoop3 ~]# cat /home/hadoop/.ssh/id_rsa.pub >> /mnt/authorized_keys
5、查看authorized_keys内容
[hadoop@hadoop1 ~]$ cat /mnt/authorized_keys ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEA32vNwXv/23k0yF7QeITb61J5uccudHB3gQBtqCnB7wsOtIhdUsVIfcxGmPnWp6S9V+Ob+b73Vrl2xsxP4i0N8Cu1l2ZcU9jevc+o37yX4nW2oTBFVEP31y9E9fXkYf3cKiF0UrvunL59qgNnVUbq8qRtFr5QPAx6lGY0TYZiPaPr+POwNKF1IZvToqABsOnNimv0DNmAhbd3QyM7GaR/ZRQKOCMF8NYljo6exoDk9xPq/wCHC/rBnAU3gUlwi7Kn/tk2dirwvYZuqP3VO+w5zd6sYxscD8+UNK99XdOARzTlc8/iEPHy+JSBa6sQI2hOAOCAuHBtTymoJFUDH9YqXQ== hadoop@hadoop1 ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEA4lTx6JTZlhoLI4Yyo0a6YeDmIgz60pYwYKwVL+p4wfp9OWB2/sEyf9iCsK8i94mnWMfNsRehqAG2ucPmWz1s/Kufxu/6uc8hJjDlOOMUOE7ENyN0Zre5MHj8jauDRhY4y37Rh3Crx86wzq79isDqJOWnKyjPQDjUH45780Hvtk87ckwNNSFhwuRgTFKhz0bQloJuHazU1/W924wmicqeEUSGhUFEkXUeJu7FqQjJcPjoRNqyTEuCHiYVh9HjOrUPdosfYqmQfuZ/x2gmsGRUdfTl32rkoZW43ay8CFV/MKqAFucEOiiHW7xttmm3zJgcyLptGhjo7NtvAQwKkPfG6w== hadoop@hadoop2 ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAs7fkzQMR6yVqLBVAnJqTxFPO9NNngrmYDNZMbWXDz6V8J4Z7zC46odUERe3CNjC+v3X8rwvUWlALYtvMNonQwhnpvqe2s0CpDithSFkOt5fQarRYP5JtAjHvF5b22NqcyltF+ywLT4zKAg4tjgGV5nLafI2hsNjgljUOXkRjpwSSUpLmLayWnepLIwioCPPGIkM40balUOEWEASzaI4DaPoywmoVUrByou71i1F1VizXpbhIWW+LE2cANAy1xmP0zYBa+/O4mvpgZjWLtLpKFR/1nRZPh1emy+OB6RcoJl3Awmhcsyyjd4Q8jfOYsH78PKpnwJfyhtUEIENrzUV63w== hadoop@hadoop3
6、做软连接(此步骤不需要在nfs服务器端做,只在客户端做)
[hadoop@hadoop2 ~]$ ln -s /mnt/authorized_keys /home/hadoop/.ssh/authorized_keys [hadoop@hadoop2 ~]$ ll /home/hadoop/.ssh/authorized_keys lrwxrwxrwx 1 hadoop hadoop 20 Aug 25 11:14 /home/hadoop/.ssh/authorized_keys -> /mnt/authorized_keys [hadoop@hadoop3 ~]$ ln -s /mnt/authorized_keys /home/hadoop/.ssh/authorized_keys [hadoop@hadoop3 ~]$ ll /home/hadoop/.ssh/authorized_keys lrwxrwxrwx 1 hadoop hadoop 20 Aug 25 11:15 /home/hadoop/.ssh/authorized_keys -> /mnt/authorized_keys
7、修改权限
[hadoop@hadoop1 ~]$ chmod 700 /home/hadoop/.ssh/
备注:如果不修改的话,在进行登陆的时候会出现需要密码。
8、测试是否实验无密码登陆
[hadoop@hadoop1 ~]$ ssh hadoop2 The authenticity of host 'hadoop2 (192.168.1.162)' can't be established. RSA key fingerprint is ca:9a:7e:19:ee:a1:35:44:7e:9d:d4:09:5c:fc:c5:0a. Are you sure you want to continue connecting (yes/no)? yes Warning: Permanently added 'hadoop2,192.168.1.162' (RSA) to the list of known hosts. [hadoop@hadoop2 ~]$ ssh hadoop3 The authenticity of host 'hadoop3 (192.168.1.163)' can't be established. RSA key fingerprint is ca:9a:7e:19:ee:a1:35:44:7e:9d:d4:09:5c:fc:c5:0a. Are you sure you want to continue connecting (yes/no)? yes Warning: Permanently added 'hadoop3,192.168.1.163' (RSA) to the list of known hosts. [hadoop@hadoop3 ~]$
五、批量安装Hadoop
1、先在hadoop1上把namenode安装完成,安装hadoop分布式可以参考:Hadoop集群安装
[hadoop@hadoop1 ~]$ cat hadoop-0.20.2/conf/slaves | awk '{print "scp -rp hadoop-0.20.2 hadoop@"$1":/home/hadoop/"}' > scp.sh [hadoop@hadoop1 ~]$ cat hadoop-0.20.2/conf/slaves | awk '{print "scp -rp temp hadoop@"$1":/home/hadoop/"}' >> scp.sh [hadoop@hadoop1 ~]$ cat hadoop-0.20.2/conf/slaves | awk '{print "scp -rp user hadoop@"$1":/home/hadoop/"}' >> scp.sh [hadoop@hadoop1 ~]$ cat hadoop-0.20.2/conf/slaves | awk '{print "scp -rp jdk1.7 hadoop@"$1":/home/hadoop/"}' >> scp.sh [hadoop@hadoop1 ~]$ ls hadoop-0.20.2 jdk1.7 scp.sh temp user [hadoop@hadoop1 ~]$ cat scp.sh scp -rp hadoop-0.20.2 hadoop@192.168.1.162:/home/hadoop/ scp -rp hadoop-0.20.2 hadoop@192.168.1.163:/home/hadoop/ scp -rp temp hadoop@192.168.1.162:/home/hadoop/ scp -rp temp hadoop@192.168.1.163:/home/hadoop/ scp -rp user hadoop@192.168.1.162:/home/hadoop/ scp -rp user hadoop@192.168.1.163:/home/hadoop/ scp -rp jdk1.7 hadoop@192.168.1.162:/home/hadoop/ scp -rp jdk1.7 hadoop@192.168.1.163:/home/hadoop/
2、格式化namenode
[hadoop@hadoop1 ~]$ hadoop-0.20.2/bin/hadoop namenode -format 13/08/25 11:52:39 INFO namenode.NameNode: STARTUP_MSG: /************************************************************ STARTUP_MSG: Starting NameNode STARTUP_MSG: host = hadoop1/192.168.1.161 STARTUP_MSG: args = [-format] STARTUP_MSG: version = 0.20.2 STARTUP_MSG: build = https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.20 -r 911707; compiled by 'chrisdo' on Fri Feb 19 08:07:34 UTC 2010 ************************************************************/ Re-format filesystem in /home/hadoop/user/name ? (Y or N) Y 13/08/25 11:52:46 INFO namenode.FSNamesystem: fsOwner=hadoop,hadoop 13/08/25 11:52:46 INFO namenode.FSNamesystem: supergroup=supergroup 13/08/25 11:52:46 INFO namenode.FSNamesystem: isPermissionEnabled=true 13/08/25 11:52:47 INFO common.Storage: Image file of size 96 saved in 0 seconds. 13/08/25 11:52:48 INFO common.Storage: Storage directory /home/hadoop/user/name has been successfully formatted. 13/08/25 11:52:48 INFO namenode.NameNode: SHUTDOWN_MSG: /************************************************************ SHUTDOWN_MSG: Shutting down NameNode at hadoop1/192.168.1.161 ************************************************************/
3、启动hadoop
[hadoop@hadoop1 ~]$ hadoop-0.20.2/bin/start-all.sh starting namenode, logging to /home/hadoop/hadoop-0.20.2/bin/../logs/hadoop-hadoop-namenode-hadoop1.out 192.168.1.163: starting datanode, logging to /home/hadoop/hadoop-0.20.2/bin/../logs/hadoop-hadoop-datanode-hadoop3.out 192.168.1.162: starting datanode, logging to /home/hadoop/hadoop-0.20.2/bin/../logs/hadoop-hadoop-datanode-hadoop2.out The authenticity of host '192.168.1.161 (192.168.1.161)' can't be established. RSA key fingerprint is ca:9a:7e:19:ee:a1:35:44:7e:9d:d4:09:5c:fc:c5:0a. Are you sure you want to continue connecting (yes/no)? yes 192.168.1.161: Warning: Permanently added '192.168.1.161' (RSA) to the list of known hosts. 192.168.1.161: starting secondarynamenode, logging to /home/hadoop/hadoop-0.20.2/bin/../logs/hadoop-hadoop-secondarynamenode-hadoop1.out starting jobtracker, logging to /home/hadoop/hadoop-0.20.2/bin/../logs/hadoop-hadoop-jobtracker-hadoop1.out 192.168.1.162: starting tasktracker, logging to /home/hadoop/hadoop-0.20.2/bin/../logs/hadoop-hadoop-tasktracker-hadoop2.out 192.168.1.163: starting tasktracker, logging to /home/hadoop/hadoop-0.20.2/bin/../logs/hadoop-hadoop-tasktracker-hadoop3.out
4、查看各个节点
[hadoop@hadoop1 ~]$ jdk1.7/bin/jps 4416 Jps 4344 JobTracker 4306 SecondaryNameNode 4157 NameNode [hadoop@hadoop2 ~]$ jdk1.7/bin/jps 3699 TaskTracker 3636 DataNode 3752 Jps [hadoop@hadoop3 ~]$ jdk1.7/bin/jps 4763 TaskTracker 4834 Jps 4653 DataNode
六、重点说明
1、如果重启以后无法自动挂载nfs,可以在/etc/rc.d/rc.local文件中添加:
/bin/mount -a
2、如果IP是自动获取的,请在DNS主机的/etc/rc.d/rc.local文件添加:
/bin/cat /app/resolv.conf > /etc/resolv.conf
[root@node1 ~]# cat /app/resolv.conf ; generated by /sbin/dhclient-script #search localdomain #nameserver 192.168.1.151
其它主机的/etc/rc.d/rc.local添加:
/bin/cat /app/resolv.conf > /etc/resolv.conf
[root@node2 ~]# cat /app/resolv.conf ; generated by /sbin/dhclient-script #search localdomain nameserver 192.168.1.151