zoukankan      html  css  js  c++  java
  • 大数据架构:搭建CDH5.5.1分布式集群环境

    yum install -y ntp gcc make lrzsz wget vim sysstat.x86_64 xinetd screen expect rsync bind-utils iotop dstat nethogs openssl-devel openssh-clients

    1) 配置 /etc/hosts
      master 10.17.172.230
      node1 10.17.172.231
      node2 10.17.172.232
      node3 10.17.172.233
      node4 10.17.172.234
      node5 10.17.172.235
      node6 10.17.172.236
      node7 10.17.172.237


    2) HOSTNAME
      vi /etc/sysconfig/network
        HOSTNAME=master
      hostname master
      service network restart

    host一次配好,不要去改;如果非要改,相关服务都要重启, authorized_keys要处理, known_hosts要删除

    3) 无密码SSH
    在主节点上执行 ssh-keygen -t rsa 一路回车,生成无密码的密钥对。
    将所有节点的公钥添加到认证文件中:
      cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
    scp文件到所有datenode节点:
      scp ~/.ssh/authorized_keys root@node1:~/.ssh/
      scp ~/.ssh/authorized_keys root@node2:~/.ssh/
      scp ~/.ssh/authorized_keys root@node3:~/.ssh/
      scp ~/.ssh/authorized_keys root@node4:~/.ssh/
      scp ~/.ssh/authorized_keys root@node5:~/.ssh/
      scp ~/.ssh/authorized_keys root@node6:~/.ssh/
    设置authorized_keys的访问权限:
      chmod 600 ~/.ssh/authorized_keys

    4) 修改 history命令 的记录数量
      sed -i 's/^HISTSIZE=1000/HISTSIZE=10000/' /etc/profile
      source /etc/profile

    5) 安装 JAVA
    rpm -qa | grep java 检查是否安装 jdk
    rpm -ivh jdk-7u80-linux-x64.rpm 安装jdk

    vi /etc/profile
      JAVA_HOME=/usr/java/jdk1.7.0_80
      export PATH=$JAVA_HOME/bin:$PATH
      export JRE_HOME=${JAVA_HOME}/jre
      export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
    source /etc/profile

    6) 配置 防火墙
      service iptables stop # 暂时关闭

    7) 关闭SELINUX
    临时生效:
      setenforce 0
    重启后永久生效:
      cat /etc/selinux/config | grep "SELINUX="

      vi /etc/selinux/config
        SELINUX=disabled

    8) 时区
      vi /etc/sysconfig/clock
        ZONE=Asia/Shanghai
      rm /etc/localtime
      ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime

    9) 配置 NTP 节点
      yum -y install ntp ntpdate

      chkconfig ntpd on # 开机启动
      chkconfig --list ntpd # 2-5为on状态就代表成功

    master机器:
    ntpdate -u 103.226.213.30 # 同步一下时间
    vi /etc/ntp.conf
      driftfile /var/lib/ntp/drift

      restrict 127.0.0.1
      restrict -6 ::1

      restrict default nomodify notrap
      restrict -6 default nomodify notrap

      server 103.226.213.30 prefer
      server 0.rhel.pool.ntp.org iburst
      server 1.rhel.pool.ntp.org iburst
      server 2.rhel.pool.ntp.org iburst
      server 3.rhel.pool.ntp.org iburst
      server 210.72.145.44 iburst
      server pool.ntp.org iburst
      server time.windows.com iburst
      server ntp.sjtu.edu.cn iburst
      server time.asia.apple.com iburst
      server clock.via.net ntp.nasa.gov iburst

      includefile /etc/ntp/crypto/pw
      keys /etc/ntp/keys

    service ntpd start # 启动
    ntpstat # 命令查看同步状态

    所有子节点:
    vi /etc/ntp.conf
      driftfile /var/lib/ntp/drift

      restrict 127.0.0.1
      restrict -6 ::1

      restrict default kod nomodify notrap nopeer noquery
      restrict -6 default kod nomodify notrap nopeer noquery

      server master

      includefile /etc/ntp/crypto/pw
      keys /etc/ntp/keys

    ntpdate -u master
    service ntpd start

    10) MySQL
      rpm -qa | grep -i mysql
      rpm --nodeps -e mysql-libs-5.1.71-1.el6.x86_64

      cd /usr/local
      tar -zxvf mysql.tar.gz

      cp /data/install_tools/init_server_env/mysql/my.cnf /usr/local/mysql/
      cp /data/install_tools/init_server_env/mysql/shutdown_mysql.sh /usr/local/mysql/
      cp /data/install_tools/init_server_env/mysql/start_mysql.sh /usr/local/mysql/

    # 创建数据库相应目录和设置目录权限
      mkdir -p /data/mysql-3306/{tmp,data}
      groupadd mysql
      useradd -r -g mysql mysql -s /sbin/nologin
      chmod 755 /usr/local/mysql/*.sh
      chown -R mysql:mysql /data/mysql-3306
      chown -R mysql:mysql /usr/local/mysql

    ##初始化mysql
      cd /usr/local/mysql
      scripts/mysql_install_db --user=mysql --defaults-file=/usr/local/mysql/my.cnf
      /usr/local/mysql/start_mysql.sh

      netstat -ant|grep ':3306'|grep 'LISTEN' # 确认 3306 端口可用

      /usr/local/mysql/bin/mysql -uroot -S /data/mysql-3306/mysqld.sock
      use mysql;
      delete from user where user='';
      grant all privileges on *.* to root@'%' identified by 'qweasdzxc';
      update user set password=password('qweasdzxc') where user='root';
      grant select,insert,update,EXECUTE,lock tables on *.* to loguser@'%' identified by 'nagioscheck';
      grant usage on *.* to 'zabbixmysql'@'127.0.0.1' identified by 'zabbix123';
      flush privileges;
      select host,user,password from user;

    首先要看本机MySQL的socket套接字文件在哪里:
      mysqld --verbose --help | grep socket

    vi /etc/profile
      export PATH==$PATH:/usr/local/mysql/bin
    source /etc/profile

     

    11)安装CM==================================================================================================================
    1) 主节点解压安装
      解压 cloudera-manager-el6-cm5.5.1_x86_64.tar.gz 到 /opt/目录

    2) 建立数据库
      cp /data/install_tools/mysql-connector-java-5.1.42.jar /opt/cm-5.5.1/share/cmf/lib/

      # 这个权限,应该要
      grant all privileges on *.* to scm@'%' identified by 'scm';
      flush privileges;

      数据库类型 数据库名称 -h数据库主机名 -u数据库用户名 -p数据库密码--scm-host cmserver主机名 scm scm scm
      /opt/cm-5.5.1/share/cmf/schema/scm_prepare_database.sh mysql cm -h127.0.0.1 -uroot -pqweasdzxc -P3306 --scm-host master scm scm scm


    3) Agent配置
      vi /opt/cm-5.5.1/etc/cloudera-scm-agent/config.ini
      server_host=master

      同步Agent到其他节点
      scp -r root@master:/opt/cm-5.5.1/ /opt/

      在所有节点创建cloudera-scm用户
      userdel cloudera-scm
      useradd --system --home=/opt/cm-5.5.1/run/cloudera-scm-server/ --no-create-home --shell=/bin/false --comment "Cloudera SCM User" cloudera-scm

      准备Parcels,用以安装CDH5(主节点)
      cp /data/install_tools/CDH/* /opt/cloudera/parcel-repo/
      mv /opt/cloudera/parcel-repo/CDH-5.5.1-1.cdh5.5.1.p0.11-el6.parcel.sha1 /opt/cloudera/parcel-repo/CDH-5.5.1-1.cdh5.5.1.p0.11-el6.parcel.sha

    启动:
      echo > /opt/cm-5.5.1/log/cloudera-scm-server/cloudera-scm-server.log
      echo > /opt/cm-5.5.1/log/cloudera-scm-agent/cloudera-scm-agent.log

    主节点:
      /opt/cm-5.5.1/etc/init.d/cloudera-scm-server start
      tail -f /opt/cm-5.5.1/log/cloudera-scm-server/cloudera-scm-server.log
        2017-06-28 11:02:29,570 INFO WebServerImpl:org.mortbay.log: Started SelectChannelConnector@0.0.0.0:7180
        2017-06-28 11:02:29,570 INFO WebServerImpl:com.cloudera.server.cmf.WebServerImpl: Started Jetty server.
      标志执行成功

    所有节点(包括主节点): 最好先启动子节点,最后启动主节点
      /opt/cm-5.5.1/etc/init.d/cloudera-scm-agent start
      tail -n20 /opt/cm-5.5.1/log/cloudera-scm-agent/cloudera-scm-agent.log # log 里面有错应该是正常的
      DnsResolutionMonitor throttling_logger INFO Using java location: '/usr/java/jdk1.7.0_80/bin/java'.
    应该是标志执行成功,不确认

    4) CDH5的安装配置
    http://master:7180/cmf/

    初始化:
    MySQL 建库:
      create database hive;
      create database oozie;
      create database amon;
      create database hue;

    性能相关:
      echo 0 > /proc/sys/vm/swappiness
      echo never > /sys/kernel/mm/redhat_transparent_hugepage/defrag

      echo "" >> /etc/sysctl.conf
      echo "vm.swappiness=0" >> /etc/sysctl.conf
      sysctl -p

      echo "" >> /etc/rc.local
      echo "echo never > /sys/kernel/mm/redhat_transparent_hugepage/defrag" >> /etc/rc.local

    【群集设置 - 审核更改】页面时:
    # 使用了MySql作为hive的元数据存储,hive默认没有带mysql的驱动
      cp /opt/cm-5.5.1/share/cmf/lib/mysql-connector-java-5.1.42.jar /opt/cloudera/parcels/CDH-5.5.1-1.cdh5.5.1.p0.11/lib/hive/lib/

    【群集设置 - 首次运行 命令】页面时:
      cp /opt/cm-5.5.1/share/cmf/lib/mysql-connector-java-5.1.42.jar /var/lib/oozie/

    ========================================================================================================================
    错误:
    1) Heartbeating to master:7182 failed
    由于在主节点上启动了Agent后,又将Agent scp到了其他节点上导致的,首次启动Agent,它会生成一个uuid。

    mv /usr/bin/host /usr/bin/host.bak
    rm -rf /opt/cm-5.5.1/lib/cloudera-scm-agent/uuid


    ========================================================================================================================
    iptables 配置:
    master:
      *filter
      :INPUT ACCEPT [0:0]
      :FORWARD ACCEPT [0:0]
      :OUTPUT ACCEPT [114502:11447075]
      -A INPUT -p tcp -m tcp --dport 8088 -j ACCEPT
      -A INPUT -p tcp -m tcp --dport 50070 -j ACCEPT
      -A INPUT -p tcp -m tcp --dport 7180 -j ACCEPT
      -A INPUT -p tcp -m tcp --dport 19888 -j ACCEPT
      -A INPUT -s 10.17.172.0/24 -p tcp -j ACCEPT
      -A INPUT -p tcp -m tcp --dport 1022 -j ACCEPT
      -A INPUT -m state --state RELATED,ESTABLISHED -j ACCEPT
      -A INPUT -p icmp -j ACCEPT
      -A INPUT -i lo -j ACCEPT
      -A INPUT -p tcp -m state --state NEW -m tcp --dport 22 -j ACCEPT
      -A INPUT -j REJECT --reject-with icmp-host-prohibited
      -A FORWARD -j REJECT --reject-with icmp-host-prohibited
    COMMIT
    node[0-6]:
      *filter
      :INPUT ACCEPT [0:0]
      :FORWARD ACCEPT [0:0]
      :OUTPUT ACCEPT [249:74801]
      -A INPUT -p tcp -m tcp --dport 10000 -j ACCEPT
      -A INPUT -p tcp -m tcp --dport 8042 -j ACCEPT
      -A INPUT -s 10.17.172.0/24 -p tcp -j ACCEPT
      -A INPUT -p tcp -m tcp --dport 1022 -j ACCEPT
      -A INPUT -m state --state RELATED,ESTABLISHED -j ACCEPT
      -A INPUT -p icmp -j ACCEPT
      -A INPUT -i lo -j ACCEPT
      -A INPUT -p tcp -m state --state NEW -m tcp --dport 22 -j ACCEPT
      -A INPUT -j REJECT --reject-with icmp-host-prohibited
      -A FORWARD -j REJECT --reject-with icmp-host-prohibited
    COMMIT

     


    ========================================================================================================================
    卸载:
    umount cm_processes

    rpm -qa | grep cloudera
    rpm -e cloudera-manager-daemons-5.5.1-1.cm551.p0.8.el6.x86_64

    ps -ef | grep cmf | grep -v "grep" | awk '{print "kill -9 " $2}' | sh
    ps -ef | grep java | grep -v "grep" | awk '{print "kill -9 " $2}' | sh

    清理数据库数据

    rm -rf /tmp/.scm_prepare_node.lock
    rm -rf /tmp/scm_prepare_node
    rm -rf /etc/cloudera*
    rm -rf /usr/share/cmf /var/lib/cloudera* /var/cache/yum/x86_64/6/cloudera* /var/log/cloudera*
    rm -rf /var/run/hadoop* /var/run/flume-ng /var/run/cloudera* /var/run/oozie/ /var/run/sqoop2
    rm -rf /var/run/zookeeper /var/run/hbase /var/run/impala /var/run/hive /var/run/hdfs-sockets
    rm -rf /var/lib/flume-ng /var/lib/hadoop* /var/lib/hue /var/lib/oozie /var/lib/solr /var/lib/sqoop*
    rm -rf /dfs /mapred /yarn
    rm -rf /data/dfs
    rm -rf /usr/lib/hadoop /usr/lib/hadoop*
    rm -rf /usr/lib/hive /usr/lib/hbase /usr/lib/oozie
    rm -rf /usr/lib/sqoop* /usr/lib/zookeeper /usr/lib/bigtop*
    rm -rf /usr/lib/flume-ng /usr/lib/hcatalog
    rm -rf /usr/bin/hadoop* /usr/bin/zookeeper*
    rm -rf /usr/bin/hbase* /usr/bin/hive*
    rm -rf /usr/bin/hdfs /usr/bin/mapred /usr/bin/yarn /usr/bin/sqoop* /usr/bin/oozie
    rm -rf /var/cache/yum/cloudera* /var/lib/zookeeper
    rm -rf /opt/cm-5.5.1
    rm -rf /opt/cloudera

     

  • 相关阅读:
    UML图箭头关系
    使用 Python 编写 vim 插件
    linux grep命令
    gevent For the Working Python Developer
    坐标系旋转变换公式图解
    欲哭无泪的p-value = 0.051 | 做几次重复能得到较低的p-value
    RNA-seq要做几次生物学重复?找出来的100%都是真正的应答基因
    Strand Specific mRNA sequencing 之重要性与分析
    为什么二代测序的原始数据中会出现Read重复现象?
    DNA甲基化研究概述
  • 原文地址:https://www.cnblogs.com/ITtangtang/p/7856958.html
Copyright © 2011-2022 走看看