HDFS 启动脚本
-
start-dfs.sh
# 我的hadoop 安装位置 /opt/hadoop-2.7.7 #Hadoop sbin的位置 eg. /opt/hadoop-2.7.7/sbin $HADOOP_HOME/sbin bin=`dirname "${BASH_SOURCE-$0}"` # 进入sbin 目录下 bin=`cd "$bin"; pwd` # 一些执行配置文件的目录 eg:/opt/hadoop-2.7.7/sbin/../libexec DEFAULT_LIBEXEC_DIR="$bin"/../libexec # 目录:/opt/hadoop-2.7.7/sbin/../libexec HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} # 执行目录:/opt/hadoop-2.7.7/sbin/../libexec 下的脚本 hdfs-config.sh . $HADOOP_LIBEXEC_DIR/hdfs-config.sh
-
hdfs-config.sh
# 目录:/opt/hadoop-2.7.7/sbin/start-dfs.sh bin=`which "$0"` # 目录:/opt/hadoop-2.7.7/sbin bin=`dirname "${bin}"` # 进入目录:/opt/hadoop-2.7.7/sbin bin=`cd "$bin"; pwd` # 目录:/opt/hadoop-2.7.7/sbin/../libexec DEFAULT_LIBEXEC_DIR="$bin"/../libexec # 目录:/opt/hadoop-2.7.7/sbin/../libexec HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} # 监测文件(包括目录)是否存在 if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then # 执行hadoop-config.sh(其它分支一直找到hadoop-config.sh) . ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh elif [ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]; then . "$HADOOP_COMMON_HOME"/libexec/hadoop-config.sh elif [ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]; then . "$HADOOP_HOME"/libexec/hadoop-config.sh else # 都没找到hadoop-config.sh 启动失败 echo "Hadoop common not found." exit fi
-
hadoop-config.sh
# 脚本的位置 /opt/hadoop-2.7.7/sbin/../libexec/hadoop-config.sh this="${BASH_SOURCE-$0}" # /opt/hadoop-2.7.7/libexec common_bin=$(cd -P -- "$(dirname -- "$this")" && pwd -P) # 脚本名字 hadoop-config.sh script="$(basename -- "$this")" # 脚本的绝对路径: /opt/hadoop-2.7.7/libexec/hadoop-config.sh this="$common_bin/$script" # 文件 hadoop-layout.sh 是普通文件 执行(我目录下没有,后续补充) [ -f "$common_bin/hadoop-layout.sh" ] && . "$common_bin/hadoop-layout.sh" # 1. hadoop 一些库的路径 不粘贴了 # 2. 确定hadoop 安装的根目录 HADOOP_PREFIX # 3. 检查参数 # 4. 设置日志级别,默认INFO # 5. 设置备用的conf目录 # 6. 可以通过参数指明host # 7. 可以通过参数指明hostname # 8. 确定是不是cygwin环境 # 9. 校验确认不是ipv6 only # 10.尝试设置JAVA_HOME ,如果没设置的话 # 11.设置一堆classpath 和 conf 目录 # 总之就是这个脚本就是确认运行环境
# 回到 start-dfs.sh 脚本上来 # get arguments 获取参数, start-dfs.sh 的另外两个操作 -upgrade 和 -rollback if [[ $# -ge 1 ]]; then startOpt="$1" shift case "$startOpt" in -upgrade) nameStartOpt="$startOpt" ;; -rollback) dataStartOpt="$startOpt" ;; *) echo $usage exit 1 ;; esac fi #Add other possible options 默认为启动 ,所以nameStartOpt 为空(没有参数) nameStartOpt="$nameStartOpt $@" # 启动namenode #--------------------------------------------------------- # namenodes NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -namenodes) echo "Starting namenodes on [$NAMENODES]" "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" --config "$HADOOP_CONF_DIR" --hostnames "$NAMENODES" --script "$bin/hdfs" start namenode $nameStartOpt # 启动datanode #--------------------------------------------------------- # datanodes (using default slaves file) if [ -n "$HADOOP_SECURE_DN_USER" ]; then echo "Attempting to start secure cluster, skipping datanodes. " "Run start-secure-dns.sh as root to complete startup." else "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" --config "$HADOOP_CONF_DIR" --script "$bin/hdfs" start datanode $dataStartOpt fi # 启动secondary namenodes #--------------------------------------------------------- # secondary namenodes (if any) SECONDARY_NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -secondarynamenodes 2>/dev/null) if [ -n "$SECONDARY_NAMENODES" ]; then echo "Starting secondary namenodes [$SECONDARY_NAMENODES]" "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" --config "$HADOOP_CONF_DIR" --hostnames "$SECONDARY_NAMENODES" --script "$bin/hdfs" start secondarynamenode fi # 启动quorumjournal (高可用集群的时候用于同步namenode 的信息) #--------------------------------------------------------- # quorumjournal nodes (if any) SHARED_EDITS_DIR=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.namenode.shared.edits.dir 2>&-) case "$SHARED_EDITS_DIR" in qjournal://*) JOURNAL_NODES=$(echo "$SHARED_EDITS_DIR" | sed 's,qjournal://([^/]*)/.*,1,g; s/;/ /g; s/:[0-9]*//g') echo "Starting journal nodes [$JOURNAL_NODES]" "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" --config "$HADOOP_CONF_DIR" --hostnames "$JOURNAL_NODES" --script "$bin/hdfs" start journalnode ;; esac # 启动zkfc -- 高可用集群故障转移使用 #--------------------------------------------------------- # ZK Failover controllers, if auto-HA is enabled AUTOHA_ENABLED=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.ha.automatic-failover.enabled) if [ "$(echo "$AUTOHA_ENABLED" | tr A-Z a-z)" = "true" ]; then echo "Starting ZK Failover Controllers on NN hosts [$NAMENODES]" "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" --config "$HADOOP_CONF_DIR" --hostnames "$NAMENODES" --script "$bin/hdfs" start zkfc fi
可以看到在最后启动的时候调取的都是hadoop-daemons.sh 脚本,然后给三个参数,config (配置文件路径),hostname(在哪台服务器启动),script(启动哪些进程的脚本),接下来看一下 hadoop-daemons.sh 和 hdfs 这两个脚本
-
-
hadoop-daemons.sh
usage="Usage: hadoop-daemons.sh [--config confdir] [--hosts hostlistfile] [start|stop] command args..." # if no args specified, show usage if [ $# -le 1 ]; then echo $usage exit 1 fi bin=`dirname "${BASH_SOURCE-$0}"` bin=`cd "$bin"; pwd` DEFAULT_LIBEXEC_DIR="$bin"/../libexec HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} # 刷新一下hadoop-config.sh 这个文件 . $HADOOP_LIBEXEC_DIR/hadoop-config.sh # 执行slave.sh 这个脚本和 hadoop-daemon.sh(daemon是单数) exec "$bin/slaves.sh" --config $HADOOP_CONF_DIR cd "$HADOOP_PREFIX" ; "$bin/hadoop-daemon.sh" --config $HADOOP_CONF_DIR "$@"
-
slaves.sh
# 摘抄了关键部分的代码,遍历 子节点 然后ssh 过去执行hadoop-daemon.sh # Where to start the script, see hadoop-config.sh # (it set up the variables based on command line options) if [ "$HADOOP_SLAVE_NAMES" != '' ] ; then SLAVE_NAMES=$HADOOP_SLAVE_NAMES else SLAVE_FILE=${HADOOP_SLAVES:-${HADOOP_CONF_DIR}/slaves} SLAVE_NAMES=$(cat "$SLAVE_FILE" | sed 's/#.*$//;/^$/d') fi # start the daemons for slave in $SLAVE_NAMES ; do ssh $HADOOP_SSH_OPTS $slave $"${@// /\ }"
-
hadoop-daemon.sh
# 第一段,获取操作的节点类型,比如启动namenode 最后command 就是namenode hadoopScript="$HADOOP_PREFIX"/bin/hadoop if [ "--script" = "$1" ] then shift hadoopScript=$1 shift fi startStop=$1 shift command=$1 shift # 中间一段设置日志路径,进程pid文件路径等 # 第三段 case $startStop in # 启动进程 (start) # 确认 pid文件路径可写 [ -w "$HADOOP_PID_DIR" ] || mkdir -p "$HADOOP_PID_DIR" if [ -f $pid ]; then if kill -0 `cat $pid` > /dev/null 2>&1; then echo $command running as process `cat $pid`. Stop it first. exit 1 fi fi if [ "$HADOOP_MASTER" != "" ]; then echo rsync from $HADOOP_MASTER rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $HADOOP_MASTER/ "$HADOOP_PREFIX" fi hadoop_rotate_log $log echo starting $command, logging to $log cd "$HADOOP_PREFIX" case $command in namenode|secondarynamenode|datanode|journalnode|dfs|dfsadmin|fsck|balancer|zkfc) if [ -z "$HADOOP_HDFS_HOME" ]; then hdfsScript="$HADOOP_PREFIX"/bin/hdfs else hdfsScript="$HADOOP_HDFS_HOME"/bin/hdfs fi #### 调整执行的优先级 并 执行 hdfs 命令 启动namenode 的时候,$command = namenode ,$@ 为空 #### 相当于执行 hdfs namenode 命令 nohup nice -n $HADOOP_NICENESS $hdfsScript --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null & ;; (*) nohup nice -n $HADOOP_NICENESS $hadoopScript --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null & ;; esac echo $! > $pid sleep 1 head "$log" # capture the ulimit output if [ "true" = "$starting_secure_dn" ]; then echo "ulimit -a for secure datanode user $HADOOP_SECURE_DN_USER" >> $log # capture the ulimit info for the appropriate user su --shell=/bin/bash $HADOOP_SECURE_DN_USER -c 'ulimit -a' >> $log 2>&1 elif [ "true" = "$starting_privileged_nfs" ]; then echo "ulimit -a for privileged nfs user $HADOOP_PRIVILEGED_NFS_USER" >> $log su --shell=/bin/bash $HADOOP_PRIVILEGED_NFS_USER -c 'ulimit -a' >> $log 2>&1 else echo "ulimit -a for user $USER" >> $log ulimit -a >> $log 2>&1 fi sleep 3; if ! ps -p $! > /dev/null ; then exit 1 fi ;; # 停止进程 if [ -f $pid ]; then TARGET_PID=`cat $pid` if kill -0 $TARGET_PID > /dev/null 2>&1; then echo stopping $command kill $TARGET_PID sleep $HADOOP_STOP_TIMEOUT if kill -0 $TARGET_PID > /dev/null 2>&1; then echo "$command did not stop gracefully after $HADOOP_STOP_TIMEOUT seconds: killing with kill -9" kill -9 $TARGET_PID fi else echo no $command to stop fi rm -f $pid else echo no $command to stop fi ;; (*) echo $usage exit 1 ;; esac
-
-
hdfs
#摘抄部分 ,确定Java主类 if [ "$COMMAND" = "namenode" ] ; then CLASS='org.apache.hadoop.hdfs.server.namenode.NameNode' HADOOP_OPTS="$HADOOP_OPTS $HADOOP_NAMENODE_OPTS" elif [ "$COMMAND" = "zkfc" ] ; then CLASS='org.apache.hadoop.hdfs.tools.DFSZKFailoverController' HADOOP_OPTS="$HADOOP_OPTS $HADOOP_ZKFC_OPTS" elif [ "$COMMAND" = "secondarynamenode" ] ; then CLASS='org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode' HADOOP_OPTS="$HADOOP_OPTS $HADOOP_SECONDARYNAMENODE_OPTS" elif [ "$COMMAND" = "datanode" ] ; then CLASS='org.apache.hadoop.hdfs.server.datanode.DataNode' if [ "$starting_secure_dn" = "true" ]; then HADOOP_OPTS="$HADOOP_OPTS -jvm server $HADOOP_DATANODE_OPTS" else HADOOP_OPTS="$HADOOP_OPTS -server $HADOOP_DATANODE_OPTS" fi elif [ "$COMMAND" = "journalnode" ] ; then CLASS='org.apache.hadoop.hdfs.qjournal.server.JournalNode' HADOOP_OPTS="$HADOOP_OPTS $HADOOP_JOURNALNODE_OPTS" # run it 启动Java 类的入口 exec "$JAVA" -Dproc_$COMMAND $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@" #### 部分 #启动namenode 运行的类 #org.apache.hadoop.hdfs.server.namenode.NameNode #启动datanode 运行的类 #org.apache.hadoop.hdfs.server.datanode.DataNode #启动secondarynamenode 运行的类 #org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode # 查看version #org.apache.hadoop.util.VersionInfo
总结
hdfs启动是由start-dfs.sh 开始,中间调用一些环境设置的脚本hdfs-config.sh,hadoop-config,sh ,然后调用hadoop-daemons.sh 启动,hadoop-daemons.sh 通过调用slaves.sh 脚本 ,ssh到每一台节点中,然后调用hadoop-daemon.sh脚本启动,hadoop-daemon.sh 先去确定一些进程相关,然后调用 hdfs 脚本,确认具体操作,确认Java主类,然后执行,接下来就要看NameNode类做了一些什么了