zoukankan      html  css  js  c++  java
  • Linux记录-shell获取hdfs used使用

    #!/bin/bash
    
    export JAVA_HOME=xxx
    export HADOOP_HOME=xxx
    export HADOOP_CONF_DIR=xxx
    
    cd /home/hdfs/xxx
    
    mv host.txt host.txt_`date +"%Y-%m-%d"`
    mv host.txt_`date +"%Y-%m-%d"` host/
    
    mv hostdfs.txt hostdfs.txt_`date +"%Y-%m-%d"`
    mv hostdfs.txt_`date +"%Y-%m-%d"` host/
    
    rm -f ip.txt
    rm -f allhostname.txt
    rm -f hostname.txt
    rm -f iphostname.txt
    rm -f dfused.txt
    rm -f minhost.txt
    rm -f maxhost.txt
    rm -f host.txt
    rm -f nohup.out
    rm -f dfsreport.txt
    
    echo "start checking hdfs used rate"
    $HADOOP_HOME/bin/hdfs dfsadmin -report > dfsreport.txt
    cat dfsreport.txt | grep -i "50010" | awk -F ":" '{print $2}'| awk '{print $1}' > ip.txt
    cat dfsreport.txt | grep -i "hostname" | awk -F ": " '{print $2}' > allhostname.txt
    cat dfsreport.txt  | grep "DFS Used%" | awk -F ": " '{print $2}' | awk -F "%" '{print $1}' > dfused.txt
    livesum=$(cat dfsreport.txt  | grep "Live datanodes" | awk '{print $3}' | awk -F "):" '{print $1}' | awk -F "(" '{print $2}')
    echo $livesum
    sed -i '1d' dfused.txt
    let livesum+=1
    sed -i $livesum',$d' dfused.txt 
    sed -i $livesum',$d' ip.txt
    linesum=$(cat ip.txt | wc -l)
    echo $linesum
    harr=($(awk '{print $1}' ip.txt))
    darr=($(awk '{print $1}' dfused.txt)) 
    if [ $linesum -gt 0 ]
      then
      for (( i = 0 ; i < ${#harr[@]} ; i++ ))
      do
         for (( j = 0 ; j < ${#darr[@]} ; j++ ))
         do
           if [ $i -eq $j ]
           then
             echo ${harr[$i]} ":" ${darr[$j]} >> hostdfs.txt
           fi
         done
     done
    else
       echo "Not Live Datanodes"
    fi
    hharr=($(cat hostdfs.txt | awk '{print $3}' |sort -ru | tail -n 60)) 
    ddarr=($(cat hostdfs.txt | awk '{print $3}' |sort -ru | head -n 100))
    for (( k = 0; k< ${#hharr[@]} ; k++ ))
    do
        if [[ $(echo $(cat hostdfs.txt) | grep "${hharr[$k]}") != "" ]]
        then
        cat hostdfs.txt | grep "${hharr[$k]}" | awk 'NR==1{print $1}' >> minhost.txt
        fi
    done
    for (( m = 0; m< ${#ddarr[@]} ; m++ ))
    do
        if [[ $(echo $(cat hostdfs.txt) | grep "${ddarr[$m]}") != "" ]]
        then
        cat hostdfs.txt | grep "${ddarr[$m]}" | awk 'NR==1{print $1}' >> maxhost.txt
        fi
    done
    
    cat maxhost.txt | awk '{print $1}' >> host.txt
    cat minhost.txt | awk '{print $1}' >> host.txt
    
    narr=($(cat allhostname.txt | awk '{print $1}'))
    for (( k = 0; k< ${#harr[@]}; k++ ))
    do
      for (( n = 0; n < ${#narr[@]}; n++))
      do
        if [ $k -eq $n ]
        then
          echo ${harr[$k]} ":" ${narr[$n]} >>iphostname.txt
        fi
      done
    done
    
    hostarr=($(cat host.txt | awk '{print $1}'))
    for (( c = 0; c < ${#hostarr[@]} ; c++ ))
    do
        if [[ $(echo $(cat iphostname.txt) | grep "${hostarr[$c]}") != "" ]]
        then
          cat iphostname.txt | grep "${hostarr[$c]}" | awk 'NR==1{print $3}' >> hostname.txt
        fi
    done
    
    max_rate=$(cat dfused.txt | sort -r | head -n 1 | awk '{print int($0)}')
    avg_used_rate=$(cat dfused.txt | awk '{e+=$1}END{print e/NR}' | awk '{print int($0)}' )
    let max_avg_diff=max_rate-avg_used_rate
    
    if [ $max_avg_diff -gt 5 ]
       then
       jps | grep -i "balancer"
       if [ $? -eq 0 ]
       then
          kill -9 $($JAVA_HOME/bin/jps | grep -i "balancer" | awk 'NR==1{print $1}')
       fi
       $HADOOP_HOME/bin/hdfs dfs -mv /system/balancer.id /system/balancer.id_`date +"%Y-%m-%d-%H-%M"` 
       nohup $HADOOP_HOME/bin/hdfs  balancer  -policy datanode -threshold 5 -include -f host.txt > /home/hdfs/balancer/rebalancer.log 2>&1 &
    else
      echo "Nothing to do"
    fi

    $ nohup hdfs balancer
    -Ddfs.datanode.balance.max.concurrent.moves = 10
    -Ddfs.balancer.dispatcherThreads = 1024
    -Ddfs.balance.bandwidthPerSec = 1073741824


    #此配置用于限制允许Datanode平衡群集的最大并发块移动数
    dfs.datanode.balance.max.concurrent.moves, default is 5
    #带宽
    dfs.datanode.balance.bandwidthPerSec, default is 1048576 (=1MB/s)
    dfsadmin -setBalancerBandwidth <bandwidth in bytes per second>
    #mover线程数
    dfs.balancer.moverThreads, default is 1000
    #datanode传输的最大线程数
    dfs.datanode.max.transfer.threads
    修改dfs.datanode.max.transfer.threads=4096 (如果运行HBase的话建议为16384),
    指定用于在DataNode间传输block数据的最大线程数,老版本的对应参数为dfs.datanode.max.xcievers。
    #平衡策略,默认为datanode
    [-policy <policy>]
    blockpool: Cluster is balanced if each pool in each node is balanced.
    datanode: Cluster is balanced if each datanode is balanced.
    #阈值
    [-threshold <threshold>] [1.0, 100.0]
    #包含列表
    [-include [-f <hosts-file> | <comma-separated list of hosts>]]
    #排除列表
    [-exclude [-f <hosts-file> | <comma-separated list of hosts>]]
    #最大移动数据大小
    dfs.balancer.max-size-to-move, default is 10737418240 (=10GB)

  • 相关阅读:
    .NET 去除一段文本中的HTML标记
    C#实现控件拖动窗口
    使用window.showModalDialog弹出窗口返回值(兼容IE、FF、chrome)
    IE浏览器报错出现stack overflow at line 0的解决办法
    用Python作GIS:原料篇
    winform 自定义控件:半透明Loading控件
    WPF 跟随鼠标动画 by wgscd
    C# 多线程 HTTP request
    VS2015 安装XAN
    C# 用QQ企业邮箱发邮件
  • 原文地址:https://www.cnblogs.com/xinfang520/p/10316526.html
Copyright © 2011-2022 走看看