zoukankan      html  css  js  c++  java
  • Linux备份-删除指定日期内文件

    #!/usr/bin/env bash


    source /etc/profile


    echo " *************** start filter ***************  "

    # get befor six month last day

    #m0=$(date -d "$(date -d 'month' +%Y%m01) -1 day" +%Y%m%d)

    #echo ${m0}

    #m1=$(date -d "$(date -d '0 month' +%Y%m01) -1 day" +%Y%m%d)

    #echo ${m1}

    #m2=$(date -d "$(date -d last-month +%Y%m01) -1 day" +%Y%m%d)

    #echo ${m2}

    #m3=$(date -d "$(date -d ${m2} +%Y%m01) -1 day" +%Y%m%d)

    #echo ${m3}

    #m4=$(date -d "$(date -d ${m3} +%Y%m01) -1 day" +%Y%m%d)

    #echo ${m4}

    #m5=$(date -d "$(date -d ${m4} +%Y%m01) -1 day" +%Y%m%d)

    #echo ${m5}

    #m6=$(date -d "$(date -d ${m5} +%Y%m01) -1 day" +%Y%m%d)

    #echo ${m6}


    # 取得当前月的最后一天,访问数组长度:${#m[*]} + ${#m[@]}

    m[0]=$(date -d "$(date -d 'month' +%Y%m01) -1 day" +%Y%m%d)

    echo m0 : ${m[0]} ' month : ' ${#m[@]}

    for n in $(seq 0 11); do

        m[$n+1]=$(date -d "$(date -d ${m[$n]} +%Y%m01) -1 day" +%Y%m%d)

        echo m$[$n+1] : ${m[$n+1]} ' month : ' ${#m[*]};

    done


    echo " ****** time : " $(date '+%Y-%m-%d %H:%M:%S') " ****** "


    max_date=0

    # get the latest file and copy to hdfs

    cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter

    for dir in $(ls -l ./ | awk '/^d/{print $NF}')

    do

       if [[ -d $dir && $dir == *\_* ]]; then

          f_d=$(echo $dir | cut -d \_ -f 3 | cut -d . -f 1)

          if [[ $max_date < $f_d ]]; then

            max_date=$f_d

            max_filter=$dir

          fi

       fi

    done

    echo " max date is : "$max_date

    echo " max filter is : "$max_filter

    pwd

    # 复制最近日期的filter文件到hdfs

    hadoop fs -test -e /data/datacenter/run_center_spark_stream/bloom_filters/$max_filter

    if [[ $? == 0 ]]; then

        echo " filter is already exist : "$max_filter

    else

        echo " start hdfs copy "

        echo " ****** start time : " $(date '+%Y-%m-%d %H:%M:%S') " ****** "

        hadoop fs -put $max_filter /data/datacenter/run_center_spark_stream/bloom_filters

        echo " ****** end time : " $(date '+%Y-%m-%d %H:%M:%S') " ****** "

    fi


    remove_week=$(date -d "$max_date 7 days ago" +%Y%m%d)

    echo " 删除本地序列化文件的日期界限:"$remove_week

    remove_date=$(date -d "$max_date 30 days ago" +%Y%m%d)

    echo " 删除文件 和 Hadoop filter 的日期界限:"$remove_date


    echo " *************** start remove filter ***************  "

    for r_dir in $(ls -l ./ | awk '/^d/{print $NF}')

    do

       if [[ -d $r_dir && $r_dir == *\_* ]]; then

          r_d=$(echo $r_dir | cut -d \_ -f 3 | cut -d . -f 1)

          if [[ $r_d < $remove_date ]]; then

              if [[ ${m[*]} == *$r_d* ]]; then

                  cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter/$r_dir

                  pwd

                  for f_dir in $(ls *)

                  do

                     if [[ "$f_dir" == "mau_device_all.FILTER.SER" ]]; then

                        echo " ------ keep mau_filter is: " $f_dir;

                     else

                        echo " remove file is: " $f_dir;

                        rm -r $f_dir

                     fi

                  done

                  cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter

                  pwd

              else

                  echo " remove filter_dir is: "$r_dir

                  rm -r $r_dir

              fi

          elif [[ $r_d < $remove_week ]]; then

              if [[ $r_d == $m0 || $r_d == $m1 || $r_d == $m2 ]]; then

                  cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter/$r_dir

                  pwd

                  for f_dir in $(ls *)

                  do

                     if [[ "$f_dir" == "mau_device_all.FILTER.SER" ]]; then

                        echo " ------ week keep mau_filter is: " $f_dir;

                     else

                        if [[ "$f_dir" == *.FILTER.SER ]]; then

                            echo " - last day of month - week remove file is: " $f_dir;

                            rm -r $f_dir

                        fi

                     fi

                  done

                  cd /home/hadoop/streaming_run_center/tmp/checkpoint/filter

                  pwd

              else

                  echo "week remove filter is: "$r_dir

                  rm -r $r_dir/*.FILTER.SER

              fi

          fi

       fi

    done


    echo " =============== start remove hdfs filter ===============  "

    # 删除hdfs上指定日期外的tdid

    for h_filter in $(hadoop fs -ls /data/datacenter/run_center_spark_stream/bloom_filters | awk '{print $8}')

    do

        if [[ $h_filter == *\_* ]]; then

            h_date=$(echo $h_filter | cut -d / -f 6 | cut -d \_ -f 3 | cut -d . -f 1)

    #        echo " hdfs date : "$h_date

    #        echo " hdfs filter : "$h_filter

            if [[ ${m[*]} == *$h_date* ]]; then

                echo " remain hdfs filter is : "$h_filter

            elif [[ $h_date < $remove_date ]]; then

                echo "not remain date is : "$h_date

                echo "remove hdfs filter is : "$h_filter

                hadoop fs -rmr $h_filter

            fi

        fi

    done


    echo " -------------- start tdid ---------------  "

    # 删除小于30天的tdid

    cd /home/hadoop/streaming_run_center/tmp/checkpoint/tdidinfo

    for tdid in $(ls *)

    do

        if [[ $tdid == *\_* ]]; then

            t_d=$(echo $tdid | cut -d \_ -f 2 | cut -d . -f 1)

            if [[ $t_d == $max_date || $t_d > $max_date ]]; then

                echo " need copy date : "$t_d

                echo " need copy tdid : "$tdid

                # 检查tdid是否存在

    #            hadoop fs -test -e jiaojiao/tdid/$tdid

    #            if [[ $? == 0 ]]; then

    #                echo " tdid is already exist,remove it first "

    #                hadoop fs -rm jiaojiao/tdid/$tdid

    #                hadoop fs -put $tdid jiaojiao/tdid

    #            else

    #                echo " start copy "

    #                hadoop fs -put $tdid jiaojiao/tdid

    #            fi

            elif [[ $t_d < $remove_date ]]; then

                echo " remove tdid : "$tdid

                rm $tdid

            fi

        fi

    done


    #echo " =============== start remove hdfs tdid ===============  "

    #for h_tdid in $(hadoop fs -ls jiaojiao/tdid | awk '{print $8}')

    #do

    #    if [[ $h_tdid == *\_* ]]; then

    #        h_date=$(echo $h_tdid | cut -d \_ -f 2 | cut -d . -f 1)

    #        echo $h_date

    #        echo $h_tdid

    #    fi

    #done

  • 相关阅读:
    LeetCode OJ 107. Binary Tree Level Order Traversal II
    LeetCode OJ 116. Populating Next Right Pointers in Each Node
    LeetCode OJ 108. Convert Sorted Array to Binary Search Tree
    LeetCode OJ 105. Construct Binary Tree from Preorder and Inorder Traversal
    LeetCode OJ 98. Validate Binary Search Tree
    老程序员解Bug的通用套路
    转载 四年努力,梦归阿里,和大家聊聊成长感悟
    转载面试感悟----一名3年工作经验的程序员应该具备的技能
    Web Service和Servlet的区别
    关于spring xml文件中的xmlns,xsi:schemaLocation
  • 原文地址:https://www.cnblogs.com/anitinaj/p/10025195.html
Copyright © 2011-2022 走看看