zoukankan      html  css  js  c++  java
  • hadoop 数据抽取

    #!/bin/bash
    
    if [ ! -z $2 ]; then
            start_time=$1
            end_time=$2
    else
            starttime=`date +%Y%m%d%H%M -d '-15 min'`
            #开始时间
            startday=${starttime:0:10}
            startmi=`expr ${starttime:10:2} / 15 * 15`
            if [ $startmi != 0 ];then
                    start_time=$startday$startmi
            else
                    start_time=$startday"00"
            fi
            #结束时间
            endtime=`date +%Y%m%d%H%M`
            endday=${endtime:0:10}
            endmi=`expr ${endtime:10:2} / 15 * 15`
            if [ $endmi != 0 ];then
                    end_time=$endday$endmi
            else
                    end_time=$endday"00"
            fi
    
    fi
    echo "数据时间为: $start_time--$end_time"
    #创建目录
    hadoop fs -mkdir cache/O_RE_ST_XDR_PS_GN_HTTP/$start_time
    #上传数据时间段数据
    for file in `ls /opt7/ftp/PS_Gn_HTTP_Event/${start_time:0:4}-${start_time:4:2}-${start_time:6:2}/*${start_time:0:10}*.CHK`
    do
        #351_TM07_PSGnHTTPEvent201410021235_3720202.CHK
            #351_TM07_PSGnHTTPEvent201410021235_3720202.AVL
            basefile=`basename $file`
            date=${basefile:22:12}
            if [ $date -ge $start_time ] && [ $date -lt $end_time ];then
                    avlfile=${file/%CHK/AVL}
                    echo  $avlfile
            cp $avlfile /opt8/gz_data_temp/http/
                    #hadoop fs -put $avlfile cache/O_RE_ST_XDR_PS_GN_HTTP/${start_time}/
            fi
    done
    #cat /opt8/gz_data_temp/http/*_*.AVL>/opt8/gz_data_temp/http/$start_time.AVL
    #split -b 512m /opt8/gz_data_temp/http/$start_time.AVL /opt8/gz_data_temp/http/$start_time.AVL.
    #echo `ls /opt8/gz_data_temp/http/$start_time.AVL`
    #rm -f /opt8/gz_data_temp/http/*.AVL
    `hadoop fs -put /opt8/gz_data_temp/http/*.AVL cache/O_RE_ST_XDR_PS_GN_HTTP/${start_time}/`
    rm -f /opt8/gz_data_temp/http/*.AVL
    echo "oozie--job-http-start!"
    cd /home/boco/program
    echo `java -cp .:../oozie/libserver/* com.boco.BSSystem.schedule.OozieRunner job_ods_a_xdr_ps_gn_http ${start_time}`
  • 相关阅读:
    01.html5+phonegap跨平台移动应用开发
    10个CSS简写/优化技巧
    JS高级学习历程-17
    JS高级学习历程-16
    算法详解之Tarjan
    分层图详解
    洛谷 题解 P1196 【[NOI2002]银河英雄传说】
    洛谷 题解 P1220 【关路灯 】
    洛谷 题解 P1352 【没有上司的舞会】
    二维前缀和详解
  • 原文地址:https://www.cnblogs.com/jack-Star/p/4221584.html
Copyright © 2011-2022 走看看