zoukankan      html  css  js  c++  java
  • python 写hive循环脚本

    要一个月的数据 一次跑一周的数据 ,建表用插入数据的方法:

     1 import os, sys
     2 reload(sys); sys.setdefaultencoding( "utf-8" )
     3 flag_server = int(os.popen('ifconfig | grep "inet addr:172" | wc -l').read().strip())
     4 dir_scripts = '/app/home/zhangb/' if flag_server else '/Users/zhangb/Desktop/'
     5 dir_data = '/app/home/zhangbo/' if flag_server else '/Users/zhangb/Desktop/'
     6 dir_server = '/app/home/'
     7 sys.path.append(dir_scripts)
     8 
     9 import datetime
    10 import time
    11 db_name = 'zhangb'
    12 
    13 def hive_day_cid(create_date,type11_duration):
    14         #原始表geohash表关联,找到cid
    15         # ------------------   建立 geohash 表   -------------------- #
    16         print "# ---------------------------------------------------------------------------------- #"
    17         
    18         start_date_str = (create_date - datetime.timedelta(days=type11_duration-1)).strftime("%Y%m%d")
    19         end_date_str   = create_date.strftime("%Y%m%d")
    20         
    21         hive_command = ( '''
    22 hive -e " use %s;
    23     create table if not exists hive_day_cid_provider(provider string,day int, cnt_cid bigint,dist_cid bigint );
    24 
    25     insert into hive_day_cid_provider
    26     select a.provider,a.day,count(a.cid) as cnt_cid,count(distinct(a.cid)) as dist_cid from 
    27     (select day,provider,token_md5 as cid  from  report_ods_mdp.upload_bi_type11
    28     where day >=%s and day <= %s  and length(token_md5)>0 and provider in ('gps','network','passive','none') ) a 
    29     group by a.provider,a.day
    30     
    31 ;"
    32     ''' % ( db_name,start_date_str, end_date_str) )
    33 
    34         print hive_command
    35         if flag_server:
    36             os.system(hive_command)
    37         print "
    "
    38         
    39         
    40 
    41 
    42 if __name__ == '__main__':
    43     
    44     start = time.time()
    45     business_name = 'brand48'
    46     # ----------------------------------------
    47     #7号是这个周期中的最后一天,是周期结束日期
    48     for (i,j) in [ (11,7),(11,14),(11,28),(12,5),(12,12),(12,19),(12,26)]:      
    49     # for (i, j) in [(2, 7), (2, 14), (2, 21), (2, 28)]:
    50         create_date = datetime.date(2016,i,j)
    51         type11_duration=7
    52         hive_day_cid(create_date,type11_duration)
    53         print "
    "
    54         print '# Time: ', str(datetime.timedelta(seconds=(time.time() - start)))
    55         print '# the end'
    56         print '
    '
    57     
    58     #hive_imei_time_list(create_date,type11_duration)
    59     print '# Time: ', str(datetime.timedelta(seconds=(time.time() - start)))    
    60         
    61      # Beintoo_day.hive_output(create_date, cnt_duration=7)
    62 '''    
    63 for i in range(1,30):
    64     a=datetime.date(2016, 2, 23)
    65     b=a+ datetime.timedelta(7*i)
    66     print b
    67 '''
    68 #===跨年的时候处理方法1
    69 '''
    70 date_begin = datetime.date(2016,12,1)
    71     # date_end = date_begin
    72     date_end = datetime.date(2017,1,10)
    73     for i in range(0,(date_end - date_begin).days+1,7):
    74         create_date = date_begin + datetime.timedelta(days=i)
    75 
    76         print create_date
    77 
    78 #方法2
    79 date_begin = datetime.date(2016,12,1)
    80     # date_end = date_begin
    81     date_end = datetime.date(2017,1,10)
    82 
    83  while date_begin <= date_end:
    84         print date_begin
    85         date_begin = date_begin + datetime.timedelta(days=7)
    86 '''
  • 相关阅读:
    CSS常见兼容性问题
    Ubuntu系统下创建python数据挖掘虚拟环境
    Django 模板中引用静态资源(js,css等)
    Django auth 登陆后页面跳转至/account/profile,修改跳转至其他页面
    Ubuntu14.04安装配置SVN及Trac
    禁止Chrome浏览器缓存的方法
    windows下安装配置Xampp
    Linux系统下用C语言获取MAC地址
    使用axios+formdata+vue上传图片遇到后台接受不到图片的值的问题
    使用vee-validate表单插件是如何设置中文提示?
  • 原文地址:https://www.cnblogs.com/zhangbojiangfeng/p/6382555.html
Copyright © 2011-2022 走看看