要一个月的数据 一次跑一周的数据 ,建表用插入数据的方法:
1 import os, sys
2 reload(sys); sys.setdefaultencoding( "utf-8" )
3 flag_server = int(os.popen('ifconfig | grep "inet addr:172" | wc -l').read().strip())
4 dir_scripts = '/app/home/zhangb/' if flag_server else '/Users/zhangb/Desktop/'
5 dir_data = '/app/home/zhangbo/' if flag_server else '/Users/zhangb/Desktop/'
6 dir_server = '/app/home/'
7 sys.path.append(dir_scripts)
8
9 import datetime
10 import time
11 db_name = 'zhangb'
12
13 def hive_day_cid(create_date,type11_duration):
14 #原始表geohash表关联,找到cid
15 # ------------------ 建立 geohash 表 -------------------- #
16 print "# ---------------------------------------------------------------------------------- #"
17
18 start_date_str = (create_date - datetime.timedelta(days=type11_duration-1)).strftime("%Y%m%d")
19 end_date_str = create_date.strftime("%Y%m%d")
20
21 hive_command = ( '''
22 hive -e " use %s;
23 create table if not exists hive_day_cid_provider(provider string,day int, cnt_cid bigint,dist_cid bigint );
24
25 insert into hive_day_cid_provider
26 select a.provider,a.day,count(a.cid) as cnt_cid,count(distinct(a.cid)) as dist_cid from
27 (select day,provider,token_md5 as cid from report_ods_mdp.upload_bi_type11
28 where day >=%s and day <= %s and length(token_md5)>0 and provider in ('gps','network','passive','none') ) a
29 group by a.provider,a.day
30
31 ;"
32 ''' % ( db_name,start_date_str, end_date_str) )
33
34 print hive_command
35 if flag_server:
36 os.system(hive_command)
37 print "
"
38
39
40
41
42 if __name__ == '__main__':
43
44 start = time.time()
45 business_name = 'brand48'
46 # ----------------------------------------
47 #7号是这个周期中的最后一天,是周期结束日期
48 for (i,j) in [ (11,7),(11,14),(11,28),(12,5),(12,12),(12,19),(12,26)]:
49 # for (i, j) in [(2, 7), (2, 14), (2, 21), (2, 28)]:
50 create_date = datetime.date(2016,i,j)
51 type11_duration=7
52 hive_day_cid(create_date,type11_duration)
53 print "
"
54 print '# Time: ', str(datetime.timedelta(seconds=(time.time() - start)))
55 print '# the end'
56 print '
'
57
58 #hive_imei_time_list(create_date,type11_duration)
59 print '# Time: ', str(datetime.timedelta(seconds=(time.time() - start)))
60
61 # Beintoo_day.hive_output(create_date, cnt_duration=7)
62 '''
63 for i in range(1,30):
64 a=datetime.date(2016, 2, 23)
65 b=a+ datetime.timedelta(7*i)
66 print b
67 '''
68 #===跨年的时候处理方法1
69 '''
70 date_begin = datetime.date(2016,12,1)
71 # date_end = date_begin
72 date_end = datetime.date(2017,1,10)
73 for i in range(0,(date_end - date_begin).days+1,7):
74 create_date = date_begin + datetime.timedelta(days=i)
75
76 print create_date
77
78 #方法2
79 date_begin = datetime.date(2016,12,1)
80 # date_end = date_begin
81 date_end = datetime.date(2017,1,10)
82
83 while date_begin <= date_end:
84 print date_begin
85 date_begin = date_begin + datetime.timedelta(days=7)
86 '''