通过关联订购关系这个操作做了一个关于join操作的试验。
以前采用上下行表直接关联,2个表数据量大约是2200w左右和1400w左右,并且2个表都是属于宽表,字段内容多,占用空间大,但join的时候用到的字段很少(2个左右),因此很多内存都耗在了存储不必要的字段值。每次关联操作耗时在2个小时以上。
通过细化join相关表后,首先减少了单表的数据元数目,并且只在细化表中只存储了join操作必须的字段。因此对订购关系这张表来说,数据量减少了70%以上。
由于sga得到了充分利用,因此join的效率也数10倍的得到了提升。
下面是采用细化分表后在join的详细实施过程:
减少join表的数据量后的详细操作记录:
#Elapsed: (ORDER)00:02:25.63 Elapsed: 00:00:48.45 13916602
create table test.tmp_user_info
as
select user_num,service_id,SUBCHANNEL_CODE SUB_CHANNEL_CODE,CHANNEL_CODE,REG_MODE,UNREG_MODE,ENABLE_STATUS,
FIRST_REG_TIME,LAST_REG_TIME,LAST_UNREG_TIME from mqq.t_wx_user_info2 where (last_unreg_time like '2007-02-05 %' and enable_status='N') or enable_status='Y'
#Elapsed: 00:06:38.53 22674664
create table test.tmp_oicall_info_20070206
tablespace tbs_sub
as
select t1.MISC_MSG_ID,
t1.MSG_ID,
t1.MSG_TYPE,
t1.FEE_TYPE,
t1.FEE_VALUE,
t1.SEND_ADDRESS,
t1.RECV_ADDRESS,
t1.FEE_ADDRESS,
t1.DOWN_STATION,
t1.OUTTER_ID,
t1.SERVICE_ID,
t1.OSS_RECV_CODE,
t1.MT_TYPE,
t1.MT_CONTENT,
t1.MT_TIME,
t1.CARRY_MSG,
t1.CARRY_ID,
t1.RPT_FLAG,
t1.RPT_STATE,
t1.GATEWAY_RPT_STATE,
t1.link_id,
t1.err_detail from mqqflow.t_mt_sms_200702 partition(p_mt_sms_20070206) t1
#Elapsed: 00:05:07.04
#Elapsed: 00:04:29.64 Elapsed: 00:04:34.68 (说明前面create table的时候,不需要order by)
create table test.tmp_sms_mt_info
as
select t1.user_num,t1.service_id,SUB_CHANNEL_CODE,t1.channel_code channel_code,REG_MODE,UNREG_MODE,ENABLE_STATUS,
FIRST_REG_TIME,LAST_REG_TIME,LAST_UNREG_TIME
from test.tmp_user_info_2 t1,test.tmp_oicall_info t2 where t1.service_id=t2.service_id and t1.user_num=t2.recv_address
CREATE INDEX IDX_tmp_sms_mt_info ON test.tmp_sms_mt_info
(USER_NUM, service_id)
NOLOGGING
TABLESPACE TBS_SUB
PCTFREE 10
INITRANS 2
MAXTRANS 255
STORAGE (
INITIAL 64K
MINEXTENTS 1
MAXEXTENTS 2147483645
PCTINCREASE 0
BUFFER_POOL DEFAULT
)
NOPARALLEL;
///
select count(*) from mqqflow.t_sso_order_2007 partition(p_sso_order_200702) where stat_date=to_date('2007-02-07', 'YYYY-MM-DD') 2044 行
///
create table tmp_mt_sms
TABLESPACE TBS_STAT
PCTUSED 0
PCTFREE 10
INITRANS 1
MAXTRANS 255
STORAGE (
INITIAL 64K
MINEXTENTS 1
MAXEXTENTS 2147483645
PCTINCREASE 0
BUFFER_POOL DEFAULT
)
LOGGING
NOCOMPRESS
NOCACHE
NOPARALLEL
MONITORING
as
select
t1.MISC_MSG_ID,
t1.MSG_ID,
t1.MSG_TYPE,
t1.FEE_TYPE,
t1.FEE_VALUE,
t1.SEND_ADDRESS,
t1.RECV_ADDRESS,
t1.FEE_ADDRESS,
t1.DOWN_STATION,
t1.OUTTER_ID,
t1.SERVICE_ID,
t1.OSS_RECV_CODE,
t1.MT_TYPE,
t1.MT_CONTENT,
t1.MT_TIME,
t1.CARRY_MSG,
t1.CARRY_ID,
t2.channel_code,
t2.SUB_CHANNEL_CODE,
t2.user_num,
t2.REG_MODE,
t2.UNREG_MODE,
t2.ENABLE_STATUS,
t2.FIRST_REG_TIME,
t2.LAST_REG_TIME,
t2.LAST_UNREG_TIME,
t1.RPT_FLAG,
t1.RPT_STATE,
t1.GATEWAY_RPT_STATE,
t1.link_id,
t1.err_detail
from mqqflow.t_mt_sms_200702 partition(p_mt_sms_20070207) t1
left join
test.tmp_sms_mt_info t2
on t2.user_num = t1.recv_address
and t1.service_id = t2.service_id
# Elapsed: 00:13:05.74
create table test. tmp_mt_sms
tablespace tbs_sub
as
select
t1.MISC_MSG_ID,
t1.MSG_ID,
t1.MSG_TYPE,
t1.FEE_TYPE,
t1.FEE_VALUE,
t1.SEND_ADDRESS,
t1.RECV_ADDRESS,
t1.FEE_ADDRESS,
t1.DOWN_STATION,
t1.OUTTER_ID,
t1.SERVICE_ID,
t1.OSS_RECV_CODE,
t1.MT_TYPE,
t1.MT_CONTENT,
t1.MT_TIME,
t1.CARRY_MSG,
t1.CARRY_ID,
decode(t3.link_id,null,t2.channel_code,'WEB') channel_code,
t2.SUB_CHANNEL_CODE,
t2.REG_MODE,
t2.UNREG_MODE,
t2.ENABLE_STATUS,
t2.FIRST_REG_TIME,
t2.LAST_REG_TIME,
t2.LAST_UNREG_TIME,
t1.RPT_FLAG,
t1.RPT_STATE,
t1.GATEWAY_RPT_STATE,
t1.link_id,
t1.err_detail
from test.tmp_oicall_info_20070206 t1
left join test.tmp_sso_mt
t3
on t3.stat_date=to_date('2007-02-06', 'YYYY-MM-DD')
and t3.link_id = t1.link_id
left join
test.tmp_user_info t2
on t2.user_num = t1.RECV_ADDRESS
and t1.service_id = t2.service_id