zoukankan      html  css  js  c++  java
  • 【原创】oracle中大数据量join操作的试验

    通过关联订购关系这个操作做了一个关于join操作的试验。

    以前采用上下行表直接关联,2个表数据量大约是2200w左右和1400w左右,并且2个表都是属于宽表,字段内容多,占用空间大,但join的时候用到的字段很少(2个左右),因此很多内存都耗在了存储不必要的字段值。每次关联操作耗时在2个小时以上。

    通过细化join相关表后,首先减少了单表的数据元数目,并且只在细化表中只存储了join操作必须的字段。因此对订购关系这张表来说,数据量减少了70%以上。

      由于sga得到了充分利用,因此join的效率也数10倍的得到了提升。

    下面是采用细化分表后在join的详细实施过程:

    减少join表的数据量后的详细操作记录:

    #Elapsed: (ORDER)00:02:25.63   Elapsed: 00:00:48.45    13916602
    create table test.tmp_user_info
    as
    select user_num,service_id,SUBCHANNEL_CODE SUB_CHANNEL_CODE,CHANNEL_CODE,REG_MODE,UNREG_MODE,ENABLE_STATUS,
      FIRST_REG_TIME,LAST_REG_TIME,LAST_UNREG_TIME from mqq.t_wx_user_info2 where (last_unreg_time like '2007-02-05 %' and enable_status='N') or enable_status='Y'


    #Elapsed: 00:06:38.53 22674664
    create table test.tmp_oicall_info_20070206
    tablespace tbs_sub
    as
    select t1.MISC_MSG_ID,
      t1.MSG_ID,
      t1.MSG_TYPE,
      t1.FEE_TYPE,
      t1.FEE_VALUE,
      t1.SEND_ADDRESS,
      t1.RECV_ADDRESS,
      t1.FEE_ADDRESS,
      t1.DOWN_STATION,
      t1.OUTTER_ID,
      t1.SERVICE_ID,
      t1.OSS_RECV_CODE,
      t1.MT_TYPE,
      t1.MT_CONTENT,
      t1.MT_TIME,
      t1.CARRY_MSG,
      t1.CARRY_ID,
      t1.RPT_FLAG,
      t1.RPT_STATE,
      t1.GATEWAY_RPT_STATE,
      t1.link_id,
      t1.err_detail from mqqflow.t_mt_sms_200702 partition(p_mt_sms_20070206) t1
    #Elapsed: 00:05:07.04


    #Elapsed: 00:04:29.64 Elapsed: 00:04:34.68  (说明前面create table的时候,不需要order by)
    create table test.tmp_sms_mt_info
    as
    select t1.user_num,t1.service_id,SUB_CHANNEL_CODE,t1.channel_code channel_code,REG_MODE,UNREG_MODE,ENABLE_STATUS,
      FIRST_REG_TIME,LAST_REG_TIME,LAST_UNREG_TIME
      from  test.tmp_user_info_2 t1,test.tmp_oicall_info t2 where t1.service_id=t2.service_id and t1.user_num=t2.recv_address
    CREATE INDEX IDX_tmp_sms_mt_info ON test.tmp_sms_mt_info
    (USER_NUM, service_id)
    NOLOGGING
    TABLESPACE TBS_SUB
    PCTFREE    10
    INITRANS   2
    MAXTRANS   255
    STORAGE    (
                INITIAL          64K
                MINEXTENTS       1
                MAXEXTENTS       2147483645
                PCTINCREASE      0
                BUFFER_POOL      DEFAULT
               )
    NOPARALLEL;
    ///
    select count(*) from mqqflow.t_sso_order_2007 partition(p_sso_order_200702) where stat_date=to_date('2007-02-07', 'YYYY-MM-DD')  2044 行
    ///
    create table tmp_mt_sms
    TABLESPACE TBS_STAT
    PCTUSED    0
    PCTFREE    10
    INITRANS   1
    MAXTRANS   255
    STORAGE    (
                INITIAL          64K
                MINEXTENTS       1
                MAXEXTENTS       2147483645
                PCTINCREASE      0
                BUFFER_POOL      DEFAULT
               )
    LOGGING
    NOCOMPRESS
    NOCACHE
    NOPARALLEL
    MONITORING
    as
    select
      t1.MISC_MSG_ID,
      t1.MSG_ID,
      t1.MSG_TYPE,
      t1.FEE_TYPE,
      t1.FEE_VALUE,
      t1.SEND_ADDRESS,
      t1.RECV_ADDRESS,
      t1.FEE_ADDRESS,
      t1.DOWN_STATION,
      t1.OUTTER_ID,
      t1.SERVICE_ID,
      t1.OSS_RECV_CODE,
      t1.MT_TYPE,
      t1.MT_CONTENT,
      t1.MT_TIME,
      t1.CARRY_MSG,
      t1.CARRY_ID,
      t2.channel_code,
      t2.SUB_CHANNEL_CODE,
      t2.user_num,
      t2.REG_MODE,
      t2.UNREG_MODE,
      t2.ENABLE_STATUS,
      t2.FIRST_REG_TIME,
      t2.LAST_REG_TIME,
      t2.LAST_UNREG_TIME,
      t1.RPT_FLAG,
      t1.RPT_STATE,
      t1.GATEWAY_RPT_STATE,
      t1.link_id,
      t1.err_detail
      from  mqqflow.t_mt_sms_200702 partition(p_mt_sms_20070207)  t1
      left join
      test.tmp_sms_mt_info t2
      on t2.user_num = t1.recv_address
      and t1.service_id = t2.service_id

    # Elapsed: 00:13:05.74
    create table test. tmp_mt_sms
    tablespace tbs_sub
    as
      select
      t1.MISC_MSG_ID,
      t1.MSG_ID,
      t1.MSG_TYPE,
      t1.FEE_TYPE,
      t1.FEE_VALUE,
      t1.SEND_ADDRESS,
      t1.RECV_ADDRESS,
      t1.FEE_ADDRESS,
      t1.DOWN_STATION,
      t1.OUTTER_ID,
      t1.SERVICE_ID,
      t1.OSS_RECV_CODE,
      t1.MT_TYPE,
      t1.MT_CONTENT,
      t1.MT_TIME,
      t1.CARRY_MSG,
      t1.CARRY_ID,
      decode(t3.link_id,null,t2.channel_code,'WEB') channel_code,
      t2.SUB_CHANNEL_CODE,
      t2.REG_MODE,
      t2.UNREG_MODE,
      t2.ENABLE_STATUS,
      t2.FIRST_REG_TIME,
      t2.LAST_REG_TIME,
      t2.LAST_UNREG_TIME,
      t1.RPT_FLAG,
      t1.RPT_STATE,
      t1.GATEWAY_RPT_STATE,
      t1.link_id,
      t1.err_detail
      from test.tmp_oicall_info_20070206 t1
      left join test.tmp_sso_mt
      t3
      on t3.stat_date=to_date('2007-02-06', 'YYYY-MM-DD')
      and t3.link_id = t1.link_id
      left join
      test.tmp_user_info t2
      on t2.user_num = t1.RECV_ADDRESS
      and t1.service_id = t2.service_id

  • 相关阅读:
    CentOS7防火墙
    [线索二叉树] [LeetCode] 不需要栈或者别的辅助空间,完成二叉树的中序遍历。题:Recover Binary Search Tree,Binary Tree Inorder Traversal
    二叉树系列
    二叉树系列
    [LeetCode] Binary Tree Level Order Traversal 与 Binary Tree Zigzag Level Order Traversal,两种按层次遍历树的方式,分别两个队列,两个栈实现
    动态规划小结
    [LeetCode] Populating Next Right Pointers in Each Node I, II
    [LeetCode] 递推思想的美妙 Best Time to Buy and Sell Stock I, II, III O(n) 解法
    二叉树系列
    [LeetCode] 数组的最长连续数, O(n)解法
  • 原文地址:https://www.cnblogs.com/jacktu/p/1083204.html
Copyright © 2011-2022 走看看