zoukankan      html  css  js  c++  java
  • 检查数据倾斜分布


    从传统数据库迁移到GP中一个重要的且常常被开发者忽略的概念是数据分布,没有良好的设计表的分布键会导致严重的性能问题。下面函数将给开发者及DBA检測一个表的数据倾斜情况。
    -- Function: gpmg.data_skew(character varying)
     
    -- DROP FUNCTION gpmg.data_skew(character varying);
     
    CREATE OR REPLACE FUNCTION gpmg.data_skew(tablename character varying)
      RETURNS text AS
    $BODY$
    --2014-05-26,Gtlions,收集和统计数据倾斜情况
    declare
      v_func character varying(200)='gpmg.data_skew()';
      v_begin_time timestamp;
      v_end_time timestamp;
      v_status int=0;
      v_msg text='Done.';
      v_record record;
     
      v_id integer;
      v_rq timestamp;  
      v_segs integer=64;
      v_totalnums bigint=0;
      v_maxskew numeric=0.0;
      v_minskew numeric=0.0;
      v_maxskew_seg varchar(20);
      v_minskew_seg varchar(20);
      v_maxrows bigint=0;
      v_minrows bigint=0;   
      v_result varchar(2000);
     
    begin
      v_id=nextval('gpmg.commonseq');
      v_rq=now();
      v_begin_time=clock_timestamp();
      v_result = 'GP hava ';
      select into v_segs count(*) segs from gp_segment_configuration where role='p' and content<>-1;
      v_result = v_result||v_segs||' instances, Standard skew is '||1.0/v_segs||'. ';
      -- bg1 segid, bg2 节点记录数量
      execute 'insert into gpmg.commontab(seq,tabname,bg1,bg2) select '||v_id||','''||$1||''',gp_segment_id,count(*) segrownums from '||$1||' group by rollup(( gp_segment_id)) order by gp_segment_id';
      select into v_segs,v_totalnums v_segs,max(bg2) from gpmg.commontab where seq=v_id and tabname=$1;
      --nm1 标准倾斜率, nm2 节点倾斜率, nm3 标准-节点倾斜率绝对值
      update gpmg.commontab set nm1=1::numeric/v_segs,nm2=bg2::numeric/v_totalnums,nm3=abs(1::numeric/v_segs-bg2::numeric/v_totalnums) where seq=v_id and tabname=$1;
      select into v_maxskew,v_minskew max(nm2),min(nm2) from gpmg.commontab where seq=v_id and tabname=$1 and bg1 is not null;
     
      select into v_maxskew_seg hostname from gp_segment_configuration where role='p' and content in (select bg1 from gpmg.commontab where seq=v_id and tabname=$1 and bg1 is not null and nm2=v_maxskew limit 1);
      select into v_minskew_seg hostname from gp_segment_configuration where role='p' and content in (select bg1 from gpmg.commontab where seq=v_id and tabname=$1 and bg1 is not null and nm2=v_minskew limit 1);
     
      select into v_maxrows bg2 from gpmg.commontab where seq=v_id and tabname=$1 and bg1 is not null and nm2=v_maxskew limit 1;
      select into v_minrows bg2 from gpmg.commontab where seq=v_id and tabname=$1 and bg1 is not null and nm2=v_minskew limit 1;
     
      v_result =v_result ||'You Table ['||$1||'] skew info: [table_totalrows:'||v_totalnums||', maxskew:seg-'||v_maxskew_seg||', rows-'||v_maxrows||' '||v_maxskew||', minskew:seg-'||v_minskew_seg||', rows-'||v_minrows||' '||v_minskew||']';
      delete from gpmg.commontab where seq=v_id and tabname=$1;
      return v_result;
      v_end_time=clock_timestamp();
    end;
    $BODY$
      LANGUAGE plpgsql VOLATILE;
    ALTER FUNCTION gpmg.data_skew(character varying)
      OWNER TO gpadmin;
    GRANT EXECUTE ON FUNCTION gpmg.data_skew(character varying) TO public;
    GRANT EXECUTE ON FUNCTION gpmg.data_skew(character varying) TO gpadmin;
    
    bigdatagp=# select gpmg.data_skew('gpmg.manager_table');
                                                                                                                data_skew                                                  
                                                               
    -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
    -----------------------------------------------------------
     GP hava 64 instances, Standard skew is 0.01562500000000000000. You Table [gpmg.manager_table] skew info: [table_totalrows:83, maxskew:seg-sdw16, rows-3 0.036144578313
    25301205, minskew:seg-sdw2, rows-1 0.01204819277108433735]
    (1 row)
     
    bigdatagp=# select gpmg.data_skew('gpmg.func_log');
                                                                                                                 data_skew                                                 
                                                                 
    -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
    -------------------------------------------------------------
     GP hava 64 instances, Standard skew is 0.01562500000000000000. You Table [gpmg.func_log] skew info: [table_totalrows:53708, maxskew:seg-sdw10, rows-907 0.016887614508
    08073285, minskew:seg-sdw7, rows-773 0.01439264169211290683]
    (1 row)
    2014-10-14 09:53:00
    


    -EOF-
  • 相关阅读:
    K2新网站(官网和BPM社区)正式上线了
    在线体验K2 BPM微信审批
    K2 BPM + SAP,实现全方面管理企业
    KStar ----BPM应用框架,K2 的新星
    SharePoint加K2,将Portal系统与BPM系统完美整合!
    迈瑞综合应用及流程管理平台项目
    深度学习教程网站
    Pytorch自定义参数层
    pytorch BCELoss和BCEWithLogitsLoss
    Some helper functions for PyTorch
  • 原文地址:https://www.cnblogs.com/wzzkaifa/p/7248785.html
Copyright © 2011-2022 走看看