[20170612]FOR ALL COLUMNS SIZE repeat(12c).txt
--//昨天看了https://jonathanlewis.wordpress.com/2017/06/01/histogram-upgrade-2/,提到了直方图的问题,
--//特别是FOR ALL COLUMNS SIZE repeat引起的问题,在一些特殊情况要注意.
1.环境:
SCOTT@test01p> @ ver1
PORT_STRING VERSION BANNER CON_ID
------------------------------ -------------- ---------------------------------------------------------------------------- ------
IBMPC/WIN_NT64-9.1.0 12.1.0.1.0 Oracle Database 12c Enterprise Edition Release 12.1.0.1.0 - 64bit Production 0
create table t (id number,pad varchar2(20),flag varchar2(1));
insert into t select rownum,lpad('a',20,'a') ,'a' from dual connect by level<=2e4;
insert into t select rownum+2e4,lpad('d',20,'d') ,'d' from dual connect by level<=2;
commit ;
SCOTT@test01p> exec dbms_stats.Gather_table_stats(ownname => user, tabname => 'T',method_opt => 'FOR ALL COLUMNS ');
PL/SQL procedure successfully completed.
SCOTT@test01p> SELECT flag, COUNT(*) FROM t GROUP BY flag;
F COUNT(*)
- ----------
d 2
a 20000
select * from DBA_TAB_COL_STATISTICS where owner=user and table_name='T' and column_name='FLAG';
Record View
As of: 2017/6/12 20:38:56
OWNER: SCOTT
TABLE_NAME: T
COLUMN_NAME: FLAG
NUM_DISTINCT: 2
LOW_VALUE: 61
HIGH_VALUE: 64
DENSITY: 0.000024997500249975
NUM_NULLS: 0
NUM_BUCKETS: 2
LAST_ANALYZED: 2017/6/12 20:37:55
SAMPLE_SIZE: 20002
GLOBAL_STATS: YES
USER_STATS: NO
NOTES:
AVG_COL_LEN: 2
HISTOGRAM: FREQUENCY
SCOPE: SHARED
--//建立了直方图.bucket=2.
SCOTT@test01p> set numw 36
SCOTT@test01p> select * from USER_TAB_HISTOGRAMS where column_name='FLAG';
TABLE_NAME COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE ENDPOINT_A ENDPOINT_ACTUAL_VALU ENDPOINT_REPEAT_COUNT SCOPE
---------- ----------- --------------- ------------------------------------ ---------- -------------------- --------------------- -------
T FLAG 20000 503652795277878000000000000000000000 0 SHARED
T FLAG 20002 519229685853483000000000000000000000 0 SHARED
--//如果应用模式发生变化,加入了2种值.
insert into t select rownum+2e4+2,lpad('b',20,'b') ,'b' from dual connect by level<=2e4;
insert into t select rownum+4e4+2,lpad('c',20,'c') ,'c' from dual connect by level<=2;
commit ;
SCOTT@test01p> exec dbms_stats.Gather_table_stats(ownname => user, tabname => 'T',method_opt => 'FOR ALL COLUMNS size repeat',No_Invalidate => false);
PL/SQL procedure successfully completed.
--//加入No_Invalidate => false,主要使执行计划能马上使用新的统计信息.
select * from DBA_TAB_COL_STATISTICS where owner=user and table_name='T' and column_name='FLAG';
Record View
As of: 2017/6/12 20:42:29
OWNER: SCOTT
TABLE_NAME: T
COLUMN_NAME: FLAG
NUM_DISTINCT: 4
LOW_VALUE: 61
HIGH_VALUE: 64
DENSITY: 0.25
NUM_NULLS: 0
NUM_BUCKETS: 2
LAST_ANALYZED: 2017/6/12 20:42:01
SAMPLE_SIZE: 5463
GLOBAL_STATS: YES
USER_STATS: NO
NOTES:
AVG_COL_LEN: 2
HISTOGRAM: HYBRID
SCOPE: SHARED
--//因为bucket桶没有变化,直方图变成了HYBRID.
SCOTT@test01p> select * from USER_TAB_HISTOGRAMS where table_name='T' and column_name='FLAG';
TABLE_NAME COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE ENDPOINT_A ENDPOINT_ACTUAL_VALU ENDPOINT_REPEAT_COUNT SCOPE
---------- ----------- --------------- ------------------------------------ ---------- -------------------- --------------------- -------
T FLAG 2760 503652795277878000000000000000000000 2760 SHARED
T FLAG 5463 519229685853483000000000000000000000 1 SHARED
SCOTT@test01p> alter session set statistics_level=all;
Session altered.
SCOTT@test01p> SELECT flag, COUNT(*) FROM t GROUP BY flag order by 1;
F COUNT(*)
- ---------
a 20000
b 20000
c 2
d 2
SCOTT@test01p> select * from t where flag='c';
ID PAD F
----- -------------------- -
40003 cccccccccccccccccccc c
40004 cccccccccccccccccccc c
SCOTT@test01p> @ dpc '' ''
PLAN_TABLE_OUTPUT
--------------------------------------
SQL_ID g3pmd0h5vnw5r, child number 0
-------------------------------------
select * from t where flag='c'
Plan hash value: 1601196873
--------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time | A-Rows | A-Time | Buffers |
--------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | | 68 (100)| | 2 |00:00:00.01 | 205 |
|* 1 | TABLE ACCESS FULL| T | 1 | 6598 | 180K| 68 (0)| 00:00:01 | 2 |00:00:00.01 | 205 |
--------------------------------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
1 - SEL$1 / T@SEL$1
Predicate Information (identified by operation id):
---------------------------------------------------
1 - filter("FLAG"='c')
23 rows selected.
--//可以发现E-rows 与 A-rows 存在很大的差距,正是业务模式发生了变化,导致method_opt => 'FOR ALL COLUMNS size repeat'存在问题.
--//而这个问题在11G是不存在的,在使用method_opt => 'FOR ALL COLUMNS size repeat',会增加bucket的数量.
--//如果应用不存在这样的问题,估计关系不大.
--//简单的使用如下命令可以确定,"可能"有问题的直方图.
SELECT *
FROM DBA_TAB_COL_STATISTICS
WHERE owner = USER
AND table_name = 'T'
AND num_distinct < 255
AND histogram NOT LIKE '%FREQUENCY%';
Record View
As of: 2017/6/12 20:46:36
OWNER: SCOTT
TABLE_NAME: T
COLUMN_NAME: FLAG
NUM_DISTINCT: 4
LOW_VALUE: 61
HIGH_VALUE: 64
DENSITY: 0.25
NUM_NULLS: 0
NUM_BUCKETS: 2
LAST_ANALYZED: 2017/6/12 20:42:01
SAMPLE_SIZE: 5463
GLOBAL_STATS: YES
USER_STATS: NO
NOTES:
AVG_COL_LEN: 2
HISTOGRAM: HYBRID
SCOPE: SHARED
--//如果不指定repeat.
SCOTT@test01p> exec dbms_stats.Gather_table_stats(ownname => user, tabname => 'T',method_opt => 'FOR ALL COLUMNS ',No_Invalidate => false);
PL/SQL procedure successfully completed.
SCOTT@test01p> select * from USER_TAB_HISTOGRAMS where table_name='T' and column_name='FLAG';
TABLE_NAME COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE ENDPOINT_A ENDPOINT_ACTUAL_VALU ENDPOINT_REPEAT_COUNT SCOPE
---------- -------------------- --------------- ------------------------------------ ---------- -------------------- --------------------- -------
T FLAG 20000 503652795277878000000000000000000000 0 SHARED
T FLAG 40000 508845092136413000000000000000000000 0 SHARED
T FLAG 40002 514037388994948000000000000000000000 0 SHARED
T FLAG 40004 519229685853483000000000000000000000 0 SHARED
--//分析使用No_Invalidate => false,这样重新执行会重新分析.
select * from DBA_TAB_COL_STATISTICS where owner=user and table_name='T' and column_name='FLAG';
Record View
As of: 2017/6/12 20:49:27
OWNER: SCOTT
TABLE_NAME: T
COLUMN_NAME: FLAG
NUM_DISTINCT: 4
LOW_VALUE: 61
HIGH_VALUE: 64
DENSITY: 0.0000124987501249875
NUM_NULLS: 0
NUM_BUCKETS: 4
LAST_ANALYZED: 2017/6/12 20:47:21
SAMPLE_SIZE: 40004
GLOBAL_STATS: YES
USER_STATS: NO
NOTES:
AVG_COL_LEN: 2
HISTOGRAM: FREQUENCY
SCOPE: SHARED
--//这样建立的直方图=FREQUENCY.
SCOTT@test01p> select * from t where flag='c';
ID PAD F
----- -------------------- -
40003 cccccccccccccccccccc c
40004 cccccccccccccccccccc c
SCOTT@test01p> @ dpc '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID g3pmd0h5vnw5r, child number 0
-------------------------------------
select * from t where flag='c'
Plan hash value: 1601196873
--------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time | A-Rows | A-Time | Buffers |
--------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | | 68 (100)| | 2 |00:00:00.01 | 205 |
|* 1 | TABLE ACCESS FULL| T | 1 | 2 | 56 | 68 (0)| 00:00:01 | 2 |00:00:00.01 | 205 |
--------------------------------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
1 - SEL$1 / T@SEL$1
Predicate Information (identified by operation id):
---------------------------------------------------
1 - filter("FLAG"='c')
--//这样就ok了.
--//对于这样的情况最佳的方式手工设置分析方式
BEGIN
dbms_stats.Set_table_prefs(user, 'T', 'METHOD_OPT'=>'FOR ALL COLUMNS SIZE 1, FOR COLUMNS SIZE AUTO flag');
END;
/
--//避免这个问题. 总之12c要注意分析方法METHOD_OPT'=>'FOR ALL COLUMNS SIZE repeat'带来的问题.
--//特别是一些应用模式发生变化的情况.而11g前面的测试不存在这个问题.