[20180118]tstats的问题.txt
--//关于使用tstats收集处理统计信息,可以看链接http://blog.itpub.net/267265/viewspace-1987839/
TSTATS in a Nutshell P97
The removal of time-sensitive data from object statistics is the main idea behind TSTATS. Here is the essence of
a process that can be used on the back of that idea:
1. Gather statistics on a selected "master" test system.
2. Fabricate statistics for all global temporary tables.
3. Remove time-sensitive data from object statistics.
4. Perform initial performance testing on the master test system and make adjustments as necessary.
5. Copy the statistics to all other test systems and ultimately to production.
6. Lock the object statistics of your application schemas on all systems.
7. Drop all statistics-gathering jobs for application schemas on all your systems.
8. TSTATS only applies to application schemas, so any jobs that gather dictionary statistics are unaffected.
--//突然自己想在一个小的生产系统试验tstats的思想。我发现自己上存在一些问题。前面我就发现直方图信息不能删除。
--//它实际上提供的脚本删除字段的最大最小值信息,而不是3. Remove time-sensitive data from object statistics.。
--//实际上删除最大最小信息直方图信息会变得无用,通过测试来说明问题。
1.环境:
SCOTT@book> @ &r/ver1
PORT_STRING VERSION BANNER
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/Linux 2.4.xx 11.2.0.4.0 Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production
create table t as select rownum id ,lpad('x',32,'x') name ,'Y' flag from dual connect by level<=1e5;
update t set flag='N' where id=1e5;
commit ;
execute sys.dbms_stats.gather_table_stats ( OwnName => user,TabName => 't',Estimate_Percent => NULL,Method_Opt => 'FOR
ALL COLUMNS SIZE 1 for columns flag size 254 ',Cascade => True ,No_Invalidate => false);
2.测试1:
SCOTT@book> select * from t where flag='N';
ID NAME F
---------- -------------------- -
100000 xxxxxxxxxxxxxxxxxxxx N
xxxxxxxxxxxx
SCOTT@book> @ &r/dpc '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID 0h7g0tqtzcvzn, child number 0
-------------------------------------
select * from t where flag='N'
Plan hash value: 120143814
-----------------------------------------------------------------------------------------
| Id | Operation | Name | E-Rows |E-Bytes| Cost (%CPU)| E-Time |
-----------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | | | 2 (100)| |
| 1 | TABLE ACCESS BY INDEX ROWID| T | 1 | 40 | 2 (0)| 00:00:01 |
|* 2 | INDEX RANGE SCAN | I_T_FLAG | 1 | | 1 (0)| 00:00:01 |
-----------------------------------------------------------------------------------------
--//可以发现使用直方图。
3.测试2:
--//如果删除最大最小值:
SCOTT@book> exec system.tstats.adjust_column_stats_v3( 'SCOTT','T');
PL/SQL procedure successfully completed.
SCOTT@book> select * from dba_histograms where owner=user and table_name='T';
OWNER TABLE_NAME COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE ENDPOINT_A
------ ---------- -------------------- --------------- -------------- ----------
SCOTT T FLAG 100000 4.6211E+35
SCOTT T FLAG 1 4.0500E+35
SCOTT T ID 0
SCOTT T NAME 0
SCOTT T ID 1
SCOTT T NAME 1
6 rows selected.
SCOTT@book> select COLUMN_NAME,DATA_TYPE,DATA_LENGTH,DATA_PRECISION,DATA_SCALE,NUM_DISTINCT,LOW_VALUE,HIGH_VALUE,DENSITY,NUM_NULLS,NUM_BUCKETS,HISTOGRAM from dba_tab_cols where owner=user and table_name='T';
COLUMN_NAME DATA_TYPE DATA_LENGTH DATA_PRECISION DATA_SCALE NUM_DISTINCT LOW_VALUE HIGH_VALUE DENSITY NUM_NULLS NUM_BUCKETS HISTOGRAM
-------------------- ---------- ----------- -------------- ---------- ------------ ---------- ---------- ---------- ---------- ----------- ---------------
ID NUMBER 22 100000 .00001 0 1 NONE
NAME VARCHAR2 32 1 1 0 1 NONE
FLAG CHAR 1 2 .5 0 2 FREQUENCY
--//可以发现直方图信息还存在。但是最大最小值信息已经删除。
SCOTT@book> select * from t where flag='N';
ID NAME F
---------- -------------------- -
100000 xxxxxxxxxxxxxxxxxxxx N
xxxxxxxxxxxx
--//执行计划:
Plan hash value: 1601196873
---------------------------------------------------------------------------
| Id | Operation | Name | E-Rows |E-Bytes| Cost (%CPU)| E-Time |
---------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | | | 177 (100)| |
|* 1 | TABLE ACCESS FULL| T | 50000 | 1953K| 177 (1)| 00:00:03 |
---------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
1 - SEL$1 / T@SEL$1
Predicate Information (identified by operation id):
---------------------------------------------------
1 - filter("FLAG"='N')
--//执行计划变成全表扫描。也就是直方图存在的情况下,最大最小值信息不能删除。
3.实际上真正的情况也许更复杂,我举一个生产系统遇到的问题。
SYSTEM@192.168.xx.xx:1521/zzzzz> @ &r/sqlid 22h1xj9x22jf6
SQL_ID SQLTEXT
------------- ---------------------------------------------------------------------------------------
22h1xj9x22jf6 SELECT COUNT ( *) FROM ZY_ZYJS A WHERE A.JZRQ IS NULL AND A.CZGH =:1 AND A.JSRQ < :2
1 row selected.
SYSTEM@192.168.xx.xx:1521/zzzzz> @ bind_cap 22h1xj9x22jf6 ''
SQL_ID CHILD_NUMBER WAS NAME POSITION MAX_LENGTH LAST_CAPTURED DATATYPE_STRING VALUE_STRING
------------- ------------ --- -------------------- ---------- ---------- ------------------- --------------- -------------------------------
22h1xj9x22jf6 0 YES :1 1 32 2018-01-17 12:05:36 CHAR(32) 829
YES :2 2 7 2018-01-17 12:05:36 DATE 2018/01/01 00:00:00
--//我们系统存在2个索引。czgh+JZRQ(操作工号+结帐日期)是一个复合索引,JSRQ(结算日期)是一个索引。明显使用JSRQ不好,因为
--//小于2018/1/1的数据很多。而实际的执行计划是:(在使用tstats包处理后)
Plan hash value: 2747456857
----------------------------------------------------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem |
----------------------------------------------------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | | 5 (100)| | 1 |00:00:00.01 | 20 | | | |
| 1 | SORT AGGREGATE | | 1 | 1 | 21 | | | 1 |00:00:00.01 | 20 | | | |
|* 2 | VIEW | index$_join$_001 | 1 | 1 | 21 | 5 (0)| 00:00:01 | 0 |00:00:00.01 | 20 | | | |
|* 3 | HASH JOIN | | 1 | | | | | 0 |00:00:00.01 | 20 | 1368K| 1368K| 1649K (0)|
|* 4 | INDEX RANGE SCAN| IDX_ZY_ZYJS_JSRQ | 1 | 1 | 21 | 3 (34)| 00:00:01 | 5356 |00:00:00.01 | 14 | | | |
|* 5 | INDEX RANGE SCAN| I_ZY_ZYJS_CZGH_JZRQ | 1 | 1 | 21 | 4 (25)| 00:00:01 | 14 |00:00:00.01 | 6 | | | |
----------------------------------------------------------------------------------------------------------------------------------------------------------------
--//恢复统计信息。
BEGIN
SYS.DBMS_STATS.GATHER_TABLE_STATS (
OwnName => 'XXXXX'
,TabName => 'ZY_ZYJS'
,Estimate_Percent => SYS.DBMS_STATS.AUTO_SAMPLE_SIZE
,Method_Opt => 'FOR ALL COLUMNS SIZE REPEAT '
,Degree => 4
,Cascade => TRUE
,No_Invalidate => FALSE);
END;
/
SYSTEM@192.168.xx.xx:1521/zzzzz> SELECT COLUMN_NAME, DATA_TYPE, DATA_LENGTH, DATA_PRECISION, DATA_SCALE, NUM_DISTINCT, LOW_VALUE, HIGH_VALUE,
DENSITY, NUM_NULLS, NUM_BUCKETS, HISTOGRAM
FROM dba_tab_cols
WHERE table_name = 'ZY_ZYJS'
AND column_name = 'JSRQ';
COLUMN_NAME DATA_TYPE DATA_LENGTH DATA_PRECISION DATA_SCALE NUM_DISTINCT LOW_VALUE HIGH_VALUE DENSITY NUM_NULLS NUM_BUCKETS HISTOGRAM
-------------------- ---------- ----------- -------------- ---------- ------------ ---------- ---------- ---------- ---------- ----------- ---------------
JSRQ DATE 7 5606 78740B0C10 7876011209 .00017838 0 1 NONE
012C 1017
1 row selected.
--//执行计划变成了。
Plan hash value: 3335725722
----------------------------------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time | A-Rows | A-Time | Buffers |
----------------------------------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | | 8 (100)| | 1 |00:00:00.01 | 5 |
| 1 | SORT AGGREGATE | | 1 | 1 | 21 | | | 1 |00:00:00.01 | 5 |
|* 2 | TABLE ACCESS BY INDEX ROWID| ZY_ZYJS | 1 | 16 | 336 | 8 (0)| 00:00:01 | 0 |00:00:00.01 | 5 |
|* 3 | INDEX RANGE SCAN | I_ZY_ZYJS_CZGH_JZRQ | 1 | 17 | | 1 (0)| 00:00:01 | 14 |00:00:00.01 | 2 |
----------------------------------------------------------------------------------------------------------------------------------------------
--//所以讲没有一成不变的优化方案,必须综合分析。可惜国内大部分dba很少把精力放在优化sql语句上,我自己也一样....
4.附上tstats的源代码,我做了许多修改:
--//注:我一般建立在system用户,另外要保证编译通过需要建立一张无用的表:
CREATE TABLE SAMPLE_PAYMENTS
(
PAYGRADE INTEGER,
PAYMENT_DATE DATE,
JOB_DESCRIPTION CHAR(20 BYTE)
);
--//我自己懒的修改代码,我还加入一个过程adjust_column_stats_v4,在保留直方图信息以及最大最小值,也就是存在直方图,不要修改字段信息。
--//使用这个包还是要小心!!
CREATE OR REPLACE PACKAGE tstats AUTHID CURRENT_USER
AS
PROCEDURE adjust_column_stats_v1 (
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT (
'USERENV'
,'CURRENT_SCHEMA')
,p_table_name all_tab_col_statistics.table_name%TYPE);
PROCEDURE adjust_column_stats_v2 (
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT (
'USERENV'
,'CURRENT_SCHEMA')
,p_table_name all_tab_col_statistics.table_name%TYPE);
PROCEDURE adjust_column_stats_v3 (
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT (
'USERENV'
,'CURRENT_SCHEMA')
,p_table_name all_tab_col_statistics.table_name%TYPE);
PROCEDURE adjust_column_stats_v4 (
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT (
'USERENV'
,'CURRENT_SCHEMA')
,p_table_name all_tab_col_statistics.table_name%TYPE);
PROCEDURE amend_time_based_statistics (
effective_date DATE DEFAULT SYSDATE);
PROCEDURE adjust_global_stats (
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT (
'USERENV'
,'CURRENT_SCHEMA')
,p_table_name all_tab_col_statistics.table_name%TYPE
,p_mode VARCHAR2 DEFAULT 'PMOP');
PROCEDURE gather_table_stats (
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT (
'USERENV'
,'CURRENT_SCHEMA')
,p_table_name all_tab_col_statistics.table_name%TYPE);
PROCEDURE set_temp_table_stats (
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT (
'USERENV'
,'CURRENT_SCHEMA')
,p_table_name all_tab_col_statistics.table_name%TYPE
,p_numrows INTEGER DEFAULT 20000
,p_numblks INTEGER DEFAULT 1000
,p_avgrlen INTEGER DEFAULT 400);
PROCEDURE import_table_stats (
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT (
'USERENV'
,'CURRENT_SCHEMA')
,p_table_name all_tab_col_statistics.table_name%TYPE
,p_statown all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT (
'USERENV'
,'CURRENT_SCHEMA')
,p_stat_table all_tab_col_statistics.table_name%TYPE);
END tstats;
/
CREATE OR REPLACE PACKAGE BODY SYSTEM.tstats
AS
FUNCTION get_srec
RETURN DBMS_STATS.statrec
IS
srec DBMS_STATS.statrec;
BEGIN
/*
Workaround for issue in 12.1.0.1
that produces wrong join cardinality
when both tables have NULL for high
and low values. As a workaround this
function sets the high value very high
and the low value very low.
*/
$IF DBMS_DB_VERSION.version >= 12
$THEN
srec.epc := 2; -- Two endpoints
srec.bkvals := NULL; -- No histogram
DBMS_STATS.prepare_column_values
(
srec
,DBMS_STATS.rawarray
(
HEXTORAW
(
-- Min
'0000000000000000000000000000000000000000000000000000000000000000'
)
-- Max
,HEXTORAW
(
'ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff'
)
)
);
RETURN srec;
$ELSE
RETURN NULL;
$END
END get_srec;
PROCEDURE adjust_column_stats_v1
(
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT
(
'USERENV'
,'CURRENT_SCHEMA'
)
,p_table_name all_tab_col_statistics.table_name%TYPE
)
AS
CURSOR c1
IS
SELECT *
FROM all_tab_col_statistics
WHERE owner = p_owner
AND table_name = p_table_name
AND last_analyzed IS NOT NULL;
BEGIN
FOR r IN c1
LOOP
DBMS_STATS.delete_column_stats
(
ownname => r.owner
,tabname => r.table_name
,colname => r.column_name
,cascade_parts => TRUE
,no_invalidate => TRUE
,force => TRUE
);
DBMS_STATS.set_column_stats
(
ownname => r.owner
,tabname => r.table_name
,colname => r.column_name
,distcnt => r.num_distinct
,density => r.density
,nullcnt => r.num_nulls
,srec => get_srec -- No HIGH_VALUE/LOW_VALUE
,avgclen => r.avg_col_len
,no_invalidate => FALSE
,force => TRUE
);
END LOOP;
END adjust_column_stats_v1;
PROCEDURE adjust_column_stats_v2
(
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT
(
'USERENV'
,'CURRENT_SCHEMA'
)
,p_table_name all_tab_col_statistics.table_name%TYPE
)
AS
CURSOR c1
IS
SELECT *
FROM all_tab_col_statistics
WHERE owner = p_owner
AND table_name = p_table_name
AND last_analyzed IS NOT NULL;
v_num_distinct all_tab_col_statistics.num_distinct%TYPE;
BEGIN
FOR r IN c1
LOOP
DBMS_STATS.delete_column_stats
(
ownname => r.owner
,tabname => r.table_name
,colname => r.column_name
,cascade_parts => TRUE
,no_invalidate => TRUE
,force => TRUE
);
IF r.num_distinct = 1
THEN
v_num_distinct := 1 + 1e-14;
ELSE
v_num_distinct := r.num_distinct;
END IF;
DBMS_STATS.set_column_stats
(
ownname => r.owner
,tabname => r.table_name
,colname => r.column_name
,distcnt => v_num_distinct
,density => 1 / v_num_distinct
,nullcnt => r.num_nulls
,srec => get_srec -- No HIGH_VALUE/LOW_VALUE
,avgclen => r.avg_col_len
,no_invalidate => FALSE
,force => TRUE
);
END LOOP;
END adjust_column_stats_v2;
-- 保留直方图信息
PROCEDURE adjust_column_stats_v3
(
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT
(
'USERENV'
,'CURRENT_SCHEMA'
)
,p_table_name all_tab_col_statistics.table_name%TYPE
)
AS
CURSOR c1
IS
SELECT *
FROM all_tab_col_statistics
WHERE owner = p_owner
AND table_name = p_table_name
AND last_analyzed IS NOT NULL;
v_num_distinct all_tab_col_statistics.num_distinct%TYPE;
z_distcnt NUMBER;
z_density NUMBER;
z_nullcnt NUMBER;
z_srec DBMS_STATS.statrec;
z_avgclen NUMBER;
BEGIN
FOR r IN c1
LOOP
DBMS_STATS.get_column_stats
(
ownname => r.owner
,tabname => r.table_name
,colname => r.column_name
,distcnt => z_distcnt
,density => z_density
,nullcnt => z_nullcnt
,srec => z_srec
,avgclen => z_avgclen
);
DBMS_STATS.delete_column_stats
(
ownname => r.owner
,tabname => r.table_name
,colname => r.column_name
,cascade_parts => TRUE
,no_invalidate => TRUE
,force => TRUE
);
z_srec.minval := NULL;
z_srec.maxval := NULL;
IF r.num_distinct = 1
THEN
v_num_distinct := 1 + 1e-14;
ELSE
v_num_distinct := r.num_distinct;
END IF;
IF r.num_distinct <> 0
THEN
DBMS_STATS.set_column_stats
(
ownname => r.owner
,tabname => r.table_name
,colname => r.column_name
,distcnt => v_num_distinct
,density => 1 / v_num_distinct
,nullcnt => r.num_nulls
,srec => z_srec -- No HIGH_VALUE/LOW_VALUE
,avgclen => r.avg_col_len
,no_invalidate => FALSE
,force => TRUE
);
END IF;
END LOOP;
END adjust_column_stats_v3;
-- 保留直方图信息以及最大最小值,也就是存在直方图,不要修改字段信息。
PROCEDURE adjust_column_stats_v4
(
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT
(
'USERENV'
,'CURRENT_SCHEMA'
)
,p_table_name all_tab_col_statistics.table_name%TYPE
)
AS
CURSOR c1
IS
SELECT *
FROM all_tab_col_statistics
WHERE owner = p_owner
AND table_name = p_table_name
AND histogram='NONE'
AND last_analyzed IS NOT NULL ;
v_num_distinct all_tab_col_statistics.num_distinct%TYPE;
z_distcnt NUMBER;
z_density NUMBER;
z_nullcnt NUMBER;
z_srec DBMS_STATS.statrec;
z_avgclen NUMBER;
BEGIN
FOR r IN c1
LOOP
DBMS_STATS.get_column_stats
(
ownname => r.owner
,tabname => r.table_name
,colname => r.column_name
,distcnt => z_distcnt
,density => z_density
,nullcnt => z_nullcnt
,srec => z_srec
,avgclen => z_avgclen
);
DBMS_STATS.delete_column_stats
(
ownname => r.owner
,tabname => r.table_name
,colname => r.column_name
,cascade_parts => TRUE
,no_invalidate => TRUE
,force => TRUE
);
z_srec.minval := NULL;
z_srec.maxval := NULL;
IF r.num_distinct = 1
THEN
v_num_distinct := 1 + 1e-14;
ELSE
v_num_distinct := r.num_distinct;
END IF;
IF r.num_distinct <> 0
THEN
DBMS_STATS.set_column_stats
(
ownname => r.owner
,tabname => r.table_name
,colname => r.column_name
,distcnt => v_num_distinct
,density => 1 / v_num_distinct
,nullcnt => r.num_nulls
,srec => z_srec -- No HIGH_VALUE/LOW_VALUE
,avgclen => r.avg_col_len
,no_invalidate => FALSE
,force => TRUE
);
END IF;
END LOOP;
END adjust_column_stats_v4;
PROCEDURE amend_time_based_statistics
(
effective_date DATE DEFAULT SYSDATE
)
IS
distcnt NUMBER;
density NUMBER;
nullcnt NUMBER;
srec DBMS_STATS.statrec;
avgclen NUMBER;
BEGIN
--
-- Step 1: Remove data from previous run
--
DELETE FROM sample_payments;
--
-- Step 2: Add data for standard pay for standard employees
--
INSERT INTO sample_payments (paygrade, payment_date, job_description)
WITH payment_dates
AS ( SELECT ADD_MONTHS
(
TRUNC (effective_date, 'MM') + 19
,1 - ROWNUM
)
standard_paydate
FROM DUAL
CONNECT BY LEVEL <= 12)
,paygrades
AS ( SELECT ROWNUM + 1 paygrade
FROM DUAL
CONNECT BY LEVEL <= 9)
,multiplier
AS ( SELECT ROWNUM rid
FROM DUAL
CONNECT BY LEVEL <= 100)
SELECT paygrade
,CASE MOD (standard_paydate - DATE '1001-01-06', 7)
WHEN 5 THEN standard_paydate - 1
WHEN 6 THEN standard_paydate - 2
ELSE standard_paydate
END
payment_date
,'AAA' job_description
FROM paygrades, payment_dates, multiplier;
--
-- Step 3: Add data for paygrade 1
--
INSERT INTO sample_payments (paygrade, payment_date, job_description)
WITH payment_dates
AS ( SELECT ADD_MONTHS
(
LAST_DAY (TRUNC (effective_date))
,1 - ROWNUM
)
standard_paydate
FROM DUAL
CONNECT BY LEVEL <= 12)
SELECT 1 paygrade
,CASE MOD (standard_paydate - DATE '1001-01-06', 7)
WHEN 5 THEN standard_paydate - 1
WHEN 6 THEN standard_paydate - 2
ELSE standard_paydate
END
payment_dates
,'zzz' job_description
FROM payment_dates;
--
-- Step 4: Add rows for exceptions.
--
INSERT INTO sample_payments (paygrade, payment_date, job_description)
WITH payment_dates
AS ( SELECT ADD_MONTHS
(
TRUNC (effective_date, 'MM') + 19
,1 - ROWNUM
)
standard_paydate
FROM DUAL
CONNECT BY LEVEL <= 12)
,paygrades
AS ( SELECT ROWNUM + 1 paygrade
FROM DUAL
CONNECT BY LEVEL <= 7)
SELECT paygrade
,CASE MOD (standard_paydate - DATE '1001-01-06', 7)
WHEN 5 THEN standard_paydate - 2 + paygrade
WHEN 6 THEN standard_paydate - 3 + paygrade
ELSE standard_paydate - 1 + paygrade
END
payment_date
,'AAA' job_description
FROM paygrades, payment_dates;
--
-- Step 5: Gather statistics for SAMPLE_PAYMENTS
--
DBMS_STATS.gather_table_stats
(
ownname => SYS_CONTEXT ('USERENV', 'CURRENT_SCHEMA')
,tabname => 'SAMPLE_PAYMENTS'
,method_opt => 'FOR COLUMNS SIZE 1 JOB_DESCRIPTION '
|| 'FOR COLUMNS SIZE 254 PAYGRADE,PAYMENT_DATE, '
|| '(PAYGRADE,PAYMENT_DATE)'
);
--
-- Step 6: Copy column statistics from SAMPLE_PAYMENTS to PAYMENTS
--
FOR r IN (SELECT column_name, histogram
FROM all_tab_cols
WHERE table_name = 'SAMPLE_PAYMENTS')
LOOP
DBMS_STATS.get_column_stats
(
ownname => SYS_CONTEXT ('USERENV', 'CURRENT_SCHEMA')
,tabname => 'SAMPLE_PAYMENTS'
,colname => r.column_name
,distcnt => distcnt
,density => density
,nullcnt => nullcnt
,srec => srec
,avgclen => avgclen
);
DBMS_STATS.set_column_stats
(
ownname => SYS_CONTEXT ('USERENV', 'CURRENT_SCHEMA')
,tabname => 'PAYMENTS'
,colname => r.column_name
,distcnt => distcnt
,density => density
,nullcnt => nullcnt
,srec => srec
,avgclen => avgclen
);
END LOOP;
END amend_time_based_statistics;
PROCEDURE adjust_global_stats
(
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT
(
'USERENV'
,'CURRENT_SCHEMA'
)
,p_table_name all_tab_col_statistics.table_name%TYPE
,p_mode VARCHAR2 DEFAULT 'PMOP'
)
IS
-- This helper function updates the statistic for the number of blocks in the
-- table so that the average size of a partition is unaltered. We sneak
-- this value away in the unused CACHEDBLK statistic
--
numblks NUMBER;
numrows NUMBER;
avgrlen NUMBER;
cachedblk NUMBER;
cachehit NUMBER;
BEGIN
DBMS_STATS.get_table_stats
(
ownname => p_owner
,tabname => p_table_name
,numrows => numrows
,avgrlen => avgrlen
,numblks => numblks
,cachedblk => cachedblk
,cachehit => cachehit
);
IF p_mode = 'PMOP'
THEN
--
-- Resetting NUMBLKS based on CACHEDBLK
-- average segment size and current number
-- of partitions.
--
IF cachedblk IS NULL
THEN
RETURN; -- No saved value
END IF;
--
-- Recalculate the number of blocks based on
-- the current number of partitions and the
-- saved average segment size
-- Avoid reference to DBA_SEGMENTS in case
-- there is no privilege.
--
SELECT cachedblk * COUNT (*)
INTO numblks
FROM all_objects
WHERE owner = p_owner
AND object_name = p_table_name
AND object_type = 'TABLE PARTITION';
ELSIF p_mode = 'GATHER'
THEN
--
-- Save average segment size in CACHEDBLK based on NUMBLKS
-- and current number of partitions.
--
SELECT numblks / COUNT (*), TRUNC (numblks / COUNT (*)) * COUNT (*)
INTO cachedblk, numblks
FROM all_objects
WHERE owner = p_owner
AND object_name = p_table_name
AND object_type = 'TABLE PARTITION';
ELSE
RAISE PROGRAM_ERROR;
-- Only gets here if p_mode not set to PMOP or GATHER
END IF;
DBMS_STATS.set_table_stats
(
ownname => p_owner
,tabname => p_table_name
,numblks => numblks
,cachedblk => cachedblk
,force => TRUE
);
END adjust_global_stats;
PROCEDURE gather_table_stats
(
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT
(
'USERENV'
,'CURRENT_SCHEMA'
)
,p_table_name all_tab_col_statistics.table_name%TYPE
)
IS
BEGIN
DBMS_STATS.unlock_table_stats
(
ownname => p_owner
,tabname => p_table_name
);
FOR r IN (SELECT *
FROM all_tables
WHERE owner = p_owner AND table_name = p_table_name)
LOOP
DBMS_STATS.gather_table_stats
(
ownname => p_owner
,tabname => p_table_name
,granularity => CASE r.partitioned
WHEN 'YES' THEN 'GLOBAL'
ELSE 'ALL'
END
,method_opt => 'FOR ALL COLUMNS SIZE repeat'
);
adjust_column_stats_v3
(
p_owner => p_owner
,p_table_name => p_table_name
);
IF r.partitioned = 'YES'
THEN
adjust_global_stats
(
p_owner => p_owner
,p_table_name => p_table_name
,p_mode => 'GATHER'
);
END IF;
END LOOP;
DBMS_STATS.lock_table_stats (ownname => p_owner, tabname => p_table_name);
END gather_table_stats;
PROCEDURE set_temp_table_stats
(
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT
(
'USERENV'
,'CURRENT_SCHEMA'
)
,p_table_name all_tab_col_statistics.table_name%TYPE
,p_numrows INTEGER DEFAULT 20000
,p_numblks INTEGER DEFAULT 1000
,p_avgrlen INTEGER DEFAULT 400
)
IS
distcnt NUMBER;
BEGIN
DBMS_STATS.unlock_table_stats
(
ownname => p_owner
,tabname => p_table_name
);
$IF DBMS_DB_VERSION.version >= 12
$THEN
DBMS_STATS.set_table_prefs
(
ownname => p_owner
,tabname => p_table_name
,pname => 'GLOBAL_TEMP_TABLE_STATS'
,pvalue => 'SHARED'
);
$END
DBMS_STATS.delete_table_stats
(
ownname => p_owner
,tabname => p_table_name
);
DBMS_STATS.set_table_stats
(
ownname => p_owner
,tabname => p_table_name
,numrows => p_numrows
,numblks => p_numblks
,avgrlen => p_avgrlen
,no_invalidate => FALSE
);
/*
We must now set column statistics to limit the effect of predicates on cardinality
calculations; by default cardinality is reduced by a factor of 100 for each predicate.
We use a value of 2 for the number of distinct columns to reduce this factor to 2. We
do no not use 1 because predicates of the type "column_1 <> 'VALUE_1'" would reduce the
cardinality to 1.
*/
distcnt := 2;
FOR r IN (SELECT *
FROM all_tab_columns
WHERE owner = p_owner AND table_name = p_table_name)
LOOP
DBMS_STATS.set_column_stats
(
ownname => p_owner
,tabname => r.table_name
,colname => r.column_name
,distcnt => distcnt
,density => 1 / distcnt
,avgclen => 5
,srec => get_srec
,no_invalidate => FALSE
);
END LOOP;
DBMS_STATS.lock_table_stats (ownname => p_owner, tabname => p_table_name);
END set_temp_table_stats;
PROCEDURE import_table_stats
(
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT
(
'USERENV'
,'CURRENT_SCHEMA'
)
,p_table_name all_tab_col_statistics.table_name%TYPE
,p_statown all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT
(
'USERENV'
,'CURRENT_SCHEMA'
)
,p_stat_table all_tab_col_statistics.table_name%TYPE
)
IS
BEGIN
DECLARE
already_up_to_date EXCEPTION;
PRAGMA EXCEPTION_INIT (already_up_to_date, -20000);
BEGIN
DBMS_STATS.upgrade_stat_table
(
ownname => 'DLS'
,stattab => 'DLS_TSTATS'
);
EXCEPTION
WHEN already_up_to_date
THEN
NULL;
END;
DBMS_STATS.unlock_table_stats
(
ownname => p_owner
,tabname => p_table_name
);
DBMS_STATS.delete_table_stats
(
ownname => p_owner
,tabname => p_table_name
,no_invalidate => FALSE
);
DBMS_STATS.import_table_stats
(
ownname => p_owner
,tabname => p_table_name
,statown => p_statown
,stattab => p_stat_table
,no_invalidate => FALSE
);
-- For partitioned tables it may be that the number of (sub)partitions on
-- the target systems do not match those on the source system.
FOR r
IN (SELECT *
FROM all_tables
WHERE owner = p_owner
AND table_name = p_table_name
AND partitioned = 'YES')
LOOP
adjust_global_stats (p_owner, p_table_name, 'PMOP');
END LOOP;
DBMS_STATS.lock_table_stats (ownname => p_owner, tabname => p_table_name);
END import_table_stats;
END tstats;
/
--//关于使用tstats收集处理统计信息,可以看链接http://blog.itpub.net/267265/viewspace-1987839/
TSTATS in a Nutshell P97
The removal of time-sensitive data from object statistics is the main idea behind TSTATS. Here is the essence of
a process that can be used on the back of that idea:
1. Gather statistics on a selected "master" test system.
2. Fabricate statistics for all global temporary tables.
3. Remove time-sensitive data from object statistics.
4. Perform initial performance testing on the master test system and make adjustments as necessary.
5. Copy the statistics to all other test systems and ultimately to production.
6. Lock the object statistics of your application schemas on all systems.
7. Drop all statistics-gathering jobs for application schemas on all your systems.
8. TSTATS only applies to application schemas, so any jobs that gather dictionary statistics are unaffected.
--//突然自己想在一个小的生产系统试验tstats的思想。我发现自己上存在一些问题。前面我就发现直方图信息不能删除。
--//它实际上提供的脚本删除字段的最大最小值信息,而不是3. Remove time-sensitive data from object statistics.。
--//实际上删除最大最小信息直方图信息会变得无用,通过测试来说明问题。
1.环境:
SCOTT@book> @ &r/ver1
PORT_STRING VERSION BANNER
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/Linux 2.4.xx 11.2.0.4.0 Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production
create table t as select rownum id ,lpad('x',32,'x') name ,'Y' flag from dual connect by level<=1e5;
update t set flag='N' where id=1e5;
commit ;
execute sys.dbms_stats.gather_table_stats ( OwnName => user,TabName => 't',Estimate_Percent => NULL,Method_Opt => 'FOR
ALL COLUMNS SIZE 1 for columns flag size 254 ',Cascade => True ,No_Invalidate => false);
2.测试1:
SCOTT@book> select * from t where flag='N';
ID NAME F
---------- -------------------- -
100000 xxxxxxxxxxxxxxxxxxxx N
xxxxxxxxxxxx
SCOTT@book> @ &r/dpc '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID 0h7g0tqtzcvzn, child number 0
-------------------------------------
select * from t where flag='N'
Plan hash value: 120143814
-----------------------------------------------------------------------------------------
| Id | Operation | Name | E-Rows |E-Bytes| Cost (%CPU)| E-Time |
-----------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | | | 2 (100)| |
| 1 | TABLE ACCESS BY INDEX ROWID| T | 1 | 40 | 2 (0)| 00:00:01 |
|* 2 | INDEX RANGE SCAN | I_T_FLAG | 1 | | 1 (0)| 00:00:01 |
-----------------------------------------------------------------------------------------
--//可以发现使用直方图。
3.测试2:
--//如果删除最大最小值:
SCOTT@book> exec system.tstats.adjust_column_stats_v3( 'SCOTT','T');
PL/SQL procedure successfully completed.
SCOTT@book> select * from dba_histograms where owner=user and table_name='T';
OWNER TABLE_NAME COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE ENDPOINT_A
------ ---------- -------------------- --------------- -------------- ----------
SCOTT T FLAG 100000 4.6211E+35
SCOTT T FLAG 1 4.0500E+35
SCOTT T ID 0
SCOTT T NAME 0
SCOTT T ID 1
SCOTT T NAME 1
6 rows selected.
SCOTT@book> select COLUMN_NAME,DATA_TYPE,DATA_LENGTH,DATA_PRECISION,DATA_SCALE,NUM_DISTINCT,LOW_VALUE,HIGH_VALUE,DENSITY,NUM_NULLS,NUM_BUCKETS,HISTOGRAM from dba_tab_cols where owner=user and table_name='T';
COLUMN_NAME DATA_TYPE DATA_LENGTH DATA_PRECISION DATA_SCALE NUM_DISTINCT LOW_VALUE HIGH_VALUE DENSITY NUM_NULLS NUM_BUCKETS HISTOGRAM
-------------------- ---------- ----------- -------------- ---------- ------------ ---------- ---------- ---------- ---------- ----------- ---------------
ID NUMBER 22 100000 .00001 0 1 NONE
NAME VARCHAR2 32 1 1 0 1 NONE
FLAG CHAR 1 2 .5 0 2 FREQUENCY
--//可以发现直方图信息还存在。但是最大最小值信息已经删除。
SCOTT@book> select * from t where flag='N';
ID NAME F
---------- -------------------- -
100000 xxxxxxxxxxxxxxxxxxxx N
xxxxxxxxxxxx
--//执行计划:
Plan hash value: 1601196873
---------------------------------------------------------------------------
| Id | Operation | Name | E-Rows |E-Bytes| Cost (%CPU)| E-Time |
---------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | | | 177 (100)| |
|* 1 | TABLE ACCESS FULL| T | 50000 | 1953K| 177 (1)| 00:00:03 |
---------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
1 - SEL$1 / T@SEL$1
Predicate Information (identified by operation id):
---------------------------------------------------
1 - filter("FLAG"='N')
--//执行计划变成全表扫描。也就是直方图存在的情况下,最大最小值信息不能删除。
3.实际上真正的情况也许更复杂,我举一个生产系统遇到的问题。
SYSTEM@192.168.xx.xx:1521/zzzzz> @ &r/sqlid 22h1xj9x22jf6
SQL_ID SQLTEXT
------------- ---------------------------------------------------------------------------------------
22h1xj9x22jf6 SELECT COUNT ( *) FROM ZY_ZYJS A WHERE A.JZRQ IS NULL AND A.CZGH =:1 AND A.JSRQ < :2
1 row selected.
SYSTEM@192.168.xx.xx:1521/zzzzz> @ bind_cap 22h1xj9x22jf6 ''
SQL_ID CHILD_NUMBER WAS NAME POSITION MAX_LENGTH LAST_CAPTURED DATATYPE_STRING VALUE_STRING
------------- ------------ --- -------------------- ---------- ---------- ------------------- --------------- -------------------------------
22h1xj9x22jf6 0 YES :1 1 32 2018-01-17 12:05:36 CHAR(32) 829
YES :2 2 7 2018-01-17 12:05:36 DATE 2018/01/01 00:00:00
--//我们系统存在2个索引。czgh+JZRQ(操作工号+结帐日期)是一个复合索引,JSRQ(结算日期)是一个索引。明显使用JSRQ不好,因为
--//小于2018/1/1的数据很多。而实际的执行计划是:(在使用tstats包处理后)
Plan hash value: 2747456857
----------------------------------------------------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem |
----------------------------------------------------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | | 5 (100)| | 1 |00:00:00.01 | 20 | | | |
| 1 | SORT AGGREGATE | | 1 | 1 | 21 | | | 1 |00:00:00.01 | 20 | | | |
|* 2 | VIEW | index$_join$_001 | 1 | 1 | 21 | 5 (0)| 00:00:01 | 0 |00:00:00.01 | 20 | | | |
|* 3 | HASH JOIN | | 1 | | | | | 0 |00:00:00.01 | 20 | 1368K| 1368K| 1649K (0)|
|* 4 | INDEX RANGE SCAN| IDX_ZY_ZYJS_JSRQ | 1 | 1 | 21 | 3 (34)| 00:00:01 | 5356 |00:00:00.01 | 14 | | | |
|* 5 | INDEX RANGE SCAN| I_ZY_ZYJS_CZGH_JZRQ | 1 | 1 | 21 | 4 (25)| 00:00:01 | 14 |00:00:00.01 | 6 | | | |
----------------------------------------------------------------------------------------------------------------------------------------------------------------
--//恢复统计信息。
BEGIN
SYS.DBMS_STATS.GATHER_TABLE_STATS (
OwnName => 'XXXXX'
,TabName => 'ZY_ZYJS'
,Estimate_Percent => SYS.DBMS_STATS.AUTO_SAMPLE_SIZE
,Method_Opt => 'FOR ALL COLUMNS SIZE REPEAT '
,Degree => 4
,Cascade => TRUE
,No_Invalidate => FALSE);
END;
/
SYSTEM@192.168.xx.xx:1521/zzzzz> SELECT COLUMN_NAME, DATA_TYPE, DATA_LENGTH, DATA_PRECISION, DATA_SCALE, NUM_DISTINCT, LOW_VALUE, HIGH_VALUE,
DENSITY, NUM_NULLS, NUM_BUCKETS, HISTOGRAM
FROM dba_tab_cols
WHERE table_name = 'ZY_ZYJS'
AND column_name = 'JSRQ';
COLUMN_NAME DATA_TYPE DATA_LENGTH DATA_PRECISION DATA_SCALE NUM_DISTINCT LOW_VALUE HIGH_VALUE DENSITY NUM_NULLS NUM_BUCKETS HISTOGRAM
-------------------- ---------- ----------- -------------- ---------- ------------ ---------- ---------- ---------- ---------- ----------- ---------------
JSRQ DATE 7 5606 78740B0C10 7876011209 .00017838 0 1 NONE
012C 1017
1 row selected.
--//执行计划变成了。
Plan hash value: 3335725722
----------------------------------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time | A-Rows | A-Time | Buffers |
----------------------------------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | | 8 (100)| | 1 |00:00:00.01 | 5 |
| 1 | SORT AGGREGATE | | 1 | 1 | 21 | | | 1 |00:00:00.01 | 5 |
|* 2 | TABLE ACCESS BY INDEX ROWID| ZY_ZYJS | 1 | 16 | 336 | 8 (0)| 00:00:01 | 0 |00:00:00.01 | 5 |
|* 3 | INDEX RANGE SCAN | I_ZY_ZYJS_CZGH_JZRQ | 1 | 17 | | 1 (0)| 00:00:01 | 14 |00:00:00.01 | 2 |
----------------------------------------------------------------------------------------------------------------------------------------------
--//所以讲没有一成不变的优化方案,必须综合分析。可惜国内大部分dba很少把精力放在优化sql语句上,我自己也一样....
4.附上tstats的源代码,我做了许多修改:
--//注:我一般建立在system用户,另外要保证编译通过需要建立一张无用的表:
CREATE TABLE SAMPLE_PAYMENTS
(
PAYGRADE INTEGER,
PAYMENT_DATE DATE,
JOB_DESCRIPTION CHAR(20 BYTE)
);
--//我自己懒的修改代码,我还加入一个过程adjust_column_stats_v4,在保留直方图信息以及最大最小值,也就是存在直方图,不要修改字段信息。
--//使用这个包还是要小心!!
CREATE OR REPLACE PACKAGE tstats AUTHID CURRENT_USER
AS
PROCEDURE adjust_column_stats_v1 (
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT (
'USERENV'
,'CURRENT_SCHEMA')
,p_table_name all_tab_col_statistics.table_name%TYPE);
PROCEDURE adjust_column_stats_v2 (
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT (
'USERENV'
,'CURRENT_SCHEMA')
,p_table_name all_tab_col_statistics.table_name%TYPE);
PROCEDURE adjust_column_stats_v3 (
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT (
'USERENV'
,'CURRENT_SCHEMA')
,p_table_name all_tab_col_statistics.table_name%TYPE);
PROCEDURE adjust_column_stats_v4 (
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT (
'USERENV'
,'CURRENT_SCHEMA')
,p_table_name all_tab_col_statistics.table_name%TYPE);
PROCEDURE amend_time_based_statistics (
effective_date DATE DEFAULT SYSDATE);
PROCEDURE adjust_global_stats (
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT (
'USERENV'
,'CURRENT_SCHEMA')
,p_table_name all_tab_col_statistics.table_name%TYPE
,p_mode VARCHAR2 DEFAULT 'PMOP');
PROCEDURE gather_table_stats (
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT (
'USERENV'
,'CURRENT_SCHEMA')
,p_table_name all_tab_col_statistics.table_name%TYPE);
PROCEDURE set_temp_table_stats (
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT (
'USERENV'
,'CURRENT_SCHEMA')
,p_table_name all_tab_col_statistics.table_name%TYPE
,p_numrows INTEGER DEFAULT 20000
,p_numblks INTEGER DEFAULT 1000
,p_avgrlen INTEGER DEFAULT 400);
PROCEDURE import_table_stats (
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT (
'USERENV'
,'CURRENT_SCHEMA')
,p_table_name all_tab_col_statistics.table_name%TYPE
,p_statown all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT (
'USERENV'
,'CURRENT_SCHEMA')
,p_stat_table all_tab_col_statistics.table_name%TYPE);
END tstats;
/
CREATE OR REPLACE PACKAGE BODY SYSTEM.tstats
AS
FUNCTION get_srec
RETURN DBMS_STATS.statrec
IS
srec DBMS_STATS.statrec;
BEGIN
/*
Workaround for issue in 12.1.0.1
that produces wrong join cardinality
when both tables have NULL for high
and low values. As a workaround this
function sets the high value very high
and the low value very low.
*/
$IF DBMS_DB_VERSION.version >= 12
$THEN
srec.epc := 2; -- Two endpoints
srec.bkvals := NULL; -- No histogram
DBMS_STATS.prepare_column_values
(
srec
,DBMS_STATS.rawarray
(
HEXTORAW
(
-- Min
'0000000000000000000000000000000000000000000000000000000000000000'
)
-- Max
,HEXTORAW
(
'ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff'
)
)
);
RETURN srec;
$ELSE
RETURN NULL;
$END
END get_srec;
PROCEDURE adjust_column_stats_v1
(
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT
(
'USERENV'
,'CURRENT_SCHEMA'
)
,p_table_name all_tab_col_statistics.table_name%TYPE
)
AS
CURSOR c1
IS
SELECT *
FROM all_tab_col_statistics
WHERE owner = p_owner
AND table_name = p_table_name
AND last_analyzed IS NOT NULL;
BEGIN
FOR r IN c1
LOOP
DBMS_STATS.delete_column_stats
(
ownname => r.owner
,tabname => r.table_name
,colname => r.column_name
,cascade_parts => TRUE
,no_invalidate => TRUE
,force => TRUE
);
DBMS_STATS.set_column_stats
(
ownname => r.owner
,tabname => r.table_name
,colname => r.column_name
,distcnt => r.num_distinct
,density => r.density
,nullcnt => r.num_nulls
,srec => get_srec -- No HIGH_VALUE/LOW_VALUE
,avgclen => r.avg_col_len
,no_invalidate => FALSE
,force => TRUE
);
END LOOP;
END adjust_column_stats_v1;
PROCEDURE adjust_column_stats_v2
(
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT
(
'USERENV'
,'CURRENT_SCHEMA'
)
,p_table_name all_tab_col_statistics.table_name%TYPE
)
AS
CURSOR c1
IS
SELECT *
FROM all_tab_col_statistics
WHERE owner = p_owner
AND table_name = p_table_name
AND last_analyzed IS NOT NULL;
v_num_distinct all_tab_col_statistics.num_distinct%TYPE;
BEGIN
FOR r IN c1
LOOP
DBMS_STATS.delete_column_stats
(
ownname => r.owner
,tabname => r.table_name
,colname => r.column_name
,cascade_parts => TRUE
,no_invalidate => TRUE
,force => TRUE
);
IF r.num_distinct = 1
THEN
v_num_distinct := 1 + 1e-14;
ELSE
v_num_distinct := r.num_distinct;
END IF;
DBMS_STATS.set_column_stats
(
ownname => r.owner
,tabname => r.table_name
,colname => r.column_name
,distcnt => v_num_distinct
,density => 1 / v_num_distinct
,nullcnt => r.num_nulls
,srec => get_srec -- No HIGH_VALUE/LOW_VALUE
,avgclen => r.avg_col_len
,no_invalidate => FALSE
,force => TRUE
);
END LOOP;
END adjust_column_stats_v2;
-- 保留直方图信息
PROCEDURE adjust_column_stats_v3
(
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT
(
'USERENV'
,'CURRENT_SCHEMA'
)
,p_table_name all_tab_col_statistics.table_name%TYPE
)
AS
CURSOR c1
IS
SELECT *
FROM all_tab_col_statistics
WHERE owner = p_owner
AND table_name = p_table_name
AND last_analyzed IS NOT NULL;
v_num_distinct all_tab_col_statistics.num_distinct%TYPE;
z_distcnt NUMBER;
z_density NUMBER;
z_nullcnt NUMBER;
z_srec DBMS_STATS.statrec;
z_avgclen NUMBER;
BEGIN
FOR r IN c1
LOOP
DBMS_STATS.get_column_stats
(
ownname => r.owner
,tabname => r.table_name
,colname => r.column_name
,distcnt => z_distcnt
,density => z_density
,nullcnt => z_nullcnt
,srec => z_srec
,avgclen => z_avgclen
);
DBMS_STATS.delete_column_stats
(
ownname => r.owner
,tabname => r.table_name
,colname => r.column_name
,cascade_parts => TRUE
,no_invalidate => TRUE
,force => TRUE
);
z_srec.minval := NULL;
z_srec.maxval := NULL;
IF r.num_distinct = 1
THEN
v_num_distinct := 1 + 1e-14;
ELSE
v_num_distinct := r.num_distinct;
END IF;
IF r.num_distinct <> 0
THEN
DBMS_STATS.set_column_stats
(
ownname => r.owner
,tabname => r.table_name
,colname => r.column_name
,distcnt => v_num_distinct
,density => 1 / v_num_distinct
,nullcnt => r.num_nulls
,srec => z_srec -- No HIGH_VALUE/LOW_VALUE
,avgclen => r.avg_col_len
,no_invalidate => FALSE
,force => TRUE
);
END IF;
END LOOP;
END adjust_column_stats_v3;
-- 保留直方图信息以及最大最小值,也就是存在直方图,不要修改字段信息。
PROCEDURE adjust_column_stats_v4
(
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT
(
'USERENV'
,'CURRENT_SCHEMA'
)
,p_table_name all_tab_col_statistics.table_name%TYPE
)
AS
CURSOR c1
IS
SELECT *
FROM all_tab_col_statistics
WHERE owner = p_owner
AND table_name = p_table_name
AND histogram='NONE'
AND last_analyzed IS NOT NULL ;
v_num_distinct all_tab_col_statistics.num_distinct%TYPE;
z_distcnt NUMBER;
z_density NUMBER;
z_nullcnt NUMBER;
z_srec DBMS_STATS.statrec;
z_avgclen NUMBER;
BEGIN
FOR r IN c1
LOOP
DBMS_STATS.get_column_stats
(
ownname => r.owner
,tabname => r.table_name
,colname => r.column_name
,distcnt => z_distcnt
,density => z_density
,nullcnt => z_nullcnt
,srec => z_srec
,avgclen => z_avgclen
);
DBMS_STATS.delete_column_stats
(
ownname => r.owner
,tabname => r.table_name
,colname => r.column_name
,cascade_parts => TRUE
,no_invalidate => TRUE
,force => TRUE
);
z_srec.minval := NULL;
z_srec.maxval := NULL;
IF r.num_distinct = 1
THEN
v_num_distinct := 1 + 1e-14;
ELSE
v_num_distinct := r.num_distinct;
END IF;
IF r.num_distinct <> 0
THEN
DBMS_STATS.set_column_stats
(
ownname => r.owner
,tabname => r.table_name
,colname => r.column_name
,distcnt => v_num_distinct
,density => 1 / v_num_distinct
,nullcnt => r.num_nulls
,srec => z_srec -- No HIGH_VALUE/LOW_VALUE
,avgclen => r.avg_col_len
,no_invalidate => FALSE
,force => TRUE
);
END IF;
END LOOP;
END adjust_column_stats_v4;
PROCEDURE amend_time_based_statistics
(
effective_date DATE DEFAULT SYSDATE
)
IS
distcnt NUMBER;
density NUMBER;
nullcnt NUMBER;
srec DBMS_STATS.statrec;
avgclen NUMBER;
BEGIN
--
-- Step 1: Remove data from previous run
--
DELETE FROM sample_payments;
--
-- Step 2: Add data for standard pay for standard employees
--
INSERT INTO sample_payments (paygrade, payment_date, job_description)
WITH payment_dates
AS ( SELECT ADD_MONTHS
(
TRUNC (effective_date, 'MM') + 19
,1 - ROWNUM
)
standard_paydate
FROM DUAL
CONNECT BY LEVEL <= 12)
,paygrades
AS ( SELECT ROWNUM + 1 paygrade
FROM DUAL
CONNECT BY LEVEL <= 9)
,multiplier
AS ( SELECT ROWNUM rid
FROM DUAL
CONNECT BY LEVEL <= 100)
SELECT paygrade
,CASE MOD (standard_paydate - DATE '1001-01-06', 7)
WHEN 5 THEN standard_paydate - 1
WHEN 6 THEN standard_paydate - 2
ELSE standard_paydate
END
payment_date
,'AAA' job_description
FROM paygrades, payment_dates, multiplier;
--
-- Step 3: Add data for paygrade 1
--
INSERT INTO sample_payments (paygrade, payment_date, job_description)
WITH payment_dates
AS ( SELECT ADD_MONTHS
(
LAST_DAY (TRUNC (effective_date))
,1 - ROWNUM
)
standard_paydate
FROM DUAL
CONNECT BY LEVEL <= 12)
SELECT 1 paygrade
,CASE MOD (standard_paydate - DATE '1001-01-06', 7)
WHEN 5 THEN standard_paydate - 1
WHEN 6 THEN standard_paydate - 2
ELSE standard_paydate
END
payment_dates
,'zzz' job_description
FROM payment_dates;
--
-- Step 4: Add rows for exceptions.
--
INSERT INTO sample_payments (paygrade, payment_date, job_description)
WITH payment_dates
AS ( SELECT ADD_MONTHS
(
TRUNC (effective_date, 'MM') + 19
,1 - ROWNUM
)
standard_paydate
FROM DUAL
CONNECT BY LEVEL <= 12)
,paygrades
AS ( SELECT ROWNUM + 1 paygrade
FROM DUAL
CONNECT BY LEVEL <= 7)
SELECT paygrade
,CASE MOD (standard_paydate - DATE '1001-01-06', 7)
WHEN 5 THEN standard_paydate - 2 + paygrade
WHEN 6 THEN standard_paydate - 3 + paygrade
ELSE standard_paydate - 1 + paygrade
END
payment_date
,'AAA' job_description
FROM paygrades, payment_dates;
--
-- Step 5: Gather statistics for SAMPLE_PAYMENTS
--
DBMS_STATS.gather_table_stats
(
ownname => SYS_CONTEXT ('USERENV', 'CURRENT_SCHEMA')
,tabname => 'SAMPLE_PAYMENTS'
,method_opt => 'FOR COLUMNS SIZE 1 JOB_DESCRIPTION '
|| 'FOR COLUMNS SIZE 254 PAYGRADE,PAYMENT_DATE, '
|| '(PAYGRADE,PAYMENT_DATE)'
);
--
-- Step 6: Copy column statistics from SAMPLE_PAYMENTS to PAYMENTS
--
FOR r IN (SELECT column_name, histogram
FROM all_tab_cols
WHERE table_name = 'SAMPLE_PAYMENTS')
LOOP
DBMS_STATS.get_column_stats
(
ownname => SYS_CONTEXT ('USERENV', 'CURRENT_SCHEMA')
,tabname => 'SAMPLE_PAYMENTS'
,colname => r.column_name
,distcnt => distcnt
,density => density
,nullcnt => nullcnt
,srec => srec
,avgclen => avgclen
);
DBMS_STATS.set_column_stats
(
ownname => SYS_CONTEXT ('USERENV', 'CURRENT_SCHEMA')
,tabname => 'PAYMENTS'
,colname => r.column_name
,distcnt => distcnt
,density => density
,nullcnt => nullcnt
,srec => srec
,avgclen => avgclen
);
END LOOP;
END amend_time_based_statistics;
PROCEDURE adjust_global_stats
(
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT
(
'USERENV'
,'CURRENT_SCHEMA'
)
,p_table_name all_tab_col_statistics.table_name%TYPE
,p_mode VARCHAR2 DEFAULT 'PMOP'
)
IS
-- This helper function updates the statistic for the number of blocks in the
-- table so that the average size of a partition is unaltered. We sneak
-- this value away in the unused CACHEDBLK statistic
--
numblks NUMBER;
numrows NUMBER;
avgrlen NUMBER;
cachedblk NUMBER;
cachehit NUMBER;
BEGIN
DBMS_STATS.get_table_stats
(
ownname => p_owner
,tabname => p_table_name
,numrows => numrows
,avgrlen => avgrlen
,numblks => numblks
,cachedblk => cachedblk
,cachehit => cachehit
);
IF p_mode = 'PMOP'
THEN
--
-- Resetting NUMBLKS based on CACHEDBLK
-- average segment size and current number
-- of partitions.
--
IF cachedblk IS NULL
THEN
RETURN; -- No saved value
END IF;
--
-- Recalculate the number of blocks based on
-- the current number of partitions and the
-- saved average segment size
-- Avoid reference to DBA_SEGMENTS in case
-- there is no privilege.
--
SELECT cachedblk * COUNT (*)
INTO numblks
FROM all_objects
WHERE owner = p_owner
AND object_name = p_table_name
AND object_type = 'TABLE PARTITION';
ELSIF p_mode = 'GATHER'
THEN
--
-- Save average segment size in CACHEDBLK based on NUMBLKS
-- and current number of partitions.
--
SELECT numblks / COUNT (*), TRUNC (numblks / COUNT (*)) * COUNT (*)
INTO cachedblk, numblks
FROM all_objects
WHERE owner = p_owner
AND object_name = p_table_name
AND object_type = 'TABLE PARTITION';
ELSE
RAISE PROGRAM_ERROR;
-- Only gets here if p_mode not set to PMOP or GATHER
END IF;
DBMS_STATS.set_table_stats
(
ownname => p_owner
,tabname => p_table_name
,numblks => numblks
,cachedblk => cachedblk
,force => TRUE
);
END adjust_global_stats;
PROCEDURE gather_table_stats
(
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT
(
'USERENV'
,'CURRENT_SCHEMA'
)
,p_table_name all_tab_col_statistics.table_name%TYPE
)
IS
BEGIN
DBMS_STATS.unlock_table_stats
(
ownname => p_owner
,tabname => p_table_name
);
FOR r IN (SELECT *
FROM all_tables
WHERE owner = p_owner AND table_name = p_table_name)
LOOP
DBMS_STATS.gather_table_stats
(
ownname => p_owner
,tabname => p_table_name
,granularity => CASE r.partitioned
WHEN 'YES' THEN 'GLOBAL'
ELSE 'ALL'
END
,method_opt => 'FOR ALL COLUMNS SIZE repeat'
);
adjust_column_stats_v3
(
p_owner => p_owner
,p_table_name => p_table_name
);
IF r.partitioned = 'YES'
THEN
adjust_global_stats
(
p_owner => p_owner
,p_table_name => p_table_name
,p_mode => 'GATHER'
);
END IF;
END LOOP;
DBMS_STATS.lock_table_stats (ownname => p_owner, tabname => p_table_name);
END gather_table_stats;
PROCEDURE set_temp_table_stats
(
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT
(
'USERENV'
,'CURRENT_SCHEMA'
)
,p_table_name all_tab_col_statistics.table_name%TYPE
,p_numrows INTEGER DEFAULT 20000
,p_numblks INTEGER DEFAULT 1000
,p_avgrlen INTEGER DEFAULT 400
)
IS
distcnt NUMBER;
BEGIN
DBMS_STATS.unlock_table_stats
(
ownname => p_owner
,tabname => p_table_name
);
$IF DBMS_DB_VERSION.version >= 12
$THEN
DBMS_STATS.set_table_prefs
(
ownname => p_owner
,tabname => p_table_name
,pname => 'GLOBAL_TEMP_TABLE_STATS'
,pvalue => 'SHARED'
);
$END
DBMS_STATS.delete_table_stats
(
ownname => p_owner
,tabname => p_table_name
);
DBMS_STATS.set_table_stats
(
ownname => p_owner
,tabname => p_table_name
,numrows => p_numrows
,numblks => p_numblks
,avgrlen => p_avgrlen
,no_invalidate => FALSE
);
/*
We must now set column statistics to limit the effect of predicates on cardinality
calculations; by default cardinality is reduced by a factor of 100 for each predicate.
We use a value of 2 for the number of distinct columns to reduce this factor to 2. We
do no not use 1 because predicates of the type "column_1 <> 'VALUE_1'" would reduce the
cardinality to 1.
*/
distcnt := 2;
FOR r IN (SELECT *
FROM all_tab_columns
WHERE owner = p_owner AND table_name = p_table_name)
LOOP
DBMS_STATS.set_column_stats
(
ownname => p_owner
,tabname => r.table_name
,colname => r.column_name
,distcnt => distcnt
,density => 1 / distcnt
,avgclen => 5
,srec => get_srec
,no_invalidate => FALSE
);
END LOOP;
DBMS_STATS.lock_table_stats (ownname => p_owner, tabname => p_table_name);
END set_temp_table_stats;
PROCEDURE import_table_stats
(
p_owner all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT
(
'USERENV'
,'CURRENT_SCHEMA'
)
,p_table_name all_tab_col_statistics.table_name%TYPE
,p_statown all_tab_col_statistics.owner%TYPE DEFAULT SYS_CONTEXT
(
'USERENV'
,'CURRENT_SCHEMA'
)
,p_stat_table all_tab_col_statistics.table_name%TYPE
)
IS
BEGIN
DECLARE
already_up_to_date EXCEPTION;
PRAGMA EXCEPTION_INIT (already_up_to_date, -20000);
BEGIN
DBMS_STATS.upgrade_stat_table
(
ownname => 'DLS'
,stattab => 'DLS_TSTATS'
);
EXCEPTION
WHEN already_up_to_date
THEN
NULL;
END;
DBMS_STATS.unlock_table_stats
(
ownname => p_owner
,tabname => p_table_name
);
DBMS_STATS.delete_table_stats
(
ownname => p_owner
,tabname => p_table_name
,no_invalidate => FALSE
);
DBMS_STATS.import_table_stats
(
ownname => p_owner
,tabname => p_table_name
,statown => p_statown
,stattab => p_stat_table
,no_invalidate => FALSE
);
-- For partitioned tables it may be that the number of (sub)partitions on
-- the target systems do not match those on the source system.
FOR r
IN (SELECT *
FROM all_tables
WHERE owner = p_owner
AND table_name = p_table_name
AND partitioned = 'YES')
LOOP
adjust_global_stats (p_owner, p_table_name, 'PMOP');
END LOOP;
DBMS_STATS.lock_table_stats (ownname => p_owner, tabname => p_table_name);
END import_table_stats;
END tstats;
/