[20180510]20 Indexes.txt
https://jonathanlewis.wordpress.com/2018/05/08/20-indexes/
--//重复测试:
If your system had to do a lot of distributed queries there's a limit on indexes that might affect performance: when
deriving an execution plan for a distributed query the optimizer will consider a maximum of twenty indexes on each
remote table. if you have any tables with a ridiculous number of indexes (various 3rd party accounting and CRM systems
spring to mind) and if you drop and recreate indexes on those tables in the wrong order then execution plans may change
for the simple reason that the optimizer is considering a different subset of the available indexes.
Although the limit is stated in the manuals (a few lines into a section on managing statement transparency) there is no
indication about which 20 indexes the optimizer is likely to choose – a couple of experiments, with tracing enabled and
shared pool flushes, gives a fairly strong indication that it's the last 20 indexes created (or, to be more explicit,
the ones with the 20 highest object_id values).
Here's a little code to help demonstrate the point – first just the table and index creation
1.环境:
SCOTT@book> @ ver1
PORT_STRING VERSION BANNER
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/Linux 2.4.xx 11.2.0.4.0 Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production
CREATE PUBLIC DATABASE LINK LOOPBACK USING 'localhost:1521/book:DEDICATED';
create table t1
as
with generator as (
select --+ materialize
rownum id
from all_objects
where rownum <= 3000 -- > comment to avoid WordPress format issue
)
select
mod(rownum,trunc(5000/1)) n01,
mod(rownum,trunc(5000/2)) n02,
mod(rownum,trunc(5000/3)) n03,
mod(rownum,trunc(5000/4)) n04,
mod(rownum,trunc(5000/5)) n05,
mod(rownum,trunc(5000/6)) n06,
mod(rownum,trunc(5000/7)) n07,
mod(rownum,trunc(5000/8)) n08,
mod(rownum,trunc(5000/9)) n09,
mod(rownum,trunc(5000/10)) n10,
mod(rownum,trunc(5000/11)) n11,
mod(rownum,trunc(5000/12)) n12,
mod(rownum,trunc(5000/13)) n13,
mod(rownum,trunc(5000/14)) n14,
mod(rownum,trunc(5000/15)) n15,
mod(rownum,trunc(5000/16)) n16,
mod(rownum,trunc(5000/17)) n17,
mod(rownum,trunc(5000/18)) n18,
mod(rownum,trunc(5000/19)) n19,
mod(rownum,trunc(5000/20)) n20,
mod(rownum,trunc(5000/21)) n21,
mod(rownum,trunc(5000/22)) n22,
mod(rownum,trunc(5000/23)) n23,
mod(rownum,trunc(5000/24)) n24,
rownum id,
rpad('x',40) padding
from
generator v1,
generator v2
where
rownum <= 1e5 -- > comment to avoid WordPress format issue
;
alter table t1 add constraint t1_pk primary key(id);
create table t2
as
with generator as (
select --+ materialize
rownum id
from all_objects
where rownum <= 3000 -- > comment to avoid WordPress format issue
)
select
mod(rownum,trunc(5000/1)) n01,
mod(rownum,trunc(5000/2)) n02,
mod(rownum,trunc(5000/3)) n03,
mod(rownum,trunc(5000/4)) n04,
mod(rownum,trunc(5000/5)) n05,
mod(rownum,trunc(5000/6)) n06,
mod(rownum,trunc(5000/7)) n07,
mod(rownum,trunc(5000/8)) n08,
mod(rownum,trunc(5000/9)) n09,
mod(rownum,trunc(5000/10)) n10,
mod(rownum,trunc(5000/11)) n11,
mod(rownum,trunc(5000/12)) n12,
mod(rownum,trunc(5000/13)) n13,
mod(rownum,trunc(5000/14)) n14,
mod(rownum,trunc(5000/15)) n15,
mod(rownum,trunc(5000/16)) n16,
mod(rownum,trunc(5000/17)) n17,
mod(rownum,trunc(5000/18)) n18,
mod(rownum,trunc(5000/19)) n19,
mod(rownum,trunc(5000/20)) n20,
mod(rownum,trunc(5000/21)) n21,
mod(rownum,trunc(5000/22)) n22,
mod(rownum,trunc(5000/23)) n23,
mod(rownum,trunc(5000/24)) n24,
rownum id,
rpad('x',40) padding
from
generator v1,
generator v2
where
rownum <= 1e5 -- > comment to avoid WordPress format issue
;
create index t2_a21 on t2(n21);
create index t2_a22 on t2(n22);
create index t2_a23 on t2(n23);
create index t2_a24 on t2(n24);
create index t2_z01 on t2(n01);
create index t2_z02 on t2(n02);
create index t2_z03 on t2(n03);
create index t2_z04 on t2(n04);
create index t2_z05 on t2(n05);
create index t2_z06 on t2(n06);
create index t2_z07 on t2(n07);
create index t2_z08 on t2(n08);
create index t2_z09 on t2(n09);
create index t2_z10 on t2(n10);
create index t2_i11 on t2(n11);
create index t2_i12 on t2(n12);
create index t2_i13 on t2(n13);
create index t2_i14 on t2(n14);
create index t2_i15 on t2(n15);
create index t2_i16 on t2(n16);
create index t2_i17 on t2(n17);
create index t2_i18 on t2(n18);
create index t2_i19 on t2(n19);
create index t2_i20 on t2(n20);
alter index t2_a21 rebuild;
alter index t2_a22 rebuild;
alter index t2_a23 rebuild;
alter index t2_a24 rebuild;
begin
dbms_stats.gather_table_stats(
ownname => user,
tabname =>'t1',
method_opt => 'for all columns size 1',
cascade => true
);
dbms_stats.gather_table_stats(
ownname => user,
tabname =>'t2',
method_opt => 'for all columns size 1',
cascade => true
);
end;
/
2.测试:
I'm going to use a loopback database link to join "local" table t1 to "remote" table t2 on all 24 of the nXX columns.
I've created indexes on all the columns, messing around with index names, order of creation, and rebuilding, to cover
possible selection criteria such as alphabetical order, ordering by data_object_id (rather than object_id), even
ordering by name of indexed columns(!).
Now the code to run a test:
define m_target=loopback
alter session set events '10053 trace name context forever';
set serveroutput off
select
t1.id,
t2.id,
t2.padding
from
t1 t1,
t2@&m_target t2
where
t1.id = 99
and t2.n01 = t1.n01
and t2.n02 = t1.n02
and t2.n03 = t1.n03
and t2.n04 = t1.n04
and t2.n05 = t1.n05
and t2.n06 = t1.n06
and t2.n07 = t1.n07
and t2.n08 = t1.n08
and t2.n09 = t1.n09
and t2.n10 = t1.n10
/* */
and t2.n11 = t1.n11
and t2.n12 = t1.n12
and t2.n13 = t1.n13
and t2.n14 = t1.n14
and t2.n15 = t1.n15
and t2.n16 = t1.n16
and t2.n17 = t1.n17
and t2.n18 = t1.n18
and t2.n19 = t1.n19
and t2.n20 = t1.n20
/* */
and t2.n21 = t1.n21
and t2.n22 = t1.n22
and t2.n23 = t1.n23
and t2.n24 = t1.n24
;
select * from table(dbms_xplan.display_cursor(null,null,'outline'));
alter session set events '10053 trace name context off';
I've used a substitution variable for the name of the database link – it's a convenience I have with all my distributed
tests, a list of possible defines at the top of the script depending on which database I happen to be using at the time
– then enabled the optimizer (10053) trace, set serveroutput off so that I can pull the execution plan from memory most
easily, then executed the query.
Here's the execution plan – including the Remote section and Outline.
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID 38q1883jmvutm, child number 0
-------------------------------------
select t1.id, t2.id, t2.padding from t1 t1,
t2@loopback t2 where t1.id = 99 and t2.n01 = t1.n01 and t2.n02 =
t1.n02 and t2.n03 = t1.n03 and t2.n04 = t1.n04 and t2.n05 = t1.n05 and
t2.n06 = t1.n06 and t2.n07 = t1.n07 and t2.n08 = t1.n08 and t2.n09 =
t1.n09 and t2.n10 = t1.n10 /* */ and t2.n11 = t1.n11 and
t2.n12 = t1.n12 and t2.n13 = t1.n13 and t2.n14 = t1.n14 and t2.n15 =
t1.n15 and t2.n16 = t1.n16 and t2.n17 = t1.n17 and t2.n18 = t1.n18 and
t2.n19 = t1.n19 and t2.n20 = t1.n20 /* */ and t2.n21 = t1.n21
and t2.n22 = t1.n22 and t2.n23 = t1.n23 and t2.n24 = t1.n24
Plan hash value: 582650634
------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | Inst |IN-OUT|
------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | | | 4 (100)| | | |
| 1 | NESTED LOOPS | | 1 | 243 | 4 (0)| 00:00:01 | | |
| 2 | TABLE ACCESS BY INDEX ROWID| T1 | 1 | 101 | 2 (0)| 00:00:01 | | |
|* 3 | INDEX UNIQUE SCAN | T1_PK | 1 | | 1 (0)| 00:00:01 | | |
| 4 | REMOTE | T2 | 1 | 142 | 2 (0)| 00:00:01 | LOOPB~ | R->S |
------------------------------------------------------------------------------------------------------
Outline Data
-------------
/*+
BEGIN_OUTLINE_DATA
IGNORE_OPTIM_EMBEDDED_HINTS
OPTIMIZER_FEATURES_ENABLE('11.2.0.4')
DB_VERSION('11.2.0.4')
ALL_ROWS
OUTLINE_LEAF(@"SEL$1")
INDEX_RS_ASC(@"SEL$1" "T1"@"SEL$1" ("T1"."ID"))
FULL(@"SEL$1" "T2"@"SEL$1")
LEADING(@"SEL$1" "T1"@"SEL$1" "T2"@"SEL$1")
USE_NL(@"SEL$1" "T2"@"SEL$1")
END_OUTLINE_DATA
*/
Predicate Information (identified by operation id):
---------------------------------------------------
3 - access("T1"."ID"=99)
Remote SQL Information (identified by operation id):
----------------------------------------------------
4 - SELECT "N01","N02","N03","N04","N05","N06","N07","N08","N09","N10","N11","N12","N13","N
14","N15","N16","N17","N18","N19","N20","N21","N22","N23","N24","ID","PADDING" FROM "T2" "T2"
WHERE "N01"=:1 AND "N02"=:2 AND "N03"=:3 AND "N04"=:4 AND "N05"=:5 AND "N06"=:6 AND "N07"=:7
AND "N08"=:8 AND "N09"=:9 AND "N10"=:10 AND "N11"=:11 AND "N12"=:12 AND "N13"=:13 AND
"N14"=:14 AND "N15"=:15 AND "N16"=:16 AND "N17"=:17 AND "N18"=:18 AND "N19"=:19 AND "N20"=:20
AND "N21"=:21 AND "N22"=:22 AND "N23"=:23 AND "N24"=:24 (accessing 'LOOPBACK' )
There's a little oddity with the plan – specifically in the Outline: there's a "full(t2)" hint which is clearly
inappropriate and isn't consistent with the cost of 2 for the REMOTE operation reported in the body of the plan.
Fortunately the SQL forwarded to the "remote" database doesn't include this hint and (you'll have to take my word for
it) used an indexed access path into the table.
Where, though, is the indication that Oracle considered only 20 indexes? It's in the 10053 trace file under the "Base
Statistical Information" section in the subsection headed "Index Stats":
Index Stats::
Index: 0 Col#: 20 (NOT ANALYZED)
LVLS: 1 #LB: 204 #DK: 250 LB/K: 1.00 DB/K: 400.00 CLUF: 2002.00
Index: 0 Col#: 19 (NOT ANALYZED)
LVLS: 1 #LB: 204 #DK: 263 LB/K: 1.00 DB/K: 380.00 CLUF: 2002.00
Index: 0 Col#: 18 (NOT ANALYZED)
LVLS: 1 #LB: 205 #DK: 277 LB/K: 1.00 DB/K: 361.00 CLUF: 2002.00
Index: 0 Col#: 17 (NOT ANALYZED)
LVLS: 1 #LB: 205 #DK: 294 LB/K: 1.00 DB/K: 340.00 CLUF: 2002.00
Index: 0 Col#: 16 (NOT ANALYZED)
LVLS: 1 #LB: 205 #DK: 312 LB/K: 1.00 DB/K: 320.00 CLUF: 2002.00
Index: 0 Col#: 15 (NOT ANALYZED)
LVLS: 1 #LB: 205 #DK: 333 LB/K: 1.00 DB/K: 300.00 CLUF: 2002.00
Index: 0 Col#: 14 (NOT ANALYZED)
LVLS: 1 #LB: 206 #DK: 357 LB/K: 1.00 DB/K: 280.00 CLUF: 2002.00
Index: 0 Col#: 13 (NOT ANALYZED)
LVLS: 1 #LB: 206 #DK: 384 LB/K: 1.00 DB/K: 260.00 CLUF: 2002.00
Index: 0 Col#: 12 (NOT ANALYZED)
LVLS: 1 #LB: 206 #DK: 416 LB/K: 1.00 DB/K: 240.00 CLUF: 2002.00
Index: 0 Col#: 11 (NOT ANALYZED)
LVLS: 1 #LB: 206 #DK: 454 LB/K: 1.00 DB/K: 220.00 CLUF: 2002.00
Index: 0 Col#: 10 (NOT ANALYZED)
LVLS: 1 #LB: 207 #DK: 500 LB/K: 1.00 DB/K: 200.00 CLUF: 2002.00
Index: 0 Col#: 9 (NOT ANALYZED)
LVLS: 1 #LB: 207 #DK: 555 LB/K: 1.00 DB/K: 180.00 CLUF: 2002.00
Index: 0 Col#: 8 (NOT ANALYZED)
LVLS: 1 #LB: 207 #DK: 625 LB/K: 1.00 DB/K: 160.00 CLUF: 2002.00
Index: 0 Col#: 7 (NOT ANALYZED)
LVLS: 1 #LB: 208 #DK: 714 LB/K: 1.00 DB/K: 140.00 CLUF: 2002.00
Index: 0 Col#: 6 (NOT ANALYZED)
LVLS: 1 #LB: 208 #DK: 833 LB/K: 1.00 DB/K: 120.00 CLUF: 2002.00
Index: 0 Col#: 5 (NOT ANALYZED)
LVLS: 1 #LB: 208 #DK: 1000 LB/K: 1.00 DB/K: 100.00 CLUF: 2002.00
Index: 0 Col#: 4 (NOT ANALYZED)
LVLS: 1 #LB: 208 #DK: 1250 LB/K: 1.00 DB/K: 80.00 CLUF: 2002.00
Index: 0 Col#: 3 (NOT ANALYZED)
LVLS: 1 #LB: 209 #DK: 1666 LB/K: 1.00 DB/K: 60.00 CLUF: 2002.00
Index: 0 Col#: 2 (NOT ANALYZED)
LVLS: 1 #LB: 209 #DK: 2500 LB/K: 1.00 DB/K: 40.00 CLUF: 2002.00
Index: 0 Col#: 1 (NOT ANALYZED)
LVLS: 1 #LB: 209 #DK: 5000 LB/K: 1.00 DB/K: 20.00 CLUF: 2002.00
We have 20 indexes listed, and while they're all called "Index 0" (and reported as "Not Analyzed") we can see from their
column definitions that they are (in reverse order) the indexes on columns n01 through to n20 – i.e. the last 20
indexes created. The optimizer has created its plan based only on its knowledge of these indexes.
We might ask whether this matters or not – after all when the remote SQL gets to the remote database the remote
optimizer is going to (re-)optimize it anyway and do the best it can with it, so at run-time Oracle could still end up
using remote indexes that the local optimizer didn't know about. So let's get nasty and give the local optimizer a
problem:
create index t2_id on t2(id);
select
t1.id,
t2.id,
t2.padding
from
t1 t1,
t2@&m_target t2
where
t1.id = 99
and t2.n01 = t1.n01
;
I've created one more index on t2, which means the local optimizer is going to "forget" about the index that was the
previous 20th index on the most recently created list for t2. That's the index on (n01), which would have been a very
good index for this query. If this query were to run locally the optimizer would do a nested loop from t1 to t2 using
the index on (n01) – but the optimizer no longer knows about that index, so we get the following plan:
SCOTT@book> @ &r/dpc '' outline
PLAN_TABLE_OUTPUT
--------------------------------------
SQL_ID 7u9qyvv1cffhb, child number 0
-------------------------------------
select t1.id, t2.id, t2.padding from t1
t1, t2@loopback t2 where
t1.id = 99 and t2.n01 = t1.n01
Plan hash value: 582650634
-------------------------------------------------------------------------------------------------------
| Id | Operation | Name | E-Rows |E-Bytes| Cost (%CPU)| E-Time | Inst |IN-OUT|
-------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | | | 352 (100)| | | |
| 1 | NESTED LOOPS | | 20 | 1140 | 352 (1)| 00:00:05 | | |
| 2 | TABLE ACCESS BY INDEX ROWID| T1 | 1 | 9 | 2 (0)| 00:00:01 | | |
|* 3 | INDEX UNIQUE SCAN | T1_PK | 1 | | 1 (0)| 00:00:01 | | |
| 4 | REMOTE | T2 | 20 | 960 | 350 (1)| 00:00:05 | LOOPB~ | R->S |
-------------------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
1 - SEL$1
2 - SEL$1 / T1@SEL$1
3 - SEL$1 / T1@SEL$1
4 - SEL$1 / T2@SEL$1
Outline Data
-------------
/*+
BEGIN_OUTLINE_DATA
IGNORE_OPTIM_EMBEDDED_HINTS
OPTIMIZER_FEATURES_ENABLE('11.2.0.4')
DB_VERSION('11.2.0.4')
ALL_ROWS
OUTLINE_LEAF(@"SEL$1")
INDEX_RS_ASC(@"SEL$1" "T1"@"SEL$1" ("T1"."ID"))
FULL(@"SEL$1" "T2"@"SEL$1")
LEADING(@"SEL$1" "T1"@"SEL$1" "T2"@"SEL$1")
USE_NL(@"SEL$1" "T2"@"SEL$1")
END_OUTLINE_DATA
*/
Predicate Information (identified by operation id):
---------------------------------------------------
3 - access("T1"."ID"=99)
Remote SQL Information (identified by operation id):
----------------------------------------------------
4 - SELECT "N01","ID","PADDING" FROM "T2" "T2" WHERE "N01"=:1 (accessing 'LOOPBACK' )
--//奇怪,我这里没有使用hash join操作.不知道为什么是版本的问题.
--//另外lz定义的m_target=orcl@loopback,有点奇怪.难道作者喜欢这样定义dblink.
Oracle is going to do a hash join and apply the join predicate late. Although the remote optimizer can sometimes rescue
us from a mistake made by the local optimizer and use indexes that the local optimizer doesn't know about, there are
times when the remote SQL generated by the local optimizer is so rigidly associated with the expected plan that there's
no way the remote optimizer can workaround the assumptions made by the local optimizer.
So when you create (or drop and recreate) an index, it's just possible that a distributed plan will have to change
because the local optimizer is no longer aware of an index that exists at the remote site.
tl;dr
Be very cautious about dropping and recreating indexes if the table in question
has more than 20 indexes
and is used at the remote end of a distributed execution plan
The optimizer will consider only 20 of the indexes on the table, choosing the ones with the highest object_ids. If you
drop and recreate an index then it gets a new (highest) object_id and a plan may change because the index that Oracle
was previously using is no longer in the top 20.
--//补充测试dblink的定义:
SCOTT@book> CREATE PUBLIC DATABASE LINK orcl@LOOPBACK USING 'localhost:1521/book:DEDICATED';
Database link created.
SCOTT@book> select sysdate from dual@orcl@loopback;
SYSDATE
-------------------
2018-05-11 09:16:08
--//确实也可以通过.