概述
近日有工作中的一个ORACLE查询,脚本如下:
select B.name as Subst_name, M.name as Branch_name,
S.*,
--主卡清单(不含自己)
CAST((SELECT CASE
WHEN COUNT(*) > 0 THEN
COUNT(*) || '::' ||
CAST(wm_concat(ACC_NBR || '=' || CASE
WHEN Open_Type_19 = '1' THEN
'OPEN'
ELSE
'CLOSE'
END) AS VARCHAR2(200))
ELSE
'N/A'
END
FROM RPT_COMM_CM_SERV X
LEFT
JOIN TB_TYKS_OPEN_LIST_INC_D_202001 Y
ON Y.SERV_ID = X.SERV_ID
WHERE X.CUST_ID = S.CUST_ID
AND X.SERV_ID <> S.SERV_ID
AND IS_FK = 0
AND (PROD_ID IN (3204, 3205))
AND STATE = 100000) as varchar(200)) ZK_LIST,
--副卡清单(不含自己)
CAST((SELECT CASE
WHEN COUNT(*) > 0 THEN
COUNT(*) || '::' ||
CAST(wm_concat(ACC_NBR
|| '=' || CASE
WHEN Open_Type_19 = '1' THEN
'OPEN'
ELSE
'CLOSE'
END) AS VARCHAR2(200))
ELSE
'N/A'
END
FROM RPT_COMM_CM_SERV X
LEFT JOIN TB_TYKS_OPEN_LIST_INC_D_202001 Y
ON Y.SERV_ID = X.SERV_ID
WHERE X.CUST_ID = S.CUST_ID
AND X.SERV_ID <> S.SERV_ID
AND IS_FK = 1
AND (PROD_ID IN (3204, 3205))
AND STATE = 100000) as varchar(200)) FK_LIST,
(select CASE
WHEN count(*) > 0 THEN
NVL(sum(IS_ACTIVE_USER), 0) || '/' || --活动数
count(IS_ACTIVE_USER) || ': ' || --总数
NVL(sum(round(NET_FLUX
/ 8 / 1024 / 1024 / 1024,
1)), --流量
0) || 'GB'
ELSE
'N/A'
END
from TB_COMM_YWL_DATA_201912 Y
where STATE = 100000
AND PROD_ID = 47
AND CUST_id = s.CUST_ID) DATA_INFO, --宽带数据
cast(case
when is_FK = 0 then
'主卡'
when (select count(*)
from rpt_comm_cm_serv_201912
where serv_id = S.Serv_Id
and is_FK = 0) > 0 then --上月是否主卡状态
'主转副'
else
'副卡'
end as varchar(6)) 主转副
from (select S.Subst_Id,
S.Branch_Id,
O.Serv_Id,
S.Acc_Nbr,
S.cust_ID,
S.Is_Fk,
case O.Open_Type_19
when 1 then
'OPEN'
else
'CLOSE'
end OPEN_STATE --是否开机
from TB_TYKS_OPEN_LIST_INC_D_202001 O
join rpt_comm_cm_serv S
ON O.Serv_Id = S.Serv_Id
and S.Create_Date >= to_date('20200101', 'yyyymmdd')
join rpt_comm_cm_msdisc D
ON D.Serv_Id = S.Serv_Id
and Prod_Offer_Code in
('YD5G01-013-1-1',
'YD5G01-013-1-2',
'YD4G01-096-1-2')) S
left join hx_name_branch B
on B.ID = S.subst_id
left join hx_name_manage M
on M.id = S.branch_id
该查询运行前已对涉及的表相关字段创建了索引:
rpt_comm_cm_msdisc:SERV_ID,PROD_OFFER_CODE
rpt_comm_cm_serv: SERV_ID, Create_Date, CUST_ID,
PROD_ID
rpt_comm_cm_serv_201912: SERV_ID
TB_TYKS_OPEN_LIST_INC_D_202001:SERV_ID
TB_COMM_YWL_DATA_201912: PROD_ID, CUST_ID
hx_name_branch: ID(记录极少)
hx_name_manage: ID(记录极少)
运行该查询依不同数据量,约需要5-10分钟。由于该查询用于直接在网页上展示数据,时间太久客户端不许可。在不预先跑、生成结果行集的情况下,对该查询进行优化。
首先对S 这一段进行优化,单独跑该段生成约3000-5000条记录,需要30-50秒。
select S.Subst_Id,
S.Branch_Id,
O.Serv_Id,
S.Acc_Nbr,
S.cust_ID,
S.Is_Fk,
case O.Open_Type_19
when 1 then
'OPEN'
else
'CLOSE'
end OPEN_STATE --是否开机
from TB_TYKS_OPEN_LIST_INC_D_202001 O
join rpt_comm_cm_serv S
ON O.Serv_Id = S.Serv_Id
and
S.Create_Date >= to_date('20200101', 'yyyymmdd')
join rpt_comm_cm_msdisc D
ON D.Serv_Id = S.Serv_Id
and
Prod_Offer_Code in
('YD5G01-013-1-1',
'YD5G01-013-1-2',
'YD4G01-096-1-2')
O表其中只有SERV_ID,有每天的开机数据,就没其它了。S表有创建日期等详细资料,D表有套餐的资料,Prod_Offer_Code是需要挑出来的套餐编号,通过JOIN的方式,以SREV_ID为准,三个表互相过滤得到最终结果。S、D表都很大,各超过500万记录。O表虽然没那么大,但10多万也是有的。查询分析:
Plan Hash
Value : 2593780587
----------------------------------------------------------------------------------------------------------------
| Id |
Operation |
Name | Rows |
Bytes | Cost |
Time |
----------------------------------------------------------------------------------------------------------------
| 0 | SELECT
STATEMENT | | 21571 | 6039880 | 47284 | 00:09:28
|
| * 1
| HASH
JOIN | | 21571 | 6039880 | 47284 | 00:09:28 |
| * 2
| HASH
JOIN | | 21171 | 2900427 | 23824 | 00:04:46
|
| 3
| TABLE ACCESS BY INDEX ROWID |
RPT_COMM_CM_SERV | 21171 | 2625204 | 21646 | 00:04:20
|
| * 4
| INDEX RANGE
SCAN |
INDEX_SERV_CREATE | 21171
| | 61 |
00:00:01 |
| 5
| TABLE ACCESS
FULL | TB_TYKS_OPEN_LIST_INC_D_202001 | 362945 | 4718285
| 1609 | 00:00:20 |
| 6
| INLIST
ITERATOR | | | | | |
| 7
| TABLE ACCESS BY INDEX ROWID |
RPT_COMM_CM_MSDISC | 52523 | 7510789 | 22925 | 00:04:36
|
| * 8
| INDEX RANGE
SCAN |
MSDISC_PROD_OFFERCODE | 27070
| | 104 | 00:00:02
|
----------------------------------------------------------------------------------------------------------------
Predicate Information
(identified by operation id):
------------------------------------------
* 1 -
access("D"."SERV_ID"="S"."SERV_ID")
* 2 -
access("O"."SERV_ID"="S"."SERV_ID")
* 4 -
access("S"."CREATE_DATE">=TO_DATE(' 2020-01-01 00:00:00',
'syyyy-mm-dd hh24:mi:ss'))
* 8 -
access("PROD_OFFER_CODE"='YD4G01-096-1-2' OR
"PROD_OFFER_CODE"='YD5G01-013-1-1' OR
"PROD_OFFER_CODE"='YD5G01-013-1-2')
看来主要问题在5,它先取出整个表进行匹配,再逐渐过滤。4、8都引用了相应的索引,没有什么问题。尝试在S或D表中先杀掉大部分数据。
以S为基表用CREATE_DATE筛选出很少的一部分出来(新客户业务),另外两个表再以此为据进行关联,工作量就少多了:
select S.Subst_Id,
S.Branch_Id,
O.Serv_Id,
S.Acc_Nbr,
S.cust_ID,
S.Is_Fk,
D.PROD_OFFER_CODE,
D.Prod_Offer_Name,
case O.Open_Type_19
when 1 then
'OPEN'
else
'CLOSE'
end
OPEN_STATE --是否开机
from rpt_comm_cm_serv S
join TB_TYKS_OPEN_LIST_INC_D_202001 O
ON O.Serv_Id = S.Serv_Id
join rpt_comm_cm_msdisc D
ON D.Serv_Id = S.Serv_Id
and
Prod_Offer_Code in
('YD5G01-013-1-1',
'YD5G01-013-1-2',
'YD4G01-096-1-2')
where S.Create_Date
> to_date('20200101', 'yyyymmdd')
这两种写法,虽然查询分析的结果几乎完全一样,但跑起来却效果完全不同,采用该方案,1秒不到就跑出来了。这是因为选用基表不同的原因。
结论:基表应该优先选用能在where里获取到最少数据那个表。
在这个基础上,对上面主查询的子查询逐个进行测试,发现主、副卡清单两段最耗时,后面的两个查询结构简单,只要做好索引,跑起来很快,没有什么优化的余地。前面的两个清单其实可看成是语法几乎相同的两个不同拷贝。所以取第一段配合大S进行测试分析。
with S as
(select
S.Subst_Id,
S.Branch_Id,
O.Serv_Id,
S.Acc_Nbr,
S.cust_ID,
S.Is_Fk,
D.PROD_OFFER_CODE,
D.Prod_Offer_Name,
case O.Open_Type_19
when 1 then
'OPEN'
else
'CLOSE'
end OPEN_STATE --是否开机
from rpt_comm_cm_serv S
join TB_TYKS_OPEN_LIST_INC_D_202001 O
ON O.Serv_Id = S.Serv_Id
join rpt_comm_cm_msdisc D
ON D.Serv_Id = S.Serv_Id
and Prod_Offer_Code in
('YD5G01-013-1-1',
'YD5G01-013-1-2',
'YD4G01-096-1-2')
where S.Create_Date > to_date('20200101',
'yyyymmdd'))
select --主卡清单(不含自己)
CAST((SELECT
CASE
WHEN COUNT(*) > 0 THEN
COUNT(*) || '::' ||
CAST(wm_concat(ACC_NBR || '=' || CASE
WHEN
Open_Type_19 = '1' THEN
'OPEN'
ELSE
'CLOSE'
END) AS VARCHAR2(200))
ELSE
'N/A'
END
FROM
RPT_COMM_CM_SERV X
LEFT JOIN TB_TYKS_OPEN_LIST_INC_D_202001 Y
ON Y.SERV_ID = X.SERV_ID
WHERE X.CUST_ID = S.CUST_ID
AND X.SERV_ID <> S.SERV_ID
AND IS_FK = 0
AND (PROD_ID IN (3204, 3205))
AND STATE = 100000) as varchar(200)) ZK_LIST
from S
看代码似乎完全没问题,很合理。完整跑完该段程序超过4分钟。检索查询分析看看:
-----------------------------------------------------------------------------------------------------------------------
| Id |
Operation |
Name | Rows |
Bytes | Cost |
Time |
-----------------------------------------------------------------------------------------------------------------------
| 0 |
SELECT
STATEMENT | | 21571 | 4055348 | 45094 | 00:09:02 |
| 1
| SORT
AGGREGATE | | 1
| 149
| | |
| * 2
| HASH JOIN
OUTER | | 2837 | 422713 | 1812 | 00:00:22 |
| * 3
| TABLE ACCESS BY INDEX
ROWID |
RPT_COMM_CM_SERV | 2837 | 385832 | 202 | 00:00:03
|
| 4
| BITMAP CONVERSION TO
ROWIDS | | | | | |
| 5
| BITMAP
AND | | | | | |
| 6
| BITMAP CONVERSION FROM ROWIDS | | | | | |
| * 7
| INDEX RANGE
SCAN |
INDEX_SERV_CUSTID | 7418
| | 3 | 00:00:01 |
| 8
| BITMAP
OR | | | | | |
| 9
| BITMAP CONVERSION FROM ROWIDS
| | | | | |
| * 10
| INDEX RANGE
SCAN |
INDEX_SERV_PRODID | 7418
| | 25 |
00:00:01 |
| 11
| BITMAP CONVERSION FROM ROWIDS
| | | | | |
| * 12
| INDEX RANGE
SCAN |
INDEX_SERV_PRODID | 7418
| | 25 |
00:00:01 |
| * 13
| TABLE ACCESS
FULL | TB_TYKS_OPEN_LIST_INC_D_202001 | 362944 | 4718272
| 1610 | 00:00:20 |
| * 14 | HASH
JOIN | | 21571 | 4055348 | 45094 | 00:09:02 |
| * 15
| HASH
JOIN | | 21171 | 952695 | 22168 |
00:04:27 |
| 16
| TABLE ACCESS BY INDEX
ROWID |
RPT_COMM_CM_SERV | 21171 | 740985 | 21645 |
00:04:20 |
| * 17
| INDEX RANGE
SCAN |
INDEX_SERV_CREATE | 21171
| | 61 |
00:00:01 |
| 18
| INDEX FAST FULL
SCAN |
TYKSOPENLIST_INC_D_202001_SERV | 362945 | 3629450
| 523 | 00:00:07 |
| 19
| INLIST
ITERATOR | | | | | |
| 20
| TABLE ACCESS BY INDEX
ROWID |
RPT_COMM_CM_MSDISC | 52523 | 7510789 | 22925 | 00:04:36 |
| * 21
| INDEX RANGE
SCAN |
MSDISC_PROD_OFFERCODE | 27070
| | 104 | 00:00:02 |
-----------------------------------------------------------------------------------------------------------------------
Predicate Information (identified by operation
id):
------------------------------------------
* 2 - access("Y"."SERV_ID"(+)="X"."SERV_ID")
* 3 - filter("X"."IS_FK"=0 AND
TO_NUMBER("X"."STATE")=100000 AND
"X"."SERV_ID"<>:B1)
* 7 - access("X"."CUST_ID"=:B1)
* 10 - access("X"."PROD_ID"=3204)
* 12 - access("X"."PROD_ID"=3205)
* 13 - filter("Y"."SERV_ID"(+)<>:B1)
* 14 - access("D"."SERV_ID"="S"."SERV_ID")
* 15 - access("O"."SERV_ID"="S"."SERV_ID")
* 17 - access("S"."CREATE_DATE">TO_DATE(' 2020-01-01
00:00:00', 'syyyy-mm-dd hh24:mi:ss'))
* 21 - access("PROD_OFFER_CODE"='YD4G01-096-1-2' OR
"PROD_OFFER_CODE"='YD5G01-013-1-1' OR
"PROD_OFFER_CODE"='YD5G01-013-1-2')
这个查询就是看跟原记录同个客户的业务清单,在排除了原记录的号码,再挑出产品ID为3204、3205,和一些杂项之后,将它们的号码联合串起来。其中还去Y中看了一起它的开机状态。
可以发现10和12似乎有点问题,对应查询语句是:PROD_ID IN (3204, 3205)。对于IN语法,看来ORACLE是先找到一个,再找另一个,然后两个做OR。虽然这两次查询都用到索引,但对于产品ID这种同值较多的,每次筛选出来结果集都较大,两个OR后再跟前面的客户ID筛选出来的做AND。
因为刚好是连续数字(整数字段),考虑修改为:
PROD_ID BETWEEN 3204
AND 3205
或者
PROD_ID >=3204 AND
PROD_ID <= 3205
这两个写法是一样的,优化器会自动转换为后面的写法。
修改后,再次分析
|
5 |
BITMAP AND
|
|
|
|
|
|
|
6 | BITMAP
CONVERSION FROM ROWIDS |
|
|
|
|
|
| *
7 |
INDEX RANGE SCAN
| INDEX_SERV_CUSTID
| 7418
|
|
3 | 00:00:01 |
|
8 |
BITMAP CONVERSION FROM ROWIDS |
|
|
|
|
|
|
9 |
SORT ORDER BY
|
|
|
|
|
|
| * 10 |
INDEX RANGE SCAN
|
INDEX_SERV_PRODID
| 7418
|
| 1438 |
00:00:18 |
| * 11 |
TABLE ACCESS FULL
| TB_TYKS_OPEN_LIST_INC_D_202001 | 362944 | 4718272 | 1610 | 00:00:20
|
已经合到一起了,并且一次索引扫描就出结果。不过总体时间优化得不多,只是略有提升。看到有个9,估计是要排ROWID,以方便与上面的客户ID的结果集中的ROWID做HASH JOIN筛选出最后结果。
再看7、10,一个是筛选出客户,一个是筛选产品,然后AND。虽两个字段均有索引,但仔细想想,这个逻辑的效率还是有问题。代码的原意是找客户,然后再把相应的业务挑出来就行了,但没想到,ORACLE居然这样操作。因为客户筛选出来的量比较少,如何让它只在这个结果集中过滤,而不引用PROD_ID索引(然后再AND)呢?
由于这个字段有索引,要让它不引用这个索引,就要让它觉得这个字段的筛选需要处理,比如:
PROD_ID + 0 BETWEEN 3204 AND 3205
对于需要处理的字段都不会援引索引进行筛选,比如
where to_char(create_date, ‘yyyymmdd’) =
‘20200101’
这样的,即使字段有索引,也不会用到,只能全表扫描。
由于引进了PROD_ID+0这个变化,前面的3204、3205如果发生变化,比如变成1、3、5也仍然可以用IN来解决了:
PROD_ID + 0 IN (1, 3, 5)
它不会走索引,只会在CUST_ID索引查找后强制过滤,因为记录数少,所以也无所谓。
考虑到count(*)也不常大于0,left join能省就省,反正是索引快速扫描,将它放到查询里面去。
其它还有一个小问题,就是STATE = 100000,这是个varchar2字段,从查询分析就能看出来筛选需要转换,所以直接用STATE = ‘100000’。
select --主卡清单(不含自己)
(SELECT CASE
WHEN COUNT(*) > 0 THEN
COUNT(*) || '::' ||
CAST(Replace(wm_concat(ACC_NBR || '=' || CASE
WHEN (SELECT Open_Type_19
FROM TB_TYKS_OPEN_LIST_INC_D_202001 Y
WHERE Y.SERV_ID = X.SERV_ID
AND ROWNUM = 1) = '1' THEN
'OPEN'
ELSE
'CLOSE'
END),
',', ', ') AS VARCHAR2(500))
ELSE
NULL
END
FROM RPT_COMM_CM_SERV X
WHERE X.CUST_ID = S.CUST_ID
AND X.SERV_ID <> S.SERV_ID
AND IS_FK = 0
AND (PROD_ID + 0 between 3204 and 3205)
AND STATE = '100000') ZK_LIST
from S
查询分析:
-
----------------------------------------------------------------------------------------------------------------
| Id |
Operation
| Name
|
Rows |
Bytes |
Cost |
Time
|
-----------------------------------------------------------------------------------------------------------------
|
0 | SELECT STATEMENT
|
| 21571 |
4055348 | 45094 | 00:09:02 |
| *
1 |
COUNT STOPKEY
|
|
|
|
|
|
|
2 |
TABLE ACCESS BY INDEX ROWID |
TB_TYKS_OPEN_LIST_INC_D_202001 |
1 |
13 |
4 | 00:00:01 |
| *
3 |
INDEX RANGE SCAN
| TYKSOPENLIST_INC_D_202001_SERV |
1 |
|
3 | 00:00:01 |
|
4 | SORT
AGGREGATE
|
|
1 |
136 |
|
|
| *
5 |
TABLE ACCESS BY INDEX ROWID |
RPT_COMM_CM_SERV
|
142 |
19312 |
7557 | 00:01:31 |
| *
6 |
INDEX RANGE SCAN
| INDEX_SERV_CUSTID
| 7418
|
|
3 | 00:00:01 |
| *
7 | HASH
JOIN
|
| 21571 |
4055348 | 45094 | 00:09:02 |
| *
8 |
HASH JOIN
|
| 21171
| 952695 |
22168 | 00:04:27 |
|
9 |
TABLE ACCESS BY INDEX ROWID | RPT_COMM_CM_SERV
| 21171
| 740985 |
21645 | 00:04:20 |
| * 10 |
INDEX RANGE SCAN
| INDEX_SERV_CREATE
| 21171
|
|
61 | 00:00:01 |
| 11
|
INDEX FAST FULL SCAN
| TYKSOPENLIST_INC_D_202001_SERV | 362945 | 3629450 | 523
| 00:00:07 |
| 12
|
INLIST ITERATOR
|
|
|
|
|
|
| 13
|
TABLE ACCESS BY INDEX ROWID | RPT_COMM_CM_MSDISC
| 52523 |
7510789 | 22925 | 00:04:36 |
| * 14 |
INDEX RANGE SCAN
| MSDISC_PROD_OFFERCODE
| 27070
|
| 104
| 00:00:02 |
-----------------------------------------------------------------------------------------------------------------
Predicate Information (identified by operation
id):
------------------------------------------
* 1 - filter(ROWNUM=1)
* 3 - access("Y"."SERV_ID"=:B1)
* 5 - filter("IS_FK"=0 AND "STATE"='100000' AND
"X"."SERV_ID"<>:B1 AND "PROD_ID"+0>=3204 AND
"PROD_ID"+0<=3205)
* 6 - access("X"."CUST_ID"=:B1)
* 7 - access("D"."SERV_ID"="S"."SERV_ID")
* 8 - access("O"."SERV_ID"="S"."SERV_ID")
* 10 - access("S"."CREATE_DATE">TO_DATE(' 2020-01-01
00:00:00', 'syyyy-mm-dd hh24:mi:ss'))
* 14 - access("PROD_OFFER_CODE"='YD4G01-096-1-2' OR
"PROD_OFFER_CODE"='YD5G01-013-1-1' OR
"PROD_OFFER_CODE"='YD5G01-013-1-2')
这样修改后,5承担了筛选产品ID和状态等杂七杂八的事,由于这时数量已很少,所以速度很快。
根据主卡清单这块脚本的方式,修订副卡清单部分的,全部脚本跑一趟大概在2-5秒。
最后
以上就是可靠钢笔为你收集整理的oracle 首次查询优化,一次ORACLE查询的优化的全部内容,希望文章能够帮你解决oracle 首次查询优化,一次ORACLE查询的优化所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
发表评论 取消回复