概述
原文地址:
https://docs.oracle.com/en/database/oracle/oracle-database/12.2/tgsql/managing-extended-statistics.html#GUID-BD0F0B71-DD8B-44A0-888E-495830FC09A4
-- 为什么要使用组合列的统计信息 。文档中有说明 。
-- 查询表sh.customers中的列cust_state_province,country_id的统计信息
SH@test>COL COLUMN_NAME FORMAT a20
COL NDV FORMAT 999
SELECT COLUMN_NAME, NUM_DISTINCT AS "NDV", HISTOGRAM
FROM DBA_TAB_COL_STATISTICS
WHERE OWNER = 'SH'
AND TABLE_NAME = 'CUSTOMERS'
AND COLUMN_NAME IN ('CUST_STATE_PROVINCE', 'COUNTRY_ID');SH@test>SH@test> 2 3 4 5
COLUMN_NAME NDV HISTOGRAM
-------------------- ---- ---------------
CUST_STATE_PROVINCE 145 FREQUENCY
COUNTRY_ID 19 NONE
SH@test>
-- 查询,3341个customer在california(CA)
SH@test>SELECT COUNT(*)
FROM sh.customers
WHERE cust_state_province = 'CA' 2 3
4 ;
COUNT(*)
----------
3341
-- 查看执行计划,还是上面的执行语句,增加一个条件,并且count_id是52790
SH@test>EXPLAIN PLAN FOR
SELECT *
FROM sh.customers
WHERE cust_state_province = 'CA'
AND country_id=52790; 2 3 4 5
Explained.
SH@test>SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY);
PLAN_TABLE_OUTPUT
--------------------------------------------------------------------------------
Plan hash value: 2008213504
-------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 176 | 31856 | 405 (1)| 00:00:01 |
|* 1 | TABLE ACCESS FULL| CUSTOMERS | 176 | 31856 | 405 (1)| 00:00:01 |
-------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
PLAN_TABLE_OUTPUT
--------------------------------------------------------------------------------
1 - filter("COUNTRY_ID"=52790 AND "CUST_STATE_PROVINCE"='CA')
13 rows selected.
--根据单个列的统计,优化器评估认为,查询在美国加利福尼亚的客户,返回176 ,而实际上,3341个客户是在加利福尼亚的,但是优化器并不知道CA和country_id=52790是同一个查询条件,所以导致优化器产生了错误的评估基数。可以通过组合列的统计信息,让优化器知道CA和country_id=52790在现实中是一回事。
SH@test>SELECT COUNT(*)
FROM sh.customers
WHERE cust_state_province = 'CA'
and country_id=52790
union all
SELECT COUNT(*)
FROM sh.customers
WHERE cust_state_province = 'CA' 2 3 4 5 6 7 8
9 ;
COUNT(*)
----------
3341
3341
SH@test>
-- 创建表,进行再次测试
14.1.2 Detecting Useful Column Groups for a Specific Workload
-- 创建表并收集统计信息
SH@test>CREATE TABLE customers_test AS SELECT * FROM customers;
Table created.
SH@test>EXEC DBMS_STATS.GATHER_TABLE_STATS(user, 'customers_test');
PL/SQL procedure successfully completed.
-- 启用workload 监控,300秒
SYS@test>exec DBMS_STATS.SEED_COL_USAGE(null,null,300);
PL/SQL procedure successfully completed.
-- 查询条件同时为洛杉矶,CA,52790 ,发现优化器评估的基数是1 ,实际一共有932条数据
SH@test>EXPLAIN PLAN FOR
SELECT *
FROM customers_test
WHERE cust_city = 'Los Angeles'
AND cust_state_province = 'CA'
AND country_id = 52790; 2 3 4 5 6
Explained.
SH@test>SELECT PLAN_TABLE_OUTPUT
FROM TABLE(DBMS_XPLAN.DISPLAY('plan_table', null,'basic rows')); 2
PLAN_TABLE_OUTPUT
--------------------------------------------------------------------------------
Plan hash value: 2112738156
----------------------------------------------------
| Id | Operation | Name | Rows |
----------------------------------------------------
| 0 | SELECT STATEMENT | | 1 |
| 1 | TABLE ACCESS FULL| CUSTOMERS_TEST | 1 |
----------------------------------------------------
8 rows selected.
-- 查询,按照country_id和州名进行查询,优化器评估基数是1949 ,实际值是145
SH@test>EXPLAIN PLAN FOR
SELECT country_id, cust_state_province, count(cust_city)
FROM customers_test
GROUP BY country_id, cust_state_province; 2 3 4
Explained.
SH@test>SELECT PLAN_TABLE_OUTPUT
FROM TABLE(DBMS_XPLAN.DISPLAY('plan_table', null,'basic rows')); 2
PLAN_TABLE_OUTPUT
--------------------------------------------------------------------------------
Plan hash value: 1820398555
-----------------------------------------------------
| Id | Operation | Name | Rows |
-----------------------------------------------------
| 0 | SELECT STATEMENT | | 1949 |
| 1 | HASH GROUP BY | | 1949 |
| 2 | TABLE ACCESS FULL| CUSTOMERS_TEST | 55500 |
-----------------------------------------------------
9 rows selected.
-- 查看有哪些列
SH@test>SET LONG 100000
SET LINES 120
SET PAGES 0
SELECT DBMS_STATS.REPORT_COL_USAGE(user, 'customers_test')
FROM DUAL;SH@test>SH@test>SH@test> 2
LEGEND:
.......
EQ : Used in single table EQuality predicate
RANGE : Used in single table RANGE predicate
LIKE : Used in single table LIKE predicate
NULL : Used in single table is (not) NULL predicate
EQ_JOIN : Used in EQuality JOIN predicate
NONEQ_JOIN : Used in NON EQuality JOIN predicate
FILTER : Used in single table FILTER predicate
JOIN : Used in JOIN predicate
GROUP_BY : Used in GROUP BY expression
...............................................................................
###############################################################################
COLUMN USAGE REPORT FOR SH.CUSTOMERS_TEST
.........................................
1. COUNTRY_ID : EQ
2. CUST_CITY : EQ
3. CUST_STATE_PROVINCE : EQ
4. (CUST_CITY, CUST_STATE_PROVINCE,
COUNTRY_ID) : FILTER
5. (CUST_STATE_PROVINCE, COUNTRY_ID) : GROUP_BY
###############################################################################
SH@test>
-- 创建列组合,因为是cblob字段,所以看到的结果是# ,记得11g中支持查询出来是值,而不是cblob
SH@test>SELECT DBMS_STATS.CREATE_EXTENDED_STATS(user, 'customers_test') FROM DUAL;
DBMS_STATS.CREATE_EXTENDED_STATS(USER,'CUSTOMERS_TEST')
--------------------------------------------------------------------------------
###############################################################################
SH@test>
-- 查看CBLOB内容如下,创建了两个组合,一个是针对filter,一个针对groupby 。
###############################################################################
EXTENSIONS FOR SH.CUSTOMERS_TEST
................................
1. (CUST_CITY, CUST_STATE_PROVINCE,
COUNTRY_ID) : SYS_STUMZ$C3AIHLPBROI#SKA58H_N exists
2. (CUST_STATE_PROVINCE, COUNTRY_ID) : SYS_STU#S#WF25Z#QAHIHE#MOFFMM_ exists
###############################################################################
-- 也可以这样查看,通过dbms_stats的SHOW_EXTENDED_STATS_NAME
SH@test>SELECT SYS.DBMS_STATS.SHOW_EXTENDED_STATS_NAME( 'sh','customers',
'(cust_state_province,country_id)' ) col_group_name
FROM DUAL; 2 3
COL_GROUP_NAME
------------------------------------------------------------------------------------------------------------------------
SYS_STU#S#WF25Z#QAHIHE#MOFFMM_
SH@test>
-- 或者这样查询
SH@test>SELECT EXTENSION_NAME, EXTENSION
FROM USER_STAT_EXTENSIONS
WHERE TABLE_NAME='CUSTOMERS'; 2 3
EXTENSION_NAME
------------------------------------------------------------------------------------------------------------------------
EXTENSION
--------------------------------------------------------------------------------
SYS_STU#S#WF25Z#QAHIHE#MOFFMM_
("CUST_STATE_PROVINCE","COUNTRY_ID")
SH@test>
--收集统计信息
SH@test>EXEC DBMS_STATS.GATHER_TABLE_STATS(user,'customers_test');
PL/SQL procedure successfully completed.
SH@test>
-- 查看统计信息
SH@test>SELECT COLUMN_NAME, NUM_DISTINCT, HISTOGRAM
FROM USER_TAB_COL_STATISTICS
WHERE TABLE_NAME = 'CUSTOMERS_TEST'
ORDER BY 1; 2 3 4
COLUMN_NAME NUM_DISTINCT HISTOGRAM
-------------------- ------------ ---------------
COUNTRY_ID 19 FREQUENCY
CUST_CITY 620 HYBRID
CUST_CITY_ID 620 NONE
CUST_CREDIT_LIMIT 8 NONE
CUST_EFF_FROM 1 NONE
CUST_EFF_TO 0 NONE
CUST_EMAIL 1699 NONE
CUST_FIRST_NAME 1300 NONE
CUST_GENDER 2 NONE
CUST_ID 55500 NONE
CUST_INCOME_LEVEL 12 NONE
COLUMN_NAME NUM_DISTINCT HISTOGRAM
-------------------- ------------ ---------------
CUST_LAST_NAME 908 NONE
CUST_MAIN_PHONE_NUMB 51344 NONE
ER
CUST_MARITAL_STATUS 11 NONE
CUST_POSTAL_CODE 623 NONE
CUST_SRC_ID 0 NONE
CUST_STATE_PROVINCE 145 FREQUENCY
CUST_STATE_PROVINCE_ 145 NONE
ID
COLUMN_NAME NUM_DISTINCT HISTOGRAM
-------------------- ------------ ---------------
CUST_STREET_ADDRESS 49900 NONE
CUST_TOTAL 1 NONE
CUST_TOTAL_ID 1 NONE
CUST_VALID 2 NONE
CUST_YEAR_OF_BIRTH 75 NONE
SYS_STU#S#WF25Z#QAHI 145 NONE
HE#MOFFMM_
SYS_STUMZ$C3AIHLPBRO 620 HYBRID
I#SKA58H_N
25 rows selected.
SH@test>
-- 或者,这样查看直方图
SH@test>
SH@test>SELECT e.EXTENSION col_group, t.NUM_DISTINCT, t.HISTOGRAM
FROM USER_STAT_EXTENSIONS e, USER_TAB_COL_STATISTICS t
WHERE e.EXTENSION_NAME=t.COLUMN_NAME
AND e.TABLE_NAME=t.TABLE_NAME
AND t.TABLE_NAME='CUSTOMERS'; 2 3 4 5
COL_GROUP NUM_DISTINCT HISTOGRAM
-------------------------------------------------------------------------------- ------------ ---------------
("CUST_STATE_PROVINCE","COUNTRY_ID") 145 FREQUENCY
SH@test>
-- 再次执行之前的查询语句,查看执行计划,新的执行计划,显示了更加准确的基数 。
SH@test>EXPLAIN PLAN FOR
SELECT *
FROM customers_test
WHERE cust_city = 'Los Angeles'
AND cust_state_province = 'CA'
AND country_id = 52790; 2 3 4 5 6
Explained.
SH@test>SELECT PLAN_TABLE_OUTPUT
FROM TABLE(DBMS_XPLAN.DISPLAY('plan_table', null,'basic rows')); 2
Plan hash value: 2112738156
----------------------------------------------------
| Id | Operation | Name | Rows |
----------------------------------------------------
| 0 | SELECT STATEMENT | | 856 |
| 1 | TABLE ACCESS FULL| CUSTOMERS_TEST | 856 |
----------------------------------------------------
8 rows selected.
SH@test>
SH@test>EXPLAIN PLAN FOR
SELECT country_id, cust_state_province, count(cust_city)
FROM customers_test
GROUP BY country_id, cust_state_province; 2 3 4
Explained.
SH@test>SELECT PLAN_TABLE_OUTPUT
FROM TABLE(DBMS_XPLAN.DISPLAY('plan_table', null,'basic rows')); 2
Plan hash value: 1820398555
-----------------------------------------------------
| Id | Operation | Name | Rows |
-----------------------------------------------------
| 0 | SELECT STATEMENT | | 145 |
| 1 | HASH GROUP BY | | 145 |
| 2 | TABLE ACCESS FULL| CUSTOMERS_TEST | 55500 |
-----------------------------------------------------
9 rows selected.
SH@test>
-- 收集列的统计信息
SH@test>BEGIN
DBMS_STATS.GATHER_TABLE_STATS( 'sh','customers',
METHOD_OPT => 'FOR ALL COLUMNS SIZE SKEWONLY ' ||
'FOR COLUMNS SIZE SKEWONLY (cust_state_province,country_id)' );
END;
/ 2 3 4 5 6
PL/SQL procedure successfully completed.
SH@test>
-- drop扩展统计
BEGIN
DBMS_STATS.DROP_EXTENDED_STATS( 'sh', 'customers','(cust_state_province, country_id)' );
END;
--对where条件里面有函数表达式的,进行扩展统计
SH@test>SELECT COUNT(*) FROM sh.customers WHERE cust_state_province='CA';
COUNT(*)
----------
3341
SH@test>
SH@test>EXPLAIN PLAN FOR SELECT * FROM sh.customers WHERE LOWER(cust_state_province)='ca';
Explained.
SH@test>select * from table(dbms_xplan.display);
PLAN_TABLE_OUTPUT
--------------------------------------------------------------------------------
Plan hash value: 2008213504
-------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 555 | 104K| 405 (1)| 00:00:01 |
|* 1 | TABLE ACCESS FULL| CUSTOMERS | 555 | 104K| 405 (1)| 00:00:01 |
-------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
PLAN_TABLE_OUTPUT
--------------------------------------------------------------------------------
1 - filter(LOWER("CUST_STATE_PROVINCE")='ca')
13 rows selected.
SH@test>
-- 创建扩展
SH@test>SELECT DBMS_STATS.CREATE_EXTENDED_STATS(user, 'customers_test','(LOWER(cust_state_province))') FROM DUAL;
DBMS_STATS.CREATE_EXTENDED_STATS(USER,'CUSTOMERS_TEST','(LOWER(CUST_STATE_PROVIN'
--------------------------------------------------------------------------------
SYS_STUBPHJSBRKOIK9O2YV3W8HOUE
SH@test>
--收集统计信息
SH@test>BEGIN
DBMS_STATS.GATHER_TABLE_STATS('sh', 'customers', method_opt => 'FOR ALL COLUMNS SIZE SKEWONLY ' ||
'FOR COLUMNS (LOWER(cust_state_province)) SIZE SKEWONLY'
);
END; 2 3 4 5
6 /
PL/SQL procedure successfully completed.
SH@test>
-- 再次查看,看执行计划,这里看到返回的rows是3341 ,前面查询的count也是3341
SH@test>EXPLAIN PLAN FOR SELECT * FROM sh.customers WHERE LOWER(cust_state_province)='ca';
Explained.
SH@test>select * from table(dbms_xplan.display);
PLAN_TABLE_OUTPUT
--------------------------------------------------------------------------------
Plan hash value: 2008213504
-------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 3341 | 665K| 405 (1)| 00:00:01 |
|* 1 | TABLE ACCESS FULL| CUSTOMERS | 3341 | 665K| 405 (1)| 00:00:01 |
-------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
PLAN_TABLE_OUTPUT
--------------------------------------------------------------------------------
1 - filter(LOWER("CUST_STATE_PROVINCE")='ca')
13 rows selected.
SH@test>
-- drop掉函数统计
SH@test>BEGIN
DBMS_STATS.DROP_EXTENDED_STATS(
'sh'
, 'customers'
, '(LOWER(cust_state_province))'
);
END;
/ 2 3 4 5 6 7 8
PL/SQL procedure successfully completed.
SH@test>
END
最后
以上就是俊秀鞋垫为你收集整理的扩展统计信息 - extended statistics的全部内容,希望文章能够帮你解决扩展统计信息 - extended statistics所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
发表评论 取消回复