《Oracle查询优化改写》笔记(高级查询)

《Oracle查询优化改写》笔记(基础语句)

《Oracle查询优化改写》笔记(应用案例)

范围处理

定位连续值的范围

方法一:自关联

1
2
3
SELECT v1.proj_id AS 工程号,v1.proj_start 开始日期,v1.proj_end 结束日期
FROM v v1,v v2
WHERE v2.proj_start = v1.proj_end

方法二:分析函数 lead() over()

1
2
3
SELECT proj_id AS 工程号,proj_start 开始日期,proj_end 结束日期
lead(proj_start) over(ORDER BY proj_id) 下一工程开始日期
FROM v;

自关联需要扫描两次视图“V”,而使用分析函数只需要一次,大部分情况可以通过分析函数优化查询性能。

同组/分区中行间差值

方法一:自关联

1
2
3
4
5
6
7
8
9
WITH x0 AS
(SELECT rownum AS seq,登录名,登录时间 FROM (
SELECT 登录名,登录时间 FROM log ORDER BY 登录名,登录时间
))

SELECT e1.登录名,e1.登录时间,e2.登录时间 AS 下一登录时间
FROM x0 e1 LEFT JOIN x0 e2
ON (e2.登录名 = e1.登录名 AND e2.seq = e1.seq + 1)
ORDER BY 1, 2;

方法二:分析函数 lead() over()

1
2
3
SELECT 登录名,登录时间,
lead(登录时间) over(partition by 登录名 order by 登录时间) as 下一登录时间
FROM log;

连续值范围的开始/结束

1
2
3
4
5
6
7
8
9
SELECT 分组依据,MIN(开始日期) AS 开始日期,MAX(结束日期) AS 结束日期 FROM (
SELECT 编号,开始日期,结束日期,SUM(连续状态) over(ORDER BY 编号) 分组依据 FROM (
SELECT proj_id AS 编号,proj_start AS 开始日期,proj_end AS 结束日期,
CASE WHEN lag(proj_end) over(order by proj_id) = proj_start THEN 0 ELSE 1 连续状态
FROM v
)
)
GROUP BY 分组依据
ORDER BY 1;

合并连续或重叠时间段

  • 为处理重叠数据,与上一条数据判断时,需要“>=”
  • “BETWEEN unbounded precedin AND 1 preceding” 就是一个 “BETWEEN … AND …” 子句,意思是“第一行到上一行”,此例分析函数就是 “ORDER BY start_date” 后 “第一行到上一行” 范围内的 “MAX(end_date)”。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
WITH 
x0 AS (
SELECT task_id,start_date,end_date,
MAX(end_date) over(order by start_date rows BETWEEN unbounded preceding AND 1 preceding) AS max_end_date
FROM timesheets b
),
x1 AS (
SELECT start_date AS 开始时间,end_date AS 结束时间,max_end_date,
CASE WHEN max_end_date >= start_date THEN 0 ELSE 1 END AS 连续状态
FROM x0
),
x2 AS (
SELECT 开始时间,结束时间,SUM(连续状态) over(order by 开始时间) AS 分组依据
FROM x1
)

SELECT 分组依据,MIN(开始时间) AS 开始时间,MAX(结束时间) AS 结束时间
FROM x2
GROUP BY 分组依据
ORDER BY 分组依据;

高级查找

结果集分页

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/*方法一(推荐)*/
/*3.根据前面生成的序号过滤掉6行以前的数据*/
SELECT rn AS 序号, ename AS 姓名, sal AS 工资 FROM (
/*2.取得排序后的序号,并过滤掉10行以后的数据*/
SELECT rownum AS rn, sal, ename FROM (
/*1.按sal排序*/
SELECT sal, ename FROM emp WHERE sal IS NOT NULL order by sal
) WHERE rownum <= 10
) WHERE rn >= 6;

/*方法二*/
SELECT rn AS 序号, ename AS 姓名, sal AS 工资 FROM (
SELECT row_number() over(order by sal) AS rn, sal, ename
FROM emp WHERE sal IS NOT NULL
) WHERE rn BETWEEN 6 AND 10;

重新生成字段值

1
2
3
4
5
6
CREATE TABLE hotel (floor_nbr, room_nbr) AS
SELECT 1,100 FROM dual UNION ALL
SELECT 1,100 FROM dual UNION ALL
SELECT 2,100 FROM dual UNION ALL
SELECT 2,100 FROM dual UNION ALL
SELECT 3,100 FROM dual;

房间号不对,按 101、102、201、202 的形式重新生成:

1
2
3
4
UPDATE hotel SET room_nbr = 
(floor_nbr*100) + row_number() over(partition by floor_nbr order by ROWID);

# ORA-30483:window 函数在此禁用
1
2
3
4
5
6
7
8
# 可更新VIEW
UPDATE (
SELECT ROWID,room_nbr,
(floor_nbr*100) + row_number() over(partition by floor_nbr order by ROWID) AS new_nbr
FROM hotel
) SET room_nbr = new_nbr;

# ORA-01732:此视图的数据操纵操作非法
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*方法一(子查询执行了5次,对hotel有5次全表扫描)*/
UPDATE hotel a
SET room_nbr = (
SELECT room_nbr FROM (
SELECT (floor_nbr*100) + row_number() over(partition by floor_nbr order by ROWID) AS room_nbr
FROM hotel
) b WHERE a.rowid = b.rowid
)

/*方法二(MERGE语句,对hotel只访问1次,推荐)*/
MERGE INTO hotel a
USING (
SELECT ROWID AS rid,
(floor_nbr*100) + row_number() over(partition by floor_nbr order by ROWID) AS room_nbr
FROM hotel
) b
ON (a.rowid = b.rowid)
WHEN MATCHED THEN
UPDATE SET a.room_nbr = b.room_nbr;

跳过表中 n 行

使用场景:对数据进行抽样,用求余函数 mod 隔 n 行返回数据。

1
2
3
4
5
SELECT empno, ename, sal, MOD(rn, 2) AS m FROM (
SELECT rownum AS rn, empno, ename, sal FROM (
SELECT empno, ename, sal FROM emp ORDER BY ename
)
) WHERE MOD(rn, 2) = 1;

排列组合去重

数据环境模拟:

1
2
3
4
5
CREATE TABLE TEST (id,t1,t2,t3) AS
SELECT 1, '1', '3', '2' FROM dual UNION ALL
SELECT 2, '1', '3', '2' FROM dual UNION ALL
SELECT 3, '3', '2', '1' FROM dual UNION ALL
SELECT 4, '4', '2', '1' FROM dual;

前三行中 t1、t2、t3 的数据组合是重复的(都是1、2、3),要求去重:

  1. 把 t1、t2、t3 这三列用列转行合并为一列。
  2. 用 listagg 函数对各组字符排序并合并。
  3. 执行常用去重语句。

1
2
3
4
5
6
7
8
9
10
11
WITH 
x1 AS (
SELECT * FROM test unpivot(b2 FOR b3 IN(t1,t2,t3))
),
x2 AS (
SELECT id,listagg(b2,',') within GROUP(ORDER BY b2) AS b
FROM x1 GROUP BY id
)

SELECT id,b,row_number() over(partition by b order by id) AS sn
FROM x2;

找含最大/小值记录

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/*方法一(扫描三次,若object_id是索引则速度可能比分析函数快)*/
SELECT object_name, object_id FROM test
WHERE object_id IN (
SELECT MIN(object_id) FROM test
UNION ALL
SELECT MAX(object_id) FROM test
)

/*方法二(扫描一次)*/
SELECT object_name,object_id FROM (
SELECT object_name,object_id,
MIN(object_id) over() min_id,
MAX(object_id) over() max_id
FROM test
) WHERE object_id IN (min_id,max_id);

报表和数据仓库运算

行转列

  1. CASE WHEN END 编写和维护较麻烦,但适合的场景较多。
  2. Oracle11g新增的 PIVOT 函数编写维护简单,但有较大限制。(只是写法简单,实际上仍用的是 CASE WHEN 语句)
1
2
3
4
5
6
7
8
SELECT job AS 工作,
SUM(CASE deptno WHEN 10 THEN sal END) AS 部门10工资,
SUM(CASE deptno WHEN 20 THEN sal END) AS 部门20工资,
SUM(CASE deptno WHEN 30 THEN sal END) AS 部门30工资,
SUM(sal) AS 合计工资
FROM emp
GROUP BY job
ORDER BY 1;

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
SELECT * FROM (
/*job列未在pivot中,所以被当作分组条件*/
SELECT job,sal,deptno FROM emp
) pivot(
/*SUM、MAX等聚集函数+别名,若不设置,则默认使用后面in里设的别名,否则两个别名相加*/
SUM(sal) AS s
FOR deptno
IN (
/*相当于 sum(case when deptno = 10 then sal end) as d10 别名与前面的别名合并后为 D10_S*/
10 AS d10,
/*相当于 sum(case when deptno = 20 then sal end) as 20 若列别名不设置,则默认使用值作为别名,此处为20,与前面的合并后为20_S*/
20,
/*相当于 sum(case when deptno = 30 then sal end) as d30 别名与前面的别名合并后为 D30_S*/
30 AS d30
)
) ORDER BY 1;

如果还要增加提成的返回列,用 PIVOT 则只需要增加一个设定即可:

1
2
3
4
5
6
SELECT * FROM (
SELECT job,sal,comm,deptno FROM emp
) pivot(
SUM(sal) AS s,SUM(comm) AS c FOR deptno
IN (10 AS d10,20 as d20,30 as d30)
) ORDER BY 1;

而用 CASE WHEN 要增加三行语句:

1
2
3
4
5
6
7
8
9
10
SELECT job,
sum(case when deptno = 10 then sal end) AS d10_s,
sum(case when deptno = 20 then sal end) AS d20_s,
sum(case when deptno = 30 then sal end) AS d30_s,
sum(case when deptno = 10 then comm end) AS d10_c,
sum(case when deptno = 20 then comm end) AS d20_c,
sum(case when deptno = 30 then comm end) AS d30_c
FROM emp
GROUP BY job
ORDER BY 1;

PIVOT 一次只能按一个条件来完成“行转列”,如果同时把工作与部门转为列,并汇总为一行时,只能用 CASE WHEN

1
2
3
4
5
6
7
8
SELECT
COUNT(case when deptno = 10 then ename end) AS deptno_10,
COUNT(case when deptno = 20 then ename end) AS deptno_20,
COUNT(case when deptno = 30 then ename end) AS deptno_30,
COUNT(case when job = 'CLERK' then ename end) AS clerks,
COUNT(case when job = 'MANAGER' then ename end) AS mgrs,
COUNT(case when job = 'PRESIDENT' then ename end) AS prez
FROM emp;

列转行

1
2
3
4
5
6
7
8
9
10
11
/*以前用 UNION ALL来写*/
SELECT '10' AS 部门编码,DEPTION_10_CT AS 人次 FROM test
UNION ALL
SELECT '20' AS 部门编码,DEPTION_20_CT AS 人次 FROM test
UNION ALL
SELECT '30' AS 部门编码,DEPTION_30_CT AS 人次 FROM test;

/*UNPIVOT 函数,与PIVOT不一样,UNPIVOT只需扫描test一次*/
SELECT deptno AS 列名,substr(deptno,-5,2) AS 部门编码,人次
FROM test
unpivot(人次 FOR deptno IN(deptno_10_CT,deptno_20_CT,deptno_30_CT));

UPIVOT 同样有限制,如果同时有人次与工资合计要转换,就不能一次性完成,只有分别转换后再用 JOIN 连接:

1
2
3
4
5
6
7
8
9
10
11
12
/*使用参数 INCLUDE NULLS,这样即使数据为空,也显示一行*/
SELECT a.列名, a.部门编码, a.人次, b.工资 FROM (
SELECT substr(deptno, 1, 9) AS 列名, substr(deptno, -5, 2) 部门编码, 人次
FROM test
unpivot include NULLS(人次 FOR deptno IN(deptno_10_ct,deptno_20_ct,deptno_30_ct))
) a
INNER JOIN (
SELECT substr(deptno, 1, 9) AS 列名, 工资
FROM test
unpivot include NULLS(人次 FOR deptno IN(deptno_10_s,deptno_20_s,deptno_30_s))
) b
ON (b.列名 = a.列名);

是否有办法只用 UNPIVOT,而不用 JOIN 呢?

1
2
3
4
5
6
7
8
9
SELECT deptno,人次,deptno2,工资
FROM test
unpivot include NULLS(人次 FOR deptno IN(
deptno_10_ct AS 10,deptno_20_ct AS 20,deptno_30_ct AS 30
))
unpivot include NULLS(工资 FOR deptno2 IN(
deptno_10_s AS 10,deptno_20_s AS 20,deptno_30_s AS 30
))
ORDER BY 1,3;

当使用两个 UNPIVOT 时,生成的结果是一个笛卡尔积,其中粗体字部分才是我们需要的。

上面的语句实际上是一个嵌套语句,前一个 UNPIVOT 结果出来后,再执行另一个 UNPIVOT

1
2
3
4
5
6
7
8
9
10
11
12
13
14
WITH x0 AS (
SELECT deptno,人次,deptno_10_s,deptno_20_s,deptno_30_s
FROM test
unpivot include NULLS(人次 FOR deptno IN (
deptno_10_ct AS 10,deptno_20_ct AS 20,deptno_30_ct AS 30
)))
)

SELECT deptno,人次,deptno2,工资
FROM x0
unpivot include NULLS(工资 FOR deptno2 IN (
deptno_10_s AS 10,deptno_20_s AS 20,deptno_30_s AS 30
))
ORDER BY 1,3;

针对需要的数据,在上面的查询上加一个过滤即可:

1
2
3
4
5
6
7
8
9
SELECT deptno AS 部门编码,人次,工资
FROM test
unpivot include NULLS(人次 FOR deptno IN(
deptno_10_ct AS 10,deptno_20_ct AS 20,deptno_30_ct AS 30
))
unpivot include NULLS(工资 FOR deptno2 IN(
deptno_10_s AS 10,deptno_20_s AS 20,deptno_30_s AS 30
))
WHERE deptno = deptno2;

结果集转置为一列

  • 与 UNION ALL 一样,要合并的数据列类型必须相同,如下 TO_CHAR(SAL)。
  • 如果不加 include nulls,将不会显示空行。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
SELECT EMPS FROM (
/*增加这一列来显示空行*/
SELECT ENAME,JOB,TO_CHAR(SAL) AS SAL,NULL AS T_COL
FROM EMP WHERE deptno = 10;
) UNPIVOT INCLUDE NULLS(EMPS FOR COL IN(
ENAME,JOB,SAL,T_COL
));

EMPS
------------
CLARK
MANAGER
2450

KING
PRESIDENT
5000

抑制结果集中重复值

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
SELECT CASE 
/*当部门分类按姓名排序后与上一条内容相同时不显示*/
WHEN lag(job) over(ORDER BY job,ename) = job
THEN NULL
ELSE job
END AS JOB,
ENAME
FROM emp
WHERE deptno = 20
/*order by 子句中job加上前缀,否则优先使用“别名”,而不是“列名”*/
ORDER BY emp.job,ename;

JOB ENAME
-----------------
ANALYST FORD
SCOTT
CLERK ADAMS
SMITH
MANAGER JONES

利用“行转列”进行计算

使用场景:计算部门20与部门10及部门30之间的总工资差额。

1
2
3
4
5
6
7
8
9
SELECT d10_sal,d20_sal,d30_sal,
d20_sal-d10_sal AS d20_10_diff,
d20_sal-d30_sal AS d20_30_diff
FROM (
SELECT SUM(CASE WHEN deptno=10 THEN sal END) AS d10_sal,
SUM(CASE WHEN deptno=20 THEN sal END) AS d20_sal,
SUM(CASE WHEN deptno=30 THEN sal END) AS d30_sal
FROM emp
) totals_by_dept;

给数据分组

使用场景:为了方便打印,要求多行多列打印,如 emp.ename 类似下面显示:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
WITH
x1 AS
/*1. 排序*/
(SELECT ename FROM emp ORDER BY ename),
x2 AS
/*2. 生成序号*/
(SELECT rownum AS rn, ename FROM x1),
x3 AS
/*3. 分组*/
(SELECT ceil(rn/5) AS gp, ename FROM x2),
x4 AS
/*4. 分组生成序号*/
(SELECT gp, ename,
row_number() over(PARTITION BY gp ORDER BY ename) AS rn
FROM x3)

/*5. 行转列*/
SELECT * FROM x4 pivot(MAX(ename) FOR rn IN (
1 AS n1,
2 AS n2,
3 AS n3,
4 AS n4,
5 AS n5
));

对数据分组

使用场景:假期公司需要对雇员和经理分三组值班,用分析函数 ntile 来处理。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
SELECT ntile(3) over(ORDER BY empno) AS 组,
empno AS 编码,
ename AS 姓名
FROM emp
WHERE job IN ('CLERK','MANAGER');

组 编码 姓名
----------------------
1 7369 SMITH
1 7566 JONES
1 7698 BLAKE
2 7782 CLARK
2 7876 ADAMS
3 7900 JAMES
3 7934 MILLER

计算简单的小计

使用场景:生成报表数据时加一个总合计,可以使用 ROLLUP 或 UNION ALL:

1
2
3
4
5
6
7
8
SELECT deptno,SUM(sal) AS s_sal FROM emp GROUP BY ROLLUP(deptno);

DEPTNO S_SAL
---------------
10 8750
20 10875
30 9400
29025

与 UNION ALL 对照实例:

1
2
3
4
5
6
SELECT deptno AS 部门编码, 
job AS 工作,
mgr AS 主管,
SUM(sal) AS s_sal
FROM emp
GROUP BY ROLLUP(deptno, job, mgr);

相当于:

1
2
3
4
5
6
7
8
9
10
11
SELECT deptno AS 部门编码, job AS 工作, mgr AS 主管, SUM(sal) as s_sal
FROM emp GROUP BY deptno, job, mgr
UNION ALL
SELECT deptno AS 部门编码, job AS 工作, NULL/*工作小计*/ AS 主管, SUM(sal) as s_sal
FROM emp GROUP BY deptno, job
UNION ALL
SELECT deptno AS 部门编码, NULL/*部门小计*/ AS 工作, NULL AS 主管, SUM(sal) as s_sal
FROM emp GROUP BY deptno
UNION ALL
SELECT NULL/*总合计*/ AS 部门编码, NULL AS 工作, NULL AS 主管, SUM(sal) as s_sal
FROM emp;

判别非小计的行

GROUPING 函数,参数只能是 group by 后的列名。

1
2
3
4
5
6
7
8
9
/* 当列被汇总时,GROUPING 的返回值为1,否则为0 */
SELECT
CASE GROUPING(deptno) WHEN 1 THEN '总计' ELSE to_char(deptno) END AS 部门编码,
CASE WHEN GROUPING(deptno) = 1 THEN NULL WHEN GROUPING(job) = 1 THEN '小计' ELSE job END AS 工作,
CASE WHEN GROUPING(job) = 1 THEN NULL WHEN GROUPING(mgr) = 1 THEN '小计' ELSE to_char(mgr) END AS 主管,
MAX(CASE WHEN empno in (7788,7654,7902) THEN empno end) AS max_empno,
SUM(sal) AS 工资合计
FROM emp
GROUP BY ROLLUP(deptno, job, mgr);

计算所有组合小计

grouping_id 函数 与 GROUPING 的对比:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
SELECT CASE GROUPING(deptno) || GROUPING(job)
WHEN '00' THEN '按部门与工作分组'
WHEN '10' THEN '按工作分组'
WHEN '01' THEN '按部门分组'
WHEN '11' THEN '总合计'
END AS GROUPING,
/* 把如上的值转为十进制就是 grouping_id(deptno,job) 的值 */
CASE grouping_id(deptno,job)
WHEN 0 THEN '按部门与工作分组'
WHEN 2 THEN '按工作分组'
WHEN 1 THEN '按部门分组'
WHEN 3 THEN '总合计'
END AS grouping_id,
deptno AS 部门,
job AS 工作,
SUM(sal) AS 工资
FROM emp
GROUP BY CUBE(deptno, job)
ORDER BY GROUPING(job), GROUPING(deptno);

groupinggrouping_id 之间的关系及对应的 group by 对比如下:

人员在工作间的分布

要求每种工作显示为一列,每位员工显示一行,员工与工作对应时显示1,否则为空:

1
2
3
4
5
6
7
8
9
10
11
SELECT * FROM (
SELECT ENAME, JOB FROM EMP
) PIVOT(
SUM(1) FOR JOB IN(
'CLERK' AS IS_CLERK,
'SALESMAN' AS IS_SALES,
'MANAGER' AS IS_MGR,
'ANALYST' AS IS_ANALYST,
'PRESIDENT' AS IS_PREZ
)
) ORDER BY 2,3,4,5,6

创建稀疏矩阵

在上例工种的分布基础上,增加在部门间的分布,由于未对数据进行汇总,所以仍可以用 PIVOT 来处理:

1
2
3
4
5
6
7
8
9
10
11
12
13
SELECT * FROM (
SELECT empno, ename, ename AS ename2, deptno, job FROM emp
) pivot(MAX(ename) FOR deptno IN(
10 AS deptno_10,
20 AS deptno_20,
30 AS deptno_30
)) pivot(MAX(ename2) FOR job IN (
'CLERK' AS clerks,
'MANAGER' AS mgrs,
'PRESIDENT' AS prez,
'ANALYST' AS anals,
'SALESMAN' AS sales
)) ORDER BY 1;

如果对数据有汇总,就不能用两个 PIVOT 的方式,因为这种查询实际相当于两个 PIVOT 的子句嵌套,应用 CASE WHEN 语句:

1
2
3
4
5
6
7
8
9
10
SELECT 
COUNT(case when deptno=10 then ename end) as deptno_10,
COUNT(case when deptno=20 then ename end) as deptno_20,
COUNT(case when deptno=30 then ename end) as deptno_30,
COUNT(case when job='CLERK' then ename end) as clerks,
COUNT(case when job='MANAGER' then ename end) as mgrs,
COUNT(case when job='PRESIDENT' then ename end) as prez,
COUNT(case when job='ANALYST' then ename end) as anals,
COUNT(case when job='SALESMAN' then ename end) as sales
FROM emp;

若用 PIVOT:

1
2
3
4
5
6
7
8
9
10
11
12
13
SELECT * FROM (
SELECT ename, ename AS ename2, deptno, job FROM emp
) pivot(COUNT(ename) FOR deptno IN(
10 AS deptno_10,
20 AS deptno_20,
30 AS deptno_30
)) pivot(COUNT(ename2) FOR job IN (
'CLERK' AS clerks,
'MANAGER' AS mgrs,
'PRESIDENT' AS prez,
'ANALYST' AS anals,
'SALESMAN' AS sales
)) ORDER BY 1;

结果与 case when 的结果不一致,分析如下:

1
2
3
4
5
6
7
8
9
10
11
12
/* 嵌套示例第一步,相当于 group by ename2,job */
WITH x0 AS (
SELECT * FROM (
SELECT ename, ename AS ename2, deptno, job FROM emp
) pivot(COUNT(ename) FOR deptno IN(
10 AS deptno_10,
20 AS deptno_20,
30 AS deptno_30
))
)
SELECT * FROM x0;
/* 第一步返回列(ename2,job,deptno_10,deptno_20,deptno_30) */

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
/* 嵌套示例第二步,去除(ename2,job),相当于group by(deptno_10,deptno_20,deptno_30) */
WITH x0 AS (
SELECT * FROM (
SELECT ename, ename AS ename2, deptno, job FROM emp
) pivot(COUNT(ename) FOR deptno IN(
10 AS deptno_10,
20 AS deptno_20,
30 AS deptno_30
))
)
SELECT * FROM x0
pivot(COUNT(ename2) FOR job IN (
'CLERK' AS clerks,
'MANAGER' AS mgrs,
'PRESIDENT' AS prez,
'ANALYST' AS anals,
'SALESMAN' AS sales
)) ORDER BY 1;

不同组/分区的同时聚集

要求列出员工所在部门及职位的人数:

没用分析函数前,用自关联:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/* 写法复杂,且对表emp访问四次 */
SELECT e.ename 姓名,
e.deptno 部门,
s_d.cnt AS 部门人数,
e.job AS 职位,
s_j.cnt AS 职位人数,
(SELECT COUNT(*) AS cnt FROM emp WHERE deptno IN (10,20)) AS 总人数
FROM emp e
INNER JOIN (
SELECT deptno,COUNT(*) AS cnt FROM emp WHERE deptno IN (10,20) GROUP BY deptno
) s_d
ON (s_d.deptno = e.deptno)
INNER JOIN (
SELECT job,COUNT(*) AS cnt FROM emp WHERE deptno IN (10,20) GROUP BY job
) s_j
ON (s_j.job = e.job)
WHERE e.deptno IN (10,20);

改用分析函数:

1
2
3
4
5
6
7
8
SELECT ename 姓名,
deptno 部门,
COUNT(*) OVER(PARTITION BY deptno) AS 部门人数,
job AS 职位,
COUNT(*) OVER(PARTITION BY job) AS 职位人数,
COUNT(*) OVER() AS 总人数
FROM emp
WHERE deptno IN (10,20);

聚集移动范围的值

要求在员工明细表中显示之前90天(包含90)以内聘用人员的工资总和,以部门30为例,下面是标量及分析函数两种方式的示例:

1
2
3
4
5
6
7
8
9
SELECT hiredate AS 聘用日期,
sal AS 工资,
(SELECT SUM(b.sal) FROM emp b WHERE b.deptno=30 AND b.hiredate<=e.hiredate AND b.hiredate>=e.hiredate-90) AS 标量求值,
'(' || to_char(hiredate-90,'yy-mm-dd') || ' 到 ' || to_char(hiredate,'yy-mm-dd') || ')聘用人员工资和' AS 需求,
SUM(sal) over(ORDER BY hiredate RANGE BETWEEN 90 preceding AND CURRENT ROW) AS 分析函数求值,
(SELECT listagg(b.sal,'+') within GROUP(ORDER BY b.hiredate) FROM emp b WHERE b.deptno=30 AND b.hiredate<=e.hiredate AND b.hiredate>=e.hiredate-90) AS 模拟公式
FROM emp e
WHERE deptno=30
ORDER BY 1;

RANGE 关键字表示对相应字段做加减运算,所以只对日期与数值两类字段使用 RANGE 开窗。

对于日期 RANGE 开窗,默认单位是“天”,如果需求改为三个月内,可以使用 INTERVAL 来写明间隔单位:

1
2
3
4
5
6
SELECT hiredate AS 聘用日期,
sal AS 工资,
SUM(sal) over(ORDER BY hiredate RANGE BETWEEN INTERVAL '3' MONTH preceding AND CURRENT ROW) AS 三月合计
FROM emp e
WHERE deptno=30
ORDER BY 1;

按分钟开窗:

1
2
3
4
5
/* 用 1/24/60 或 INTERVAL '1' minute 都可以 */
SELECT c1,d1,
SUM(c1) over(ORDER BY d1 RANGE BETWEEN 1/24/60 preceding AND CURRENT ROW) AS s1,
SUM(c1) over(ORDER BY d1 RANGE BETWEEN (INTERVAL '2' minute) preceding AND CURRENT ROW) AS s2
FROM test;

常用分析函数开窗

工资排序后取第一行到当前行范围内的最小值:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
SELECT ename,sal,
/*因是按工资排序,所以这个语句返回的结果就是所有行的最小值*/
MIN(sal) over(ORDER BY sal) AS min_11,
/*上述语句默认参数如下,plan中可以看到*/
MIN(sal) over(ORDER BY sal RANGE BETWEEN unbounded preceding AND CURRENT ROW) AS min_12,
/*这种情况下,rows与RANGE返回数据一样*/
MIN(sal) over(ORDER BY sal rows BETWEEN unbounded preceding AND CURRENT ROW) AS min_13,
/*取所有行内最小值,可以与前面返回的值对比查看*/
MIN(sal) over() AS min_14,
/*如果明确写出上面min_14的范围就是*/
MIN(sal) over(ORDER BY sal RANGE BETWEEN unbounded preceding AND unbounded following) AS min_15,
/*这种情况下,rows与RANGE返回数据一样*/
MIN(sal) over(ORDER BY sal rows BETWEEN unbounded preceding AND unbounded following) AS min_16
FROM emp
WHERE deptno = 30;

工资排序后取第一行到当前行范围内的最大值:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
SELECT ename,sal,
/*因按工资排序,所以这个语句与上面sal返回的值一样*/
MAX(sal) over(ORDER BY sal) AS max_11,
/*上述语句默认参数如下,plan中可以看到*/
MAX(sal) over(ORDER BY sal RANGE BETWEEN unbounded preceding AND CURRENT ROW) AS max_12,
/*这种情况下,rows与RANGE返回数据一样*/
MAX(sal) over(ORDER BY sal rows BETWEEN unbounded preceding AND CURRENT ROW) AS max_13,
/*取所有行内的最大值,可以与前面返回的值对比查看*/
MAX(sal) over() AS max_14,
/*如果明确写出上面max_14的范围就是*/
MAX(sal) over(ORDER BY sal RANGE BETWEEN unbounded preceding AND unbounded following) AS max_15,
/*这种情况下,rows与RANGE返回数据一样*/
MAX(sal) over(ORDER BY sal rows BETWEEN unbounded preceding AND unbounded following) AS max_16
FROM emp
WHERE deptno = 30;

工资排序后取第一行到当前行范围内的工资和,这里要注意区别:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
SELECT ename,sal,
/*累加工资,要注意工资重复时的现象*/
SUM(sal) over(ORDER BY sal) AS sum_11,
/*上述语句默认参数如下,plan中可以看到*/
SUM(sal) over(ORDER BY sal RANGE BETWEEN unbounded preceding AND CURRENT ROW) AS sum_12,
/*这种情况下,rows与RANGE返回数据一样,见第二行*/
SUM(sal) over(ORDER BY sal rows BETWEEN unbounded preceding AND CURRENT ROW) AS sum_13,
/*工资合计*/
SUM(sal) over() AS sum_14,
/*如果明确写出上面sum_14的范围就是*/
SUM(sal) over(ORDER BY sal RANGE BETWEEN unbounded preceding AND unbounded following) AS sum_15,
/*这种情况下,rows与RANGE返回数据一样*/
SUM(sal) over(ORDER BY sal rows BETWEEN unbounded preceding AND unbounded following) AS max_16
FROM emp
WHERE deptno = 30;

使用关键字“RANGE”时,第二行“SUM_11”、“SUM_12”对应的条件是“<=1250”,而1250有两个,所以:950+1250+1250=3450,而“SUM_13”:950+1250=2200。

前后都有限定条件:

1
2
3
4
5
6
7
SELECT ENAME,SAL,
/*当前行(+-1000)范围内的最大值*/
MAX(sal) over(ORDER BY sal RANGE BETWEEN 500 preceding AND 500 FOLLOWING) AS max_11,
/*前后各一行,共三行中的最大值*/
MAX(sal) over(ORDER BY sal ROWS BETWEEN 1 preceding AND 1 FOLLOWING) AS max_12
FROM emp
WHERE deptno = 30;

listagg九九乘法表

1
2
3
4
5
6
7
8
9
10
11
12
13
WITH 
x0 AS (
SELECT LEVEL AS lv FROM dual CONNECT BY LEVEL <= 9
),
x1 AS (
SELECT a.lv AS lv_a,b.lv AS lv_b,
to_char(b.lv) || ' X ' || to_char(a.lv) || ' = ' || rpad(to_char(a.lv*b.lv),2,' ') AS text
FROM x0 a,x0 b
WHERE b.lv <= a.lv
)
SELECT listagg(text,' ') within GROUP(ORDER BY lv_b) AS 九九乘法
FROM x1
GROUP BY lv_a;

分层查询

简单的树形查询

1
2
3
4
5
6
7
SELECT empno AS 员工编码,
ename AS 姓名,
mgr AS 主管编码,
(PRIOR ename) AS 主管姓名
FROM emp
START WITH empno = 7566
CONNECT BY (PRIOR empno) = mgr;
  1. 起点:以 “START WITH empno = 7566” 为起点向下递归查询。
  2. 通过 “PRIOR” 取上一级信息,如上:主管姓名 (PRIOR ename)。
  3. CONNECT BY 子句列出来递归的条件:(上一级编码)等于本级主管编码。

根/分支/叶子节点

  • level 返回当前行所在的等级,根节点为1级,其下为2级…
  • 若当前节点下没有其他节点,则 connect_by_isleaf 返回 1,否则返回 0
1
2
3
4
5
6
7
8
9
10
SELECT lpad('-',(LEVEL-1)*2,'-') || empno AS 员工编码,
ename AS 姓名,
mgr AS 主管编码,
LEVEL AS 级别,
decode(LEVEL,1,1) AS 根节点,
decode(connect_by_isleaf,1,1) AS 叶子节点,
CASE WHEN (connect_by_isleaf=0 AND LEVEL>1) THEN 1 END AS 分支节点
FROM emp
START WITH empno=7566
CONNECT BY (PRIOR empno)=mgr;

sys_connect_by_path

根节点到当前节点的路径:

1
2
3
4
5
6
7
SELECT empno AS 员工编码,
ename AS 姓名,
mgr AS 主管编码,
sys_connect_by_path(ename,',') AS enames
FROM emp
START WITH empno = 7566
CONNECT BY (PRIOR empno) = mgr;

也可以代替 listagg 分析函数来合并字符串,因为 Oracle11.2 之前没有 listagg

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
WITH
x1 AS (
/* 1.分组生成序号rn */
SELECT deptno,ename,
row_number() over(PARTITION BY deptno ORDER BY ename) AS rn
FROM emp
)
/* 2.用sys_connect_by_path合并字符串 */
SELECT deptno,sys_connect_by_path(ename,',') AS emps
FROM x1
WHERE connect_by_isleaf = 1/*过滤掉多余数据,只取叶子节点*/
START WITH rn = 1
CONNECT BY (PRIOR deptno)=deptno AND (PRIOR rn)=rn-1;

DEPTNO EMPS
-----------------------------------------------
10 ,CLARK,KING,MILLER
20 ,ADAMS,FORD,JONES.SCOTT,SMITH
30 ,ALLEN,BLAKE,JAMES,MARTIN,TURNER,WARD

树形查询中的排序

树形查询里直接用 ORDER BY 排序,数据会乱,无法再看清上下级关系。

对于树形数据,应该只对同一分支下的数据排序,用专用关键字 “SIBLINGS”:

1
2
3
4
5
6
7
8
SELECT lpad('-',(LEVEL-1)*2,'-') || empno AS 员工编码,
ename AS 姓名,
mgr AS 主管编码
FROM emp
START WITH empno = 7566
CONNECT BY (PRIOR empno) = mgr
ORDER SIBLINGS BY empno DESC;
/* 不能直接 ORDER BY empno DESC */

可以看到只对同分支7566下的7902、7788进行排序,没有影响树形结构。

树形查询中的WHERE

树形查询中的 WHERE 过滤对象是树形查询的结果,要过滤必须先用子查询嵌套一次,再进行树形查询:

1
2
3
4
5
6
7
8
SELECT empno AS 员工编码,
mgr AS 经理编码,
ename AS 姓名,
deptno AS 部门编码
FROM (SELECT * FROM emp WHERE deptno=20) emp
/* 这里不能用 WHERE deptno=20 直接过滤 */
START WITH mgr IS NULL
CONNECT BY (PRIOR empno) = mgr;

查询树形的一个分支

查询树形的一个分支不能用 WHERE,用 START WITH 指定分支的起点就可以:

1
2
3
4
SELECT empno AS 员工编码, mgr AS 主管编码, ename AS 姓名, LEVEL AS 级别
FROM emp
START WITH empno = 7698
CONNECT BY (PRIOR empno) = mgr;

剪去一个分支

要求减去 7698 开始的分支,同样不能在 WHERE 中加条件,因为树形查询递归是根据 (PRIOR empno)=mgr 进行的,如下即可:

1
2
3
4
5
6
SELECT empno AS 员工编码, mgr AS 主管编码, ename AS 姓名, LEVEL AS 级别
FROM emp
START WITH mgr IS NULL
CONNECT BY (PRIOR empno) = mgr
/* 剪去分支 */
AND empno != 7698;
0%