一、窗口函数核心概念
本质:对一组与当前行相关联的行执行计算,不改变原表行数
与聚合函数的区别:
SELECT department, AVG(salary) -- 普通聚合:每个部门一行
FROM employees
GROUP BY department;
SELECT name, salary,
AVG(salary) OVER(PARTITION BY department) -- 窗口函数:保留所有行
FROM employees;
二、窗口函数完整语法结构
函数名([参数]) OVER (
[PARTITION BY 分组字段]
[ORDER BY 排序字段 [ASC|DESC]]
[frame_clause]
)
1. 核心子句详解
三、常用窗口函数分类
1. 排序函数
SELECT
product_id,
sales,
ROW_NUMBER() OVER(ORDER BY sales DESC) AS rank1, -- 唯一连续序号
RANK() OVER(ORDER BY sales DESC) AS rank2, -- 允许并列跳号
DENSE_RANK() OVER(ORDER BY sales DESC) AS rank3 -- 允许并列不跳号
FROM sales_data;
2. 聚合窗口函数
SELECT
date,
revenue,
SUM(revenue) OVER(ORDER BY date
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS 3d_avg,
AVG(revenue) OVER(PARTITION BY YEAR(date)) AS year_avg
FROM daily_sales;
3. 分布分析函数
SELECT
student_id,
score,
PERCENT_RANK() OVER(ORDER BY score) AS percentile, -- 相对百分比排名
CUME_DIST() OVER(ORDER BY score) AS cumulative_dist -- 累计分布
FROM exam_results;
4. 偏移函数
SELECT
date,
temperature,
LAG(temperature, 1) OVER(ORDER BY date) AS prev_day_temp, -- 前一行
LEAD(temperature, 1) OVER(ORDER BY date) AS next_day_temp, -- 后一行
FIRST_VALUE(temperature) OVER(PARTITION BY WEEK(date)) AS week_first_temp
FROM weather;
四、典型应用场景
1. 动态分组TopN
-- 获取每个部门薪资前三名
WITH ranked AS (
SELECT
name, department, salary,
DENSE_RANK() OVER(PARTITION BY department ORDER BY salary DESC) AS rk
FROM employees
)
SELECT * FROM ranked WHERE rk <= 3;
2. 累计计算
-- 计算累计销售额与移动平均
SELECT
order_date,
daily_sales,
SUM(daily_sales) OVER(ORDER BY order_date) AS cum_sum,
AVG(daily_sales) OVER(ORDER BY order_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) AS 7d_ma
FROM orders;
3. 数据差异分析
-- 对比每月销售额与上月差异
SELECT
month,
revenue,
LAG(revenue, 1) OVER(ORDER BY month) AS prev_month,
revenue - LAG(revenue, 1) OVER(ORDER BY month) AS mom_diff
FROM monthly_sales;
五、性能优化策略
1. 索引设计
-- 为窗口函数涉及的字段建立组合索引
ALTER TABLE sales ADD INDEX idx_dept_time (department, order_date);
-- 执行计划检查
EXPLAIN
SELECT
product_id,
SUM(quantity) OVER(PARTITION BY product_id ORDER BY sale_date)
FROM sales;
2. 分区剪枝优化
-- 结合WHERE条件减少处理分区
SELECT *
FROM (
SELECT
user_id,
ROW_NUMBER() OVER(PARTITION BY city ORDER BY reg_date) AS rn
FROM users
WHERE city IN ('北京','上海') -- 提前过滤
) t
WHERE rn <= 100;
3. 避免重复计算
-- 使用WINDOW子句复用定义
SELECT
AVG(sales) OVER w AS avg_sales,
MAX(sales) OVER w AS max_sales
FROM sales_data
WINDOW w AS (PARTITION BY region ORDER BY month);
六、与临时表结合的高级用法
1. 分阶段计算
-- 第一阶段:计算基础窗口
CREATE TEMPORARY TABLE stage1 AS
SELECT
user_id,
SUM(amount) OVER(PARTITION BY user_id) AS total_amt
FROM transactions;
-- 第二阶段:二次聚合
SELECT
AVG(total_amt) AS avg_amt_per_user
FROM stage1;
2. 递归窗口计算
-- 计算员工管理链层级
WITH RECURSIVE emp_tree AS (
SELECT
emp_id,
manager_id,
1 AS level,
CAST(emp_id AS CHAR(100)) AS path
FROM employees
WHERE manager_id IS NULL
UNION ALL
SELECT
e.emp_id,
e.manager_id,
et.level + 1,
CONCAT(et.path, '->', e.emp_id)
FROM employees e
JOIN emp_tree et ON e.manager_id = et.emp_id
)
SELECT
emp_id,
level,
path,
RANK() OVER(ORDER BY level) AS hierarchy_rank
FROM emp_tree;
七、常见错误排查
错误现象 |
原因分析 |
解决方案 |
结果排序不符合预期 |
未正确使用ORDER BY |
明确指定排序字段和方向 |
窗口范围计算错误 |
ROWS与RANGE混淆使用 |
确认需要物理行偏移还是逻辑值范围 |
性能急剧下降 |
未加分区条件导致全表扫描 |
添加WHERE条件或分区过滤 |
出现重复排名 |
使用了ROW_NUMBER而非RANK |
根据业务需求选择正确的排名函数 |
八、版本特性差异
MySQL版本 |
窗口函数支持 |
5.x |
❌ 不支持 |
8.0.2+ |
✔️ 基础窗口函数 |
8.0.28+ |
✔️ 新增NTH_VALUE() 、NTILE() 等扩展函数 |
结语:最佳实践原则
- 明确窗口范围:始终指定
ROWS/RANGE
避免意外结果
- 慎用无界窗口:
UNBOUNDED PRECEDING
可能导致性能问题
- 结合索引优化:为
PARTITION BY
和ORDER BY
字段建立索引
- 分阶段处理:对复杂计算使用临时表拆分步骤
- 版本验证:生产环境确认MySQL版本支持情况
-- 查看窗口函数执行计划
EXPLAIN FORMAT=JSON
SELECT ... [包含窗口函数的查询];
-- 性能模式监控
SELECT * FROM performance_schema.events_stages_history_long
WHERE EVENT_NAME LIKE '%window%';