第六章:查询详解
最后更新: 2024-01-01
作者: PostgreSQL Team
页面目录
第六章:查询详解
6.1 高级查询技巧
6.1.1 DISTINCT 用法
-- 去重查询
SELECT DISTINCT status FROM orders;
-- 多列去重
SELECT DISTINCT status, payment_method FROM orders;
-- 聚合中使用 DISTINCT
SELECT COUNT(DISTINCT user_id) FROM orders;
SELECT COUNT(DISTINCT category) FROM products;
6.1.2 FETCH 子句
-- 获取前 N 行
SELECT * FROM users ORDER BY created_at DESC FETCH FIRST 5 ROWS ONLY;
-- 获取前 N 行(兼容语法)
SELECT * FROM users ORDER BY created_at DESC LIMIT 5;
-- 跳过前 N 行
SELECT * FROM users ORDER BY id OFFSET 10;
-- 分页查询(每页 10 条,取第 3 页)
SELECT * FROM users
ORDER BY id
LIMIT 10 OFFSET 20;
-- 随机获取 N 行
SELECT * FROM users ORDER BY random() LIMIT 5;
6.1.3 NULL 处理
-- NULL 比较
SELECT * FROM users WHERE phone IS NULL;
SELECT * FROM users WHERE phone IS NOT NULL;
-- NULL 排序(NULL 值在前/后)
SELECT * FROM users ORDER BY phone NULLS FIRST;
SELECT * FROM users ORDER BY phone DESC NULLS LAST;
-- COALESCE 函数
SELECT
username,
COALESCE(phone, mobile, '无联系方式') AS contact
FROM users;
-- NULLIF 函数
SELECT NULLIF(value, 0) FROM products; -- 值为 0 返回 NULL
-- NVL 兼容写法
SELECT COALESCE(age, 0) AS age FROM users;
6.2 聚合函数高级用法
6.2.1 FILTER 子句
-- 使用 FILTER 进行条件聚合
SELECT
COUNT(*) AS total_orders,
COUNT(*) FILTER (WHERE status = 'completed') AS completed_orders,
COUNT(*) FILTER (WHERE status = 'pending') AS pending_orders,
AVG(total) FILTER (WHERE status = 'completed') AS avg_completed_amount
FROM orders;
6.2.2 GROUPING SETS
-- 多维度分组
SELECT
status,
payment_method,
COUNT(*) AS order_count,
SUM(total) AS total_amount
FROM orders
GROUP BY GROUPING SETS (
(status), -- 按状态分组
(payment_method), -- 按支付方式分组
(status, payment_method), -- 按状态和支付方式分组
() -- 总计
)
ORDER BY status, payment_method;
6.2.3 CUBE 和 ROLLUP
-- ROLLUP:层级汇总
SELECT
status,
DATE_TRUNC('month', created_at) AS month,
COUNT(*) AS order_count
FROM orders
GROUP BY ROLLUP (status, DATE_TRUNC('month', created_at))
ORDER BY status, month;
-- CUBE:所有组合汇总
SELECT
status,
payment_method,
COUNT(*) AS order_count
FROM orders
GROUP BY CUBE (status, payment_method)
ORDER BY status, payment_method;
6.3 多表连接
6.3.1 自然连接 (NATURAL JOIN)
-- 自然连接(基于同名列)
SELECT * FROM orders NATURAL JOIN users;
-- 谨慎使用,可能产生意外结果
6.3.2 交叉连接 (CROSS JOIN)
-- 交叉连接(笛卡尔积)
SELECT * FROM users CROSS JOIN products;
-- 生成测试数据
SELECT generate_series(1, 100) AS id;
6.3.3 多表连接
-- 多个表连接
SELECT
u.username,
o.order_id,
o.total_amount,
p.product_name,
oi.quantity
FROM users u
JOIN orders o ON u.id = o.user_id
JOIN order_items oi ON o.order_id = oi.order_id
JOIN products p ON oi.product_id = p.id
WHERE o.status = 'completed'
ORDER BY o.created_at DESC;
6.3.4 自连接
-- 自连接查询员工及其上级
SELECT
e.employee_name AS employee,
m.employee_name AS manager
FROM employees e
LEFT JOIN employees m ON e.manager_id = m.id;
-- 查找同类产品的价格差异
SELECT
p1.name AS product1,
p1.price AS price1,
p2.name AS product2,
p2.price AS price2,
p1.price - p2.price AS price_diff
FROM products p1
JOIN products p2 ON p1.category = p2.category AND p1.id > p2.id
WHERE p1.price > p2.price;
6.4 高级子查询
6.4.1 标量子查询
-- 返回单个值的子查询
SELECT
username,
(SELECT COUNT(*) FROM orders WHERE user_id = users.id) AS order_count
FROM users;
-- 在表达式中使用
SELECT
username,
total_spent,
total_spent / (SELECT SUM(total_spent) FROM user_stats) * 100 AS percentage
FROM user_stats;
6.4.2 表子查询
-- 在 FROM 中使用子查询
SELECT
category,
AVG(price) AS avg_price,
COUNT(*) AS product_count
FROM (
SELECT
name AS product_name,
price,
CASE
WHEN price < 100 THEN '低价'
WHEN price < 500 THEN '中价'
ELSE '高价'
END AS category
FROM products
) t
GROUP BY category;
6.4.3 LATERAL 子查询
-- LATERAL 允许子查询引用左边的表
SELECT
p.name,
latest_comment.comment_text,
latest_comment.created_at
FROM products p
CROSS JOIN LATERAL (
SELECT comment_text, created_at
FROM comments
WHERE product_id = p.id
ORDER BY created_at DESC
LIMIT 1
) AS latest_comment;
-- 获取每个类别最贵的产品
SELECT DISTINCT ON (category) *
FROM products
ORDER BY category, price DESC;
6.5 窗口函数
窗口函数在不使用 GROUP BY 的情况下进行聚合计算。
6.5.1 窗口函数基础
-- 语法结构
aggregate_function() OVER (
PARTITION BY column_list
ORDER BY column_list
ROWS/RANGE BETWEEN frame_start AND frame_end
)
-- 示例数据准备
CREATE TABLE sales (
id SERIAL,
salesperson VARCHAR(50),
region VARCHAR(50),
sale_date DATE,
amount DECIMAL(10, 2)
);
6.5.2 排名函数
-- ROW_NUMBER:连续排名
SELECT
salesperson,
amount,
ROW_NUMBER() OVER (ORDER BY amount DESC) AS row_num
FROM sales;
-- RANK:跳跃排名(有并列)
SELECT
salesperson,
amount,
RANK() OVER (ORDER BY amount DESC) AS rank
FROM sales;
-- DENSE_RANK:连续排名(无跳跃)
SELECT
salesperson,
amount,
DENSE_RANK() OVER (ORDER BY amount DESC) AS dense_rank
FROM sales;
-- NTILE:分组编号
SELECT
salesperson,
amount,
NTILE(4) OVER (ORDER BY amount DESC) AS quartile
FROM sales;
6.5.3 聚合窗口函数
-- 累计求和
SELECT
sale_date,
amount,
SUM(amount) OVER (ORDER BY sale_date) AS running_total
FROM sales;
-- 累计平均
SELECT
sale_date,
amount,
AVG(amount) OVER (ORDER BY sale_date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_avg
FROM sales;
-- 分组累计
SELECT
salesperson,
sale_date,
amount,
SUM(amount) OVER (
PARTITION BY salesperson
ORDER BY sale_date
) AS personal_total
FROM sales;
-- 累计百分比
SELECT
sale_date,
amount,
ROUND(
amount / SUM(amount) OVER () * 100,
2
) AS percentage
FROM sales;
6.5.4 窗口框架
-- 相对于当前行的窗口
SELECT
sale_date,
amount,
SUM(amount) OVER (
ORDER BY sale_date
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
) AS last_3_days_total,
AVG(amount) OVER (
ORDER BY sale_date
ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
) AS centered_avg
FROM sales;
-- 相对于当前行的窗口(范围)
SELECT
sale_date,
amount,
SUM(amount) OVER (
ORDER BY sale_date
RANGE BETWEEN INTERVAL '7 days' PRECEDING AND CURRENT ROW
) AS last_7_days_total
FROM sales;
6.5.5 FIRST_VALUE 和 LAST_VALUE
-- FIRST_VALUE:窗口内第一个值
SELECT
sale_date,
amount,
FIRST_VALUE(amount) OVER (
PARTITION BY salesperson
ORDER BY sale_date
) AS first_sale
FROM sales;
-- LAST_VALUE:窗口内最后一个值
SELECT
sale_date,
amount,
LAST_VALUE(amount) OVER (
PARTITION BY salesperson
ORDER BY sale_date
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS last_sale
FROM sales;
-- NTH_VALUE:窗口内第 N 个值
SELECT
sale_date,
amount,
NTH_VALUE(amount, 3) OVER (
ORDER BY sale_date
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS third_value
FROM sales;
6.5.6 LAG 和 LEAD
-- LAG:获取前 N 行数据
SELECT
sale_date,
amount,
LAG(amount, 1) OVER (ORDER BY sale_date) AS prev_amount,
amount - LAG(amount, 1) OVER (ORDER BY sale_date) AS diff_from_prev
FROM sales;
-- LEAD:获取后 N 行数据
SELECT
sale_date,
amount,
LEAD(amount, 1) OVER (ORDER BY sale_date) AS next_amount,
LEAD(amount, 1) OVER (ORDER BY sale_date) - amount AS diff_to_next
FROM sales;
-- 使用默认值
SELECT
sale_date,
amount,
LAG(amount, 1, 0) OVER (ORDER BY sale_date) AS prev_amount
FROM sales;
6.5.7 窗口函数综合示例
-- 销售报表
SELECT
sale_date,
salesperson,
region,
amount,
-- 排名
RANK() OVER (PARTITION BY region ORDER BY amount DESC) AS regional_rank,
-- 累计
SUM(amount) OVER (PARTITION BY region ORDER BY sale_date) AS regional_running_total,
-- 占比
ROUND(amount / SUM(amount) OVER (PARTITION BY region) * 100, 2) AS regional_percentage,
-- 环比
ROUND(
(amount - LAG(amount, 1) OVER (PARTITION BY salesperson ORDER BY sale_date)) /
NULLIF(LAG(amount, 1) OVER (PARTITION BY salesperson ORDER BY sale_date), 0) * 100,
2
) AS month_over_month_growth
FROM sales
ORDER BY sale_date, regional_rank;
6.6 CTE(公用表表达式)
6.6.1 基础 CTE
-- 基本语法
WITH cte_name AS (
SELECT ...
)
SELECT * FROM cte_name;
-- 单个 CTE
WITH active_users AS (
SELECT id, username, email
FROM users
WHERE status = 'active'
)
SELECT * FROM active_users WHERE username LIKE 'A%';
-- 多个 CTE
WITH
user_orders AS (
SELECT user_id, COUNT(*) AS order_count, SUM(total) AS total_spent
FROM orders
GROUP BY user_id
),
top_users AS (
SELECT user_id FROM user_orders ORDER BY total_spent DESC LIMIT 10
)
SELECT
u.username,
uo.order_count,
uo.total_spent
FROM users u
JOIN user_orders uo ON u.id = uo.user_id
WHERE u.id IN (SELECT user_id FROM top_users);
6.6.2 递归 CTE
-- 生成序列
WITH RECURSIVE series AS (
SELECT 1 AS n
UNION ALL
SELECT n + 1 FROM series WHERE n < 10
)
SELECT * FROM series;
-- 递归查询(组织架构)
WITH RECURSIVE org_tree AS (
-- 起始条件:CEO
SELECT id, name, manager_id, 1 AS level
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- 递归条件:下属
SELECT e.id, e.name, e.manager_id, ot.level + 1
FROM employees e
JOIN org_tree ot ON e.manager_id = ot.id
)
SELECT
REPEAT(' ', level - 1) || name AS employee_name,
level
FROM org_tree
ORDER BY level, name;
-- 生成日期序列
WITH RECURSIVE date_series AS (
SELECT DATE '2024-01-01' AS date
UNION ALL
SELECT date + 1 FROM date_series WHERE date < '2024-01-31'
)
SELECT * FROM date_series;
-- 遍历树形结构(评论系统)
WITH RECURSIVE comment_tree AS (
SELECT id, parent_id, content, 1 AS depth, ARRAY[id] AS path
FROM comments
WHERE parent_id IS NULL
UNION ALL
SELECT c.id, c.parent_id, c.content, ct.depth + 1, ct.path || c.id
FROM comments c
JOIN comment_tree ct ON c.parent_id = ct.id
)
SELECT REPEAT(' ', depth - 1) || content FROM comment_tree;
6.7 复杂查询示例
6.7.1 排名与分区统计
-- 用户购买行为分析
WITH user_stats AS (
SELECT
u.id,
u.username,
COUNT(o.id) AS order_count,
COALESCE(SUM(o.total), 0) AS total_spent,
MAX(o.created_at) AS last_order_date
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.username
)
SELECT
username,
order_count,
total_spent,
last_order_date,
RANK() OVER (ORDER BY total_spent DESC) AS spending_rank,
CASE
WHEN order_count = 0 THEN '新用户'
WHEN last_order_date < NOW() - INTERVAL '90 days' THEN '流失用户'
WHEN last_order_date < NOW() - INTERVAL '30 days' THEN '沉默用户'
ELSE '活跃用户'
END AS user_segment
FROM user_stats
ORDER BY total_spent DESC;
6.7.2 时间序列分析
-- 月度销售趋势
WITH monthly_sales AS (
SELECT
DATE_TRUNC('month', created_at) AS month,
SUM(total) AS revenue,
COUNT(*) AS order_count
FROM orders
WHERE created_at >= '2023-01-01'
GROUP BY DATE_TRUNC('month', created_at)
),
sales_with_growth AS (
SELECT
month,
revenue,
order_count,
LAG(revenue) OVER (ORDER BY month) AS prev_month_revenue,
revenue - LAG(revenue) OVER (ORDER BY month) AS revenue_growth,
ROUND(
(revenue - LAG(revenue) OVER (ORDER BY month)) /
NULLIF(LAG(revenue) OVER (ORDER BY month), 0) * 100,
2
) AS growth_rate
FROM monthly_sales
)
SELECT
TO_CHAR(month, 'YYYY-MM') AS month,
revenue,
order_count,
COALESCE(revenue_growth, 0) AS revenue_growth,
COALESCE(growth_rate, 0) || '%' AS growth_rate
FROM sales_with_growth
ORDER BY month;
6.8 本章小结
本章深入介绍了 PostgreSQL 的高级查询技巧:
| 主题 | 关键功能 |
|---|---|
| 聚合函数 | FILTER, GROUPING SETS, ROLLUP, CUBE |
| 窗口函数 | ROW_NUMBER, RANK, SUM, AVG, LAG, LEAD |
| 子查询 | 标量、表、LATERAL |
| CTE | 基础 CTE, 递归 CTE |
📌 下一章预告
下一章将介绍 PostgreSQL 的索引机制和性能优化技巧。