第六章:查询详解

最后更新: 2024-01-01 作者: PostgreSQL Team
页面目录

第六章:查询详解

6.1 高级查询技巧

6.1.1 DISTINCT 用法

-- 去重查询
SELECT DISTINCT status FROM orders;

-- 多列去重
SELECT DISTINCT status, payment_method FROM orders;

-- 聚合中使用 DISTINCT
SELECT COUNT(DISTINCT user_id) FROM orders;
SELECT COUNT(DISTINCT category) FROM products;

6.1.2 FETCH 子句

-- 获取前 N 行
SELECT * FROM users ORDER BY created_at DESC FETCH FIRST 5 ROWS ONLY;

-- 获取前 N 行(兼容语法)
SELECT * FROM users ORDER BY created_at DESC LIMIT 5;

-- 跳过前 N 行
SELECT * FROM users ORDER BY id OFFSET 10;

-- 分页查询(每页 10 条,取第 3 页)
SELECT * FROM users 
ORDER BY id 
LIMIT 10 OFFSET 20;

-- 随机获取 N 行
SELECT * FROM users ORDER BY random() LIMIT 5;

6.1.3 NULL 处理

-- NULL 比较
SELECT * FROM users WHERE phone IS NULL;
SELECT * FROM users WHERE phone IS NOT NULL;

-- NULL 排序(NULL 值在前/后)
SELECT * FROM users ORDER BY phone NULLS FIRST;
SELECT * FROM users ORDER BY phone DESC NULLS LAST;

-- COALESCE 函数
SELECT 
    username,
    COALESCE(phone, mobile, '无联系方式') AS contact
FROM users;

-- NULLIF 函数
SELECT NULLIF(value, 0) FROM products;  -- 值为 0 返回 NULL

-- NVL 兼容写法
SELECT COALESCE(age, 0) AS age FROM users;

6.2 聚合函数高级用法

6.2.1 FILTER 子句

-- 使用 FILTER 进行条件聚合
SELECT 
    COUNT(*) AS total_orders,
    COUNT(*) FILTER (WHERE status = 'completed') AS completed_orders,
    COUNT(*) FILTER (WHERE status = 'pending') AS pending_orders,
    AVG(total) FILTER (WHERE status = 'completed') AS avg_completed_amount
FROM orders;

6.2.2 GROUPING SETS

-- 多维度分组
SELECT 
    status,
    payment_method,
    COUNT(*) AS order_count,
    SUM(total) AS total_amount
FROM orders
GROUP BY GROUPING SETS (
    (status),                    -- 按状态分组
    (payment_method),            -- 按支付方式分组
    (status, payment_method),    -- 按状态和支付方式分组
    ()                           -- 总计
)
ORDER BY status, payment_method;

6.2.3 CUBE 和 ROLLUP

-- ROLLUP:层级汇总
SELECT 
    status,
    DATE_TRUNC('month', created_at) AS month,
    COUNT(*) AS order_count
FROM orders
GROUP BY ROLLUP (status, DATE_TRUNC('month', created_at))
ORDER BY status, month;

-- CUBE:所有组合汇总
SELECT 
    status,
    payment_method,
    COUNT(*) AS order_count
FROM orders
GROUP BY CUBE (status, payment_method)
ORDER BY status, payment_method;

6.3 多表连接

6.3.1 自然连接 (NATURAL JOIN)

-- 自然连接(基于同名列)
SELECT * FROM orders NATURAL JOIN users;

-- 谨慎使用,可能产生意外结果

6.3.2 交叉连接 (CROSS JOIN)

-- 交叉连接(笛卡尔积)
SELECT * FROM users CROSS JOIN products;

-- 生成测试数据
SELECT generate_series(1, 100) AS id;

6.3.3 多表连接

-- 多个表连接
SELECT 
    u.username,
    o.order_id,
    o.total_amount,
    p.product_name,
    oi.quantity
FROM users u
JOIN orders o ON u.id = o.user_id
JOIN order_items oi ON o.order_id = oi.order_id
JOIN products p ON oi.product_id = p.id
WHERE o.status = 'completed'
ORDER BY o.created_at DESC;

6.3.4 自连接

-- 自连接查询员工及其上级
SELECT 
    e.employee_name AS employee,
    m.employee_name AS manager
FROM employees e
LEFT JOIN employees m ON e.manager_id = m.id;

-- 查找同类产品的价格差异
SELECT 
    p1.name AS product1,
    p1.price AS price1,
    p2.name AS product2,
    p2.price AS price2,
    p1.price - p2.price AS price_diff
FROM products p1
JOIN products p2 ON p1.category = p2.category AND p1.id > p2.id
WHERE p1.price > p2.price;

6.4 高级子查询

6.4.1 标量子查询

-- 返回单个值的子查询
SELECT 
    username,
    (SELECT COUNT(*) FROM orders WHERE user_id = users.id) AS order_count
FROM users;

-- 在表达式中使用
SELECT 
    username,
    total_spent,
    total_spent / (SELECT SUM(total_spent) FROM user_stats) * 100 AS percentage
FROM user_stats;

6.4.2 表子查询

-- 在 FROM 中使用子查询
SELECT 
    category,
    AVG(price) AS avg_price,
    COUNT(*) AS product_count
FROM (
    SELECT 
        name AS product_name,
        price,
        CASE 
            WHEN price < 100 THEN '低价'
            WHEN price < 500 THEN '中价'
            ELSE '高价'
        END AS category
    FROM products
) t
GROUP BY category;

6.4.3 LATERAL 子查询

-- LATERAL 允许子查询引用左边的表
SELECT 
    p.name,
    latest_comment.comment_text,
    latest_comment.created_at
FROM products p
CROSS JOIN LATERAL (
    SELECT comment_text, created_at
    FROM comments
    WHERE product_id = p.id
    ORDER BY created_at DESC
    LIMIT 1
) AS latest_comment;

-- 获取每个类别最贵的产品
SELECT DISTINCT ON (category) *
FROM products
ORDER BY category, price DESC;

6.5 窗口函数

窗口函数在不使用 GROUP BY 的情况下进行聚合计算。

6.5.1 窗口函数基础

-- 语法结构
aggregate_function() OVER (
    PARTITION BY column_list
    ORDER BY column_list
    ROWS/RANGE BETWEEN frame_start AND frame_end
)

-- 示例数据准备
CREATE TABLE sales (
    id SERIAL,
    salesperson VARCHAR(50),
    region VARCHAR(50),
    sale_date DATE,
    amount DECIMAL(10, 2)
);

6.5.2 排名函数

-- ROW_NUMBER:连续排名
SELECT 
    salesperson,
    amount,
    ROW_NUMBER() OVER (ORDER BY amount DESC) AS row_num
FROM sales;

-- RANK:跳跃排名(有并列)
SELECT 
    salesperson,
    amount,
    RANK() OVER (ORDER BY amount DESC) AS rank
FROM sales;

-- DENSE_RANK:连续排名(无跳跃)
SELECT 
    salesperson,
    amount,
    DENSE_RANK() OVER (ORDER BY amount DESC) AS dense_rank
FROM sales;

-- NTILE:分组编号
SELECT 
    salesperson,
    amount,
    NTILE(4) OVER (ORDER BY amount DESC) AS quartile
FROM sales;

6.5.3 聚合窗口函数

-- 累计求和
SELECT 
    sale_date,
    amount,
    SUM(amount) OVER (ORDER BY sale_date) AS running_total
FROM sales;

-- 累计平均
SELECT 
    sale_date,
    amount,
    AVG(amount) OVER (ORDER BY sale_date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_avg
FROM sales;

-- 分组累计
SELECT 
    salesperson,
    sale_date,
    amount,
    SUM(amount) OVER (
        PARTITION BY salesperson 
        ORDER BY sale_date
    ) AS personal_total
FROM sales;

-- 累计百分比
SELECT 
    sale_date,
    amount,
    ROUND(
        amount / SUM(amount) OVER () * 100, 
        2
    ) AS percentage
FROM sales;

6.5.4 窗口框架

-- 相对于当前行的窗口
SELECT 
    sale_date,
    amount,
    SUM(amount) OVER (
        ORDER BY sale_date
        ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
    ) AS last_3_days_total,
    AVG(amount) OVER (
        ORDER BY sale_date
        ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
    ) AS centered_avg
FROM sales;

-- 相对于当前行的窗口(范围)
SELECT 
    sale_date,
    amount,
    SUM(amount) OVER (
        ORDER BY sale_date
        RANGE BETWEEN INTERVAL '7 days' PRECEDING AND CURRENT ROW
    ) AS last_7_days_total
FROM sales;

6.5.5 FIRST_VALUE 和 LAST_VALUE

-- FIRST_VALUE:窗口内第一个值
SELECT 
    sale_date,
    amount,
    FIRST_VALUE(amount) OVER (
        PARTITION BY salesperson 
        ORDER BY sale_date
    ) AS first_sale
FROM sales;

-- LAST_VALUE:窗口内最后一个值
SELECT 
    sale_date,
    amount,
    LAST_VALUE(amount) OVER (
        PARTITION BY salesperson 
        ORDER BY sale_date
        ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
    ) AS last_sale
FROM sales;

-- NTH_VALUE:窗口内第 N 个值
SELECT 
    sale_date,
    amount,
    NTH_VALUE(amount, 3) OVER (
        ORDER BY sale_date
        ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
    ) AS third_value
FROM sales;

6.5.6 LAG 和 LEAD

-- LAG:获取前 N 行数据
SELECT 
    sale_date,
    amount,
    LAG(amount, 1) OVER (ORDER BY sale_date) AS prev_amount,
    amount - LAG(amount, 1) OVER (ORDER BY sale_date) AS diff_from_prev
FROM sales;

-- LEAD:获取后 N 行数据
SELECT 
    sale_date,
    amount,
    LEAD(amount, 1) OVER (ORDER BY sale_date) AS next_amount,
    LEAD(amount, 1) OVER (ORDER BY sale_date) - amount AS diff_to_next
FROM sales;

-- 使用默认值
SELECT 
    sale_date,
    amount,
    LAG(amount, 1, 0) OVER (ORDER BY sale_date) AS prev_amount
FROM sales;

6.5.7 窗口函数综合示例

-- 销售报表
SELECT 
    sale_date,
    salesperson,
    region,
    amount,
    -- 排名
    RANK() OVER (PARTITION BY region ORDER BY amount DESC) AS regional_rank,
    -- 累计
    SUM(amount) OVER (PARTITION BY region ORDER BY sale_date) AS regional_running_total,
    -- 占比
    ROUND(amount / SUM(amount) OVER (PARTITION BY region) * 100, 2) AS regional_percentage,
    -- 环比
    ROUND(
        (amount - LAG(amount, 1) OVER (PARTITION BY salesperson ORDER BY sale_date)) /
        NULLIF(LAG(amount, 1) OVER (PARTITION BY salesperson ORDER BY sale_date), 0) * 100, 
        2
    ) AS month_over_month_growth
FROM sales
ORDER BY sale_date, regional_rank;

6.6 CTE(公用表表达式)

6.6.1 基础 CTE

-- 基本语法
WITH cte_name AS (
    SELECT ...
)
SELECT * FROM cte_name;

-- 单个 CTE
WITH active_users AS (
    SELECT id, username, email
    FROM users
    WHERE status = 'active'
)
SELECT * FROM active_users WHERE username LIKE 'A%';

-- 多个 CTE
WITH 
    user_orders AS (
        SELECT user_id, COUNT(*) AS order_count, SUM(total) AS total_spent
        FROM orders
        GROUP BY user_id
    ),
    top_users AS (
        SELECT user_id FROM user_orders ORDER BY total_spent DESC LIMIT 10
    )
SELECT 
    u.username,
    uo.order_count,
    uo.total_spent
FROM users u
JOIN user_orders uo ON u.id = uo.user_id
WHERE u.id IN (SELECT user_id FROM top_users);

6.6.2 递归 CTE

-- 生成序列
WITH RECURSIVE series AS (
    SELECT 1 AS n
    UNION ALL
    SELECT n + 1 FROM series WHERE n < 10
)
SELECT * FROM series;

-- 递归查询(组织架构)
WITH RECURSIVE org_tree AS (
    -- 起始条件:CEO
    SELECT id, name, manager_id, 1 AS level
    FROM employees
    WHERE manager_id IS NULL
    
    UNION ALL
    
    -- 递归条件:下属
    SELECT e.id, e.name, e.manager_id, ot.level + 1
    FROM employees e
    JOIN org_tree ot ON e.manager_id = ot.id
)
SELECT 
    REPEAT('  ', level - 1) || name AS employee_name,
    level
FROM org_tree
ORDER BY level, name;

-- 生成日期序列
WITH RECURSIVE date_series AS (
    SELECT DATE '2024-01-01' AS date
    UNION ALL
    SELECT date + 1 FROM date_series WHERE date < '2024-01-31'
)
SELECT * FROM date_series;

-- 遍历树形结构(评论系统)
WITH RECURSIVE comment_tree AS (
    SELECT id, parent_id, content, 1 AS depth, ARRAY[id] AS path
    FROM comments
    WHERE parent_id IS NULL
    
    UNION ALL
    
    SELECT c.id, c.parent_id, c.content, ct.depth + 1, ct.path || c.id
    FROM comments c
    JOIN comment_tree ct ON c.parent_id = ct.id
)
SELECT REPEAT('  ', depth - 1) || content FROM comment_tree;

6.7 复杂查询示例

6.7.1 排名与分区统计

-- 用户购买行为分析
WITH user_stats AS (
    SELECT 
        u.id,
        u.username,
        COUNT(o.id) AS order_count,
        COALESCE(SUM(o.total), 0) AS total_spent,
        MAX(o.created_at) AS last_order_date
    FROM users u
    LEFT JOIN orders o ON u.id = o.user_id
    GROUP BY u.id, u.username
)
SELECT 
    username,
    order_count,
    total_spent,
    last_order_date,
    RANK() OVER (ORDER BY total_spent DESC) AS spending_rank,
    CASE 
        WHEN order_count = 0 THEN '新用户'
        WHEN last_order_date < NOW() - INTERVAL '90 days' THEN '流失用户'
        WHEN last_order_date < NOW() - INTERVAL '30 days' THEN '沉默用户'
        ELSE '活跃用户'
    END AS user_segment
FROM user_stats
ORDER BY total_spent DESC;

6.7.2 时间序列分析

-- 月度销售趋势
WITH monthly_sales AS (
    SELECT 
        DATE_TRUNC('month', created_at) AS month,
        SUM(total) AS revenue,
        COUNT(*) AS order_count
    FROM orders
    WHERE created_at >= '2023-01-01'
    GROUP BY DATE_TRUNC('month', created_at)
),
sales_with_growth AS (
    SELECT 
        month,
        revenue,
        order_count,
        LAG(revenue) OVER (ORDER BY month) AS prev_month_revenue,
        revenue - LAG(revenue) OVER (ORDER BY month) AS revenue_growth,
        ROUND(
            (revenue - LAG(revenue) OVER (ORDER BY month)) /
            NULLIF(LAG(revenue) OVER (ORDER BY month), 0) * 100, 
            2
        ) AS growth_rate
    FROM monthly_sales
)
SELECT 
    TO_CHAR(month, 'YYYY-MM') AS month,
    revenue,
    order_count,
    COALESCE(revenue_growth, 0) AS revenue_growth,
    COALESCE(growth_rate, 0) || '%' AS growth_rate
FROM sales_with_growth
ORDER BY month;

6.8 本章小结

本章深入介绍了 PostgreSQL 的高级查询技巧:

主题 关键功能
聚合函数 FILTER, GROUPING SETS, ROLLUP, CUBE
窗口函数 ROW_NUMBER, RANK, SUM, AVG, LAG, LEAD
子查询 标量、表、LATERAL
CTE 基础 CTE, 递归 CTE

📌 下一章预告

下一章将介绍 PostgreSQL 的索引机制和性能优化技巧。