第十五章:运维管理
最后更新: 2024-01-01
作者: PostgreSQL Team
页面目录
第十五章:运维管理
15.1 服务管理
15.1.1 systemd 管理
# 启动服务
sudo systemctl start postgresql
# 停止服务
sudo systemctl stop postgresql
# 重启服务
sudo systemctl restart postgresql
# 重载配置(不重启)
sudo systemctl reload postgresql
# 查看状态
sudo systemctl status postgresql
# 启用开机自启
sudo systemctl enable postgresql
# 禁用开机自启
sudo systemctl disable postgresql
15.1.2 pg_ctl 命令
# 启动数据库
pg_ctl -D /var/lib/postgresql/data start
# 停止数据库(智能模式)
pg_ctl -D /var/lib/postgresql/data stop
# 停止数据库(快速模式,不等待客户端断开)
pg_ctl -D /var/lib/postgresql/data stop -m fast
# 立即停止(不推荐,可能导致数据损坏)
pg_ctl -D /var/lib/postgresql/data stop -m immediate
# 查看状态
pg_ctl -D /var/lib/postgresql/data status
# 重载配置
pg_ctl -D /var/lib/postgresql/data reload
15.2 配置管理
15.2.1 修改配置
-- 查看当前配置
SHOW all;
SHOW max_connections;
SHOW shared_buffers;
-- 查看特定配置
SELECT name, setting, unit FROM pg_settings WHERE name LIKE '%buffer%';
-- 设置参数(会话级)
SET statement_timeout = '5s';
-- 设置参数(事务级)
BEGIN;
SET LOCAL statement_timeout = '5s';
COMMIT;
-- 修改配置文件后重载
SELECT pg_reload_conf();
15.2.2 推荐配置参数
# postgresql.conf 优化配置
# 连接设置
max_connections = 200
superuser_reserved_connections = 3
# 内存设置(根据可用内存调整)
shared_buffers = '4GB' # 建议为系统内存的 25%
effective_cache_size = '12GB' # 建议为系统内存的 75%
work_mem = '64MB'
maintenance_work_mem = '512MB'
temp_buffers = '16MB'
# WAL 设置
wal_level = replica
wal_buffers = '64MB'
min_wal_size = '1GB'
max_wal_size = '4GB'
checkpoint_completion_target = 0.9
# 并行设置
max_worker_processes = 16
max_parallel_workers_per_gather = 4
max_parallel_workers = 8
parallel_leader_participation = on
# 查询优化
effective_io_concurrency = 200
random_page_cost = 1.1
default_statistics_target = 100
# 日志设置
logging_collector = on
log_directory = 'log'
log_filename = 'postgresql-%Y-%m-%d.log'
log_rotation_age = 1d
log_min_duration_statement = 1000
# 异步提交
synchronous_commit = on
# 统计信息
track_activities = on
track_counts = on
track_io_timing = on
track_functions = pl
# 自动清理
autovacuum = on
autovacuum_max_workers = 4
autovacuum_naptime = 1min
15.3 监控指标
15.3.1 系统视图
-- 数据库状态
SELECT * FROM pg_stat_database WHERE datname = 'mydb';
-- 表访问统计
SELECT
schemaname,
relname,
seq_scan,
seq_tup_read,
idx_scan,
idx_tup_fetch,
n_tup_ins,
n_tup_upd,
n_tup_del,
n_live_tup,
n_dead_tup,
last_vacuum,
last_autovacuum,
last_analyze,
last_autoanalyze,
vacuum_count,
autovacuum_count
FROM pg_stat_user_tables
ORDER BY n_live_tup DESC;
-- 索引使用统计
SELECT
schemaname,
relname AS table_name,
indexrelname AS index_name,
idx_scan,
idx_tup_read,
idx_tup_fetch,
pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
ORDER BY idx_scan ASC;
-- 连接统计
SELECT
state,
COUNT(*) AS count,
ARRAY_AGG(usename) AS users,
ARRAY_AGG(client_addr) AS clients
FROM pg_stat_activity
WHERE state IS NOT NULL
GROUP BY state;
-- 复制状态(主库)
SELECT
pid,
usesysid,
usename,
application_name,
client_addr,
client_hostname,
client_port,
backend_start,
backend_xmin,
state,
sent_lsn,
write_lsn,
flush_lsn,
replay_lsn,
write_lag,
flush_lag,
replay_lag,
sync_priority,
sync_state
FROM pg_stat_replication;
15.3.2 常用监控查询
-- 当前活跃查询
SELECT
pid,
now() - query_start AS duration,
usename,
datname,
state,
left(query, 100) AS query_preview
FROM pg_stat_activity
WHERE state != 'idle'
ORDER BY duration DESC;
-- 长事务
SELECT
pid,
now() - xact_start AS transaction_duration,
usename,
xmin,
state,
left(query, 100) AS query_preview
FROM pg_stat_activity
WHERE xact_start IS NOT NULL
AND state != 'idle'
AND now() - xact_start > interval '5 minutes';
-- 大表
SELECT
schemaname,
relname AS table_name,
pg_size_pretty(pg_total_relation_size(relid)) AS total_size,
pg_size_pretty(pg_relation_size(relid)) AS table_size,
pg_size_pretty(pg_indexes_size(relid)) AS index_size,
n_live_tup,
n_dead_tup,
last_vacuum,
last_autovacuum
FROM pg_stat_user_tables
ORDER BY pg_total_relation_size(relid) DESC
LIMIT 20;
-- 缺失索引
SELECT
schemaname,
relname AS table_name,
seq_scan,
seq_tup_read,
idx_scan,
n_live_tup,
n_dead_tup
FROM pg_stat_user_tables
WHERE idx_scan = 0
AND n_live_tup > 1000
ORDER BY seq_tup_read DESC;
15.4 性能监控
15.4.1 使用 pg_stat_statements
-- 安装扩展
CREATE EXTENSION pg_stat_statements;
-- 查询最慢查询
SELECT
query,
calls,
total_exec_time,
mean_exec_time,
max_exec_time,
min_exec_time,
rows,
shared_blks_hit,
shared_blks_read
FROM pg_stat_statements
ORDER BY total_exec_time DESC
LIMIT 20;
-- 查询调用次数最多的查询
SELECT
query,
calls,
total_exec_time,
mean_exec_time,
rows,
ROUND(mean_exec_time::numeric, 2) AS avg_ms
FROM pg_stat_statements
ORDER BY calls DESC
LIMIT 10;
-- 查询消耗最多的 I/O
SELECT
query,
calls,
shared_blks_hit,
shared_blks_read,
temp_blks_read,
temp_blks_written
FROM pg_stat_statements
ORDER BY shared_blks_read DESC
LIMIT 10;
-- 重置统计
SELECT pg_stat_statements_reset();
15.4.2 pgBadger 日志分析
# 配置详细日志
# postgresql.conf
log_line_prefix = '%t [%p] [%l-1] user=%u,db=%d,app=%a client=%h '
log_duration = off
log_statement = 'all'
log_min_duration_statement = 0
# 生成报告
pgbadger /var/log/postgresql/postgresql.log -o /var/www/pgbadger/index.html
# 实时分析
pgbadger --preprocess --live-scan --log-prefix '%t [%p] ' /var/log/postgresql/
15.5 维护任务
15.5.1 VACUUM 操作
-- 手动 VACUUM
VACUUM ANALYZE users;
-- VACUUM 所有表
VACUUM (VERBOSE, ANALYZE);
-- VACUUM 特定表
VACUUM (VERBOSE, ANALYZE) orders;
-- 冻结事务
VACUUM (FREEZE) users;
-- 查看 VACUUM 状态
SELECT
schemaname,
relname,
last_vacuum,
last_autovacuum,
vacuum_count,
autovacuum_count,
n_dead_tup
FROM pg_stat_user_tables
WHERE n_dead_tup > 10000;
15.5.2 ANALYZE 操作
-- 分析表
ANALYZE users;
-- 分析表和索引
ANALYZE VERBOSE users;
-- 分析所有表
ANALYZE VERBOSE;
-- 查看统计信息过期
SELECT
schemaname,
relname,
last_analyze,
last_autoanalyze,
n_live_tup,
n_dead_tup,
last_analyze::timestamp - last_autoanalyze::timestamp AS analyze_interval
FROM pg_stat_user_tables
WHERE n_live_tup + n_dead_tup > 1000;
15.5.3 REINDEX 操作
-- 重建索引
REINDEX INDEX idx_users_email;
-- 重建表的所有索引
REINDEX TABLE users;
-- 重建所有表的索引(谨慎使用)
REINDEX DATABASE mydb;
-- 并行重建索引(需要独占锁)
REINDEX INDEX CONCURRENTLY idx_users_email;
-- 查看索引膨胀
SELECT
schemaname || '.' || relname AS table,
indexrelname AS index,
pg_size_pretty(pg_relation_size(i.indexrelid)) AS index_size,
idx_scan AS index_scans
FROM pg_stat_user_indexes ui
JOIN pg_index i ON ui.indexrelid = i.indexrelid
WHERE idx_scan = 0
ORDER BY pg_relation_size(i.indexrelid) DESC;
15.5.4 维护脚本
#!/bin/bash
# maintenance.sh - PostgreSQL 维护脚本
PGHOST=localhost
PGPORT=5432
PGUSER=postgres
PGDATABASE=mydb
# VACUUM
psql -h $PGHOST -p $PGPORT -U $PGUSER -d $PGDATABASE -c "VACUUM (VERBOSE, ANALYZE);"
# 重建膨胀的索引
psql -h $PGHOST -p $PGPORT -U $PGUSER -d $PGDATABASE << 'EOF'
SELECT 'REINDEX INDEX CONCURRENTLY ' || indexrelname || ';'
FROM pg_stat_user_indexes
WHERE idx_scan = 0
AND pg_relation_size(indexrelid) > 10 * 1024 * 1024;
EOF
# 清理 pg_stat_statements
psql -h $PGHOST -p $PGPORT -U $PGUSER -d $PGDATABASE -c "SELECT pg_stat_statements_reset();"
15.6 容量规划
-- 表大小分析
SELECT
schemaname,
relname,
pg_size_pretty(pg_total_relation_size(relid)) AS total_size,
pg_size_pretty(pg_relation_size(relid)) AS table_size,
pg_size_pretty(pg_indexes_size(relid)) AS indexes_size,
n_live_tup,
n_dead_tup,
ROUND(n_live_tup * 100.0 / NULLIF(n_live_tup + n_dead_tup, 0), 2) AS live_ratio
FROM pg_stat_user_tables
ORDER BY pg_total_relation_size(relid) DESC;
-- 数据库总大小
SELECT
datname,
pg_size_pretty(pg_database_size(datname)) AS size
FROM pg_database
ORDER BY pg_database_size(datname) DESC;
-- 预估表增长
SELECT
schemaname,
relname,
n_live_tup,
n_dead_tup,
n_tup_ins,
n_tup_upd,
n_tup_del,
last_vacuum,
last_autovacuum
FROM pg_stat_user_tables
WHERE n_live_tup > 10000;
15.7 故障排除
15.7.1 常见问题
-- 连接数过多
SELECT COUNT(*) FROM pg_stat_activity;
SELECT MAX(max_connections) FROM pg_settings;
-- 磁盘空间不足
SELECT pg_size_pretty(pg_tablespace_size('pg_default'));
SELECT * FROM pg_ls_waldir() ORDER BY modification DESC LIMIT 10;
-- 长查询
SELECT
pid,
now() - query_start AS duration,
state,
left(query, 200) AS query
FROM pg_stat_activity
WHERE state != 'idle'
ORDER BY duration DESC;
-- 终止问题查询
SELECT pg_cancel_backend(pid); -- 优雅终止
SELECT pg_terminate_backend(pid); -- 强制终止
-- 锁等待
SELECT
l.locktype,
l.relation::regclass,
l.mode,
l.granted,
l.pid,
a.usename,
a.query
FROM pg_locks l
JOIN pg_stat_activity a ON l.pid = a.pid
WHERE NOT l.granted;
15.7.2 健康检查脚本
#!/bin/bash
# health_check.sh - PostgreSQL 健康检查
echo "=== PostgreSQL 健康检查 ==="
echo ""
# 检查服务状态
systemctl is-active postgresql || echo "WARNING: PostgreSQL 未运行"
# 检查连接数
psql -U postgres -t -c "SELECT count(*) FROM pg_stat_activity;"
MAX_CONN=$(psql -U postgres -t -c "SHOW max_connections;")
echo "最大连接数: $MAX_CONN"
# 检查复制延迟
psql -U postgres -c "SELECT client_addr, state,
pg_size_pretty(pg_wal_lsn_diff(sent_lsn, replay_lsn)) AS delay
FROM pg_stat_replication;"
# 检查数据库大小
psql -U postgres -c "SELECT datname, pg_size_pretty(pg_database_size(datname))
FROM pg_database ORDER BY pg_database_size(datname) DESC LIMIT 5;"
# 检查长查询
psql -U postgres -c "SELECT pid, now() - query_start AS duration, query
FROM pg_stat_activity
WHERE state != 'idle' AND query_start < now() - interval '5 minutes';"
15.8 本章小结
PostgreSQL 运维管理要点:
| 领域 | 关键任务 |
|---|---|
| 配置 | 内存优化、连接管理 |
| 监控 | 性能指标、慢查询 |
| 维护 | VACUUM、ANALYZE、REINDEX |
| 容量 | 空间分析、增长预估 |
| 故障排除 | 连接、锁、死锁 |
📌 教程总结
恭喜完成 PostgreSQL 权威教程!本教程涵盖了:
- 基础入门:安装配置、SQL 基础、数据类型
- 核心功能:表设计、查询、索引、视图函数
- 高级特性:事务、存储过程、触发器、分区表
- 运维实践:备份恢复、高可用、安全、监控管理