Shell脚本实战
最后更新: 2026-01-15
作者: Linux Team
页面目录
目录
实战案例概述
本章将展示真实生产环境中常用的Shell脚本案例,涵盖系统监控、日志处理、数据备份、自动化部署等场景。
系统监控脚本
主机健康监控
#!/bin/bash
# health_check.sh - 系统健康状态监控
set -uo pipefail
# 配置
ALERT_THRESHOLD_CPU=80
ALERT_THRESHOLD_MEM=90
ALERT_THRESHOLD_DISK=85
ALERT_EMAIL="admin@example.com"
SLACK_WEBHOOK="" # 可选Slack通知
# 颜色
RED='\033[0;31m'
YELLOW='\033[1;33m'
GREEN='\033[0;32m'
NC='\033[0m'
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
}
check_cpu() {
local usage=$(top -bn1 | grep "Cpu(s)" | awk '{print int($2)}')
log "CPU Usage: ${usage}%"
if [ "$usage" -gt "$ALERT_THRESHOLD_CPU" ]; then
echo -e "${RED}ALERT: CPU usage is ${usage}% (threshold: ${ALERT_THRESHOLD_CPU}%)${NC}"
return 1
fi
echo -e "${GREEN}CPU: OK${NC}"
return 0
}
check_memory() {
local mem_used=$(free | grep Mem | awk '{printf "%.0f", $3/$2 * 100}')
log "Memory Usage: ${mem_used}%"
if [ "$mem_used" -gt "$ALERT_THRESHOLD_MEM" ]; then
echo -e "${RED}ALERT: Memory usage is ${mem_used}% (threshold: ${ALERT_THRESHOLD_MEM}%)${NC}"
return 1
fi
echo -e "${GREEN}Memory: OK${NC}"
return 0
}
check_disk() {
local disk_usage=$(df -h / | tail -1 | awk '{print int($5)}')
log "Disk Usage: ${disk_usage}%"
if [ "$disk_usage" -gt "$ALERT_THRESHOLD_DISK" ]; then
echo -e "${RED}ALERT: Disk usage is ${disk_usage}% (threshold: ${ALERT_THRESHOLD_DISK}%)${NC}"
return 1
fi
echo -e "${GREEN}Disk: OK${NC}"
return 0
}
check_load() {
local load=$(uptime | awk -F'load average:' '{print $2}' | awk '{print $1}' | sed 's/,//')
local cores=$(nproc)
log "Load Average: $load (cores: $cores)"
}
check_services() {
local services=("nginx" "mysql" "sshd" "docker")
local failed=()
for svc in "${services[@]}"; do
if systemctl is-active --quiet "$svc" 2>/dev/null; then
echo -e "${GREEN}[$svc] Running${NC}"
else
echo -e "${RED}[$svc] Not Running${NC}"
failed+=("$svc")
fi
done
[ ${#failed[@]} -gt 0 ] && return 1
return 0
}
check_process_count() {
local total=$(ps aux | wc -l)
log "Total processes: $total"
if [ "$total" -gt 500 ]; then
echo -e "${YELLOW}WARNING: High process count ($total)${NC}"
fi
}
send_alert() {
local message="$1"
# 邮件通知
if [ -n "$ALERT_EMAIL" ]; then
echo "$message" | mail -s "[Alert] $(hostname) Health Check" "$ALERT_EMAIL"
fi
# Slack通知
if [ -n "$SLACK_WEBHOOK" ]; then
curl -s -X POST -H 'Content-type: application/json' \
--data "{\"text\":\"$message\"}" "$SLACK_WEBHOOK"
fi
}
# 主程序
main() {
log "=== Starting Health Check ==="
local status=0
check_cpu || status=1
check_memory || status=1
check_disk || status=1
check_load
check_process_count
echo
check_services || status=1
log "=== Health Check Complete ==="
if [ $status -eq 1 ]; then
log "ALERTS DETECTED!"
send_alert "Health check failed on $(hostname)"
exit 1
fi
exit 0
}
main "$@"
服务可用性监控
#!/bin/bash
# service_monitor.sh - 服务可用性监控
set -uo pipefail
# 配置
SERVICES=(
"nginx:http://localhost:80"
"mysql:tcp://localhost:3306"
"redis:tcp://localhost:6379"
"api:http://localhost:8080/api/health"
)
ALERT_EMAIL="admin@example.com"
check_http() {
local url=$1
local code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 "$url")
if [ "$code" = "200" ] || [ "$code" = "301" ] || [ "$code" = "302" ]; then
echo "OK (HTTP $code)"
return 0
else
echo "FAILED (HTTP $code)"
return 1
fi
}
check_tcp() {
local host=$(echo "$1" | sed 's|tcp://||' | cut -d: -f1)
local port=$(echo "$1" | sed 's|tcp://||' | cut -d: -f2)
if timeout 5 bash -c "echo > /dev/tcp/$host/$port" 2>/dev/null; then
echo "OK"
return 0
else
echo "FAILED"
return 1
fi
}
main() {
local failed=()
echo "=== Service Monitor ==="
for item in "${SERVICES[@]}"; do
IFS=':' read -r name url <<< "$item"
echo -n "[$name] "
if [[ "$url" == http* ]]; then
result=$(check_http "$url")
else
result=$(check_tcp "$url")
fi
echo "$result"
[[ "$result" != OK* ]] && failed+=("$name")
done
if [ ${#failed[@]} -gt 0 ]; then
echo "FAILED SERVICES: ${failed[*]}"
# 发送告警
echo "Service ${failed[*]} failed on $(hostname)" | \
mail -s "[ALERT] Service Down" "$ALERT_EMAIL"
exit 1
fi
exit 0
}
main "$@"
日志处理脚本
日志轮转与清理
#!/bin/bash
# log_rotation.sh - 日志轮转与清理
set -uo pipefail
# 配置
LOG_DIRS=("/var/log/nginx" "/var/log/mysql" "/var/log/app")
RETENTION_DAYS=30
MAX_SIZE_MB=100
COMPRESS_AFTER_DAYS=7
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
}
rotate_log() {
local logfile=$1
local timestamp=$(date +%Y%m%d_%H%M%S)
if [ ! -f "$logfile" ]; then
return
fi
# 检查文件大小
local size_mb=$(du -m "$logfile" | cut -f1)
if [ "$size_mb" -gt "$MAX_SIZE_MB" ]; then
log "Rotating $logfile (size: ${size_mb}MB)"
# 重命名
mv "$logfile" "${logfile%.log}.${timestamp}.log"
# 通知服务重新打开日志
if [[ "$logfile" == *nginx* ]]; then
nginx -s reopen 2>/dev/null || true
fi
# 创建新空文件
touch "$logfile"
chmod 644 "$logfile"
fi
}
compress_old_logs() {
local logdir=$1
find "$logdir" -type f -name "*.log" -mtime +"$COMPRESS_AFTER_DAYS" \
! -name "*.gz" -exec gzip {} \;
}
cleanup_old_logs() {
local logdir=$1
find "$logdir" -type f \( -name "*.log" -o -name "*.gz" \) \
-mtime +"$RETENTION_DAYS" -delete
log "Cleaned logs older than $RETENTION_DAYS days in $logdir"
}
main() {
log "=== Log Rotation Started ==="
for dir in "${LOG_DIRS[@]}"; do
if [ -d "$dir" ]; then
log "Processing $dir"
# 轮转大日志
for logfile in "$dir"/*.log; do
[ -f "$logfile" ] && rotate_log "$logfile"
done
# 压缩旧日志
compress_old_logs "$dir"
# 清理过期日志
cleanup_old_logs "$dir"
else
log "WARNING: Directory $dir not found"
fi
done
log "=== Log Rotation Complete ==="
}
main "$@"
日志实时分析
#!/bin/bash
# log_analyzer.sh - 实时日志分析
set -uo pipefail
# 配置
LOG_FILE=${1:-/var/log/nginx/access.log}
THRESHOLD_RPS=100
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
}
analyze_status_codes() {
echo "=== Status Codes ==="
awk '{print $9}' "$LOG_FILE" | sort | uniq -c | sort -rn
}
analyze_top_ips() {
echo "=== Top 10 IPs ==="
awk '{print $1}' "$LOG_FILE" | sort | uniq -c | sort -rn | head -10
}
analyze_top_pages() {
echo "=== Top 10 Pages ==="
awk '{print $7}' "$LOG_FILE" | sort | uniq -c | sort -rn | head -10
}
analyze_response_time() {
echo "=== Response Time Stats ==="
awk -F'"' '{print $(NF-1)}' "$LOG_FILE" | \
awk '{sum+=$1; count++} END {print "Avg:", sum/count "ms"}'
}
analyze_errors() {
echo "=== Error Analysis ==="
awk '$9 >= 500 {print $7, $9}' "$LOG_FILE" | sort | uniq -c | sort -rn
}
realtime_monitor() {
echo "=== Real-time Monitoring (Ctrl+C to stop) ==="
tail -f "$LOG_FILE" | while read line; do
# 统计最近一分钟请求数
:
done
}
generate_report() {
local report_file="report_$(date +%Y%m%d_%H%M%S).txt"
{
echo "=== Log Analysis Report ==="
echo "Generated: $(date)"
echo "Log file: $LOG_FILE"
echo
analyze_status_codes
echo
analyze_top_ips
echo
analyze_top_pages
echo
analyze_errors
} > "$report_file"
log "Report saved to $report_file"
}
main() {
case "${1:-report}" in
report)
generate_report
;;
status)
analyze_status_codes
;;
ips)
analyze_top_ips
;;
pages)
analyze_top_pages
;;
errors)
analyze_errors
;;
*)
echo "Usage: $0 {report|status|ips|pages|errors}"
exit 1
;;
esac
}
main "$@"
备份脚本
数据库备份
#!/bin/bash
# mysql_backup.sh - MySQL数据库备份
set -uo pipefail
# 配置
MYSQL_HOST=${MYSQL_HOST:-localhost}
MYSQL_PORT=${MYSQL_PORT:-3306}
MYSQL_USER=${MYSQL_USER:-backup}
MYSQL_PASSWORD=${MYSQL_PASSWORD:-}
BACKUP_DIR=${BACKUP_DIR:-/backup/mysql}
RETENTION_DAYS=${RETENTION_DAYS:-7}
DATABASES=${DATABASES:-"--all-databases"}
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
}
backup_database() {
local db=$1
local timestamp=$(date +%Y%m%d_%H%M%S)
local backup_file="${BACKUP_DIR}/${db}_${timestamp}.sql.gz"
log "Backing up database: $db"
if mysqldump -h "$MYSQL_HOST" -P "$MYSQL_PORT" -u "$MYSQL_USER" \
-p"${MYSQL_PASSWORD}" --single-transaction --quick "$db" | \
gzip > "$backup_file"; then
local size=$(du -h "$backup_file" | cut -f1)
log "Backup completed: $backup_file ($size)"
# 验证备份
if gunzip -t "$backup_file" 2>/dev/null; then
log "Backup verified: $db"
else
log "ERROR: Backup verification failed for $db"
return 1
fi
else
log "ERROR: Backup failed for $db"
return 1
fi
}
cleanup_old_backups() {
log "Cleaning backups older than $RETENTION_DAYS days..."
find "$BACKUP_DIR" -name "*.sql.gz" -mtime +"$RETENTION_DAYS" -delete
log "Cleanup completed"
}
restore_database() {
local backup_file=$1
local db_name=$2
log "Restoring $db_name from $backup_file..."
gunzip -c "$backup_file" | mysql -h "$MYSQL_HOST" -P "$MYSQL_PORT" \
-u "$MYSQL_USER" -p"${MYSQL_PASSWORD}" "$db_name"
log "Restore completed"
}
main() {
mkdir -p "$BACKUP_DIR"
case "${1:-backup}" in
backup)
log "=== MySQL Backup Started ==="
if [ "$DATABASES" = "--all-databases" ]; then
backup_file="${BACKUP_DIR}/all_databases_$(date +%Y%m%d_%H%M%S).sql.gz"
log "Backing up all databases..."
mysqldump -h "$MYSQL_HOST" -P "$MYSQL_PORT" \
-u "$MYSQL_USER" -p"${MYSQL_PASSWORD}" \
--all-databases --single-transaction --quick | \
gzip > "$backup_file"
log "All databases backed up to $backup_file"
else
for db in $DATABASES; do
backup_database "$db"
done
fi
cleanup_old_backups
log "=== MySQL Backup Complete ==="
;;
restore)
if [ -z "${2:-}" ] || [ -z "${3:-}" ]; then
echo "Usage: $0 restore <backup_file> <database>"
exit 1
fi
restore_database "$2" "$3"
;;
list)
ls -lh "$BACKUP_DIR"/*.sql.gz 2>/dev/null || echo "No backups found"
;;
*)
echo "Usage: $0 {backup|restore|list}"
exit 1
;;
esac
}
main "$@"
文件系统备份
#!/bin/bash
# fs_backup.sh - 文件系统增量备份
set -uo pipefail
# 配置
SOURCE_DIRS=("/home" "/etc" "/var/www")
BACKUP_DIR="/backup/fs"
REMOTE_HOST="backup-server"
REMOTE_PATH="/backup"
RETENTION_DAYS=30
EXCLUDE_FILE="/etc/backup_exclude.txt"
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
}
create_backup() {
local timestamp=$(date +%Y%m%d_%H%M%S)
local backup_name="backup_${timestamp}"
local backup_path="${BACKUP_DIR}/${backup_name}"
local exclude_opts=""
mkdir -p "$backup_path"
# 生成排除选项
if [ -f "$EXCLUDE_FILE" ]; then
exclude_opts="--exclude-from=$EXCLUDE_FILE"
fi
for dir in "${SOURCE_DIRS[@]}"; do
if [ -d "$dir" ]; then
local basename=$(basename "$dir")
log "Backing up $dir..."
tar -czf "${backup_path}/${basename}.tar.gz" \
-C "$(dirname "$dir")" \
$exclude_opts \
"$(basename "$dir")" 2>/dev/null || true
fi
done
# 创建备份信息
{
echo "Backup Date: $(date)"
echo "Source Dirs: ${SOURCE_DIRS[*]}"
echo "Hostname: $(hostname)"
} > "${backup_path}/backup_info.txt"
log "Local backup created: $backup_path"
# 复制到远程
if [ -n "$REMOTE_HOST" ]; then
log "Copying to remote host..."
rsync -avz --progress "$backup_path/" \
"${REMOTE_HOST}:${REMOTE_PATH}/" || \
log "WARNING: Remote backup failed"
fi
# 创建压缩包
cd "$BACKUP_DIR"
tar -czf "${backup_name}.tar.gz" "$backup_name"
rm -rf "$backup_name"
log "Final backup: ${backup_path}.tar.gz"
}
cleanup_old_backups() {
log "Cleaning backups older than $RETENTION_DAYS days..."
find "$BACKUP_DIR" -name "backup_*.tar.gz" -mtime +"$RETENTION_DAYS" -exec rm -v {} \;
log "Cleanup completed"
}
verify_backup() {
local backup_file=$1
log "Verifying backup: $backup_file"
if tar -tzf "$backup_file" > /dev/null 2>&1; then
log "Backup verified: $backup_file"
return 0
else
log "ERROR: Backup verification failed: $backup_file"
return 1
fi
}
restore_backup() {
local backup_file=$1
local restore_dir=${2:-/}
log "Restoring from $backup_file to $restore_dir..."
tar -xzf "$backup_file" -C "$restore_dir"
log "Restore completed"
}
main() {
mkdir -p "$BACKUP_DIR"
case "${1:-backup}" in
backup)
log "=== Filesystem Backup Started ==="
create_backup
cleanup_old_backups
log "=== Backup Complete ==="
;;
restore)
if [ -z "${2:-}" ]; then
echo "Usage: $0 restore <backup_file> [restore_dir]"
exit 1
fi
verify_backup "$2"
restore_backup "$2" "${3:-/}"
;;
verify)
if [ -z "${2:-}" ]; then
echo "Usage: $0 verify <backup_file>"
exit 1
fi
verify_backup "$2"
;;
list)
ls -lh "$BACKUP_DIR"/backup_*.tar.gz 2>/dev/null
;;
*)
echo "Usage: $0 {backup|restore|verify|list}"
exit 1
;;
esac
}
main "$@"
自动化部署脚本
应用部署脚本
#!/bin/bash
# deploy.sh - 应用自动化部署脚本
set -uo pipefail
# 配置
APP_NAME="myapp"
APP_DIR="/opt/${APP_NAME}"
REPO_URL="git@github.com:org/${APP_NAME}.git"
REPO_BRANCH="main"
DEPLOY_USER="deploy"
ENVIRONMENT=${1:-staging}
CONFIG_DIR="/etc/${APP_NAME}"
# 颜色
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
log() {
echo -e "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
}
error() {
echo -e "${RED}[ERROR] $1${NC}" >&2
}
success() {
echo -e "${GREEN}[SUCCESS] $1${NC}"
}
backup_current() {
if [ -d "$APP_DIR" ]; then
local backup_dir="/tmp/${APP_NAME}_backup_$(date +%Y%m%d_%H%M%S)"
log "Backing up current deployment..."
cp -a "$APP_DIR" "$backup_dir"
success "Backup created: $backup_dir"
fi
}
clone_or_pull() {
if [ -d "$APP_DIR/.git" ]; then
log "Pulling latest changes..."
cd "$APP_DIR"
sudo -u "$DEPLOY_USER" git pull origin "$REPO_BRANCH"
else
log "Cloning repository..."
sudo -u "$DEPLOY_USER" git clone -b "$REPO_BRANCH" "$REPO_URL" "$APP_DIR"
fi
}
install_dependencies() {
log "Installing dependencies..."
cd "$APP_DIR"
if [ -f "package.json" ]; then
npm install --production
fi
if [ -f "requirements.txt" ]; then
pip install -r requirements.txt
fi
if [ -f "composer.json" ]; then
composer install --no-dev
fi
}
run_migrations() {
log "Running database migrations..."
if [ -f "manage.py" ]; then
python manage.py migrate --noinput
fi
if [ -f "migrations.sh" ]; then
./migrations.sh
fi
}
symlink_config() {
log "Updating configuration..."
if [ -d "$CONFIG_DIR" ]; then
ln -sf "$CONFIG_DIR/${ENVIRONMENT}.env" "$APP_DIR/.env"
ln -sf "$CONFIG_DIR/nginx.conf" "$APP_DIR/deploy/nginx.conf"
fi
}
restart_services() {
log "Restarting services..."
sudo systemctl restart "${APP_NAME}.service" || true
sudo systemctl restart nginx
}
health_check() {
log "Running health check..."
local max_attempts=30
local attempt=0
while [ $attempt -lt $max_attempts ]; do
if curl -sf "http://localhost:8080/health" > /dev/null 2>&1; then
success "Health check passed!"
return 0
fi
attempt=$((attempt + 1))
sleep 2
done
error "Health check failed after $max_attempts attempts"
return 1
}
rollback() {
error "Deployment failed! Rolling back..."
if [ -d "/tmp/${APP_NAME}_backup_"* ]; then
local backup_dir=$(ls -td /tmp/${APP_NAME}_backup_* | head -1)
rm -rf "$APP_DIR"
cp -a "$backup_dir" "$APP_DIR"
restart_services
success "Rollback completed"
fi
}
main() {
log "=== Deployment Started: ${APP_NAME} to ${ENVIRONMENT} ==="
# 检查root权限
if [ "$EUID" -ne 0 ]; then
error "This script must be run as root"
exit 1
fi
trap rollback ERR
backup_current
clone_or_pull
install_dependencies
symlink_config
run_migrations
restart_services
if health_check; then
success "Deployment completed successfully!"
else
rollback
exit 1
fi
log "=== Deployment Finished ==="
}
main "$@"
定时任务设置
Crontab配置
# 编辑crontab
crontab -e
# 常用格式
# * * * * * command
# │ │ │ │ │
# │ │ │ │ └── 星期 (0-7, 0和7是周日)
# │ │ │ └──── 月份 (1-12)
# │ │ └────── 日 (1-31)
# │ └──────── 时 (0-23)
# └───────── 分 (0-59)
# 示例
# 每分钟执行
* * * * * /path/to/script.sh
# 每小时第5分钟
5 * * * * /path/to/script.sh
# 每天凌晨3点
0 3 * * * /path/to/backup.sh
# 每周日凌晨2点
0 2 * * 0 /path/to/weekly_task.sh
# 每月1日凌晨1点
0 1 1 * * /path/to/monthly_task.sh
# 每5分钟执行
*/5 * * * * /path/to/monitor.sh
# 特定时间范围
0 9-17 * * 1-5 /path/to/business_hours.sh
# 系统crontab
sudo vim /etc/crontab
sudo vim /etc/cron.daily/
sudo vim /etc/cron.hourly/
cron管理
# 查看crontab
crontab -l
# 查看系统cron
ls -la /etc/cron.d/
# 删除crontab
crontab -r
# 备份crontab
crontab -l > crontab_backup.txt
# 恢复crontab
crontab crontab_backup.txt
# 查看cron日志
grep CRON /var/log/syslog
journalctl -u cron
课后练习
实践任务
- 根据实际环境修改并运行健康检查脚本
- 配置数据库备份脚本的定时任务
- 实现一套完整的日志分析脚本
- 编写自动化部署脚本并配置CI/CD
- 配置日志轮转的cron任务
下一篇预告:我们将学习系统安全加固,掌握Linux安全防护技能。