第十四章:实战案例

通过实际案例学习 Elasticsearch 的应用,包括日志分析、全文搜索、电商搜索等实战场景。

最后更新: 2024-01-15
页面目录

第十四章:实战案例

14.1 案例概述

本章通过三个典型场景,展示 Elasticsearch 在实际项目中的应用:

  • 日志分析平台:使用 ELK Stack 构建集中式日志分析
  • 电商搜索系统:构建高性能商品搜索服务
  • 全文搜索系统:构建企业内部文档搜索

14.2 日志分析平台

14.2.1 架构设计

┌─────────────────────────────────────────────────────────┐
│                    Data Sources                          │
│  ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐       │
│  │ Apache   │ │ Nginx    │ │ App Logs │ │ Syslog   │       │
│  └────┬─────┘ └────┬─────┘ └────┬─────┘ └────┬─────┘       │
│       │            │            │            │              │
│       └────────────┴─────┬───────┴────────────┘              │
│                          │                                   │
│                    ┌─────▼─────┐                             │
│                    │  Beats    │                             │
│                    │ (Filebeat)│                             │
│                    └─────┬─────┘                             │
└──────────────────────────┼──────────────────────────────────┘
┌──────────────────────────▼──────────────────────────────────┐
│                    Logstash                                  │
│  ┌─────────────────────────────────────────────────────┐   │
│  │ Input → Filter → Output                              │   │
│  └─────────────────────────────────────────────────────┘   │
└──────────────────────────┬──────────────────────────────────┘
┌──────────────────────────▼──────────────────────────────────┐
│                  Elasticsearch Cluster                       │
│  ┌─────────┐  ┌─────────┐  ┌─────────┐                      │
│  │ Hot Node│  │ Warm Node│ │ Cold Node│                     │
│  └─────────┘  └─────────┘  └─────────┘                      │
└──────────────────────────┬──────────────────────────────────┘
                    ┌──────▼──────┐
                    │   Kibana    │
                    │  Dashboard  │
                    └─────────────┘

14.2.2 创建日志索引模板

PUT /_index_template/nginx-logs
{
  "index_patterns": ["nginx-logs-*"],
  "priority": 100,
  "template": {
    "settings": {
      "number_of_shards": 2,
      "number_of_replicas": 1,
      "index.lifecycle.name": "nginx-logs-policy",
      "refresh_interval": "5s"
    },
    "mappings": {
      "properties": {
        "@timestamp": {
          "type": "date"
        },
        "host": {
          "type": "ip"
        },
        "client_ip": {
          "type": "ip"
        },
        "method": {
          "type": "keyword"
        },
        "path": {
          "type": "keyword"
        },
        "status": {
          "type": "short"
        },
        "body_bytes_sent": {
          "type": "long"
        },
        "request_time": {
          "type": "float"
        },
        "user_agent": {
          "type": "text",
          "fields": {
            "keyword": {
              "type": "keyword",
              "ignore_above": 256
            }
          }
        },
        "referer": {
          "type": "keyword"
        }
      }
    }
  }
}

14.2.3 Logstash 配置

# /etc/logstash/conf.d/nginx.conf

input {
  beats {
    port => 5044
  }
}

filter {
  if [fileset][name] == "access" {
    grok {
      match => { "message" => '%{IPORHOST:[nginx][access][client_ip]} - %{DATA:[nginx][access][user_name]} \[%{HTTPDATE:[nginx][access][time]}\] "%{WORD:[nginx][access][method]} %{DATA:[nginx][access][url]} HTTP/%{NUMBER:[nginx][access][http_version]}" %{NUMBER:[nginx][access][response_code]} %{NUMBER:[nginx][access][body_sent][bytes]} "%{DATA:[nginx][access][referrer]}" "%{DATA:[nginx][access][user_agent]}" %{NUMBER:[nginx][access][request_time]}'
    }
    
    date {
      match => [ "[nginx][access][time]", "dd/MMM/yyyy:HH:mm:ss Z" ]
      target => "@timestamp"
    }
    
    geoip {
      source => "[nginx][access][client_ip]"
      target => "[geoip]"
    }
    
    useragent {
      source => "[nginx][access][user_agent]"
      target => "[ua]"
    }
  }
  
  if [fileset][name] == "error" {
    grok {
      match => { "message" => '(?<timestamp>%{YEAR}[./-]%{MONTHNUM}[./-]%{MONTHDAY}[- ]%{TIME}) \[%{LOGLEVEL:level}\] %{POSINT:pid}#%{NUMBER}: %{GREEDYDATA:message}'
    }
    
    date {
      match => [ "timestamp", "yyyy/MM/dd HH:mm:ss" ]
      target => "@timestamp"
    }
  }
}

output {
  elasticsearch {
    hosts => ["http://es01:9200"]
    index => "nginx-logs-%{+YYYY.MM.dd}"
  }
}

14.2.4 Kibana 可视化

// Nginx 访问日志仪表盘配置

// 1. 请求量时序图
{
  "title": "Nginx Requests Over Time",
  "type": "line",
  "metrics": [
    {
      "aggregation": "count",
      "field": "_doc"
    }
  ],
  "groupBy": [
    {
      "field": "time",
      "interval": "auto"
    }
  ]
}

// 2. 状态码分布
{
  "title": "HTTP Status Distribution",
  "type": "pie",
  "metrics": [
    {
      "aggregation": "count",
      "field": "_doc"
    }
  ],
  "breakdown": [
    {
      "field": "status",
      "size": 10
    }
  ]
}

// 3. Top 10 请求路径
{
  "title": "Top 10 Request Paths",
  "type": "table",
  "metrics": [
    {
      "aggregation": "count",
      "field": "_doc"
    }
  ],
  "dimensions": [
    {
      "field": "path",
      "size": 10,
      "sort": "desc"
    }
  ]
}

// 4. 慢请求分析
{
  "title": "Slow Requests (>1s)",
  "type": "data_table",
  "query": {
    "range": {
      "request_time": { "gte": 1 }
    }
  },
  "columns": ["client_ip", "method", "path", "status", "request_time"]
}

14.2.5 告警规则

# 5xx 错误率告警
PUT /_watcher/watch/high_error_rate
{
  "trigger": {
    "schedule": { "interval": "5m" }
  },
  "input": {
    "search": {
      "request": {
        "indices": ["nginx-logs-*"],
        "body": {
          "size": 0,
          "query": {
            "range": {
              "@timestamp": {
                "gte": "now-5m"
              }
            }
          },
          "aggs": {
            "total": { "value_count": { "field": "_id" } },
            "errors": {
              "filter": {
                "range": { "status": { "gte": 500 } }
              }
            }
          }
        }
      }
    }
  },
  "condition": {
    "script": {
      "source": "return (ctx.payload.aggs.errors.doc_count / ctx.payload.aggs.total.value) > 0.05"
    }
  },
  "actions": {
    "slack": {
      "webhook": {
        "url": "https://hooks.slack.com/services/xxx",
        "body": {
          "text": "Alert: Nginx 5xx error rate > 5% in last 5 minutes"
        }
      }
    }
  }
}

14.3 电商搜索系统

14.3.1 需求分析

  • 商品名称、描述的全文搜索
  • 支持拼音搜索
  • 分类筛选、价格区间筛选
  • 品牌筛选、属性筛选
  • 排序(价格、销量、新品)
  • 高亮显示匹配词
  • 自动补全

14.3.2 索引设计

PUT /products
{
  "settings": {
    "number_of_shards": 3,
    "number_of_replicas": 1,
    "analysis": {
      "filter": {
        "pinyin_filter": {
          "type": "pinyin",
          "keep_first_letter": true,
          "keep_full_pinyin": false
        },
        "edge_ngram_filter": {
          "type": "edge_ngram",
          "min_gram": 1,
          "max_gram": 20
        }
      },
      "analyzer": {
        "ik_pinyin_analyzer": {
          "tokenizer": "ik_max_word",
          "filter": ["pinyin_filter", "lowercase"]
        },
        "autocomplete_analyzer": {
          "tokenizer": "ik_max_word",
          "filter": ["edge_ngram_filter", "lowercase"]
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "id": { "type": "keyword" },
      "name": {
        "type": "text",
        "analyzer": "ik_pinyin_analyzer",
        "search_analyzer": "ik_smart",
        "fields": {
          "autocomplete": {
            "type": "text",
            "analyzer": "autocomplete_analyzer"
          }
        }
      },
      "description": {
        "type": "text",
        "analyzer": "ik_max_word"
      },
      "category": {
        "type": "keyword"
      },
      "category_path": {
        "type": "keyword"
      },
      "brand": {
        "type": "keyword"
      },
      "price": {
        "type": "scaled_float",
        "scaling_factor": 100
      },
      "original_price": {
        "type": "scaled_float",
        "scaling_factor": 100
      },
      "stock": { "type": "integer" },
      "sales_count": { "type": "integer" },
      "rating": { "type": "float" },
      "tags": { "type": "keyword" },
      "attributes": {
        "type": "nested",
        "properties": {
          "name": { "type": "keyword" },
          "value": { "type": "keyword" }
        }
      },
      "is_active": { "type": "boolean" },
      "created_at": { "type": "date" },
      "updated_at": { "type": "date" },
      "suggest": {
        "type": "completion",
        "analyzer": "ik_max_word",
        "preserve_separators": true,
        "preserve_position_increments": true,
        "max_input_length": 50
      }
    }
  }
}

14.3.3 搜索实现

POST /products/_search
{
  "query": {
    "function_score": {
      "query": {
        "bool": {
          "must": [
            {
              "multi_match": {
                "query": "iPhone 手机",
                "fields": ["name^3", "description", "tags"],
                "type": "best_fields"
              }
            }
          ],
          "filter": [
            { "term": { "is_active": true } },
            { "range": { "stock": { "gt": 0 } } }
          ]
        }
      },
      "functions": [
        {
          "filter": { "range": { "sales_count": { "gte": 1000 } } },
          "weight": 2
        },
        {
          "filter": { "range": { "rating": { "gte": 4.5 } } },
          "weight": 1.5
        },
        {
          "field_value_factor": {
            "field": "sales_count",
            "factor": 0.0001,
            "modifier": "log1p",
            "missing": 1
          }
        }
      ],
      "score_mode": "sum",
      "boost_mode": "multiply"
    }
  },
  "post_filter": {
    "bool": {
      "should": [
        { "term": { "category": "electronics" } }
      ]
    }
  },
  "highlight": {
    "fields": {
      "name": {
        "pre_tags": ["<em>"],
        "post_tags": ["</em>"]
      },
      "description": {
        "fragment_size": 100,
        "number_of_fragments": 3
      }
    }
  },
  "aggs": {
    "categories": {
      "terms": { "field": "category", "size": 20 }
    },
    "brands": {
      "terms": { "field": "brand", "size": 30 }
    },
    "price_ranges": {
      "range": {
        "field": "price",
        "ranges": [
          { "key": "0-500", "to": 500 },
          { "key": "500-1000", "from": 500, "to": 1000 },
          { "key": "1000-3000", "from": 1000, "to": 3000 },
          { "key": "3000-5000", "from": 3000, "to": 5000 },
          { "key": "5000+", "from": 5000 }
        ]
      }
    }
  }
}

14.3.4 自动补全

POST /products/_search
{
  "suggest": {
    "product_suggest": {
      "prefix": "iphon",
      "completion": {
        "field": "suggest",
        "size": 10,
        "skip_duplicates": true,
        "fuzzy": {
          "fuzziness": "AUTO"
        }
      }
    }
  },
  "_source": ["name", "price", "brand"]
}

14.3.5 分类聚合筛选

POST /products/_search
{
  "query": {
    "bool": {
      "must": [
        { "match": { "name": "手机" } }
      ]
    }
  },
  "aggs": {
    "selected_category": {
      "terms": { "field": "category" }
    },
    "selected_brand": {
      "terms": { "field": "brand" }
    },
    "selected_tags": {
      "terms": { "field": "tags", "size": 20 }
    },
    "price_stats": {
      "stats": { "field": "price" }
    }
  }
}

14.4 全文搜索系统

14.4.1 需求分析

  • 文档标题和内容的全文检索
  • 支持中英文混合搜索
  • 高亮显示匹配内容
  • 相关文档推荐
  • 搜索建议和纠错

14.4.2 索引设计

PUT /documents
{
  "settings": {
    "number_of_shards": 2,
    "number_of_replicas": 1,
    "analysis": {
      "analyzer": {
        "content_analyzer": {
          "type": "custom",
          "tokenizer": "ik_max_word",
          "filter": ["lowercase", "asciifolding"]
        },
        "autocomplete_analyzer": {
          "tokenizer": "ik_max_word",
          "filter": ["lowercase", "edge_ngram_filter"]
        }
      },
      "filter": {
        "edge_ngram_filter": {
          "type": "edge_ngram",
          "min_gram": 2,
          "max_gram": 15
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "id": { "type": "keyword" },
      "title": {
        "type": "text",
        "analyzer": "content_analyzer",
        "fields": {
          "keyword": { "type": "keyword" },
          "autocomplete": {
            "type": "text",
            "analyzer": "autocomplete_analyzer"
          }
        }
      },
      "content": {
        "type": "text",
        "analyzer": "content_analyzer"
      },
      "author": {
        "type": "keyword"
      },
      "department": {
        "type": "keyword"
      },
      "tags": {
        "type": "keyword"
      },
      "file_type": {
        "type": "keyword"
      },
      "created_at": {
        "type": "date"
      },
      "updated_at": {
        "type": "date"
      },
      "word_count": {
        "type": "integer"
      },
      "suggest": {
        "type": "completion"
      }
    }
  }
}

14.4.3 搜索实现

POST /documents/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "multi_match": {
            "query": "elasticsearch 集群管理",
            "fields": ["title^2", "content"],
            "type": "best_fields"
          }
        }
      ],
      "filter": [
        { "range": { "updated_at": { "gte": "2023-01-01" } } }
      ]
    }
  },
  "highlight": {
    "pre_tags": ["<mark>"],
    "post_tags": ["</mark>"],
    "fields": {
      "title": {
        "number_of_fragments": 0
      },
      "content": {
        "fragment_size": 150,
        "number_of_fragments": 3,
        "fragmenter": "span"
      }
    }
  },
  "aggs": {
    "by_department": {
      "terms": { "field": "department" }
    },
    "by_author": {
      "terms": { "field": "author", "size": 10 }
    },
    "by_file_type": {
      "terms": { "field": "file_type" }
    },
    "recent_docs": {
      "date_histogram": {
        "field": "updated_at",
        "calendar_interval": "month"
      }
    }
  }
}

14.4.4 相关文档推荐

POST /documents/_search
{
  "query": {
    "more_like_this": {
      "fields": ["title", "content"],
      "like": [
        {
          "_index": "documents",
          "_id": "doc_123"
        }
      ],
      "min_term_freq": 1,
      "min_doc_freq": 1,
      "max_query_terms": 25,
      "minimum_should_match": "30%"
    }
  },
  "_source": ["title", "author", "updated_at"],
  "size": 5
}

14.4.5 搜索建议

POST /documents/_search
{
  "suggest": {
    "title_suggest": {
      "prefix": "elasti",
      "completion": {
        "field": "suggest",
        "size": 5,
        "skip_duplicates": true,
        "fuzzy": {
          "fuzziness": 2
        }
      }
    }
  }
}

14.5 性能优化实践

14.5.1 批量导入优化

# 1. 临时调整设置
PUT /_cluster/settings
{
  "transient": {
    "indices.recovery.max_bytes_per_sec": "200mb",
    "indices.memory.index_buffer_size": "20%"
  }
}

# 2. 禁用刷新
PUT /products/_settings
{
  "index": {
    "refresh_interval": "-1",
    "number_of_replicas": 0
  }
}

# 3. 执行批量导入
POST /_bulk
{ "index": { "_index": "products" } }
{ "product_data": "..." }

# 4. 恢复设置
PUT /products/_settings
{
  "index": {
    "refresh_interval": "5s",
    "number_of_replicas": 1
  }
}

# 5. 强制合并
POST /products/_forcemerge
{
  "max_num_segments": 1
}

14.5.2 查询优化

# 1. 使用过滤器缓存
POST /products/_search
{
  "query": {
    "bool": {
      "filter": [
        { "term": { "category": "electronics" } },
        { "range": { "price": { "gte": 100, "lte": 1000 } } }
      ]
    }
  }
}

# 2. 限制返回字段
POST /products/_search
{
  "_source": ["id", "name", "price", "brand"],
  "query": { "match_all": {} }
}

# 3. 深度分页优化
POST /products/_search
{
  "size": 10,
  "sort": ["_score", "id"],
  "search_after": [10.5, "product_123"]
}

14.6 总结

本章通过三个实际案例展示了 Elasticsearch 的应用场景和实现方法:

  1. 日志分析平台:使用 ELK Stack 构建集中式日志分析,支持实时监控和告警
  2. 电商搜索系统:构建高性能商品搜索,支持全文搜索、过滤、排序和自动补全
  3. 全文搜索系统:构建企业内部文档搜索,支持高亮、相关推荐等功能

这些案例可以作为实际项目的参考模板,根据业务需求进行调整和扩展。