第十四章:实战案例
通过实际案例学习 Elasticsearch 的应用,包括日志分析、全文搜索、电商搜索等实战场景。
最后更新: 2024-01-15
页面目录
第十四章:实战案例
14.1 案例概述
本章通过三个典型场景,展示 Elasticsearch 在实际项目中的应用:
- 日志分析平台:使用 ELK Stack 构建集中式日志分析
- 电商搜索系统:构建高性能商品搜索服务
- 全文搜索系统:构建企业内部文档搜索
14.2 日志分析平台
14.2.1 架构设计
┌─────────────────────────────────────────────────────────┐
│ Data Sources │
│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │
│ │ Apache │ │ Nginx │ │ App Logs │ │ Syslog │ │
│ └────┬─────┘ └────┬─────┘ └────┬─────┘ └────┬─────┘ │
│ │ │ │ │ │
│ └────────────┴─────┬───────┴────────────┘ │
│ │ │
│ ┌─────▼─────┐ │
│ │ Beats │ │
│ │ (Filebeat)│ │
│ └─────┬─────┘ │
└──────────────────────────┼──────────────────────────────────┘
│
┌──────────────────────────▼──────────────────────────────────┐
│ Logstash │
│ ┌─────────────────────────────────────────────────────┐ │
│ │ Input → Filter → Output │ │
│ └─────────────────────────────────────────────────────┘ │
└──────────────────────────┬──────────────────────────────────┘
│
┌──────────────────────────▼──────────────────────────────────┐
│ Elasticsearch Cluster │
│ ┌─────────┐ ┌─────────┐ ┌─────────┐ │
│ │ Hot Node│ │ Warm Node│ │ Cold Node│ │
│ └─────────┘ └─────────┘ └─────────┘ │
└──────────────────────────┬──────────────────────────────────┘
│
┌──────▼──────┐
│ Kibana │
│ Dashboard │
└─────────────┘
14.2.2 创建日志索引模板
PUT /_index_template/nginx-logs
{
"index_patterns": ["nginx-logs-*"],
"priority": 100,
"template": {
"settings": {
"number_of_shards": 2,
"number_of_replicas": 1,
"index.lifecycle.name": "nginx-logs-policy",
"refresh_interval": "5s"
},
"mappings": {
"properties": {
"@timestamp": {
"type": "date"
},
"host": {
"type": "ip"
},
"client_ip": {
"type": "ip"
},
"method": {
"type": "keyword"
},
"path": {
"type": "keyword"
},
"status": {
"type": "short"
},
"body_bytes_sent": {
"type": "long"
},
"request_time": {
"type": "float"
},
"user_agent": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"referer": {
"type": "keyword"
}
}
}
}
}
14.2.3 Logstash 配置
# /etc/logstash/conf.d/nginx.conf
input {
beats {
port => 5044
}
}
filter {
if [fileset][name] == "access" {
grok {
match => { "message" => '%{IPORHOST:[nginx][access][client_ip]} - %{DATA:[nginx][access][user_name]} \[%{HTTPDATE:[nginx][access][time]}\] "%{WORD:[nginx][access][method]} %{DATA:[nginx][access][url]} HTTP/%{NUMBER:[nginx][access][http_version]}" %{NUMBER:[nginx][access][response_code]} %{NUMBER:[nginx][access][body_sent][bytes]} "%{DATA:[nginx][access][referrer]}" "%{DATA:[nginx][access][user_agent]}" %{NUMBER:[nginx][access][request_time]}'
}
date {
match => [ "[nginx][access][time]", "dd/MMM/yyyy:HH:mm:ss Z" ]
target => "@timestamp"
}
geoip {
source => "[nginx][access][client_ip]"
target => "[geoip]"
}
useragent {
source => "[nginx][access][user_agent]"
target => "[ua]"
}
}
if [fileset][name] == "error" {
grok {
match => { "message" => '(?<timestamp>%{YEAR}[./-]%{MONTHNUM}[./-]%{MONTHDAY}[- ]%{TIME}) \[%{LOGLEVEL:level}\] %{POSINT:pid}#%{NUMBER}: %{GREEDYDATA:message}'
}
date {
match => [ "timestamp", "yyyy/MM/dd HH:mm:ss" ]
target => "@timestamp"
}
}
}
output {
elasticsearch {
hosts => ["http://es01:9200"]
index => "nginx-logs-%{+YYYY.MM.dd}"
}
}
14.2.4 Kibana 可视化
// Nginx 访问日志仪表盘配置
// 1. 请求量时序图
{
"title": "Nginx Requests Over Time",
"type": "line",
"metrics": [
{
"aggregation": "count",
"field": "_doc"
}
],
"groupBy": [
{
"field": "time",
"interval": "auto"
}
]
}
// 2. 状态码分布
{
"title": "HTTP Status Distribution",
"type": "pie",
"metrics": [
{
"aggregation": "count",
"field": "_doc"
}
],
"breakdown": [
{
"field": "status",
"size": 10
}
]
}
// 3. Top 10 请求路径
{
"title": "Top 10 Request Paths",
"type": "table",
"metrics": [
{
"aggregation": "count",
"field": "_doc"
}
],
"dimensions": [
{
"field": "path",
"size": 10,
"sort": "desc"
}
]
}
// 4. 慢请求分析
{
"title": "Slow Requests (>1s)",
"type": "data_table",
"query": {
"range": {
"request_time": { "gte": 1 }
}
},
"columns": ["client_ip", "method", "path", "status", "request_time"]
}
14.2.5 告警规则
# 5xx 错误率告警
PUT /_watcher/watch/high_error_rate
{
"trigger": {
"schedule": { "interval": "5m" }
},
"input": {
"search": {
"request": {
"indices": ["nginx-logs-*"],
"body": {
"size": 0,
"query": {
"range": {
"@timestamp": {
"gte": "now-5m"
}
}
},
"aggs": {
"total": { "value_count": { "field": "_id" } },
"errors": {
"filter": {
"range": { "status": { "gte": 500 } }
}
}
}
}
}
}
},
"condition": {
"script": {
"source": "return (ctx.payload.aggs.errors.doc_count / ctx.payload.aggs.total.value) > 0.05"
}
},
"actions": {
"slack": {
"webhook": {
"url": "https://hooks.slack.com/services/xxx",
"body": {
"text": "Alert: Nginx 5xx error rate > 5% in last 5 minutes"
}
}
}
}
}
14.3 电商搜索系统
14.3.1 需求分析
- 商品名称、描述的全文搜索
- 支持拼音搜索
- 分类筛选、价格区间筛选
- 品牌筛选、属性筛选
- 排序(价格、销量、新品)
- 高亮显示匹配词
- 自动补全
14.3.2 索引设计
PUT /products
{
"settings": {
"number_of_shards": 3,
"number_of_replicas": 1,
"analysis": {
"filter": {
"pinyin_filter": {
"type": "pinyin",
"keep_first_letter": true,
"keep_full_pinyin": false
},
"edge_ngram_filter": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 20
}
},
"analyzer": {
"ik_pinyin_analyzer": {
"tokenizer": "ik_max_word",
"filter": ["pinyin_filter", "lowercase"]
},
"autocomplete_analyzer": {
"tokenizer": "ik_max_word",
"filter": ["edge_ngram_filter", "lowercase"]
}
}
}
},
"mappings": {
"properties": {
"id": { "type": "keyword" },
"name": {
"type": "text",
"analyzer": "ik_pinyin_analyzer",
"search_analyzer": "ik_smart",
"fields": {
"autocomplete": {
"type": "text",
"analyzer": "autocomplete_analyzer"
}
}
},
"description": {
"type": "text",
"analyzer": "ik_max_word"
},
"category": {
"type": "keyword"
},
"category_path": {
"type": "keyword"
},
"brand": {
"type": "keyword"
},
"price": {
"type": "scaled_float",
"scaling_factor": 100
},
"original_price": {
"type": "scaled_float",
"scaling_factor": 100
},
"stock": { "type": "integer" },
"sales_count": { "type": "integer" },
"rating": { "type": "float" },
"tags": { "type": "keyword" },
"attributes": {
"type": "nested",
"properties": {
"name": { "type": "keyword" },
"value": { "type": "keyword" }
}
},
"is_active": { "type": "boolean" },
"created_at": { "type": "date" },
"updated_at": { "type": "date" },
"suggest": {
"type": "completion",
"analyzer": "ik_max_word",
"preserve_separators": true,
"preserve_position_increments": true,
"max_input_length": 50
}
}
}
}
14.3.3 搜索实现
POST /products/_search
{
"query": {
"function_score": {
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "iPhone 手机",
"fields": ["name^3", "description", "tags"],
"type": "best_fields"
}
}
],
"filter": [
{ "term": { "is_active": true } },
{ "range": { "stock": { "gt": 0 } } }
]
}
},
"functions": [
{
"filter": { "range": { "sales_count": { "gte": 1000 } } },
"weight": 2
},
{
"filter": { "range": { "rating": { "gte": 4.5 } } },
"weight": 1.5
},
{
"field_value_factor": {
"field": "sales_count",
"factor": 0.0001,
"modifier": "log1p",
"missing": 1
}
}
],
"score_mode": "sum",
"boost_mode": "multiply"
}
},
"post_filter": {
"bool": {
"should": [
{ "term": { "category": "electronics" } }
]
}
},
"highlight": {
"fields": {
"name": {
"pre_tags": ["<em>"],
"post_tags": ["</em>"]
},
"description": {
"fragment_size": 100,
"number_of_fragments": 3
}
}
},
"aggs": {
"categories": {
"terms": { "field": "category", "size": 20 }
},
"brands": {
"terms": { "field": "brand", "size": 30 }
},
"price_ranges": {
"range": {
"field": "price",
"ranges": [
{ "key": "0-500", "to": 500 },
{ "key": "500-1000", "from": 500, "to": 1000 },
{ "key": "1000-3000", "from": 1000, "to": 3000 },
{ "key": "3000-5000", "from": 3000, "to": 5000 },
{ "key": "5000+", "from": 5000 }
]
}
}
}
}
14.3.4 自动补全
POST /products/_search
{
"suggest": {
"product_suggest": {
"prefix": "iphon",
"completion": {
"field": "suggest",
"size": 10,
"skip_duplicates": true,
"fuzzy": {
"fuzziness": "AUTO"
}
}
}
},
"_source": ["name", "price", "brand"]
}
14.3.5 分类聚合筛选
POST /products/_search
{
"query": {
"bool": {
"must": [
{ "match": { "name": "手机" } }
]
}
},
"aggs": {
"selected_category": {
"terms": { "field": "category" }
},
"selected_brand": {
"terms": { "field": "brand" }
},
"selected_tags": {
"terms": { "field": "tags", "size": 20 }
},
"price_stats": {
"stats": { "field": "price" }
}
}
}
14.4 全文搜索系统
14.4.1 需求分析
- 文档标题和内容的全文检索
- 支持中英文混合搜索
- 高亮显示匹配内容
- 相关文档推荐
- 搜索建议和纠错
14.4.2 索引设计
PUT /documents
{
"settings": {
"number_of_shards": 2,
"number_of_replicas": 1,
"analysis": {
"analyzer": {
"content_analyzer": {
"type": "custom",
"tokenizer": "ik_max_word",
"filter": ["lowercase", "asciifolding"]
},
"autocomplete_analyzer": {
"tokenizer": "ik_max_word",
"filter": ["lowercase", "edge_ngram_filter"]
}
},
"filter": {
"edge_ngram_filter": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 15
}
}
}
},
"mappings": {
"properties": {
"id": { "type": "keyword" },
"title": {
"type": "text",
"analyzer": "content_analyzer",
"fields": {
"keyword": { "type": "keyword" },
"autocomplete": {
"type": "text",
"analyzer": "autocomplete_analyzer"
}
}
},
"content": {
"type": "text",
"analyzer": "content_analyzer"
},
"author": {
"type": "keyword"
},
"department": {
"type": "keyword"
},
"tags": {
"type": "keyword"
},
"file_type": {
"type": "keyword"
},
"created_at": {
"type": "date"
},
"updated_at": {
"type": "date"
},
"word_count": {
"type": "integer"
},
"suggest": {
"type": "completion"
}
}
}
}
14.4.3 搜索实现
POST /documents/_search
{
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "elasticsearch 集群管理",
"fields": ["title^2", "content"],
"type": "best_fields"
}
}
],
"filter": [
{ "range": { "updated_at": { "gte": "2023-01-01" } } }
]
}
},
"highlight": {
"pre_tags": ["<mark>"],
"post_tags": ["</mark>"],
"fields": {
"title": {
"number_of_fragments": 0
},
"content": {
"fragment_size": 150,
"number_of_fragments": 3,
"fragmenter": "span"
}
}
},
"aggs": {
"by_department": {
"terms": { "field": "department" }
},
"by_author": {
"terms": { "field": "author", "size": 10 }
},
"by_file_type": {
"terms": { "field": "file_type" }
},
"recent_docs": {
"date_histogram": {
"field": "updated_at",
"calendar_interval": "month"
}
}
}
}
14.4.4 相关文档推荐
POST /documents/_search
{
"query": {
"more_like_this": {
"fields": ["title", "content"],
"like": [
{
"_index": "documents",
"_id": "doc_123"
}
],
"min_term_freq": 1,
"min_doc_freq": 1,
"max_query_terms": 25,
"minimum_should_match": "30%"
}
},
"_source": ["title", "author", "updated_at"],
"size": 5
}
14.4.5 搜索建议
POST /documents/_search
{
"suggest": {
"title_suggest": {
"prefix": "elasti",
"completion": {
"field": "suggest",
"size": 5,
"skip_duplicates": true,
"fuzzy": {
"fuzziness": 2
}
}
}
}
}
14.5 性能优化实践
14.5.1 批量导入优化
# 1. 临时调整设置
PUT /_cluster/settings
{
"transient": {
"indices.recovery.max_bytes_per_sec": "200mb",
"indices.memory.index_buffer_size": "20%"
}
}
# 2. 禁用刷新
PUT /products/_settings
{
"index": {
"refresh_interval": "-1",
"number_of_replicas": 0
}
}
# 3. 执行批量导入
POST /_bulk
{ "index": { "_index": "products" } }
{ "product_data": "..." }
# 4. 恢复设置
PUT /products/_settings
{
"index": {
"refresh_interval": "5s",
"number_of_replicas": 1
}
}
# 5. 强制合并
POST /products/_forcemerge
{
"max_num_segments": 1
}
14.5.2 查询优化
# 1. 使用过滤器缓存
POST /products/_search
{
"query": {
"bool": {
"filter": [
{ "term": { "category": "electronics" } },
{ "range": { "price": { "gte": 100, "lte": 1000 } } }
]
}
}
}
# 2. 限制返回字段
POST /products/_search
{
"_source": ["id", "name", "price", "brand"],
"query": { "match_all": {} }
}
# 3. 深度分页优化
POST /products/_search
{
"size": 10,
"sort": ["_score", "id"],
"search_after": [10.5, "product_123"]
}
14.6 总结
本章通过三个实际案例展示了 Elasticsearch 的应用场景和实现方法:
- 日志分析平台:使用 ELK Stack 构建集中式日志分析,支持实时监控和告警
- 电商搜索系统:构建高性能商品搜索,支持全文搜索、过滤、排序和自动补全
- 全文搜索系统:构建企业内部文档搜索,支持高亮、相关推荐等功能
这些案例可以作为实际项目的参考模板,根据业务需求进行调整和扩展。