第六章:查询 DSL

深入学习 Elasticsearch 的查询 DSL,包括全文查询、结构化查询、复合查询等。

最后更新: 2024-01-15
页面目录

第六章:查询 DSL

6.1 查询概述

Elasticsearch 提供了功能强大的 Query DSL,支持多种查询类型。

6.1.1 查询分类

Query DSL
├── 全文查询 (Full Text Queries)
   ├── match
   ├── multi_match
   ├── query_string
   └── simple_query_string
├── 词项查询 (Term Level Queries)
   ├── term
   ├── terms
   ├── range
   └── exists
├── 布尔查询 (Boolean Queries)
   ├── must
   ├── should
   ├── must_not
   └── filter
├── 复合查询 (Compound Queries)
   ├── constant_score
   ├── dis_max
   └── function_score
└── 其他查询
    ├── match_all
    ├── match_none
    └── scripted

6.1.2 查询与过滤

特性 Query Filter
评分 计算相关性分数 不计算分数
性能 相对较慢 更快
缓存 不缓存 可缓存
用途 全文搜索 精确匹配

6.2 全文查询

6.2.1 match 查询

# 基本匹配
GET /products/_search
{
  "query": {
    "match": {
      "name": "iPhone"
    }
  }
}

# 布尔运算
GET /products/_search
{
  "query": {
    "match": {
      "name": {
        "query": "iPhone 15",
        "operator": "and"  # or / and
      }
    }
  }
}

# 短语匹配
GET /products/_search
{
  "query": {
    "match_phrase": {
      "name": "iPhone 15",
      "slop": 1  # 允许词间隔
    }
  }
}

# 短语前缀
GET /products/_search
{
  "query": {
    "match_phrase_prefix": {
      "name": "iPh"
    }
  }
}

6.2.2 multi_match 查询

# 多字段匹配
GET /products/_search
{
  "query": {
    "multi_match": {
      "query": "iPhone 15 Pro",
      "fields": ["name^3", "description", "tags^2"],
      "type": "best_fields"  # best_fields / most_fields / cross_fields / phrase / phrase_prefix
    }
  }
}

# 跨字段匹配
GET /products/_search
{
  "query": {
    "multi_match": {
      "query": "John Smith",
      "fields": ["first_name", "last_name"],
      "type": "cross_fields"
    }
  }
}

6.2.3 query_string 查询

# 高级语法搜索
GET /products/_search
{
  "query": {
    "query_string": {
      "default_field": "name",
      "query": "(iPhone AND 15) OR (Samsung AND galaxy)",
      "default_operator": "AND"
    }
  }
}

# 多字段 query_string
GET /products/_search
{
  "query": {
    "query_string": {
      "fields": ["name", "description", "tags"],
      "query": "手机 -二手"  # 包含手机,不包含二手
    }
  }
}

6.3 词项查询

6.3.1 term 查询

# 精确匹配 keyword 字段
GET /products/_search
{
  "query": {
    "term": {
      "category": {
        "value": "electronics",
        "boost": 2.0
      }
    }
  }
}

# 多个精确值
GET /products/_search
{
  "query": {
    "terms": {
      "category": ["electronics", "gadgets"],
      "boost": 2.0
    }
  }
}

6.3.2 range 查询

# 数值范围
GET /products/_search
{
  "query": {
    "range": {
      "price": {
        "gte": 1000,
        "lte": 5000,
        "boost": 2.0
      }
    }
  }
}

# 日期范围
GET /products/_search
{
  "query": {
    "range": {
      "created_at": {
        "gte": "2024-01-01",
        "lte": "now",
        "format": "yyyy-MM-dd"
      }
    }
  }
}

# 日期数学
GET /products/_search
{
  "query": {
    "range": {
      "created_at": {
        "gte": "now-30d/d",
        "lte": "now/d"
      }
    }
  }
}

6.3.3 exists 和 missing

# 字段存在
GET /products/_search
{
  "query": {
    "exists": {
      "field": "description"
    }
  }
}

# 字段不存在(已废弃,使用 must_not + exists)
GET /products/_search
{
  "query": {
    "bool": {
      "must_not": [
        { "exists": { "field": "description" } }
      ]
    }
  }
}

6.3.4 前缀和通配符

# 前缀匹配
GET /products/_search
{
  "query": {
    "prefix": {
      "name": {
        "value": "iPh",
        "case_insensitive": true
      }
    }
  }
}

# 通配符查询
GET /products/_search
{
  "query": {
    "wildcard": {
      "name": {
        "value": "iPh*",
        "case_insensitive": true
      }
    }
  }
}

# 正则查询
GET /products/_search
{
  "query": {
    "regexp": {
      "name": "iPh[0-9]+"
    }
  }
}

6.4 布尔查询

6.4.1 must(必须匹配)

GET /products/_search
{
  "query": {
    "bool": {
      "must": [
        { "match": { "name": "iPhone" } },
        { "range": { "price": { "lte": 10000 } } }
      ]
    }
  }
}

6.4.2 filter(过滤器)

GET /products/_search
{
  "query": {
    "bool": {
      "must": [
        { "match": { "name": "iPhone" } }
      ],
      "filter": [
        { "term": { "category": "electronics" } },
        { "range": { "price": { "gte": 5000, "lte": 10000 } } }
      ]
    }
  }
}

6.4.3 should(应该匹配)

GET /products/_search
{
  "query": {
    "bool": {
      "must": [
        { "match": { "name": "手机" } }
      ],
      "should": [
        { "term": { "brand": "苹果" } },
        { "term": { "brand": "华为" } }
      ],
      "minimum_should_match": 1
    }
  }
}

6.4.4 must_not(必须不匹配)

GET /products/_search
{
  "query": {
    "bool": {
      "must_not": [
        { "term": { "status": "deleted" } },
        { "term": { "category": "二手" } }
      ]
    }
  }
}

6.4.5 综合示例

GET /products/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "multi_match": {
            "query": "iPhone 手机",
            "fields": ["name^3", "description"],
            "type": "best_fields"
          }
        }
      ],
      "filter": [
        { "term": { "is_active": true } },
        { "range": { "price": { "gte": 5000 } } },
        { "range": { "stock": { "gt": 0 } } }
      ],
      "should": [
        { "term": { "category": "旗舰店" } },
        { "range": { "sales_count": { "gte": 1000 } } }
      ],
      "must_not": [
        { "term": { "tags": "二手" } }
      ],
      "minimum_should_match": 0,
      "boost": 1.0
    }
  }
}

6.5 复合查询

6.5.1 constant_score

# 不评分查询
GET /products/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "term": { "category": "electronics" }
      },
      "boost": 1.2
    }
  }
}

6.5.2 dis_max

# 最佳字段查询
GET /products/_search
{
  "query": {
    "dis_max": {
      "queries": [
        { "match": { "name": "iPhone" } },
        { "match": { "description": "iPhone" } }
      ],
      "tie_breaker": 0.3
    }
  }
}

6.5.3 function_score

# 评分函数
GET /products/_search
{
  "query": {
    "function_score": {
      "query": {
        "match": { "name": "iPhone" }
      },
      "functions": [
        {
          "filter": { "range": { "sales_count": { "gte": 1000 } } },
          "weight": 2
        },
        {
          "field_value_factor": {
            "field": "sales_count",
            "factor": 1.2,
            "modifier": "log1p",
            "missing": 1
          }
        },
        {
          "random_score": {
            "seed": 12345,
            "field": "_seq_no"
          }
        },
        {
          "gauss": {
            "price": {
              "origin": "5000",
              "scale": "2000",
              "decay": 0.5
            }
          }
        }
      ],
      "score_mode": "sum",
      "boost_mode": "multiply"
    }
  }
}

6.6 分页与排序

6.6.1 分页

GET /products/_search
{
  "from": 0,
  "size": 20,
  "query": {
    "match": { "name": "iPhone" }
  }
}

# 深度分页问题
# 使用 search_after
GET /products/_search
{
  "size": 10,
  "query": { "match_all": {} },
  "sort": [
    { "price": "asc" },
    { "_id": "asc" }
  ],
  "search_after": [5000, "doc_id_123"]
}

6.6.2 排序

GET /products/_search
{
  "query": {
    "match": { "name": "iPhone" }
  },
  "sort": [
    { "_score": "desc" },
    { "price": { "order": "asc", "mode": "avg" } },
    { "sales_count": "desc" },
    { "_doc": "asc" }
  ]
}

6.7 高亮显示

GET /products/_search
{
  "query": {
    "match": { "name": "iPhone" }
  },
  "highlight": {
    "pre_tags": ["<em>"],
    "post_tags": ["</em>"],
    "fields": {
      "name": {
        "fragment_size": 100,
        "number_of_fragments": 3
      },
      "description": {}
    }
  }
}

6.8 常用查询汇总

查询类型 用途
match_all 匹配所有文档
match 全文搜索
term 精确值查询
terms 多值精确查询
range 范围查询
bool 布尔组合查询
multi_match 多字段搜索
query_string 高级语法搜索

6.9 总结

本章全面介绍了 Elasticsearch 的查询 DSL,包括全文查询、词项查询、布尔查询和复合查询等。熟练掌握这些查询语法是构建高效搜索功能的关键。下一章将学习聚合分析。