zoukankan      html  css  js  c++  java
  • Elasticsearch中文文档,内容不全

    注意

    内容不全,这是观看中文文档进行操作的

    文档地址

    旧版中文文档,部分内容过期 https://www.elastic.co/guide/cn/elasticsearch/guide/current/index.html

    1.0.0 基础入门

    1.1.0 你知道的,为了搜索...

    1.1.1 索引员工文档

    • megacorp 索引名称 -> 数据库
    • employee 类型名称 -> 表
    • 1 特定雇员的ID -> 主键
    • 请求体 JSON文档 -> 行
    PUT /megacorp/employee/1
    {
        "first_name" : "John",
        "last_name" :  "Smith",
        "age" :        25,
        "about" :      "I love to go rock climbing",
        "interests": [ "sports", "music" ]
    }
    PUT /megacorp/employee/2
    {
        "first_name" :  "Jane",
        "last_name" :   "Smith",
        "age" :         32,
        "about" :       "I like to collect rock albums",
        "interests":  [ "music" ]
    }
    PUT /megacorp/employee/3
    {
        "first_name" :  "Douglas",
        "last_name" :   "Fir",
        "age" :         35,
        "about":        "I like to build cabinets",
        "interests":  [ "forestry" ]
    }
    

    执行聚合"aggs"需要设置"fielddata":true

    PUT megacorp/employee/_mapping
    {
      "properties": {
        "interests":{
          "type": "text", 
          "fielddata": true
        }
      }
    }
    

    1.1.2 检索文档

    根据索引 类型 id查询指定文档

    GET /megacorp/employee/1
    

    结果

    {
      "_index": "megacorp",
      "_type": "employee",
      "_id": "1",
      "_version": 4,
      "found": true,
      "_source": {//原始JSON文档
        "first_name": "John",
        "last_name": "Smith",
        "age": 25,
        "about": "I love to go rock climbing",
        "interests": [
          "sports",
          "music"
        ]
      }
    }
    

    1.1.3 轻量搜索

    根据索引 类型查询全部文档

    GET /megacorp/employee/_search
    

    结果

    {
      "took": 1,
      "timed_out": false,
      "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": 3,//匹配到3条
        "max_score": 1,
        "hits": [//显示匹配的记录
          {
            "_index": "megacorp",
            "_type": "employee",
            "_id": "2",
            "_score": 1,
            "_source": {
              "first_name": "Jane",
              "last_name": "Smith",
              "age": 32,
              "about": "I like to collect rock albums",
              "interests": [
                "music"
              ]
            }
          },
          {
            "_index": "megacorp",
            "_type": "employee",
            "_id": "1",
            "_score": 1,
            "_source": {
              "first_name": "John",
              "last_name": "Smith",
              "age": 25,
              "about": "I love to go rock climbing",
              "interests": [
                "sports",
                "music"
              ]
            }
          },
          {
            "_index": "megacorp",
            "_type": "employee",
            "_id": "3",
            "_score": 1,
            "_source": {
              "first_name": "Douglas",
              "last_name": "Fir",
              "age": 35,
              "about": "I like to build cabinets",
              "interests": [
                "forestry"
              ]
            }
          }
        ]
      }
    }
    

    1.1.4 使用查询表达式搜索

    查询 last_name=smith

    GET /megacorp/employee/_search
    {
        "query" : {
            "match" : {"last_name" : "Smith"}
        }
    }
    

    结果

    ...
      "hits": {
        "total": 2,
        "max_score": 0.2876821,
        "hits": [
          {
            "_index": "megacorp",
            "_type": "employee",
            "_id": "2",
            "_score": 0.2876821,
            "_source": {
              "first_name": "Jane",
              "last_name": "Smith",
              "age": 32,
              "about": "I like to collect rock albums",
              "interests": [
                "music"
              ]
            }
          },
          {
            "_index": "megacorp",
            "_type": "employee",
            "_id": "1",
            "_score": 0.2876821,
            "_source": {
              "first_name": "John",
              "last_name": "Smith",
              "age": 25,
              "about": "I love to go rock climbing",
              "interests": [
                "sports",
                "music"
              ]
            }
          }
        ]
      }
    ...
    

    1.1.5 更复杂的搜索

    查询 last_name=smith and age>30

    GET /megacorp/employee/_search
    {
        "query" : {
            "bool": {
                "must": {
                    "match" : {
                        "last_name" : "smith" 
                    }
                },
                "filter": {
                    "range" : {
                        "age" : { "gt" : 30 } 
                    }
                }
            }
        }
    }
    

    结果

    ...
      "hits": {
        "total": 1,
        "max_score": 0.2876821,
        "hits": [
          {
            "_index": "megacorp",
            "_type": "employee",
            "_id": "2",
            "_score": 0.2876821,
            "_source": {
              "first_name": "Jane",
              "last_name": "Smith",
              "age": 32,
              "about": "I like to collect rock albums",
              "interests": [
                "music"
              ]
            }
          }
        ]
      }
    ...
    

    1.1.6 全文检索

    查询about中含有词rock climbing
    根据匹配得分_score进行排序

    GET /megacorp/employee/_search
    {
        "query" : {
            "match" : {"about" : "rock climbing"}
        }
    }
    

    结果

    ...
      "hits": {
        "total": 2,//匹配2条
        "max_score": 0.5753642,//最大得分
        "hits": [
          {
            "_index": "megacorp",
            "_type": "employee",
            "_id": "1",
            "_score": 0.5753642,
            "_source": {
              "first_name": "John",
              "last_name": "Smith",
              "age": 25,
              "about": "I love to go rock climbing",
              "interests": [
                "sports",
                "music"
              ]
            }
          },
          {
            "_index": "megacorp",
            "_type": "employee",
            "_id": "2",
            "_score": 0.2876821,//得分低 因为"about"只包含了"rock"
            "_source": {
              "first_name": "Jane",
              "last_name": "Smith",
              "age": 32,
              "about": "I like to collect rock albums",
              "interests": [
                "music"
              ]
            }
          }
        ]
      }
    ...
    

    1.1.7 短语搜索

    仅匹配"about"中含有"rock climbing"短语
    根据匹配得分"_score"进行排序

    GET /megacorp/employee/_search
    {
        "query" : {
            "match_phrase" : {"about" : "rock climbing"}
        }
    }
    

    结果

    ...
      "hits": {
        "total": 1,
        "max_score": 0.5753642,
        "hits": [
          {
            "_index": "megacorp",
            "_type": "employee",
            "_id": "1",
            "_score": 0.5753642,
            "_source": {
              "first_name": "John",
              "last_name": "Smith",
              "age": 25,
              "about": "I love to go rock climbing",
              "interests": [
                "sports",
                "music"
              ]
            }
          }
        ]
      }
    ...
    

    1.1.8 高亮搜索

    根据匹配得分"_score"进行排序
    在"highlight"中使用"em"标签封装了"about"中匹配到的词

    GET /megacorp/employee/_search
    {
        "query" : {
            "match_phrase" : {"about" : "rock climbing"}
        },
        "highlight": {
            "fields" : {"about" : {}}
        }
    }
    

    结果

    ...
      "hits": {
        "total": 1,
        "max_score": 0.5753642,
        "hits": [
          {
            "_index": "megacorp",
            "_type": "employee",
            "_id": "1",
            "_score": 0.5753642,
            "_source": {
              "first_name": "John",
              "last_name": "Smith",
              "age": 25,
              "about": "I love to go rock climbing",
              "interests": [
                "sports",
                "music"
              ]
            },
            "highlight": {//匹配到的使用了"em"标签封装
              "about": [
                "I love to go <em>rock</em> <em>climbing</em>"
              ]
            }
          }
        ]
      }
    ...
    

    1.1.9 分析

    按照"interests"进行分组

    GET /megacorp/employee/_search
    {
      "aggs": {
        "all_interests": {//聚合桶名称
          "terms": { "field": "interests" }
        }
      }
    }
    

    结果

    ...
      "aggregations": {
        "all_interests": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 0,
          "buckets": [
            {
              "key": "music",
              "doc_count": 2
            },
            {
              "key": "forestry",
              "doc_count": 1
            },
            {
              "key": "sports",
              "doc_count": 1
            }
          ]
        }
      }
    ...
    

    查询 last_name=smith 并按照"interests"中的内容进行分组

    GET /megacorp/employee/_search
    {
      "query": {
        "match": { "last_name": "smith"}
      },
      "aggs": {
        "all_interests": {
          "terms": {"field": "interests" }
        }
      }
    }
    

    结果

    ...
      "aggregations": {
        "all_interests": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 0,
          "buckets": [
            {
              "key": "music",
              "doc_count": 2
            },
            {
              "key": "sports",
              "doc_count": 1
            }
          ]
        }
      }
    ...
    

    先聚合桶再进行度量

    GET /megacorp/employee/_search
    {
        "aggs" : {
            "all_interests" : {
                "terms" : { "field" : "interests" },
                "aggs" : {
                    "avg_age" : {"avg" : { "field" : "age" }}
                }
            }
        }
    }
    

    结果

    ...
      "aggregations": {
        "all_interests": {//聚合桶名称
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 0,
          "buckets": [
            {
              "key": "music",
              "doc_count": 2,
              "avg_age": {
                "value": 28.5
              }
            },
            {
              "key": "forestry",
              "doc_count": 1,
              "avg_age": {
                "value": 35
              }
            },
            {
              "key": "sports",
              "doc_count": 1,
              "avg_age": {
                "value": 25
              }
            }
          ]
        }
      }
    ...
    

    1.2.0 集群内的原理

    1.2.1 集群健康

    GET /_cluster/health
    

    结果

    • green 所有的主分片和副本分片都正常运行。
    • yellow 所有的主分片都正常运行,但不是所有的副本分片都正常运行。
    • red 有主分片没能正常运行。
    {
      "cluster_name": "docker-cluster",
      "status": "yellow",//①
      "timed_out": false,
      "number_of_nodes": 1,
      "number_of_data_nodes": 1,
      "active_primary_shards": 23,
      "active_shards": 23,
      "relocating_shards": 0,
      "initializing_shards": 0,
      "unassigned_shards": 20,
      "delayed_unassigned_shards": 0,
      "number_of_pending_tasks": 0,
      "number_of_in_flight_fetch": 0,
      "task_max_waiting_in_queue_millis": 0,
      "active_shards_percent_as_number": 53.48837209302325
    }
    

    1.2.2 添加索引

    PUT /blogs
    {
       "settings" : {
          "number_of_shards" : 3,//分片
          "number_of_replicas" : 1//副本
       }
    }
    

    1.3.0 数据输入和输出

    1.3.1 文档元数据

    • _index 放在哪个数据库 这个名字必须小写,不能以下划线开头,不能包含逗号
    • _type 放在哪个表 可以是大写或者小写,但是不能以下划线或者句号开头,不应该包含逗号,并且长度限制为256个字符
    • _id 文档唯一标识 字符串类型,不指定会自动生成

    1.3.2 索引文档

    指定id创建文档

    PUT /website/blog/123
    {
      "title": "My first blog entry",
      "text":  "Just trying this out...",
      "date":  "2014/01/01"
    }
    

    不指定id创建文档

    PUT /website/blog
    {
      "title": "My first blog entry",
      "text":  "Just trying this out...",
      "date":  "2014/01/01"
    }
    

    1.3.3 取回一个文档

    GET website/blog/123
    

    结果

    {
      "_index": "website",
      "_type": "blog",
      "_id": "123",
      "_version": 1,
      "found": true,
      "_source": {
        "title": "My first blog entry",
        "text": "Just trying this out...",
        "date": "2014/01/01"
      }
    }
    

    只返回原始文档_source

    GET website/blog/123/_source
    

    结果

    {
      "title": "My first blog entry",
      "text": "Just trying this out...",
      "date": "2014/01/01"
    }
    

    1.3.4 检查文档是否存在

    使用HEAD代替GET,只返回请求头,没有请求体

    HEAD /website/blog/123
    

    返回 200 - OK

    HEAD /website/blog/124
    

    返回 404 - Not Found

    1.3.5 更新整个文档

    删除旧文档,创建一个新文档,如果不存在就创建一个新文档

    PUT /website/blog/123
    {
      "title": "My first blog entry",
      "text":  "I am starting to get the hang of this...",
      "date":  "2014/01/02"
    }
    

    1.3.6 创建新文档

    加参数 存在不能创建

    POST /website/blog/123?op_type=create
    {
     "name":"taopanfeng" 
    }
    

    等同于下面

    POST /website/blog/123/_create
    {
     "name":"taopanfeng1" 
    }
    

    都是返回 结果

    {
      "error": {
        "root_cause": [
          {
            "type": "version_conflict_engine_exception",
            "reason": "[blog][123]: version conflict, document already exists (current version [5])",
            "index_uuid": "reL04BFdQN-YCE3l9THqjA",
            "shard": "0",
            "index": "website"
          }
        ],
        "type": "version_conflict_engine_exception",
        "reason": "[blog][123]: version conflict, document already exists (current version [5])",
        "index_uuid": "reL04BFdQN-YCE3l9THqjA",
        "shard": "0",
        "index": "website"
      },
      "status": 409
    }
    

    不加参数

    POST /website/blog/111
    {
     "name":"taopanfeng" 
    }
    

    不存在 创建resultcreated,_version是1

    {
      "_index": "website",
      "_type": "blog",
      "_id": "111",
      "_version": 1,
      "result": "created",
      "_shards": {
        "total": 2,
        "successful": 1,
        "failed": 0
      },
      "_seq_no": 1,
      "_primary_term": 1
    }
    

    存在 更新resultupdated,_version加1

    {
      "_index": "website",
      "_type": "blog",
      "_id": "111",
      "_version": 2,
      "result": "updated",
      "_shards": {
        "total": 2,
        "successful": 1,
        "failed": 0
      },
      "_seq_no": 2,
      "_primary_term": 1
    }
    

    1.3.7 删除文档

    DELETE /website/blog/123
    

    已存在 结果 resultdeleted

    {
      "_index": "website",
      "_type": "blog",
      "_id": "123",
      "_version": 6,
      "result": "deleted",//删除成功
      "_shards": {
        "total": 2,
        "successful": 1,
        "failed": 0
      },
      "_seq_no": 5,
      "_primary_term": 1
    }
    

    不存在 结果 resultnot_found

    {
      "_index": "website",
      "_type": "blog",
      "_id": "123",
      "_version": 1,
      "result": "not_found",//未找到
      "_shards": {
        "total": 2,
        "successful": 1,
        "failed": 0
      },
      "_seq_no": 6,
      "_primary_term": 1
    }
    

    1.3.8 处理冲突

    两个人在购买同一种物品,原库存100,A读取到,B也读取到,B买了一件这个商品,
    此时商品库存为99,但是B读取到的是100,B买了一件之后还是99,就形成了冲突

    悲观锁 我修改之后,别人才可以修改
    乐观锁 假设不可以冲突,如果读取的时候被修改,就更新失败
    elasticsearch就是使用乐观锁

    1.3.9 乐观并发控制

    创建一篇文章

    PUT /website/blog/888/_create
    {
     "name":"taopanfeng888" 
    }
    

    获取数据

    GET /website/blog/888
    

    结果,其中_version就是版本号

    {
      "_index": "website",
      "_type": "blog",
      "_id": "888",
      "_version": 1,
      "found": true,
      "_source": {
        "name": "taopanfeng888"
      }
    }
    

    现在修改文档,指定版本号为当前版本号1

    PUT /website/blog/888?version=1
    {
     "name":"taopanfeng888-update"
    }
    

    结果 修改成功 _version版本号+1 resultupdated

    {
      "_index": "website",
      "_type": "blog",
      "_id": "888",
      "_version": 2,
      "result": "updated",
      "_shards": {
        "total": 2,
        "successful": 1,
        "failed": 0
      },
      "_seq_no": 1,
      "_primary_term": 1
    }
    

    如果仍然执行上面 version=1
    结果 修改失败 因为需要指定version等于当前版本号才可以修改

    {
      "error": {
        "root_cause": [
          {
            "type": "version_conflict_engine_exception",
            "reason": "[blog][888]: version conflict, current version [2] is different than the one provided [1]",
            "index_uuid": "reL04BFdQN-YCE3l9THqjA",
            "shard": "2",
            "index": "website"
          }
        ],
        "type": "version_conflict_engine_exception",
        "reason": "[blog][888]: version conflict, current version [2] is different than the one provided [1]",
        "index_uuid": "reL04BFdQN-YCE3l9THqjA",
        "shard": "2",
        "index": "website"
      },
      "status": 409
    }
    

    但是我们可以使用 version_type指定external来设置
    但是指定的version要大于当前版本号,小于等于都不可以

    PUT /website/blog/888?version=99&version_type=external
    {
     "name":"taopanfeng888-v99" 
    }
    

    结果 修改成功 版本号也改为了99

    {
      "_index": "website",
      "_type": "blog",
      "_id": "888",
      "_version": 99,
      "result": "updated",
      "_shards": {
        "total": 2,
        "successful": 1,
        "failed": 0
      },
      "_seq_no": 2,
      "_primary_term": 1
    }
    

    1.3.10 文档的部分更新

    先添加一篇文章

    PUT /website/blog/3/_create
    {
      "name":"taopanfeng"
    }
    

    使用POST请求加上参数_update来对doc添加指定更新
    如果doc中指定属性不存在则添加
    如果存在 age=26 又改为 age=27 则会更新成功
    如果存在 age=27 又改为 age=27 则会不进行更新返回resultnoop
    如果为多个字段 例如 age=27 and sex=man 更新其中任何一个就可以更新成功

    POST /website/blog/3/_update
    {
      "doc": {
        "age":26
      }
    }
    

    结果 更新成功 版本号+1 resultupdated

    {
      "_index": "website",
      "_type": "blog",
      "_id": "3",
      "_version": 2,
      "result": "updated",
      "_shards": {
        "total": 2,
        "successful": 1,
        "failed": 0
      },
      "_seq_no": 2,
      "_primary_term": 1
    }
    

    检索 GET /website/blog/3

    {
      "_index": "website",
      "_type": "blog",
      "_id": "3",
      "_version": 2,
      "found": true,
      "_source": {
        "name": "taopanfeng",
        "age": 26
      }
    }
    

    1.3.11 取回多个文档

    查询website/blog/2 website/blog/1并只显示name字段 website/pageviews/1

    GET /_mget
    {
       "docs" : [
          {
             "_index" : "website",
             "_type" :  "blog",
             "_id" :    2
          },
          {
             "_index" : "website",
             "_type" :  "blog",
             "_id" :    1,
             "_source": "name"//只显示"name"属性
          },
          {
             "_index" : "website",
             "_type" :  "pageviews",//查询类型为"pageviews"
             "_id" :    1
          }
       ]
    }
    

    结果

    {
      "docs": [
        {
          "_index": "website",
          "_type": "blog",
          "_id": "2",
          "_version": 2,
          "found": true,
          "_source": {
            "name": "taopanfeng",
            "age": 26
          }
        },
        {
          "_index": "website",
          "_type": "blog",
          "_id": "1",
          "_version": 2,
          "found": true,
          "_source": {
            "name": "taopanfeng"//指定了"_source": "name"
          }
        },
        {
          "_index": "website",
          "_type": "pageviews",
          "_id": "1",
          "found": false//未找到
        }
      ]
    }
    

    查询 id为2 1 55

    GET /website/blog/_mget
    {
       "ids" : [ "2", "1" ,"55"]
    }
    

    结果

    {
      "docs": [
        {
          "_index": "website",
          "_type": "blog",
          "_id": "2",
          "_version": 2,
          "found": true,
          "_source": {
            "name": "taopanfeng",
            "age": 26
          }
        },
        {
          "_index": "website",
          "_type": "blog",
          "_id": "1",
          "_version": 2,
          "found": true,
          "_source": {
            "name": "taopanfeng",
            "age": 26
          }
        },
        {
          "_index": "website",
          "_type": "blog",
          "_id": "55",
          "found": false
        }
      ]
    }
    

    1.3.12 代价较小的批量操作

    • 每个操作都是独立的,互不影响

    bulk允许多次 create,index,update,delete
    create 成功201 失败409
    index 成功201
    update 成功200 失败404 _id不存在
    delete 成功200 失败404 _id不存在

    POST _bulk
    { "create":  { "_index": "website", "_type": "blog", "_id": "1" }}
    { "title":    "My first blog post" }
    { "create":  { "_index": "website", "_type": "blog", "_id": "2" }}
    { "title":    "My first blog post" }
    { "index":  { "_index": "website", "_type": "blog"}}
    { "title":    "My first blog post" }
    { "delete": { "_index": "website", "_type": "blog", "_id": "1" }}
    { "update":{ "_index": "website", "_type": "blog", "_id": "2" }}
    {"doc":{"content":"I'm content...."}}
    

    改进 都是操作同一个索引 类型

    POST /website/blog/_bulk
    { "create":  {"_id": "1" }}
    { "title":    "My first blog post" }
    { "create":  {"_id": "2" }}
    { "title":    "My first blog post" }
    { "index":  {}}
    { "title":    "My first blog post" }
    { "delete": {"_id": "1" }}
    { "update":{"_id": "2" }}
    {"doc":{"content":"I'm content...."}}
    

    1.4.0 搜索——最基本的工具

    1.4.1 多索引,多类型

    //查找所有索引的所有文档
    GET _search
    
    //查找cars索引的所有文档
    GET cars/_search
    
    //*代表[0,多]
    GET website,cars/_search
    GET *sit*,*ar*/_search
    
    //it开头的索引的所有文档
    GET it*/_search
    
    //查询"site"结尾,"it"开头,含有"ar"的所有索引的所有文档
    GET *site,it*,*ar*/_search
    
    //查询"item"和"website"索引的"blog"类型的文档
    GET item,website/blog/_search
    
    //查找所有索引的"blog" "user"类型文档
    GET _all/blog,user/_search
    

    1.4.2 分页

    size默认10 代表"hits"数组显示的数量,最小为0,大于等于"total"都会显示全部
    from默认0 代表要跳过几条
    例如 一共五条 size=2 from=1 则只会显示第2 3两条数据

    GET _search
    {
     "size":10,
     "from":0
    }
    
    //等同于
    
    GET _search?size=10&from=0
    

    1.5.0 映射和分析

    1.5.1 映射

    获取映射

    GET cars/transactions/_mapping
    

    结果

    {
      "cars": {
        "mappings": {
          "transactions": {
            "properties": {
              "color": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "make": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "price": {
                "type": "long"
              },
              "sold": {
                "type": "date"
              }
            }
          }
        }
      }
    }
    
    • 字符串类型
      • text 可分词,不可聚合
      • keyword 可聚合,不可分词
    • 基本数值类型
      • long、interger、short、byte、double、float、half_float
    • 日期类型
      • date 建议存为 long类型

    创建映射字段
    设置"index": "false"不能对该字段进行搜索
    text类型默认会进行分词,也可以指定分词器"analyzer": "分词器"
    text想设置聚合需要设置 "fielddata": true

    PUT 索引/类型/_mapping/
    {
      "properties": {
        "字段名称": {
          "type": "text",
          "analyzer": "ik_max_word"
        },
        "字段名称": {
          "type": "keyword",
          "index": "false"
        },
        "字段名称": {
          "type": "float"
        }
      }
    }
    

    查看映射关系

    GET 索引/类型/_mapping
    

    删除索引

    DELETE 索引
    

    1.6.0 请求体查询

    1.6.1 查询表达式

    查询全部索引的全部类型
    请求体为空可以省略
    全部索引 _all 可以省略

    GET _search
    
    GET _search
    {}
    
    GET _search
    {
      "query": {
        "match_all": {}
      }
    }
    
    GET _all/_search
    {}
    

    指定索引 类型查询
    *可以表示零个或多个,cars可以匹配ca* *ar* *s

    查询多个索引的全部类型的全部文档

    GET 索引1,索引2/_search
    

    查询多个索引的多个类型的全部文档

    GET 索引1,索引2/类型1,类型2/_search
    

    查询 price=15000

    GET cars/transactions/_search
    {
      "query": {
        "match": {
          "price": 15000
        }
      }
    }
    

    1.6.2 最重要的查询

    match_all默认的查询,匹配所有文档

    GET a1/student/_search
    {
      "query": {
    	"match_all": {}
      }
    }
    

    结果

    {
      "took": 4,//查询耗时4毫秒
      "timed_out": false,//没有超时
      "_shards": {//分片
    	"total": 5,//一共5个
    	"successful": 5,//成功5个
    	"skipped": 0,//跳过0个
    	"failed": 0//失败0个
      },
      "hits": {//查询到的数据
    	"total": 3,//查询总条数
    	"max_score": 1,//最大得分1
    	"hits": [//查询到的数据所有文档
    	  {//一个文档
    		"_index": "a1",//数据库
    		"_type": "student",//表
    		"_id": "2",//注解 每个文档的唯一标识
    		"_score": 1,//得分是1 满分是最大得分
    		"_source": {//查询到的数据 包括 字段 字段值 -> k:v
    		  "name": "大米手机",
    		  "age": 22
    		}
    	  },
    	  {
    		"_index": "a1",
    		"_type": "student",
    		"_id": "CA2Yqm0Bmr19jrNQ7nRL",
    		"_score": 1,
    		"_source": {
    		  "name": "小米手机",
    		  "age": 11
    		}
    	  },
    	  {
    		"_index": "a1",
    		"_type": "student",
    		"_id": "3",
    		"_score": 1,
    		"_source": {
    		  "name": "小米电视4K",
    		  "age": 33,
    		  "address": "安徽阜阳小米酒店101"
    		}
    	  }
    	]
      }
    }
    

    match查询
    text类型会分词查询字符串
    数组,日期,布尔或not_analyzed字符串字段,就会精准匹配

    { "match": { "tweet": "About Search" }}
    
    { "match": { "age":    26           }}
    
    { "match": { "date":   "2014-09-01" }}
    
    { "match": { "public": true         }}
    
    { "match": { "tag":    "full_text"  }}
    
    //测试笔记
    match
    查询 name=小米电视
    因为使用了分词,默认是or 所以可匹配 -> 小米 or 电视
    GET a1/_search
    {
      "query": {
        "match": {
          "name": "小米电视"
        }
      }
    }
    
    查询分词,指定and可匹配 -> 小米 and 电视
    GET a1/_search
    {
      "query": {
        "match": {
          "name": {
            "query": "小米电视",
            "operator": "and"
          }
        }
      }
    }
    
    可以指定分词的个数,
    1 -> 匹配任意一个词
    2 -> 匹配任意两个词
    3 -> 因为超过了分词量,所以匹配不到
    GET a1/_search
    {
      "query": {
        "match": {
          "name": {
            "query": "小米电视",
            "minimum_should_match": 1
          }
        }
      }
    }
    
    3x0.66=1.98,因为1.98<2 所以匹配任意一个
    GET a1/_search
    {
      "query": {
        "match": {
          "name": {
            "query": "小米智能电视",
            "minimum_should_match": "66%"
          }
        }
      }
    }
    
    3x0.67=2.01,因为2.01>2 所以匹配任意两个
    GET a1/_search
    {
      "query": {
        "match": {
          "name": {
            "query": "小米智能电视",
            "minimum_should_match": "67%"
          }
        }
      }
    }
    

    multi_match多字段搜索 name like '%大米%' or f1 like '%大米%'

    GET a1/student/_search
    {
      "query": {
        "multi_match": {
          "query": "大米",
          "fields": ["name","f1"]
        }
      }
    }
    

    range查询 10<=age<=20
    lt <
    lte <=
    gt >
    gte >=

    GET a1/student/_search
    {
      "query": {
        "range": {
          "age": {
            "gte": 10,
            "lte": 20
          }
        }
      }
    }
    

    term精准匹配

    GET a1/student/_search
    {
      "query": {
        "term": {
          "age": {
            "value": 11
          }
        }
      }
    }
    

    terms多值匹配,满足一个即可

    GET a1/student/_search
    {
      "query": {
        "terms": {
          "age": [11,22,77]
        }
      }
    }
    

    exists查询存在指定字段的文档

    GET a1/student/_search
    {
      "query": {
        "exists":{
          "field":"address"
        }
      }
    }
    

    1.6.3 组合多查询

    must[{1},{2}] 满足所有
    查询"name"分词有"小米"并且"age"等于11或者22

    GET a1/_search
    {
      "query": {
        "bool": {
          "must": [
            {
              "match": {
                "name": "小米"
              }
            },
            {
              "terms": {
                "age": [11,22]
              }
            }
          ]
        }
      }
    }
    

    must_not[{1},{2}] 不满足所有
    查询"name"分词没有"小米并且"age"不等于11或者22

    GET a1/_search
    {
      "query": {
        "bool": {
          "must_not": [
            {
              "match": {
                "name": "小米"
              }
            },
            {
              "terms": {
                "age": [11,22]
              }
            }
          ]
        }
      }
    }
    

    should[{1},{2}] 满足任意一个

    GET a1/_search
    {
      "query": {
        "bool": {
          "should": [
            {
              "match": {
                "name": "小米"
              }
            },
            {
              "terms": {
                "age": [11,22]
              }
            }
          ]
        }
      }
    }
    

    结果过滤

    查询字段只显示 name age
    并且查询 age in [11,22,77]
    GET a1/_search
    {
      "_source": ["name","age"],
      "query": {
        "terms": {
          "age": [11,22,77]
        }
      }
    }
    
    查询所有 只显示"address"字段,没有此字段的显示空
    GET a1/_search
    {
      "_source": {
        "includes": ["address"]
      }
    }
    
    查询所有只除了"address"字段,其他全显示
    GET a1/_search
    {
      "_source": {
        "excludes": ["address"]
      }
    }
    

    过滤 filter会将评分设置为0,不会使评分对结果影响
    查询"name"=小米并且10<=age<=20

    GET a1/_search
    {
      "query": {
        "bool": {
          "must": [
            {
              "match": {
                "name": "小米"
              }
            }
          ],
          "filter": {
            "range": {
              "age": {
                "gte": 10,
                "lte": 20
              }
            }
          }
        }
      }
    }
    

    1.7.0 排序与相关性

    1.7.1 排序

    排序
    查询"name"=小米并按照年龄降序

    GET a1/_search
    {
      "query": {
        "match": {
          "name": "小米"
        }
      },
      "sort": [
        {
          "age": {
            "order": "desc"
          }
        }
      ]
    }
    

    多字段排序 先排序第一个,第一个相同才会对第二个进行排序

    GET a1/_search
    {
      "query": {
        "match": {
          "name": "小米"
        }
      },
      "sort": [
        {"age": {"order": "desc"}},
    	{"_score": {"order": "asc"}}
      ]
    }
    

    1.7.2 什么是相关性?

    • 检索词频率 一个词出现次数多相关性越高
    • 反向文档频率 一个词只在一个文档中存在,相关性高,如果在十个文档中存在,相关性低
    • 字段长度准则 "a"在"ab"中相关性比在"abcd"中要高,长度越短越高

    1.8.0 索引管理

    1.8.1 创建一个索引

    手动创建索引

    PUT 索引名
    {
    	"settings":{...},
    	"mappings":{
    		"类型名称1":{...},
    		"类型名称2":{...}
    	}
    }
    

    设置禁止自动创建索引
    找到 config/elasticsearch.yml
    在每个节点下添加 action.auto_create_index: false

    1.8.2 删除一个索引

    • 删除指定索引

    DELETE /my_index

    • 删除多个索引

    DELETE /index_one,index_two
    DELETE /index_*

    +删除全部索引

    DELETE /_all
    DELETE /*

    如果不想使用 _all 或 * 来批量删除索引
    设置elasticsearch.yml配置action.destructive_requires_name: true

    1.8.3 索引设置

    number_of_shards 分片默认5
    number_of_replicas 副本默认1

    PUT /my_temp_index
    {
        "settings": {
            "number_of_shards" :   1,
            "number_of_replicas" : 0
        }
    }
    

    可以修改副本,不能修改分片

    PUT /my_temp_index/_settings
    {
        "number_of_replicas": 1
    }
    

    1.8.4 索引别名

    创建一个索引g

    PUT g
    

    查看g

    GET g
    

    结果 此时aliases为空

    {
      "g": {
        "aliases": {},
        "mappings": {},
        "settings": {
          "index": {
            "creation_date": "1570706049853",
            "number_of_shards": "5",
            "number_of_replicas": "1",
            "uuid": "N0uDV7bmSRGBYG3Vnk51Og",
            "version": {
              "created": "6050099"
            },
            "provided_name": "g"
          }
        }
      }
    }
    

    创建一个别名g1

    PUT g/_alias/g1
    

    此时执行GET gGET g1是一样的效果,返回

    {
      "g": {
        "aliases": {
          "g1": {}
        },
    ...
    

    批量操作
    指定g删除别名g1
    指定g添加别名g2
    指定g添加别名g3

    POST _aliases
    {
      "actions": [
        {"remove": {"index": "g","alias": "g1"}},
        {"add": {"index": "g","alias": "g2"}},
        {"add": {"index": "g","alias": "g3"}}
    
      ]
    }
    

    结果

    {
      "g": {
        "aliases": {
          "g2": {},
          "g3": {}
        },
    ...
    

    2.0.0 深入搜索

    2.1.0 结构化搜索

    2.1.1 精确值查找

    先设置类型,设置productID不分词,可以精确查找

    PUT my_store/products/_mapping
    {
      "properties": {
        "productID":{
          "type": "keyword"
        }
      }
    }
    

    添加数据

    POST /my_store/products/_bulk
    { "index": { "_id": 1 }}
    { "price" : 10, "productID" : "XHDK-A-1293-#fJ3" }
    { "index": { "_id": 2 }}
    { "price" : 20, "productID" : "KDKE-B-9947-#kL5" }
    { "index": { "_id": 3 }}
    { "price" : 30, "productID" : "JODL-X-1937-#pV7" }
    { "index": { "_id": 4 }}
    { "price" : 30, "productID" : "QQPX-R-3956-#aD8" }
    

    精确查找 price=20

    GET my_store/products/_search
    {
      "query": {
        "term" : {
          "price" : 20
        }
      }
    }
    

    不希望评估计算,进行排除,包括的计算
    这样做可以优化速度,统计评分设置 1

    GET /my_store/products/_search
    {
        "query" : {
            "constant_score" : { 
                "filter" : {
                    "term" : { 
                        "price" : 20
                    }
                }
            }
        }
    }
    

    查询 productID=XHDK-A-1293-#fJ3

    GET /my_store/products/_search
    {
        "query" : {
            "constant_score" : {
                "filter" : {
                    "term" : {
                        "productID" : "XHDK-A-1293-#fJ3"
                    }
                }
            }
        }
    }
    

    2.1.2 组合过滤器

    • 布尔过滤器 bool
      • must:[{1},{2}] 都满足 and
      • must_not:[{1},{2}] 都不满足 not
      • should:[{1},{2}] 满足一个即可 or

    组合 (price=20 or productID=XHDK-A-1293-#fJ3) and price!=30

    GET /my_store/products/_search
    {
       "query" : {
          "bool" : {
            "should" : [
               { "term" : {"price" : 20}}, 
               { "term" : {"productID" : "XHDK-A-1293-#fJ3"}} 
            ],
            "must_not" : {
               "term" : {"price" : 30} 
            }
         }
       }
    }
    

    嵌套 price=30 and (productID!=JODL-X-1937-#pV7)

    GET my_store/products/_search
    {
      "query": {
        "bool": {
          "must": [
            {"term": {"price": 30}},
            {"bool": {"must_not": [
              {"term": {"productID": "JODL-X-1937-#pV7"}}
            ]}}
          ]
        }
      }
    }
    

    2.1.3 查找多个精确值

    查找 price in (20,30)

    GET /my_store/products/_search
    {
        "query" : {
            "constant_score" : {
                "filter" : {
                    "terms" : { 
                        "price" : [20, 30]
                    }
                }
            }
        }
    }
    

    2.1.4 范围

    数值范围 price BETWEEN 20 AND 40

    GET /my_store/products/_search
    {
        "query" : {
            "constant_score" : {
                "filter" : {
                    "range" : {
                        "price" : {
                            "gte" : 20,
                            "lt"  : 40
                        }
                    }
                }
            }
        }
    }
    

    日期范围
    定义日期类型

    PUT my_store
    {
      "mappings": {
        "products": {
          "properties": {
            "date":{
              "type": "date",
              "format": "yyyy-MM-dd HH:mm:ss"
            }
          }
        }
      }
    }
    

    插入数据

    POST /my_store/products/_bulk
    { "index": { "_id": 5 }}
    {  "date":"2019-10-10 15:35:20"}
    { "index": { "_id": 6 }}
    { "date":"2019-10-09 16:32:12"}
    { "index": { "_id": 7 }}
    { "date":"2019-09-09 16:32:12"}
    

    可以使用now表示当前时间
    因为我们定义格式为"yyyy-MM-dd HH:mm:ss" 所有y M d H m s表示年月日时分秒

    查询date大于等于当前时间的上一个月

    GET my_store/products/_search
    {
      "query": {
        "range": {
          "date": {
            "lte": "now-1M"
          }
        }
      }
    }
    

    指定时间 查询大于"2019-09-10 11:11:11"

    GET my_store/products/_search
    {
      "query": {
        "range": {
          "date": {
            "gt": "2019-09-10 11:11:11"
          }
        }
      }
    }
    

    2.1.5 处理null值

    查询存在date字段的文档,并且date IS NOT NULL

    GET my_store/products/_search
    {
      "query": {
        "exists":{
          "field":"date"
        }
      }
    }
    

    2.2.0 全文检索

    写到这里,就不往下写了,这时我去看7.4的官方文档了

  • 相关阅读:
    sed替换
    【工作提升】好员工秘诀十二条
    【工作提升】怎么样做工作才有好的效果
    vim删除空行和注释
    Java实现几种常见排序方法
    数据背后的二八定律,揭示程序员担忧的主要问题
    【新闻】国内第一本介绍程序员996现象的技术图书上市,“再谈工作996,生病ICU”一文受关注
    10年磨一剑:《SOD框架“企业级”应用数据架构实战》一书正式上市
    程序员肺被切掉一块还得去加班... 再谈“工作996,生病ICU”
    win10编译pdfium
  • 原文地址:https://www.cnblogs.com/taopanfeng/p/11684483.html
Copyright © 2011-2022 走看看