zoukankan      html  css  js  c++  java
  • Elasticsearch【基础入门】

    一.操作index

    1.查看index
    GET /_cat/indices?v
    
    2.增加index
    PUT /index名
    
    3.删除index
    DELETE /index名
    

    二.操作index

    1.新增document

    --PUT /index名/type名/id
    PUT /movie_index/movie/1
    { "id":1,
      "name":"operation red sea",
      "doubanScore":8.5,
      "actorList":[  
        {"id":1,"name":"zhang yi"},
        {"id":2,"name":"hai qing"},
        {"id":3,"name":"zhang han yu"}
      ]
    }
    PUT /movie_index/movie/2
    {
      "id":2,
      "name":"operation meigong river",
      "doubanScore":8.0,
      "actorList":[  
        {"id":3,"name":"zhang han yu"}
      ]
    }
    

    注意: 如果之前没建过 index 或者 type,es 会自动创建。

    2.查询type 全部数据

    GET /index名/type名/_search
    

    3.查找指定 id 的 document 数据

    GET /index名/type名/id值
    

    4.修改 document

    修改分两种: 整体替换只修改某个字段

    • 整体修改:和新增document差不多

      PUT /movie_index/movie/3
      {
        "id":"3",
        "name":"incident red sea",
        "doubanScore":"8.0",
        "actorList":[  
          {"id":"1","name":"zhang chen"}
        ]
      }
      
    • 只修改某个字段 :使用post方法

      POST /movie_index/movie/3/_update
      {
        "doc": {
          --字段值 : 更新后的值
          "doubanScore":"8.1" 
        }
      }
      
      

    5.删除一个 document

    DELETE /movie_index/movie/3
    

    6.条件查询

    原始数据格式
    { "id":1,
      "name":"operation red sea",
      "doubanScore":8.5,
      "actorList":[  
        {"id":1,"name":"zhang yi"},
        {"id":2,"name":"hai qing"},
        {"id":3,"name":"zhang han yu"}
      ]
    }
    
    查询全部
    GET /movie_index/movie/_search
    {
      "query": {
        "match_all": {}
      }
    }
    ------等价于
    GET /movie_index/movie/_search
    
    按照指定字段值查询
    GET /movie_index/movie/_search
    {
      "query": {
        "match": {
          "name": "sea" --字段值
        }
      }
    }
    
    按照字段子属性查询
    GET /movie_index/movie/_search
    {
      "query": {
        "match": {
          "actorList.name": "zhang" --json数组取子元素字段
        }
      }
    }
    

    7.按照短语整体查询

    按照短语查询的意思是指, 匹配某个 field 的整个内容, 不再利用分词技术

    GET /movie_index/movie/_search
    {
      "query": {
        "match_phrase": {
          "name": "operation red"
        }
      }
    }
    

    说明: 把operation red作为一个整体来看待

    对比一下

    --包含 operation 或者 red 的都会被查找出来
    GET /movie_index/movie/_search
    {
      "query": {
        "match": {
          "name": "operation red"
        }
      }
    }
    

    8.模糊查询

    校正匹配分词,当一个单词都无法准确匹配,es 通过一种算法对非常接近的单词也给与一定的评分,能够查询出来,但是消耗更多的性能。

    GET /movie_index/movie/_search
    {
      "query": {
        "fuzzy": {
          "name": "red"
        }
      }
    }
    

    9.查询后过滤

    GET /movie_index/movie/_search
    {
      "query": {
        "match": {
          "name": "red"
        }
      },
      "post_filter": {
        "term": {
          "actorList.id": "3"
        }
      }
    }
    

    10.查询前过滤(推荐)

    --条件:actorList.id=3 or actorList.id= 1 && name contains "zhang"
    GET movie_index/movie/_search
    {
      "query": {
        "bool": {
          "filter": [
            {"term": 
              {"actorList.id": 3}
            },
            {
              "term":
                {"actorList.id": 1}
            }
          ],
          "must": 
            {"match": {
              "name": "zhang"
            }}
          
        }
      }
    }
    
    must、should、must_not区别

    must可以理解为 &,should理解为 |, must_not理解为!【与、或、非的关系】

    must+should的使用可以理解为:先按照must过滤,过滤出来的结果的score分数增加。should只是辅助作用

    must

    年龄39 且 性别'女'

    GET /bank/_search
    {
      "query": {
        "bool": {
          "must": [
            {"match": {
              "age": "39"
            }},
            {"match": {
              "gender": "F"
            }}
          ]
        }
      }
    }
    
    should

    年龄39 或 性别'女'

    GET /bank/_search
    {
      "query": {
        "bool": {
          "should": [
            {"match": {
              "age": "39"
            }},
            {"match": {
              "gender": "F"
            }}
          ]
        }
      }
    }
    
    must_not

    年龄不是39 且 性别不为 '女'

    GET /bank/_search
    {
      "query": {
        "bool": {
          "must_not": [
            {"match": {
              "age": "39"
            }},
            {"match": {
              "age": "40"
            }}
          ]
        }
      }
    }
    
    must+should

    结果和must结果一样,不同就是“must+should”的结果的score增加。

    GET /bank/_search
    {
      "query": {
        "bool": {
          "must": [
            {"match": {
              "age": "39"
            }}
          ],
          "should": [
            {"match": {
              "gender": "F"
            }}
          ]
        }
      }
    }
    

    11.按照范围过滤

    lt:小于,lte:小于等于,gt:大于,gte:大于等于

    GET movie_index/movie/_search
    {
      "query": {
        "bool": {
          "filter": {
            "range": {
              "doubanScore": {
                "gt": 5,
                "lt": 9
              }
            }
          }
        }
      }
    }
    

    12.排序

    GET movie_index/movie/_search
    {
      "query":{
        "match": {"name":"red operation"}
      }
      , "sort": [
        {
          "doubanScore": {  --指定排序字段
            "order": "desc" --指定排序规则
          }
        }
      ]
    }
    

    13.分页查询

    GET movie_index/movie/_search
    {
      "query": { "match_all": {} },
      "from": 10, --从第几条开始查询
      "size": 10  --展示几条
    }
    

    14.聚合

    select count(*) from group by gender
    GET /bank/_search
    {
      
      "aggs": {
        "groupby_gender": {
          "terms": {
            "field": "gender.keyword",
            "size": 1
          }
        }
      }
    }
    
    多组聚合

    相对于Sql中的两个group by语句的查询结果

    selec sum(balance), max(balance) from .. group by gender

    selec sum(balance) from .. group by age

    GET /bank/_search
    {
      "query": {
        "match": {
          "address": "Terrace"
        }
      }, 
      
      "aggs": {
        --按照gender聚合
        "groupby_gender": {
          "terms": {
            "field": "gender.keyword",
            "size": 2
          },
          "aggs": {
            "b_sum": {
              "sum": {
                "field": "balance"
              }
            },
            "b_max":{
              "max": {
                "field": "balance"
              }
            }
          }
        },
        ----按照age聚合
        "groupby_age": {
          "terms": {
            "field": "age",
            "size": 100
          },
          "aggs": {
            "b_sum": {
              "sum": {
                "field": "balance"
              }
            }
          }
        }
      },
      "sort": [
        {
          "age": {
            "order": "desc"
          }
        }
      ],
      "_source": ["balance", "age"]
    }
    

    三. Scala API插入数据到ES

    使用java API同样可以实现

    1.ES中新建测试Index

    GET /user/_search --查询
    
    --向Index插入一条数据,同时创建Index
    PUT /user/_doc/1
    {
      "name":"zhangsan",
      "age":10
    }
    

    2.User样例类

    case class User(name:String,age:Int)
    

    3.ES工具类

    import io.searchbox.client.{JestClient, JestClientFactory}
    import io.searchbox.client.config.HttpClientConfig
    import io.searchbox.core.{Bulk, Index}
    
    
    /**
     * @description: ES工具类
     * @author: HaoWu
     * @create: 2020年09月09日
     */
    object ESUtil {
      // 构建JestClientFactory
      //ES服务器地址     注意:可以设置1个也可以设置1个Collection,要转为java的集合
      import scala.collection.JavaConverters._
      val esServerUrl = Set("http://hadoop102:9200", "http://hadoop103:9200", "http://hadoop104:9200").asJava
      //构建客户端工厂
      private val factory = new JestClientFactory
      var conf: HttpClientConfig = new HttpClientConfig.Builder(esServerUrl)
        .multiThreaded(true)
        .maxTotalConnection(100)
        .connTimeout(10 * 1000)
        .readTimeout(10 * 1000)
        .build()
      factory.setHttpClientConfig(conf)
    
      /**
       * 获取ES客户端
       */
      def getESClient(): JestClient = {
        factory.getObject
      }
    
      /**
       * 插入单条数据
       *
       * @param index  :插入的Index
       * @param source :满足两种类型参数:1.source   2.(id,source) ,其中source可以是样例类对象 或 json对象字符串
       */
      def insertSingle(index: String, source: Any): Unit = {
        val client: JestClient = getESClient()
        val action =
          source match {
            case (id, s: Any) => {
              new Index.Builder(s)
                .index(index)
                .`type`("_doc")
                .id(id.toString) //ES中的id为String类型,当入参的id为int类型可能插入错误。
                .build()
            }
            case (_) => {
              new Index.Builder(source)
                .index(index)
                .`type`("_doc")
                .build()
            }
          }
        client.execute(action)
        client.close()
      }
    
      /**
       * 批量插入数据
       *
       * @param index   :插入的Index
       * @param sources :满足两种类型参数:1.source   2.(id,source) ,其中source可以是样例类对象 或 Json对象字符串
       *                说明:将来数据使用mapPartition批量写入,所以参数封装为Iterator类型
       */
      def insertBluk(index: String, sources: Iterator[Object]) {
        // 1.获取ES客户端
        val client: JestClient = getESClient()
        // 2.构建Builder
        val builder: Bulk.Builder = new Bulk.Builder()
          .defaultIndex(index)
          .defaultType("_doc")
        // 3.为Builder添加action
        //================== 方式一 ========================================
        /*    sources.foreach(
              source => {
                val action =
                  source match {
                    case (id: String, s) => { //入参是一个元祖(id, source)
                      new Index.Builder(s)
                        .id(id)
                        .build()
                    }
                    case (_) => { //入参是source,样例类,或者 json对象字符串
                      new Index.Builder(source)
                        .build()
                    }
                  }
                //添加action
                builder.addAction(action)
              }
            )*/
        //================== 方式二 ========================================
        sources.map { //转换为action
          case (id: String, s) => {
            new Index.Builder(s)
              .id(id)
              .build()
          }
          case (source) => {
            new Index.Builder(source)
              .build()
          }
        } //往builder添加action
          .foreach(builder.addAction)
    
        // 4.执行插入
        client.execute(builder.build())
        // 5.关闭客户端
        client.close()
      }
    
    

    4.测试

    插入单条数据
        val source1 = User("lisi", 20) //id随机生成
        val source2 = ("11",User("lisi", 20)) //id为10
        insertSingle("user", source1)
        insertSingle("user", source2)
    

    查询结果

          {
            "_index": "user",
            "_type": "_doc",
            "_id": "pwvTcXQBrKDUC6YPHEQZ",
            "_score": 1,
            "_source": {
              "name": "lisi",
              "age": 20
            }
          },
    	   {
            "_index": "user",
            "_type": "_doc",
            "_id": "11",
            "_score": 1,
            "_source": {
              "name": "lisi",
              "age": 20
            }
          }
    
    批量插入数据
    //不指定id 和 指定id
    val sources = Iterator(User("wangwu", 21), (99,User("zhaoliu", 30)))
    insertBluk("user",sources)
    

    查询结果

         {
            "_index": "user",
            "_type": "_doc",
            "_id": "qwvgcXQBrKDUC6YPbETl",
            "_score": 1,
            "_source": {
              "name": "wangwu",
              "age": 21
            }
          },
    	  	{
            "_index": "user",
            "_type": "_doc",
            "_id": "rAvgcXQBrKDUC6YPbETl",
            "_score": 1,
            "_source": {
              "_1": 99,
              "_2": {
                "name": "zhaoliu",
                "age": 30
              }
            }
          }
    

    四.JavaAPI 条件查询ES 解析

    需求:根据日期(date)和关键字(sku_name)过滤,并且分别对年龄(age)和性别(gender)分组,根据当前页数(startPage),每页展示条数(pageSize)

    DSL查询

    GET /gmall0421_sale_detail/_search
    {
      "query": {
        "bool": {
          "filter": {
            "term": {
              "dt": "2020-09-12" //日期date
            }
          },
          "must": [
            {"match": {
              "sku_name": {
                "query": "手机小米", //关键字keyword
                "operator": "and"
              }
            }}
          ]
        }
      },
      
      "aggs": {
        "group_by_gender": {
          "terms": {
            "field": "user_gender", //按性别分组(gender)
            "size": 10
          }
        },
          "group_by_age": {
          "terms": {
            "field": "user_age",  //按年龄分组(age)
            "size": 150
          }
        }
        
      }, 
      
      
      "from": 0,  //查询的页数(startPage)
      "size": 1000 //每页记录数(pageSize)
    }
    

    ESUtil

    scala代码,写DSL语句更美观

    package com.bigdata.real_time_project.gmallpublisher.uitl
    
    import io.searchbox.client.config.HttpClientConfig
    import io.searchbox.client.{JestClient, JestClientFactory}
    import io.searchbox.core.{Bulk, Index, Search, SearchResult}
    
    import scala.collection.JavaConverters._
    
    /**
     * @description: ES工具类
     * @author: HaoWu
     * @create: 2020年09月09日
     */
    object ESUtil {
      // 构建JestClientFactory
      //ES服务器地址
      val esServerUrl = Set("http://hadoop102:9200", "http://hadoop103:9200", "http://hadoop104:9200")
        .asJava //要转为java的集合
      private val factory = new JestClientFactory
      var conf: HttpClientConfig = new HttpClientConfig.Builder(esServerUrl)
        .multiThreaded(true)
        .maxTotalConnection(100)
        .connTimeout(10 * 1000)
        .readTimeout(10 * 1000)
        .build()
      factory.setHttpClientConfig(conf)
    
      /**
       * 获取ES客户端
       */
      def getESClient(): JestClient = {
        factory.getObject
      }
    
      //获取DSL语句
      def getQueryDSL(date: String, keyword: String, startPage: Int, pageSize: Int) = {
        val dsl =
          s"""
             |{
             |  "query": {
             |    "bool": {
             |      "filter": {
             |        "term": {
             |          "dt": "${date}"
             |        }
             |      },
             |      "must": [
             |        {"match": {
             |          "sku_name": {
             |            "query": "${keyword}",
             |            "operator": "and"
             |          }
             |        }}
             |      ]
             |    }
             |  },
             |
             |  "aggs": {
             |    "group_by_gender": {
             |      "terms": {
             |        "field": "user_gender",
             |        "size": 10
             |      }
             |    },
             |      "group_by_age": {
             |      "terms": {
             |        "field": "user_age",
             |        "size": 150
             |      }
             |    }
             |
             |  },
             |
             |
             |  "from": ${startPage},
             |  "size": ${pageSize}
             |}
             |""".stripMargin
        dsl
      }
    }
    

    查询并解析

    Java代码解析

    //前端访问:http://localhost:8070/sale_detail?date=2019-05-20&&keyword=手机小米&&startpage=1&&size=5
    public Map<String, Object> getSailDetailAndAggs(String date, String keyword, int startpage, int size) throws IOException {
            //1.查询 es
            JestClient client = ESUtil.getESClient();
            Search.Builder builder = new Search.Builder(ESUtil.getQueryDSL(date, keyword, startpage, size))
                    .addIndex("gmall0421_sale_detail")
                    .addType("_doc");
            SearchResult searchResult = client.execute(builder.build());
            client.close();
    
            //2.解析查询结果
            HashedMap result = new HashedMap();
            //2.1 total
            result.put("total", searchResult.getTotal());
            //2.2 details
            ArrayList<Map> details = new ArrayList<>();
            List<SearchResult.Hit<Map, Void>> hits = searchResult.getHits(Map.class);
            for (SearchResult.Hit<Map, Void> hit : hits) {
                Map source = hit.source;
                details.add(source);
            }
            result.put("detail", details);
            //2.3 gender聚合结果
            Map<String, Object> genderMap = new HashMap<>();
            List<TermsAggregation.Entry> bucketsGender = searchResult.getAggregations()
                    .getTermsAggregation("group_by_gender")
                    .getBuckets();
            for (TermsAggregation.Entry bucket : bucketsGender) {
                genderMap.put(bucket.getKey(), bucket.getCount());
            }
            result.put("genderAgg",genderMap);
    
            //2.3 age聚合结果
            Map<String, Object> ageMap = new HashMap<>();
            List<TermsAggregation.Entry> bucketsAge = searchResult.getAggregations()
                    .getTermsAggregation("group_by_age")
                    .getBuckets();
            for (TermsAggregation.Entry bucket : bucketsAge) {
                ageMap.put(bucket.getKey(), bucket.getCount());
            }
            result.put("ageAgg",ageMap);
            return result;
        }
    
  • 相关阅读:
    文件夹对比工具
    删除eval key
    Mongodb Backup and Restore
    RabbitMQ Management界面中查看NaN的队列
    css文字描边
    网页适配 iPhoneX,就是这么简单
    VSCode 配置使用less转成.min.css压缩
    css3随着外层包裹的旋转里面的图标为正
    安装nginx+免费https证书
    在线微博数据可视化
  • 原文地址:https://www.cnblogs.com/wh984763176/p/13657140.html
Copyright © 2011-2022 走看看