zoukankan      html  css  js  c++  java
  • es学习

    1.官网直接安装

    官网下载:https://www.elastic.co/cn/downloads/elasticsearch

    mac安装es:

    下载完成后,打开bin文件夹下的elasticsearch文件,注意jdk版本要正确。因为我电脑有两个jdk版本,默认是jdk7.

    执行vim ~/.bash_profile

     

    进入编辑模式,输入i,修改后,esc退出,输入:wq退出insert

    安装成功,访问http://localhost:9200/

    2.docker安装

    mac安装docker:https://www.runoob.com/docker/macos-docker-install.html

    3.安装kibana

    下载地址:https://www.elastic.co/cn/downloads/kibana

    安装:打开bin文件夹,打开文件kibana

    安装遇到错误:

          Error: getaddrinfo ENOTFOUND localhost,是由于localhost没有绑定到127.0.0.1

    启动后,在浏览器上打开 http://localhost:5601/

    如果想修改Kibana连接的Elasticsearch地址,或是Kibana自身的端口5601,可以在Kibana目录下的config下面的kibana.yml文件中进行修改; 

    4.es分词器安装

    安装指南:https://github.com/medcl/elasticsearch-analysis-ik

    两种安装方式:

    a.下载解压后安装,下载地址:https://github.com/medcl/elasticsearch-analysis-ik/releases

    b.直接命令安装:./bin/elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v7.9.0/elasticsearch-analysis-ik-7.9.0.zip

    安装完重启es

    5.term vectors

    官网文档:https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-termvectors.html#docs-termvectors-api-term-info

    PUT /my-index-000001
    { "mappings": {
        "properties": {
          "text": {
            "type": "text",
            "term_vector": "with_positions_offsets_payloads",
            "store" : true,
            "analyzer" : "fulltext_analyzer"
           },
           "fullname": {
            "type": "text",
            "term_vector": "with_positions_offsets_payloads",
            "analyzer" : "fulltext_analyzer"
          }
        }
      },
      "settings" : {
        "index" : {
          "number_of_shards" : 1,
          "number_of_replicas" : 0
        },
        "analysis": {
          "analyzer": {
            "fulltext_analyzer": {
              "type": "custom",
              "tokenizer": "whitespace",
              "filter": [
                "lowercase",
                "type_as_payload"
              ]
            }
          }
        }
      }
    }
    
    PUT /my-index-000001/_doc/1
    {
      "fullname" : "John Doe",
      "text" : "test test test "
    }
    
    PUT /my-index-000001/_doc/2?refresh=wait_for
    {
      "fullname" : "Jane Doe",
      "text" : "Another test ..."
    }
    
    PUT /my-index-000001/_doc/3?refresh=wait_for
    {
      "fullname" : "huyanxia liangming",
      "text" : "test Another baby ..."
    }
    
    GET /my-index-000001/_termvectors
    {
      "fields" : ["text"],
      "offsets" : true,
      "payloads" : true,
      "positions" : true,
      "term_statistics" : true,
      "field_statistics" : true
    }
    
    GET /my-index-000001/_termvectors
    {
      "doc" : {
        "fullname" : "John Doe diannao",
        "text" : "test test test"
      },
      "filter": {
        "max_num_terms": 3,
        "min_term_freq": 1,
        "min_doc_freq": 1
      }
    }

    6.聚合计算,es版本7.9.1

    PUT /user_profiles1
    {
        "settings": {
            "index": {
                "number_of_shards": "32",
                "number_of_replicas": "1"
            }
        },
        "mappings": {
                    "properties": {
                    "type": {
                        "type": "keyword"
                    },
                    "user_id": {
                        "type": "keyword"
                    },
                    "item_id": {
                        "type": "keyword"
                    },
                    "boost": {
                        "type": "double"
                    },
                    "created": {
                        "type": "date",
                        "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
                    },
                    "keywords": {
                        "type": "nested",
                        "properties": {
                            "word": {
                                "type": "keyword"
                            },
                            "weight": {
                                "type": "double"
                            }
                        }
                    }
                }
            }
        
    }
    
    PUT /user_profiles1/_doc/1_1_1001
    {
      "type": "1",
      "user_id": "1",
      "item_id": "1001",
      "factor": 1.2,
      "created" : "2020-09-07 14:54:37",
      "keywords": [
        {
          "word": "中国",
          "weight": 3.2
        },
        {
          "word": "美国",
          "weight": 1.4
        }
      ]
    }
    
    PUT /user_profiles1/_doc/1_1_1002
    {
      "type": "1",
      "user_id": "1",
      "item_id": "1002",
      "factor": 1.2,
      "created" : "2020-09-07 14:54:37",
      "keywords": [
        {
          "word": "中国辅导费",
          "weight": 6.2
        },
        {
          "word": "美国当时的",
          "weight": 1.9
        }
      ]
    }
    POST /user_profiles1/_search
    {
        "query": {
            "bool": {
                "must": [{
                        "terms": {
                            "type": [
                                "1"
                            ]
                        }
                    },
                    {
                        "term": {
                            "user_id": {
                                "value": "1"
                            }
                        }
                    },
                    {
                        "range": {
                            "created": {
                                "gte": "2020-09-07 14:54:37"
                            }
                        }
                    }
                  
                
                ]
            }
        },
        "size": 0,
        "aggs": {
            "agg_keywords": {
                "nested": {
                    "path": "keywords"
                },
                "aggs": {
                    "agg_word": {
                        "terms": {
                            "field": "keywords.word",
                            "order": {
                                "agg_score": "desc"
                            },
                            "size": 2 //决定返回大小 
                        },
                        "aggs": {
                            "agg_score": {
                                "sum": {
                                    "field": "keywords.weight"
                                }
                            }
                        }
                    }
                }
            }
        }
    }
    

      7.从本地读取文件 

    8.termVector es2.1

    //第一种
    TermVectorsResponse termVectorResponse = ElasticSearchUtils.getEsClient()
                    .prepareTermVectors()
                    .setIndex("knowledge_items")
                    .setType("knowledge_items")
                    .setId(itemId)
                    .setSelectedFields("content")
                    .setTermStatistics(true)
                    .setFieldStatistics(false)
                    .setOffsets(false)
                    .setPayloads(false)
                    .setPositions(false)
                    .execute()
                    .actionGet();
    //第二种
    TermVectorsRequest termVectorsRequest = new TermVectorsRequest();
     //设置参数            ElasticSearchUtils.getEsClient().termVectors(termVectorsRequest).actionGet();

     结果json化输出

     try {
                XContentBuilder builder = XContentFactory.jsonBuilder();
                builder.startObject();
                termVectorResponse.toXContent(builder, ToXContent.EMPTY_PARAMS);
                builder.endObject();
                System.out.println("json termVectorResponse:" + builder.string());
            } catch (IOException e) {
                e.printStackTrace();
            }
    

     结果遍历

     

    Fields fields = termVectorResponse.getFields();
                Iterator<String> iterator = fields.iterator();
                while (iterator.hasNext()) {
                    String field = iterator.next();
                    Terms terms = fields.terms(field);
                    int docCount = terms.getDocCount();//field_statistics
                    TermsEnum termsEnum = terms.iterator();
                    int currentTotalTermFreq = 0;
                    List<TermInfoEntity> termInfoEntities = new ArrayList<>();
                    while (termsEnum.next() != null) {//每个词条
                        BytesRef term = termsEnum.term();
                        String termName = term.utf8ToString();
                        if(NumUtils.isNum(termName) || termName.length() == 1){
                            LOG.info("termName filter:{}" + termName);
                            continue;
                        }
                        if (term != null) {
                            int docFreq = termsEnum.docFreq();
                            int termFreq = termsEnum.postings(null, PostingsEnum.FREQS).freq();
                            currentTotalTermFreq = currentTotalTermFreq + termFreq;
    
                            TermInfoEntity termInfoEntity = new TermInfoEntity(term.utf8ToString(), termFreq, docFreq);
                            termInfoEntities.add(termInfoEntity);
                        }
                    }
                    int finalCurrentTotalTermFreq = currentTotalTermFreq;
                    double finalItemBoost = itemBoost;
                    //计算每个词的tf-idf
                    termInfoEntities.forEach(termInfoEntity -> {
                        double tf = (double) termInfoEntity.getTermFreq()/ (double) finalCurrentTotalTermFreq;
                        double idf = Math.log10(docCount/termInfoEntity.getDocFreq()) + 1;
                        double tfIDf = NumUtils.doubleValueScale(6, tf * idf);
                        KeyWordEntity keyWordEntity = new KeyWordEntity(termInfoEntity.getTermName(), tfIDf * userActionTypeEnum.getBoost() * finalItemBoost);
                        keyWordEntities.add(keyWordEntity);
                    });
                }
            } catch (IOException e) {
                LOG.error("es termVectorResponse 遍历失败:", e);
            }
    

      

      

  • 相关阅读:
    用SNMP协议实现系统信息监控--Windows Server 2008
    Apache与Tomcat区别联系
    Oracle数据库的创建与验证
    oracle监听服务开启
    【VS Code 开发工具】在VS Code中使用Markdown语法
    【SQLServer数据库】SQLServer分库分表
    【SQLServer数据库】SQLServer死锁与优化
    【SQLServer数据库】SQLServer悲观锁和乐观锁
    【HTTP】HTTP Body
    【SQLServer数据库】SQLServer视图
  • 原文地址:https://www.cnblogs.com/zhima-hu/p/13613908.html
Copyright © 2011-2022 走看看