一、创建索引时,自定义拼音分词和ik分词
PUT /my_index { "index": { "analysis": { "analyzer": { "ik_pinyin_analyzer": { 自定义分词name "type": "custom", "tokenizer": "ik_smart", "filter": ["my_pinyin", "word_delimiter"] }, "pinyin_analyzer": { "type": "custom", "tokenizer": "ik_max_word", "filter": ["my_pinyin", "word_delimiter"] } }, "filter": { "my_pinyin": { "type" : "pinyin", "keep_separate_first_letter" : false, 启用该选项时,将保留第一个字母分开,例如:刘德华
>l
,d
,h
,默认:false,注意:查询结果也许是太模糊,由于长期过频 "keep_full_pinyin" : true, 当启用该选项,例如:刘德华
> [liu
,de
,hua
],默认值:true "keep_original" : true, 启用此选项时,也将保留原始输入,默认值:false "limit_first_letter_length" : 16, 设置first_letter结果的最大长度,默认值:16
"lowercase" : true, 小写非中文字母,默认值:true
"remove_duplicated_term" : true 启用此选项后,将删除重复的术语以保存索引,例如:de的
>de
,default:false,注意:位置相关的查询可能会受到影响
}
}
}
}
}
二、创建mapping时,设置字段分词(注:相同索引下建不同的type时,相同字段名属性必须设一样)
POST /my_index/user/_mapping { "user": { "properties": { "id":{ "type":"integer" }, "userName": { "type": "text", "store": "no", "term_vector": "with_positions_offsets", "analyzer": "ik_pinyin_analyzer", 自定义分词器name "boost": 10, "fielddata" : true, "fields": { "raw": { "type": "keyword" 设置keyword时,对该字段不进行分析 } } }, "reason":{ "type": "text", "store": "no", 字段store为true,这意味着这个field的数据将会被单独存储。这时候,如果你要求返回field1(store:yes),es会分辨出field1已经被存储了,因此不会从_source中加载,而是从field1的存储块中加载。 "term_vector": "with_positions_offsets", "analyzer": "ik_pinyin_analyzer", "boost": 10 } } } }
测试
PUT /my_index/user/1 { "id":1, "userName":"刘德华", "reason":"大帅哥" } PUT /my_index/user/2 { "id":2, "userName":"刘德华", "reason":"中华人民" }
不分词查询
GET /my_index/user/_search { "query": { "match": { "userName.raw": "刘德华" } } } { "took": 0, "timed_out": false, "_shards": { "total": 5, "successful": 5, "skipped": 0, "failed": 0 }, "hits": { "total": 2, "max_score": 0.2876821, "hits": [ { "_index": "my_index", "_type": "user", "_id": "2", "_score": 0.2876821, "_source": { "id": 2, "userName": "刘德华", "reason": "中华人民" } }, { "_index": "my_index", "_type": "user", "_id": "1", "_score": 0.2876821, "_source": { "id": 1, "userName": "刘德华", "reason": "大帅哥" } } ] } }
分词查询
GET /my_index/user/_search { "query": { "match": { "userName": "刘" } } } { "took": 0, "timed_out": false, "_shards": { "total": 5, "successful": 5, "skipped": 0, "failed": 0 }, "hits": { "total": 2, "max_score": 0.31331712, "hits": [ { "_index": "my_index", "_type": "user", "_id": "2", "_score": 0.31331712, "_source": { "id": 2, "userName": "刘德华", "reason": "中华人民" } }, { "_index": "my_index", "_type": "user", "_id": "1", "_score": 0.31331712, "_source": { "id": 1, "userName": "刘德华", "reason": "大帅哥" } } ] } }
拼音分词
GET /my_index/user/_search { "query": { "match": { "reason": "shuai" } } } { "took": 0, "timed_out": false, "_shards": { "total": 5, "successful": 5, "skipped": 0, "failed": 0 }, "hits": { "total": 1, "max_score": 3.4884284, "hits": [ { "_index": "my_index", "_type": "user", "_id": "1", "_score": 3.4884284, "_source": { "id": 1, "userName": "刘德华", "reason": "大帅哥" } } ] } }
分组聚合
GET /my_index/user/_search { "size":2, "query": { "match": { "userName": "liu" } }, "aggs": { "group_by_meetingType": { "terms": { "field": "userName.raw" } } } } { "took": 1, "timed_out": false, "_shards": { "total": 5, "successful": 5, "skipped": 0, "failed": 0 }, "hits": { "total": 2, "max_score": 3.133171, "hits": [ { "_index": "my_index", "_type": "user", "_id": "2", "_score": 3.133171, "_source": { "id": 2, "userName": "刘德华", "reason": "中华人民" } }, { "_index": "my_index", "_type": "user", "_id": "1", "_score": 3.133171, "_source": { "id": 1, "userName": "刘德华", "reason": "大帅哥" } } ] }, "aggregations": { "group_by_meetingType": { "doc_count_error_upper_bound": 0, "sum_other_doc_count": 0, "buckets": [ { "key": "刘德华", "doc_count": 2 } ] } } }