按照单词切分,不做处理
GET _analyze { "analyzer": "standard", "text": "2 running Quick brawn-foxes leap over lazy dogs in the summer evening." } { "tokens" : [ { "token" : "2", "start_offset" : 0,分词 "end_offset" : 1, "type" : "<NUM>", "position" : 0 }, { "token" : "running", "start_offset" : 2, "end_offset" : 9, "type" : "<ALPHANUM>", "position" : 1 }, { "token" : "quick", "start_offset" : 10, "end_offset" : 15, "type" : "<ALPHANUM>", "position" : 2 }, { "token" : "brawn", "start_offset" : 16, "end_offset" : 21, "type" : "<ALPHANUM>", "position" : 3 }, { "token" : "foxes", "start_offset" : 22, "end_offset" : 27, "type" : "<ALPHANUM>", "position" : 4 }, { "token" : "leap", "start_offset" : 28, "end_offset" : 32, "type" : "<ALPHANUM>", "position" : 5 }, { "token" : "over", "start_offset" : 33, "end_offset" : 37, "type" : "<ALPHANUM>", "position" : 6 }, { "token" : "lazy", "start_offset" : 38, "end_offset" : 42, "type" : "<ALPHANUM>", "position" : 7 }, { "token" : "dogs", "start_offset" : 43, "end_offset" : 47, "type" : "<ALPHANUM>", "position" : 8 }, { "token" : "in", "start_offset" : 48, "end_offset" : 50, "type" : "<ALPHANUM>", "position" : 9 }, { "token" : "the", "start_offset" : 51, "end_offset" : 54, "type" : "<ALPHANUM>", "position" : 10 }, { "token" : "summer", "start_offset" : 55, "end_offset" : 61, "type" : "<ALPHANUM>", "position" : 11 }, { "token" : "evening", "start_offset" : 62, "end_offset" : 69, "type" : "<ALPHANUM>", "position" : 12 } ] }
按照非字母的字符切分
GET _analyze { "analyzer": "simple", "text": "2 running Quick brawn-foxes leap over lazy dogs in the summer evening." } { "tokens" : [ { "token" : "running", "start_offset" : 2, "end_offset" : 9, "type" : "word", "position" : 0 }, { "token" : "quick", "start_offset" : 10, "end_offset" : 15, "type" : "word", "position" : 1 }, { "token" : "brawn", "start_offset" : 16, "end_offset" : 21, "type" : "word", "position" : 2 }, { "token" : "foxes", "start_offset" : 22, "end_offset" : 27, "type" : "word", "position" : 3 }, { "token" : "leap", "start_offset" : 28, "end_offset" : 32, "type" : "word", "position" : 4 }, { "token" : "over", "start_offset" : 33, "end_offset" : 37, "type" : "word", "position" : 5 }, { "token" : "lazy", "start_offset" : 38, "end_offset" : 42, "type" : "word", "position" : 6 }, { "token" : "dogs", "start_offset" : 43, "end_offset" : 47, "type" : "word", "position" : 7 }, { "token" : "in", "start_offset" : 48, "end_offset" : 50, "type" : "word", "position" : 8 }, { "token" : "the", "start_offset" : 51, "end_offset" : 54, "type" : "word", "position" : 9 }, { "token" : "summer", "start_offset" : 55, "end_offset" : 61, "type" : "word", "position" : 10 }, { "token" : "evening", "start_offset" : 62, "end_offset" : 69, "type" : "word", "position" : 11 } ] }
按照空格切分不做任何处理
GET _analyze { "analyzer": "whitespace", "text": "2 running Quick brawn-foxes leap over lazy dogs in the summer evening." } { "tokens" : [ { "token" : "2", "start_offset" : 0, "end_offset" : 1, "type" : "word", "position" : 0 }, { "token" : "running", "start_offset" : 2, "end_offset" : 9, "type" : "word", "position" : 1 }, { "token" : "Quick", "start_offset" : 10, "end_offset" : 15, "type" : "word", "position" : 2 }, { "token" : "brawn-foxes", "start_offset" : 16, "end_offset" : 27, "type" : "word", "position" : 3 }, { "token" : "leap", "start_offset" : 28, "end_offset" : 32, "type" : "word", "position" : 4 }, { "token" : "over", "start_offset" : 33, "end_offset" : 37, "type" : "word", "position" : 5 }, { "token" : "lazy", "start_offset" : 38, "end_offset" : 42, "type" : "word", "position" : 6 }, { "token" : "dogs", "start_offset" : 43, "end_offset" : 47, "type" : "word", "position" : 7 }, { "token" : "in", "start_offset" : 48, "end_offset" : 50, "type" : "word", "position" : 8 }, { "token" : "the", "start_offset" : 51, "end_offset" : 54, "type" : "word", "position" : 9 }, { "token" : "summer", "start_offset" : 55, "end_offset" : 61, "type" : "word", "position" : 10 }, { "token" : "evening.", "start_offset" : 62, "end_offset" : 70, "type" : "word", "position" : 11 } ] }
按词切分去掉修饰词
GET _analyze { "analyzer": "stop", "text": "2 running Quick brawn-foxes leap over lazy dogs in the summer evening." } { "tokens" : [ { "token" : "running", "start_offset" : 2, "end_offset" : 9, "type" : "word", "position" : 0 }, { "token" : "quick", "start_offset" : 10, "end_offset" : 15, "type" : "word", "position" : 1 }, { "token" : "brawn", "start_offset" : 16, "end_offset" : 21, "type" : "word", "position" : 2 }, { "token" : "foxes", "start_offset" : 22, "end_offset" : 27, "type" : "word", "position" : 3 }, { "token" : "leap", "start_offset" : 28, "end_offset" : 32, "type" : "word", "position" : 4 }, { "token" : "over", "start_offset" : 33, "end_offset" : 37, "type" : "word", "position" : 5 }, { "token" : "lazy", "start_offset" : 38, "end_offset" : 42, "type" : "word", "position" : 6 }, { "token" : "dogs", "start_offset" : 43, "end_offset" : 47, "type" : "word", "position" : 7 }, { "token" : "summer", "start_offset" : 55, "end_offset" : 61, "type" : "word", "position" : 10 }, { "token" : "evening", "start_offset" : 62, "end_offset" : 69, "type" : "word", "position" : 11 } ] }
不进行切分直接输出
GET _analyze { "analyzer": "keyword", "text": "2 running Quick brawn-foxes leap over lazy dogs in the summer evening." } { "tokens" : [ { "token" : "2 running Quick brawn-foxes leap over lazy dogs in the summer evening.", "start_offset" : 0, "end_offset" : 70, "type" : "word", "position" : 0 } ] }
通过正则表达式方式进行切割,默认非字符的方式切割
GET _analyze { "analyzer": "pattern", "text": "2 running Quick brawn-foxes leap over lazy dogs in the summer evening." } { "tokens" : [ { "token" : "2", "start_offset" : 0, "end_offset" : 1, "type" : "word", "position" : 0 }, { "token" : "running", "start_offset" : 2, "end_offset" : 9, "type" : "word", "position" : 1 }, { "token" : "quick", "start_offset" : 10, "end_offset" : 15, "type" : "word", "position" : 2 }, { "token" : "brawn", "start_offset" : 16, "end_offset" : 21, "type" : "word", "position" : 3 }, { "token" : "foxes", "start_offset" : 22, "end_offset" : 27, "type" : "word", "position" : 4 }, { "token" : "leap", "start_offset" : 28, "end_offset" : 32, "type" : "word", "position" : 5 }, { "token" : "over", "start_offset" : 33, "end_offset" : 37, "type" : "word", "position" : 6 }, { "token" : "lazy", "start_offset" : 38, "end_offset" : 42, "type" : "word", "position" : 7 }, { "token" : "dogs", "start_offset" : 43, "end_offset" : 47, "type" : "word", "position" : 8 }, { "token" : "in", "start_offset" : 48, "end_offset" : 50, "type" : "word", "position" : 9 }, { "token" : "the", "start_offset" : 51, "end_offset" : 54, "type" : "word", "position" : 10 }, { "token" : "summer", "start_offset" : 55, "end_offset" : 61, "type" : "word", "position" : 11 }, { "token" : "evening", "start_offset" : 62, "end_offset" : 69, "type" : "word", "position" : 12 } ] }
英语分词器
GET _analyze { "analyzer": "english", "text": "2 running Quick brawn-foxes leap over lazy dogs in the summer evening." } { "tokens" : [ { "token" : "2", "start_offset" : 0, "end_offset" : 1, "type" : "<NUM>", "position" : 0 }, { "token" : "run", "start_offset" : 2, "end_offset" : 9, "type" : "<ALPHANUM>", "position" : 1 }, { "token" : "quick", "start_offset" : 10, "end_offset" : 15, "type" : "<ALPHANUM>", "position" : 2 }, { "token" : "brawn", "start_offset" : 16, "end_offset" : 21, "type" : "<ALPHANUM>", "position" : 3 }, { "token" : "fox", "start_offset" : 22, "end_offset" : 27, "type" : "<ALPHANUM>", "position" : 4 }, { "token" : "leap", "start_offset" : 28, "end_offset" : 32, "type" : "<ALPHANUM>", "position" : 5 }, { "token" : "over", "start_offset" : 33, "end_offset" : 37, "type" : "<ALPHANUM>", "position" : 6 }, { "token" : "lazi", "start_offset" : 38, "end_offset" : 42, "type" : "<ALPHANUM>", "position" : 7 }, { "token" : "dog", "start_offset" : 43, "end_offset" : 47, "type" : "<ALPHANUM>", "position" : 8 }, { "token" : "summer", "start_offset" : 55, "end_offset" : 61, "type" : "<ALPHANUM>", "position" : 11 }, { "token" : "even", "start_offset" : 62, "end_offset" : 69, "type" : "<ALPHANUM>", "position" : 12 } ] }
中文分词器,一个字符一个字符切分
POST _analyze { "analyzer": "standard", "text": "他说的确实在理" } { "tokens" : [ { "token" : "他", "start_offset" : 0, "end_offset" : 1, "type" : "<IDEOGRAPHIC>", "position" : 0 }, { "token" : "说", "start_offset" : 1, "end_offset" : 2, "type" : "<IDEOGRAPHIC>", "position" : 1 }, { "token" : "的", "start_offset" : 2, "end_offset" : 3, "type" : "<IDEOGRAPHIC>", "position" : 2 }, { "token" : "确", "start_offset" : 3, "end_offset" : 4, "type" : "<IDEOGRAPHIC>", "position" : 3 }, { "token" : "实", "start_offset" : 4, "end_offset" : 5, "type" : "<IDEOGRAPHIC>", "position" : 4 }, { "token" : "在", "start_offset" : 5, "end_offset" : 6, "type" : "<IDEOGRAPHIC>", "position" : 5 }, { "token" : "理", "start_offset" : 6, "end_offset" : 7, "type" : "<IDEOGRAPHIC>", "position" : 6 } ] }