zoukankan html css js c++ java

Lucene中Analyzer语句分析

Lucene中Analyzer语句分析,利用lucene中自带的词法分析工具Analyzer，进行对句子的分析。

源代码如下：

 1 package com.test;
 2 
 3 import java.io.IOException;
 4 import java.io.StringReader;
 5 import java.util.List;
 6 
 7 import org.apache.lucene.analysis.Analyzer;
 8 import org.apache.lucene.analysis.SimpleAnalyzer;
 9 import org.apache.lucene.analysis.StopAnalyzer;
10 import org.apache.lucene.analysis.Token;
11 import org.apache.lucene.analysis.TokenStream;
12 import org.apache.lucene.analysis.WhitespaceAnalyzer;
13 import org.apache.lucene.analysis.standard.StandardAnalyzer;
14 import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
15 import org.apache.lucene.util.Version;
16 
17 import com.bean.mashupDerscriptionTest;
18 import com.daoImpl.MashupDaoImpl;
19 import com.gargoylesoftware.htmlunit.javascript.host.Comment;
20 
21 public class KeyWordsTest {
22 
23     /**
24      * @param args
25      */
26     public static void main(String[] args) {
27         MashupDaoImpl mashupDao = new MashupDaoImpl();
28         List<mashupDerscriptionTest> list = mashupDao
29                 .findAllmashupDescripteonTest();
30         int i = 1;
31         String comment = null;
32         for (mashupDerscriptionTest mashup : list) {
33             // 描述为空去名字作为描述
34             if (mashup.getComments().equals("")) {
35                 comment = mashup.getName();
36             } else {
37                 comment = mashup.getComments();
38             }
39 //            System.out.println(comment);
40             //对读取的描述利用Lucene中的Analyzer进行句子分析产生
41             //空格及各种符号分割,去掉停止词，停止词包括 is,are,in,on,the等无实际意义的词  
42             StringReader reader = new StringReader(comment);
43             Analyzer analyzer = new StopAnalyzer();
44             TokenStream tStream = analyzer.tokenStream("", reader);
45             Token t;
46             try {
47                 while ((t = tStream.next()) != null) {
48                     //对每个单词采用
49                     System.out.print(t.termText()+" ");
50                 }
51                 System.out.println((i++)+"条描述分词结束！");
52             } catch (IOException e) {
53                 e.printStackTrace();
54             }    
55         }
56     }
57 }

　　注:数据来源于数据库中......

查看全文

相关阅读:
服务器时钟同步
 vue父组件向子组件传递数值 props
sql 语句in 使用占位符
 vagrant 打包box 快速部署统一开发环境
 Memcache安装使用 linux系统
 centos 7 搭建lnmp环境搭建 yum 源安装
 vagrant搭建lnmp 环境（环境contos7+php72w+mariaDB10.2)
linux定时任务 Cron Crontab命令
 vue使用el-upload 跨域上传文件跳坑小记
 vue key得理解

原文地址：https://www.cnblogs.com/rememberme/p/Lucene_Analyzer.html