Lucene是一个搜索类库,solr、nutch和elasticsearch都是基于Lucene。个人感觉学习高级搜索引擎应用程序之前 有必要了解Lucene。
开发环境:idea maven springboot
开始贴代码:
maven配置
<parent> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-parent</artifactId> <version>1.4.3.RELEASE</version> </parent> <properties> <java.version>1.8</java.version> </properties> <dependencies> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter</artifactId> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-thymeleaf</artifactId> </dependency> <!-- hot swapping, disable cache for template, enable live reload --> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-devtools</artifactId> <optional>true</optional> </dependency> <!--Lucene--> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>7.1.0</version> </dependency> <!--中文分词器,一般分词器适用于英文分词(common)--> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-smartcn</artifactId> <version>7.1.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queryparser</artifactId> <version>7.1.0</version> </dependency> <!--检索关键字高亮显示--> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-highlighter</artifactId> <version>7.1.0</version> </dependency> <!--Lucene--> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.12</version> </dependency> </dependencies> <build> <plugins> <!-- Package as an executable jar/war --> <plugin> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-maven-plugin</artifactId> </plugin> </plugins> </build>
辅助类
public class LuceneConstants { public static final String CONTENTS="contents"; public static final String FILE_NAME="filename"; public static final String FILE_PATH="filepath"; public static final int MAX_SEARCH = 10; public static final String IndexDir ="E:\\Lucene\\Index"; public static final String DataDir ="E:\\Lucene\\Data"; public static final String ArticleDir ="E:\\Lucene\\Files\\article.txt"; }
调用Lucene
public class Indexer { public void addEntity() throws IOException { Article article = new Article(); //article.setId(1); //article.setTitle("Lucene全文检索"); //article.setContent("Lucene是apache软件基金会4 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包,但它不是一个完整的全文检索引擎,而是一个全文检索引擎的架构,提供了完整的查询引擎和索引引擎,部分文本分析引擎(英文与德文两种西方语言)。"); article.setId(2); article.setTitle("Solr搜索引擎"); article.setContent("Solr是基于Lucene框架的搜索莹莹程序,是一个开放源代码的全文检索引擎。"); final Path path = Paths.get(LuceneConstants.IndexDir); Directory directory = FSDirectory.open(path);//索引存放目录 存在磁盘 //Directory RAMDirectory= new RAMDirectory();// 存在内存 Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); //indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE); indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.APPEND); IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);//更新或创建索引 Document document = new Document(); document.add(new TextField("id", article.getId().toString(), Field.Store.YES)); document.add(new TextField("title", article.getTitle(), Field.Store.YES)); document.add(new TextField("content", article.getContent(), Field.Store.YES)); indexWriter.addDocument(document); indexWriter.close(); } public void addFile() throws IOException { final Path path = Paths.get(LuceneConstants.IndexDir); Directory directory = FSDirectory.open(path); Analyzer analyzer=new StandardAnalyzer(); IndexWriterConfig indexWriterConfig=new IndexWriterConfig(analyzer); indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter indexWriter=new IndexWriter(directory,indexWriterConfig); InputStreamReader isr = new InputStreamReader(new FileInputStream(LuceneConstants.ArticleDir), "GBK");//.txt文档,不设置格式会乱码 BufferedReader bufferedReader=new BufferedReader(isr); String content=""; while ((content=bufferedReader.readLine())!=null){ Document document=new Document(); document.add(new TextField("content",content,Field.Store.YES) ); indexWriter.addDocument(document); } bufferedReader.close(); indexWriter.close(); } public List<String> SearchFiles() throws IOException, ParseException { String queryString = "Solr"; final Path path = Paths.get(LuceneConstants.IndexDir); Directory directory = FSDirectory.open(path);//索引存储位置 Analyzer analyzer = new StandardAnalyzer();//分析器 //单条件 //关键词解析 //QueryParser queryParser=new QueryParser("content",analyzer); //Query query=queryParser.parse(queryString); //多条件 Query mQuery = MultiFieldQueryParser.parse(new String[]{"Solr"},new String[]{"content"},new StandardAnalyzer()); IndexReader indexReader = DirectoryReader.open(directory);//索引阅读器 IndexSearcher indexSearcher = new IndexSearcher(indexReader);//查询 //TopDocs topDocs=indexSearcher.search(query,3); TopDocs topDocs=indexSearcher.search(mQuery,10); long count = topDocs.totalHits; ScoreDoc[] scoreDocs = topDocs.scoreDocs; List<String> list=new ArrayList<String>(); list.add(String.valueOf(count)); Integer cnt=0; for (ScoreDoc scoreDoc : scoreDocs) { Document document = indexSearcher.doc(scoreDoc.doc); //list.add(cnt.toString()+"-"+"相关度:"+scoreDoc.score+"-----time:"+document.get("time")); //list.add("|||"); //list.add(cnt.toString()+"-"+document.get("content")); list.add(document.get("content")); cnt++; } return list; } }
查看运行效果
@Controller public class LuceneController { @RequestMapping("/add") public String welcomepage(Map<String, Object> model) { try { Indexer indexer = new Indexer(); indexer.addEntity(); model.put("message", "Success"); } catch (IOException ex) { model.put("message", "Failure"); } return "welcome"; } @RequestMapping("/file") public String fileindex(Map<String, Object> model) { try { Indexer indexer = new Indexer(); indexer.addFile(); model.put("message", "SuccessF"); } catch (IOException ex) { model.put("message", "FailureF"); } return "welcome"; } @RequestMapping("/search") public String searchindex(Map<String, Object> model) { try { Indexer indexer = new Indexer(); List<String> rlts = indexer.SearchFiles(); String message = ""; for (String str : rlts) { message += str + " "; } model.put("message", message); } catch (Exception ex) { model.put("message", "FailureF"); } return "welcome"; } }