package com.merlin.lucene;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Date;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class LuceneIndexDemo {
public static void main(String[] args) throws IOException, ParseException {
LuceneIndexDemo demo = new LuceneIndexDemo();
// demo.createIndex(); 创建索引
demo.searcher("merlin");
//删除
demo.delete();
demo.query();
}
private void delete() {
IndexWriter writer = null;
try {
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_4_9,new StandardAnalyzer(Version.LUCENE_4_9));
writer = new IndexWriter(FSDirectory.open(new File("D:\index")), indexWriterConfig);
//参数是一个选项,可以是一个Query,也可以是一个Term,Term是一个精确查找的值
//此时删除的文档并不会完全被删除,而是存储在一个回收站中,可以恢复
//使用Reader可以有效的恢复取到的文档数
writer.deleteDocuments(new Term("path","E:\wamp\www\meal\Application\Common\Conf\config.php"));
} catch (Exception e) {
e.printStackTrace();
}finally{
if(writer!=null){
try {
writer.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
/**
* 被删除的索引查询
*/
public void query(){
try {
IndexReader indexReader = IndexReader.open(FSDirectory.open(new File("D:\index")));
System.out.println("存储的文档数:" + indexReader.numDocs());
System.out.println("总存储量:" + indexReader.maxDoc());
System.out.println("被删除的文档:" + indexReader.numDeletedDocs());
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 更新 索引
*/
public void update(){
IndexWriter indexWriter = null;
try {
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_4_9,new StandardAnalyzer(Version.LUCENE_4_9));
indexWriter = new IndexWriter(FSDirectory.open(new File("D:\index")), indexWriterConfig);
//Luence并没有提供更新,这里的更新操作其实是先删除再添加的操作合集
Document document = new Document();
//更新path 为 d: est est的数据
indexWriter.updateDocument(new Term("path","D:\test\test"), document);
} catch (Exception e) {
e.printStackTrace();
}finally{
if(indexWriter!=null){
try {
indexWriter.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
/**
* 1.创建Directory 2.创建IndexWriter 3.创建Document对象 4.为Document添加Field 为本地文件夹创建
* 索引
*/
public void createIndex() {
String indexPath = "D:\index";// 索引存放路径
String docsPath = "E:\wamp\www\meal";// 为该文件夹下的所有文件建立索引
boolean create = true; // 创建
final File docDir = new File(docsPath);
if (!docDir.exists() || !docDir.canRead()) {
System.out
.println("Document directory '"
+ docDir.getAbsolutePath()
+ "' does not exist or is not readable, please check the path");
System.exit(1);
}
Date start = new Date();
try {
System.out.println("Indexing to directory '" + indexPath + "'...");
Directory dir = FSDirectory.open(new File(indexPath));
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9,
analyzer);
if (create) {
// Create a new index in the directory, removing any
// previously indexed documents:
iwc.setOpenMode(OpenMode.CREATE);
} else {
// Add new documents to an existing index:
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
}
IndexWriter writer = new IndexWriter(dir, iwc);
indexDocs(writer, docDir);
writer.close();
Date end = new Date();
System.out.println(end.getTime() - start.getTime()
+ " total milliseconds");
} catch (IOException e) {
System.out.println(" caught a " + e.getClass()
+ "
with message: " + e.getMessage());
}
}
/**
* 创建Directory 2.创建IndexReader 3.根据IndexReader创建IndexSearcher 4.创建搜索的Query
* 5.根据Searcher搜索并且返回TopDocs 6.根据TopDocs获取ScoreDoc对象
* 7.根据Seacher和ScoreDoc对象获取具体的Document对象 8.根据Document对象获取需要的值
* 9.关闭IndexReader
*
* @throws IOException
* @throws ParseException
*/
public void searcher( String querystring) throws IOException, ParseException {
IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(
"D:\index")));
IndexSearcher searcher = new IndexSearcher(reader);
// :Post-Release-Update-Version.LUCENE_XY:
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
QueryParser parser = new QueryParser(Version.LUCENE_4_9, "contents",
analyzer);
// 搜索文件中含有querystring的文件列表
Query query = parser.parse(querystring);
TopDocs results = searcher.search(query, 10);
ScoreDoc[] hits = results.scoreDocs;
int numTotalHits = results.totalHits;
System.out.println(numTotalHits + " total matching documents");
for (int i = 0; i < hits.length; i++) {
Document doc = searcher.doc(hits[i].doc);
String path = doc.get("path");
System.out.println(path);
}
}
private void indexDocs(IndexWriter writer, File file) throws IOException {
if (file.canRead()) {
if (file.isDirectory()) {
String[] files = file.list();
if (files != null) {
for (int i = 0; i < files.length; i++) {
indexDocs(writer, new File(file, files[i]));
}
}
} else {
FileInputStream fis;
try {
fis = new FileInputStream(file);
} catch (FileNotFoundException fnfe) {
return;
}
try {
Document doc = new Document();
Field pathField = new StringField("path", file.getPath(),
Field.Store.YES);
doc.add(pathField);
doc.add(new LongField("modified", file.lastModified(),
Field.Store.NO));
doc.add(new TextField("contents", new BufferedReader(
new InputStreamReader(fis, StandardCharsets.UTF_8))));
if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
System.out.println("adding " + file);
writer.addDocument(doc);
} else {
System.out.println("updating " + file);
writer.updateDocument(new Term("path", file.getPath()),
doc);
}
} finally {
fis.close();
}
}
}
}
}