zoukankan      html  css  js  c++  java
  • Lucene 4.9 document的简单应用

    package com.merlin.lucene;
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileNotFoundException;
    import java.io.IOException;
    import java.io.InputStreamReader;
    import java.nio.charset.StandardCharsets;
    import java.util.Date;
    
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.document.LongField;
    import org.apache.lucene.document.StringField;
    import org.apache.lucene.document.TextField;
    import org.apache.lucene.index.DirectoryReader;
    import org.apache.lucene.index.IndexReader;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriterConfig;
    import org.apache.lucene.index.IndexWriterConfig.OpenMode;
    import org.apache.lucene.index.Term;
    import org.apache.lucene.queryparser.classic.ParseException;
    import org.apache.lucene.queryparser.classic.QueryParser;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TopDocs;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.util.Version;
    
    public class LuceneIndexDemo {
    
    	public static void main(String[] args) throws IOException, ParseException {
    
    		LuceneIndexDemo demo = new LuceneIndexDemo();
    		
    //		demo.createIndex(); 创建索引
    		demo.searcher("merlin");
    		
    		//删除
    		demo.delete();
    		demo.query();
    	}
    
    	private void delete() {
    		
    		IndexWriter writer = null;
    		 
            try {
            	
                IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_4_9,new StandardAnalyzer(Version.LUCENE_4_9));
                writer = new IndexWriter(FSDirectory.open(new File("D:\index")), indexWriterConfig);
                
                //参数是一个选项,可以是一个Query,也可以是一个Term,Term是一个精确查找的值
                //此时删除的文档并不会完全被删除,而是存储在一个回收站中,可以恢复
                //使用Reader可以有效的恢复取到的文档数
                
                writer.deleteDocuments(new Term("path","E:\wamp\www\meal\Application\Common\Conf\config.php"));
                
            } catch (Exception e) {
                e.printStackTrace();
            }finally{
                if(writer!=null){
                    try {
                    	writer.close();
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                }
            }		
    	}
    
    	/**
    	 *  被删除的索引查询
    	 */
    	public void query(){
            try {
                IndexReader indexReader = IndexReader.open(FSDirectory.open(new File("D:\index")));
                System.out.println("存储的文档数:" + indexReader.numDocs());
                System.out.println("总存储量:" + indexReader.maxDoc());
                System.out.println("被删除的文档:" + indexReader.numDeletedDocs());
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    	
    	/**
    	 * 更新 索引
    	 */
    
    	public void update(){
            IndexWriter indexWriter = null;
            try {
                IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_4_9,new StandardAnalyzer(Version.LUCENE_4_9));
                indexWriter = new IndexWriter(FSDirectory.open(new File("D:\index")), indexWriterConfig);
                //Luence并没有提供更新,这里的更新操作其实是先删除再添加的操作合集
                Document document = new Document();
                //更新path 为 d:	est	est的数据
                indexWriter.updateDocument(new Term("path","D:\test\test"), document);
            } catch (Exception e) {
                e.printStackTrace();
            }finally{
                if(indexWriter!=null){
                    try {
                        indexWriter.close();
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                }
            }
    }
    	
    	/**
    	 * 1.创建Directory 2.创建IndexWriter 3.创建Document对象 4.为Document添加Field 为本地文件夹创建
    	 * 索引
    	 */
    	public void createIndex() {
    
    		String indexPath = "D:\index";// 索引存放路径
    		String docsPath = "E:\wamp\www\meal";// 为该文件夹下的所有文件建立索引
    		boolean create = true; // 创建
    
    		final File docDir = new File(docsPath);
    		if (!docDir.exists() || !docDir.canRead()) {
    			System.out
    					.println("Document directory '"
    							+ docDir.getAbsolutePath()
    							+ "' does not exist or is not readable, please check the path");
    			System.exit(1);
    		}
    
    		Date start = new Date();
    
    		try {
    			System.out.println("Indexing to directory '" + indexPath + "'...");
    
    			Directory dir = FSDirectory.open(new File(indexPath));
    			Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
    			IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9,
    					analyzer);
    
    			if (create) {
    				// Create a new index in the directory, removing any
    				// previously indexed documents:
    				iwc.setOpenMode(OpenMode.CREATE);
    			} else {
    				// Add new documents to an existing index:
    				iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    			}
    
    			IndexWriter writer = new IndexWriter(dir, iwc);
    			indexDocs(writer, docDir);
    
    			writer.close();
    
    			Date end = new Date();
    			System.out.println(end.getTime() - start.getTime()
    					+ " total milliseconds");
    
    		} catch (IOException e) {
    			System.out.println(" caught a " + e.getClass()
    					+ "
     with message: " + e.getMessage());
    		}
    	}
    
    	/**
    	 * 创建Directory 2.创建IndexReader 3.根据IndexReader创建IndexSearcher 4.创建搜索的Query
    	 * 5.根据Searcher搜索并且返回TopDocs 6.根据TopDocs获取ScoreDoc对象
    	 * 7.根据Seacher和ScoreDoc对象获取具体的Document对象 8.根据Document对象获取需要的值
    	 * 9.关闭IndexReader
    	 * 
    	 * @throws IOException
    	 * @throws ParseException
    	 */
    	public void searcher( String querystring) throws IOException, ParseException {
    
    		IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(
    				"D:\index")));
    
    		IndexSearcher searcher = new IndexSearcher(reader);
    		// :Post-Release-Update-Version.LUCENE_XY:
    
    		Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
    		QueryParser parser = new QueryParser(Version.LUCENE_4_9, "contents",
    				analyzer);
    
    		// 搜索文件中含有querystring的文件列表
    		Query query = parser.parse(querystring);
    
    		TopDocs results = searcher.search(query, 10);
    		ScoreDoc[] hits = results.scoreDocs;
    
    		int numTotalHits = results.totalHits;
    		System.out.println(numTotalHits + " total matching documents");
    
    		for (int i = 0; i < hits.length; i++) {
    			Document doc = searcher.doc(hits[i].doc);
    			String path = doc.get("path");
    			System.out.println(path);
    		}
    
    	}
    
    	private void indexDocs(IndexWriter writer, File file) throws IOException {
    
    		if (file.canRead()) {
    
    			if (file.isDirectory()) {
    
    				String[] files = file.list();
    				if (files != null) {
    					for (int i = 0; i < files.length; i++) {
    						indexDocs(writer, new File(file, files[i]));
    					}
    				}
    			} else {
    
    				FileInputStream fis;
    				try {
    					fis = new FileInputStream(file);
    				} catch (FileNotFoundException fnfe) {
    					return;
    				}
    
    				try {
    
    					Document doc = new Document();
    
    					Field pathField = new StringField("path", file.getPath(),
    							Field.Store.YES);
    					doc.add(pathField);
    					doc.add(new LongField("modified", file.lastModified(),
    							Field.Store.NO));
    					doc.add(new TextField("contents", new BufferedReader(
    							new InputStreamReader(fis, StandardCharsets.UTF_8))));
    
    					if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
    						System.out.println("adding " + file);
    						writer.addDocument(doc);
    					} else {
    						System.out.println("updating " + file);
    						writer.updateDocument(new Term("path", file.getPath()),
    								doc);
    					}
    
    				} finally {
    					fis.close();
    				}
    			}
    		}
    	}
    }
    

      

  • 相关阅读:
    Fluent UDF【1】:简介
    CFD使用者应当了解的一些事情
    关于CAE的那点儿破事儿【二】
    sau交流学习社区--看小说的lovebook一个无线端BS应用
    sau交流学习社区--songEagle开发系列:Vue.js + Koa.js项目中使用JWT认证
    mysql数据库NO CONNECTION问题分析以及解决方案
    sau交流学习社区第三方登陆github--oauth来实现用户登录
    微信小程序初体验,入门练手项目--通讯录,部署上线(二)
    快速新建简单的koa2后端服务
    docker初体验,搭建自用的gitlab服务
  • 原文地址:https://www.cnblogs.com/merlini/p/4043889.html
Copyright © 2011-2022 走看看