zoukankan      html  css  js  c++  java
  • Lucene4.3入门

          辞职交接期间无聊看了一下搜索引擎,java社区比较火的当然是Lucene,想写一个简单的小例子,在网上找了些资料,不过都不是4.3的,自己看了一下。

    下载地址:http://lucene.apache.org/core/

    项目结构


    constans.java 是常量类

    LuceneIndex.java 建立索引类

    LuceneSearch.java 搜索类

    数据文件:



    package com.xin;
    
    public class Constants { 
        public final static String INDEX_FILE_PATH = "e:\lucene\test"; //索引的文件的存放路径 
        public final static String INDEX_STORE_PATH = "e:\lucene\index"; //索引的存放位置 
    } 
    package com.xin;
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.InputStreamReader;
    import java.io.Reader;
    import java.util.Date;
    
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.document.StringField;
    import org.apache.lucene.document.TextField;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriterConfig;
    import org.apache.lucene.index.IndexWriterConfig.OpenMode;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.util.Version;
    /**
     * @author chongxin
     * @since 2013/6/19
     * @version Lucene 4.3.1
     * */
    public class LuceneIndex {
    	// 索引器
    	private IndexWriter writer = null;
    	public LuceneIndex() {
    		try {
    			//索引文件的保存位置
    			Directory dir = FSDirectory.open(new File(Constants.INDEX_STORE_PATH));
    			//分析器
    			Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
    			//配置类
    			IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40,analyzer);
    			iwc.setOpenMode(OpenMode.CREATE);//创建模式 OpenMode.CREATE_OR_APPEND 添加模式
    			
    			writer = new IndexWriter(dir, iwc);
    		} catch (Exception e) {
    			e.printStackTrace();
    		}
    	}
    
    	// 将要建立索引的文件构造成一个Document对象,并添加一个域"content"
    	private Document getDocument(File f) throws Exception {
    		Document doc = new Document();
    
    		FileInputStream is = new FileInputStream(f);
    		Reader reader = new BufferedReader(new InputStreamReader(is));
    		//字符串 StringField LongField TextField
    		Field pathField = new StringField("path", f.getAbsolutePath(),Field.Store.YES);
    		Field contenField = new TextField("contents", reader);
    		//添加字段
    		doc.add(contenField);
    		doc.add(pathField);
    		return doc;
    	}
    
    	public void writeToIndex() throws Exception {
    		File folder = new File(Constants.INDEX_FILE_PATH);
    		
    		if (folder.isDirectory()) {
    			String[] files = folder.list();
    			for (int i = 0; i < files.length; i++) {
    				File file = new File(folder, files[i]);
    				Document doc = getDocument(file);
    				System.out.println("正在建立索引 : " + file + "");
    				writer.addDocument(doc);
    			}
    		}
    	}
    
    	public void close() throws Exception {
    		writer.close();
    	}
    
    	public static void main(String[] args) throws Exception {
    		// 声明一个对象
    		LuceneIndex indexer = new LuceneIndex();
    		// 建立索引
    		Date start = new Date();
    		indexer.writeToIndex();
    		Date end = new Date();
    
    		System.out.println("建立索引用时" + (end.getTime() - start.getTime()) + "毫秒");
    
    		indexer.close();
    	}
    }

    执行结果:

    正在建立索引 : e:lucene	esta.txt
    正在建立索引 : e:lucene	est.txt
    正在建立索引 : e:lucene	estc.txt
    正在建立索引 : e:lucene	estd.txt
    建立索引用时109毫秒
    

    生成的索引文件:

    查找:

    package com.xin;
    
    import java.io.File;
    import java.util.Date;
    
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.index.DirectoryReader;
    import org.apache.lucene.index.IndexReader;
    import org.apache.lucene.queryparser.classic.QueryParser;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TopDocs;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.util.Version;
    
    /**
     * @author chongxin
     * @since 2013/6/19
     * @version Lucene 4.3.1
     * */
    public class LuceneSearch {
    	// 声明一个IndexSearcher对象
    	private IndexSearcher searcher = null;
    	// 声明一个Query对象
    	private Query query = null;
    	private String field = "contents";
    
    	public LuceneSearch() {
    		try {
    			IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(Constants.INDEX_STORE_PATH)));
    			searcher = new IndexSearcher(reader);
    		} catch (Exception e) {
    			e.printStackTrace();
    		}
    	}
        //返回查询结果 
    	public final TopDocs search(String keyword) {
    		System.out.println("正在检索关键字 : " + keyword);
    		try {
    			Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
    			QueryParser parser = new QueryParser(Version.LUCENE_40, field,analyzer);
    			// 将关键字包装成Query对象
    			query = parser.parse(keyword);
    			Date start = new Date();
    			TopDocs results = searcher.search(query, 5 * 2);
    			Date end = new Date();
    			System.out.println("检索完成,用时" + (end.getTime() - start.getTime())
    					+ "毫秒");
    			return results;
    		} catch (Exception e) {
    			e.printStackTrace();
    			return null;
    		}
    	}
          //打印结果
    	public void printResult(TopDocs results) {
    		ScoreDoc[] h = results.scoreDocs;
    		if (h.length == 0) {
    			System.out.println("对不起,没有找到您要的结果。");
    		} else {
    			for (int i = 0; i < h.length; i++) {
    				try {
    					Document doc = searcher.doc(h[i].doc);
    					System.out.print("这是第" + i + "个检索到的结果,文件名为:");
    					System.out.println(doc.get("path"));
    				} catch (Exception e) {
    					e.printStackTrace();
    				}
    			}
    		}
    		System.out.println("--------------------------");
    	}
    
    	public static void main(String[] args) throws Exception {
    		LuceneSearch test = new LuceneSearch();
    		TopDocs h = null;
    		h = test.search("中国");
    		test.printResult(h);
    		h = test.search("人民");
    		test.printResult(h);
    		h = test.search("共和国");
    		test.printResult(h);
    	}
    
    }


  • 相关阅读:
    十个男人看了,九个成了富人
    win7下编译安装osgearth
    gdal源码编译安装
    win7下编译boost库总结
    everything && executor
    cursor:hand与cursor:pointer的区别介绍
    web程序记录当前在线人数
    汉字转拼音
    40多个非常有用的Oracle 查询语句
    asp.net 使用IHttpModule 做权限检查 登录超时检查(转)
  • 原文地址:https://www.cnblogs.com/dyllove98/p/3144919.html
Copyright © 2011-2022 走看看