zoukankan      html  css  js  c++  java
  • lucene 在项目中的使用

    开源全文搜索工具包Lucene3.0.1的使用。

    项目环境Struts2 (2.18)+Hibernate(3.0)+Spring(2.5) JDK:1.6 IDE:myEclipse 8.5 

    项目需求:把站内发布的新闻进行全文解索

    新闻实体News

    public class News {
    	private int id;
    	/**标题*/
    	private String title;
    	/**内容*/
    	private String contents;
    setters();
    getters();
    }
    新闻实体的luceneDao
    package com.hkrt.dao;
    
    import com.hkrt.domain.LuceneSearchResult;
    import com.hkrt.domain.News;
    public interface NewsLuceneDao {
    	public static final String FIELD_ID="id";
    	public static final String FIELD_TITLE = "title";
    	public static final String FIELD_CONTENTS = "contents";
    	// 索引存放目录
    	 public static final String INDEX_DIR = Thread.currentThread().getContextClassLoader().getResource("").getPath()+"index_dir";
    	/**
    	 * 对所有文件进行重新索引
    	 */
    	public void rebuildAllIndex();
    	/**
    	 * 对指定上传文件对象进行索引并追加到已有的索引文件中
    	 * @param news
    	 */
    	public void doIndexSingle(News news);
    	/**
    	 * 根据关键字搜索,返回符合条件的分页数据
    	 * @param keyword   关键字
    	 * @param pageNo    起始页
    	 * @param pageSize  每页要显示的记录数
    	 * @return LuceneSearchResult对象
    	 */
    	public LuceneSearchResult<News> doSeacher(String keyword, int pageNo,int pageSize);
    	/**
    	 * 更新文件的索引
    	 * @param news
    	 */
    	public void updateIndex(News news);
    	/**
    	 * 根据文件id删除索引
    	 * @param id
    	 */
    	public void deleteIndex(Integer id);
    }
    新闻lucene dao的实现
    package com.hkrt.dao.impl;
    
    import java.io.File;
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.List;
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.index.CorruptIndexException;
    import org.apache.lucene.index.IndexReader;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.Term;
    import org.apache.lucene.queryParser.MultiFieldQueryParser;
    import org.apache.lucene.queryParser.ParseException;
    import org.apache.lucene.search.BooleanClause;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TopDocs;
    import org.apache.lucene.search.highlight.Highlighter;
    import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
    import org.apache.lucene.search.highlight.QueryScorer;
    import org.apache.lucene.search.highlight.SimpleFragmenter;
    import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.store.LockObtainFailedException;
    import org.apache.lucene.util.Version;
    
    import com.hkrt.dao.NewsDao;
    import com.hkrt.dao.NewsLuceneDao;
    import com.hkrt.domain.LuceneSearchResult;
    import com.hkrt.domain.News;
    public class NewsLuceneDaoImpl implements NewsLuceneDao {
    	private NewsDao newsDao;
    	/** 获取语法解析器 */
    	public Analyzer getAnalyzer() {
    		return new StandardAnalyzer(Version.LUCENE_30);
    	}
    
    	/** 打开索引的存放目录 */
    	public Directory openDirectory() {
    		try {
    			System.out.println(new File(INDEX_DIR)	+ "-------打开索引--------------");
    			return FSDirectory.open(new File(INDEX_DIR));
    		} catch (IOException e) {
    			e.printStackTrace();
    		}
    		return null;
    	}
    
    	/** 对文件的指定属性映射成域,返回文件文档对象 */  
    	public Document createForumuploadDocument(News news) {
    		Document doc = new Document(); // 创建一个文档对象
    		//id 域
    		Field field = new Field(FIELD_ID,String.valueOf(news.getId()),Field.Store.YES, Field.Index.NOT_ANALYZED);
    		doc.add(field);
    		// title域
    		Field field1 = new Field(FIELD_TITLE, String.valueOf(news.getTitle()),Field.Store.YES, Field.Index.ANALYZED);
    		doc.add(field1);
    		// content域
    		Field field2 = new Field(FIELD_CONTENTS, String.valueOf(news.getContents()), Field.Store.YES, Field.Index.ANALYZED);
    		doc.add(field2);
    		return doc;
    	}
    
    	public void deleteIndex(Integer id) {
    		IndexReader ir = null;  
    		try {  
    			ir = IndexReader.open(this.openDirectory(), false);  //打开指定目录下索引文件的索引读取器  
    			ir.deleteDocuments(new Term(FIELD_ID,String.valueOf(id)));  //删除符合条件的Document  
    		} catch (IOException e) {  
    			e.printStackTrace();  
    		}finally{  
    			if(ir != null){  
    				try {  
    					ir.close();  
    				} catch (IOException e) {  
    					e.printStackTrace();  
    				}  
    			}  
    		}  
    	}
    
    	@Override
    	public void doIndexSingle(News news) {
    		//创建索引写入器  
    		IndexWriter indexWriter = null;
    		try {
    			indexWriter = new IndexWriter(openDirectory(), getAnalyzer(),false, IndexWriter.MaxFieldLength.UNLIMITED);
    			Document doc = this.createForumuploadDocument(news);
    			indexWriter.addDocument(doc);
    			indexWriter.optimize(); // 对索引进行优化
    		} catch (CorruptIndexException e) {
    			e.printStackTrace();
    		} catch (LockObtainFailedException e) {
    			e.printStackTrace();
    		} catch (IOException e) {
    			e.printStackTrace();
    		} finally {
    			try {
    				if (indexWriter != null) {
    					indexWriter.close(); // 关闭IndexWriter,把内存中的数据写到文件
    				}
    			} catch (CorruptIndexException e) {
    				e.printStackTrace();
    			} catch (IOException e) {
    				e.printStackTrace();
    			}
    		}
    	}
    
    	@Override
    	public LuceneSearchResult<News> doSeacher(String keyword, int pageNo,int pageSize) {
    		LuceneSearchResult<News> lsr = new LuceneSearchResult<News>();
    		lsr.setPageNo(pageNo);
    		lsr.setPageSize(pageSize);
    		lsr.setKeyword(keyword);
    		IndexSearcher searcher = null;
    		try {
    			// 创建一个索引搜索器
    			searcher = new IndexSearcher(this.openDirectory(), true);
    			// 用多域查询解析器来创建一个查询器,
    			Query query = MultiFieldQueryParser.parse(Version.LUCENE_30,keyword, new String[] { FIELD_TITLE, FIELD_CONTENTS },
    					new BooleanClause.Occur[] {BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD }, this.getAnalyzer());
    			long begin = System.currentTimeMillis();
    			// 查询结集信息类
    			TopDocs ts = searcher.search(query, null, 100000);
    			// 获取命中的数量
    			lsr.setRecordCount(ts.totalHits);
    			// 用这个进行高亮显示,默认是<b>..</b>
    			SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<span style=color:red>", "</span>");
    			// 构造高亮:指定高亮的格式,指定查询评分
    			Highlighter highlighter = new Highlighter(simpleHTMLFormatter,new QueryScorer(query));highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));
    			// 获取匹配到的结果集
    			ScoreDoc[] hits = ts.scoreDocs;
    			List<News> ais = new ArrayList<News>();
    			int pageCount = (lsr.getRecordCount() + pageSize - 1) / pageSize; // 总页数
    			int start = 0; // 要开始返回的文档编号
    			int end = 0; // 要结束返回的文档编号
    			if (pageCount > 0) {
    				start = (pageNo - 1) * pageSize;
    				end = start + pageSize;
    				if (pageNo == pageCount) { // 处理最后一页的结束文档的编号
    					end = start + (lsr.getRecordCount() % pageSize);
    				}
    			}
    			if (start < end) {
    				lsr.setStratNo(start + 1);
    				lsr.setEndNo(end);
    			}
    			for (int i = start; i < end; i++) { // 循环获取分页数据
    				// 通过内部编号从搜索器中得到对应的文档
    				Document doc = searcher.doc(hits[i].doc);
    				News news = new News();
    				news.setTitle(doc.getField(FIELD_TITLE).stringValue());
    				news.setContents(doc.getField(FIELD_CONTENTS).stringValue()); 
    				// 处理文件名称的高亮显示问题
    				String title = doc.getField(FIELD_TITLE).stringValue();
    				String title2 = highlighter.getBestFragment(this.getAnalyzer(),FIELD_TITLE, title);
    				if (title2 == null) {
    					news.setTitle(title);
    				} else {
    					news.setTitle(title2);
    				}
    				// 文件描述高亮显示
    				String contents1 = doc.getField(FIELD_CONTENTS).stringValue();
    				String contents2 = highlighter.getBestFragment(this.getAnalyzer(), FIELD_CONTENTS, contents1);
    				if (contents2 == null) {
    					news.setContents(contents1);
    				} else {
    					if (contents2.length() > 512) {
    						news.setContents(contents2.substring(0, 512) + "...");
    					} else {
    						news.setContents(contents2);
    					}
    				}
    				ais.add(news); // 把符合条件的数据添加到List
    			}
    			lsr.setTime((System.currentTimeMillis() - begin) / 1000.0); // 计算搜索耗时秒数
    			lsr.setDatas(ais); // 把查询到的数据添加到LuceneSearchResult
    		} catch (IOException e) {
    			e.printStackTrace();
    		} catch (ParseException e) {
    			e.printStackTrace();
    		} catch (InvalidTokenOffsetsException e) {
    			e.printStackTrace();
    		} finally {
    			if (searcher != null) {
    				try {
    					searcher.close(); // 关闭搜索器
    				} catch (Exception e) {
    					e.printStackTrace();
    				}
    			}
    		}
    		return lsr;
    	}
    
    	@Override
    	public void rebuildAllIndex() {
    		File file = new File(INDEX_DIR);
    		if (file.exists()) {
    			for (File subFile : file.listFiles()) {
    				subFile.delete();
    			}
    		} else {
    			file.mkdirs();
    		}
    		List<News> data = this.newsDao.findAll();
    		IndexWriter indexWriter = null;
    		try {
    			indexWriter = new IndexWriter(this.openDirectory(), getAnalyzer(),true, IndexWriter.MaxFieldLength.UNLIMITED);
    			// 设置打开使用复合文件
    			// indexWriter.setUseCompoundFile(true);
    			int size = data == null ? 0 : data.size();
    			for (int i = 0; i < size; i++) {
    				News news = data.get(i);
    				Document doc = createForumuploadDocument(news);
    				indexWriter.addDocument(doc);
    				if (i % 20 == 0) {
    					indexWriter.commit();
    				}
    			}
    			indexWriter.optimize(); // 对索引进行优化
    		} catch (CorruptIndexException e) {
    			e.printStackTrace();
    		} catch (LockObtainFailedException e) {
    			e.printStackTrace();
    		} catch (IOException e) {
    			e.printStackTrace();
    		} finally {
    			try {
    				if (indexWriter != null) {
    					indexWriter.close();// 关闭IndexWriter,把内存中的数据写到文件
    				}
    			} catch (CorruptIndexException e) {
    				e.printStackTrace();
    			} catch (IOException e) {
    				e.printStackTrace();
    			}
    		}
    	}
    	@Override
    	public void updateIndex(News news) {
    		this.deleteIndex(news.getId());  
    		this.doIndexSingle(news);  
    	}
    	public NewsDao getNewsDao() {
    		return newsDao;
    	}
    	public void setNewsDao(NewsDao newsDao) {
    		this.newsDao = newsDao;
    	}
    }
    

    对查询结果进行分页处理

    package com.hkrt.domain;
    import java.util.List;
    public class LuceneSearchResult<T> {
    	private int pageNo = 1;    //当前页  
    	private int pageSize = 5;  //每页显示记录数  
    	private int recordCount;   //总记录数  
    	private double time;       //耗时  
    	private List<T> datas;     //当前页的数据  
    	private int stratNo;       //开始记录数  
    	private int endNo;         //结束记录数  
    	private String keyword;    //关键字  
    	public int getPageNo() {  
    	return pageNo;  
    	}  
    	public void setPageNo(int pageNo) {  
    	this.pageNo = pageNo;  
    	}  
    	public int getPageSize() {  
    	return pageSize;  
    	}  
    	public void setPageSize(int pageSize) {  
    	this.pageSize = pageSize;  
    	}  
    	public int getRecordCount() {  
    	return recordCount;  
    	}  
    	public void setRecordCount(int recordCount) {  
    	this.recordCount = recordCount;  
    	}  
    	public List<T> getDatas() {  
    	return datas;  
    	}  
    	public void setDatas(List<T> datas) {  
    	this.datas = datas;  
    	}  
    	public double getTime() {  
    	return time;  
    	}  
    	public void setTime(double time) {  
    	this.time = time;  
    	}  
    	public String getKeyword() {  
    	return keyword;  
    	}  
    	public void setKeyword(String keyword) {  
    	this.keyword = keyword;  
    	}  
    	public int getStratNo() {  
    	return stratNo;  
    	}  
    	public void setStratNo(int stratNo) {  
    	this.stratNo = stratNo;  
    	}  
    	public int getEndNo() {  
    	return endNo;  
    	}  
    	public void setEndNo(int endNo) {  
    	this.endNo = endNo;  
    	}  
    }
    

    代码已经实现对news 进行建立索引和对关键字进行索引
    lucene3.0.1 中需要的jar 包


    建立索引:


    搜索页面数据展示

     <table width="100%" height="92" border="0" cellpadding="0" cellspacing="1">  
            <div class="title">搜索结果:搜索关键字【${lsr.keyword}】,共搜索到【${lsr.recordCount }】个文件,耗时:${lsr.time}秒,当前显示${lsr.stratNo}—${lsr.endNo}记录</div>  
            <c:forEach items="${request.lsr.datas}" var="news">  
    	        <tr>
    	          <td height="30" colspan="6" align="left" bgcolor="#f2f2f2" class="left_txt">  
    	           ${news.id}  
    	          </td>
    	          <td height="30" colspan="6" align="left" bgcolor="#f2f2f2" class="left_txt">  
    	          ${news.title}  
    	          </td>  
    	          <td height="30" colspan="6" align="left" bgcolor="#f2f2f2" class="left_txt">${news.contents}</td>  
    	        </tr>  
             </c:forEach>  
          </table> 
    最终搜索结果:



  • 相关阅读:
    .net core 获取程序中文件路径
    关于autofac的一些具体的用法
    Ztree节点增加删除修改和Icheck的用法
    C# 多线程之Task(任务
    C#开启异步 线程的四种方式
    c# thread数线程的创建
    图片转二进制 ,
    循环中如果有某一次循环抛出异常了整个循环就停止的解决办法
    JAVA 8 Optional的使用
    react 获取指定表单对象中的对象的值
  • 原文地址:https://www.cnblogs.com/java20130726/p/3218373.html
Copyright © 2011-2022 走看看