zoukankan      html  css  js  c++  java
  • lucene入门(二)

    上篇文章给出了如何创建索引的例子,在这里我将介绍全文检索的第二大部分:索引的搜索操作。

    索引搜索其实也就是一个先读索引然后在查索引的操作,理解了会很快的使用了,个人认为要多实践。

    直接上代码了(这是我个人写的一个小demo,后面会给出源码):

    	private static Directory getDirectory() throws IOException{
    		return FSDirectory.open(Paths.get(SysConstant.LUCENE_INDEX_DIR));
    	}
    	
    	private static DirectoryReader getDirectoryReader() throws IOException{
    		return DirectoryReader.open(getDirectory());
    	}
    	
    	private static IndexSearcher getSercher() throws IOException{
    		return new IndexSearcher(getDirectoryReader());
    	}
    	/**
         * @Title: serch  
         * @Description: 普通的索引搜索方法
         * @param @param type:搜索类型
         * @param @param key:搜索关键词
         * @param @throws ParseException
         * @param @throws IOException
         * @param @throws ClassNotFoundException
         * @param @throws InstantiationException
         * @param @throws IllegalAccessException    参数  
         * @return List<Map<String,Object>>    返回类型  
         * @throws
    	 */
    	public static List<Map<String,Object>> serch(String type,String key) throws ParseException, IOException, ClassNotFoundException, InstantiationException, IllegalAccessException{
    		List<Map<String,Object>> objs = new ArrayList<Map<String,Object>>();
    		QueryParser parser = new QueryParser(type, new StandardAnalyzer());
    		Query query = parser.parse(key);
    		TopDocs results = getSercher().search(query, 10 * 600);
    	    ScoreDoc[] hits = results.scoreDocs;
    	    for(ScoreDoc hit : hits){
    	    	Document doc = getSercher().doc(hit.doc);
    	    	objs.add(LuceneDocToBean.DocToBean(doc,new TxtTable()));
    	    }
    	    return objs;
    	}
    	/**
         * @Title: searchHigher  
         * @Description: 高亮搜索方法
         * @param @param type:搜索类型
         * @param @param key:搜索关键词
         * @param @throws ParseException
         * @param @throws IOException
         * @param @throws InvalidTokenOffsetsException
         * @param @throws ClassNotFoundException
         * @param @throws InstantiationException
         * @param @throws IllegalAccessException    参数  
         * @return List<HigerSerchBean>    返回类型  
         * @throws
    	 */
    	 public static List<HigerSerchBean> searchHigher(String type, String key) throws ParseException, IOException, InvalidTokenOffsetsException, ClassNotFoundException, InstantiationException, IllegalAccessException {
    	 	List<HigerSerchBean> list = new ArrayList<HigerSerchBean>();
    	 	SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer();
            QueryParser parser=new QueryParser(type,analyzer);
            Query query=parser.parse(key);
            
            TopDocs hits=getSercher().search(query, 10 * 600);
            QueryScorer scorer=new QueryScorer(query);
            Fragmenter fragmenter=new SimpleSpanFragmenter(scorer);
            SimpleHTMLFormatter simpleHTMLFormatter=new SimpleHTMLFormatter("<b><font color='red'>","</font></b>");
            Highlighter highlighter=new Highlighter(simpleHTMLFormatter, scorer);
            highlighter.setTextFragmenter(fragmenter);
            for(ScoreDoc scoreDoc:hits.scoreDocs){
            	HigerSerchBean bean = new HigerSerchBean();
                Document doc=getSercher().doc(scoreDoc.doc);
                String value = doc.get(type);
                String higer = "";
                if(value!=null){
                    higer = highlighter.getBestFragment(new StandardAnalyzer(), type, value);
                }
                bean.setHigherSerchHtml(higer);
                bean.setMap(LuceneDocToBean.DocToBean(doc, new TxtTable()));
                list.add(bean);
            }
            return list;
        }
    	 /**
          * @Title: wildcardQuery  
          * @Description: 通配符搜索
          * @param @param key:搜索关键词
          * @param @param type:搜索类型
          * @param @param pagenow
          * @param @param pageSize
          * @param @return
          * @param @throws Exception    参数  
          * @return List<Map<String,Object>>    返回类型  
          * @throws
    	  */
    	public static PageBean<Map<String,Object>> wildcardQuery(String key, String type, Integer pagenow, Integer pageSize)throws Exception{
    		List<Map<String,Object>> list = new ArrayList<Map<String,Object>>();
    		Term t=new Term(type,key);
    		Query query=new WildcardQuery(t);
    		int total = (int)getSercher().search(query, SysConstant.LUCENE_PAGESIZE_ALL).totalHits;
    		if(1 == pagenow){			
    			TopDocs hits0=getSercher().search(query, pageSize);
    			for(ScoreDoc scoreDoc:hits0.scoreDocs){
    				Document doc=getSercher().doc(scoreDoc.doc);
    				list.add(LuceneDocToBean.DocToBean(doc, new TxtTable()));
    			}
    		}else if(pagenow > 1){			
    			TopDocs hits0=getSercher().search(query, pageSize*(pagenow-1));
    			TopDocs hits1=getSercher().searchAfter(hits0.scoreDocs[hits0.scoreDocs.length-1], query, 10);
    			for(ScoreDoc scoreDoc:hits1.scoreDocs){
    				Document doc=getSercher().doc(scoreDoc.doc);
    				list.add(LuceneDocToBean.DocToBean(doc, new TxtTable()));
    			}
    		}
    		PageBean<Map<String,Object>> pageinfo = new PageBean<Map<String,Object>>(pagenow, pageSize, list ,total);
    		return pageinfo;
    	}
    

     注意的是,我这里的代码仅仅是为了提供参考的,其中有很多的是我自己封装的方法,主要是需要了解搜索索引的过程最重要,代码我会在后续给出。

  • 相关阅读:
    Error (10327): VHDL error at xd.vhd(17): can't determine definition of operator ""+"" -- found 0 pos
    FPGA 起脚nCEO/IO管教设置问题
    使用Cross-validation (CV) 调整Extreme learning Machine (ELM) 最优参数的实现(matlab)
    Tools that help you scrape web data----帮助你收集web数据的工具
    采集网页数据---Using Java
    使用正则表达式自动对文本按照字符排序
    Apriori算法实例----Weka,R, Using Weka in my javacode
    关于FP-Growth 算法一个很好的ppt-学习分享
    ARFF文件格式
    Weka-学习
  • 原文地址:https://www.cnblogs.com/advanceBlog/p/9116836.html
Copyright © 2011-2022 走看看