zoukankan      html  css  js  c++  java
  • IKAnalyzer2012FF_u1.jarlucene4.0简单实例 悟寰轩

      1 import java.io.File;
      2 import java.io.IOException;
      3 import java.io.StringReader;
      4 
      5 import org.apache.lucene.analysis.Analyzer;
      6 import org.apache.lucene.analysis.TokenStream;
      7 import org.apache.lucene.document.Document;
      8 import org.apache.lucene.document.TextField;
      9 import org.apache.lucene.document.Field.Store;
     10 import org.apache.lucene.index.IndexReader;
     11 import org.apache.lucene.index.IndexWriter;
     12 import org.apache.lucene.index.IndexWriterConfig;
     13 import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
     14 import org.apache.lucene.queryparser.classic.ParseException;
     15 import org.apache.lucene.queryparser.classic.QueryParser;
     16 import org.apache.lucene.search.IndexSearcher;
     17 import org.apache.lucene.search.Query;
     18 import org.apache.lucene.search.ScoreDoc;
     19 import org.apache.lucene.search.TopDocs;
     20 import org.apache.lucene.search.TopScoreDocCollector;
     21 import org.apache.lucene.search.highlight.Highlighter;
     22 import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
     23 import org.apache.lucene.search.highlight.QueryScorer;
     24 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
     25 import org.apache.lucene.store.Directory;
     26 import org.apache.lucene.store.FSDirectory;
     27 import org.apache.lucene.util.Version;
     28 import org.wltea.analyzer.lucene.IKAnalyzer;
     29 
     30 public class IndexTools {
     31     /**
     32      * 获得indexwriter对象
     33      * 
     34      * @param dir
     35      * @return
     36      * @throws IOException
     37      * @throws Exception
     38      */
     39     private IndexWriter getIndexWriter(Directory dir, Analyzer analyzer) throws IOException {
     40         IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer);
     41         return new IndexWriter(dir, iwc);
     42     }
     43     
     44     /**
     45      * 关闭indexwriter对象
     46      * 
     47      * @throws IOException
     48      * 
     49      * @throws Exception
     50      */
     51     private void closeWriter(IndexWriter indexWriter) throws IOException {
     52         if (indexWriter != null) {
     53             indexWriter.close();
     54         }
     55     }
     56     
     57     /**
     58      * 创建索引
     59      * 
     60      * @throws InvalidTokenOffsetsException
     61      */
     62     public void createIndex() throws InvalidTokenOffsetsException {
     63         String indexPath = "D://luceneindex"; // 建立索引文件的目录
     64         // 默认IKAnalyzer()-false:实现最细粒度切分算法,true:分词器采用智能切分
     65         Analyzer analyzer = new IKAnalyzer(true);
     66         IndexWriter indexWriter = null;
     67         Directory directory = null;
     68         try {
     69             directory = FSDirectory.open(new File(indexPath));
     70             indexWriter = getIndexWriter(directory, analyzer);
     71         } catch (Exception e) {
     72             System.out.println("索引打开异常!");
     73         }
     74         // 添加索引
     75         try {
     76             Document document = new Document();
     77             document.add(new TextField("filename", "标题:起点", Store.YES));
     78             document.add(new TextField("content", "内容:我是一名程序员", Store.YES));
     79             indexWriter.addDocument(document);
     80             Document document1 = new Document();
     81             document1.add(new TextField("filename", "标题:终点", Store.YES));
     82             document1.add(new TextField("content", "内容:我不再只是程序员", Store.YES));
     83             indexWriter.addDocument(document1);
     84             indexWriter.commit();
     85         } catch (IOException e1) {
     86             System.out.println("索引创建异常!");
     87         }
     88         try {
     89             closeWriter(indexWriter);
     90         } catch (Exception e) {
     91             System.out.println("索引关闭异常!");
     92         }
     93     }
     94     
     95     /**
     96      * 搜索
     97      * 
     98      * @throws ParseException
     99      * @throws IOException
    100      * @throws InvalidTokenOffsetsException
    101      */
    102     @SuppressWarnings("deprecation")
    103     public void searchIndex() throws ParseException, IOException, InvalidTokenOffsetsException {
    104         String indexPath = "D://luceneindex"; // 建立索引文件的目录
    105         // 默认IKAnalyzer()-false:实现最细粒度切分算法,true:分词器采用智能切分
    106         Analyzer analyzer = new IKAnalyzer(true);
    107         Directory directory = null;
    108         try {
    109             directory = FSDirectory.open(new File(indexPath));
    110         } catch (Exception e) {
    111             System.out.println("索引打开异常!");
    112         }
    113         IndexReader ireader = null;
    114         IndexSearcher isearcher = null;
    115         try {
    116             ireader = IndexReader.open(directory);
    117         } catch (IOException e) {
    118             System.out.println("打开索引文件!");
    119         }
    120         isearcher = new IndexSearcher(ireader);
    121         String keyword = "程序员";
    122         // 使用QueryParser查询分析器构造Query对象
    123         // eg:单个字段查询
    124         // String fieldName = "content";
    125         // QueryParser qp = new QueryParser(Version.LUCENE_40, fieldName, analyzer);
    126         String[] fields = { "filename", "content" };
    127         QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_40, fields, analyzer);
    128         qp.setDefaultOperator(QueryParser.AND_OPERATOR);
    129         Query query = qp.parse(keyword);
    130         // 搜索相似度最高的5条记录
    131         TopDocs topDocs = isearcher.search(query, 25);
    132         System.out.println("命中:" + topDocs.totalHits);
    133         // 输出结果
    134         ScoreDoc[] scoreDocs = topDocs.scoreDocs;
    135         for (int i = 0; i < topDocs.totalHits; i++) {
    136             Document targetDoc = isearcher.doc(scoreDocs[i].doc);
    137             System.out.println("内容:" + targetDoc.toString());
    138         }
    139         // 分页,高亮显示
    140         higherIndex(analyzer, isearcher, query, topDocs);
    141     }
    142     
    143     public static void main(String[] args) {
    144         IndexTools tool = new IndexTools();
    145         try {
    146             tool.searchIndex();
    147         } catch (ParseException e) {
    148             System.out.println("解析错误");
    149         } catch (IOException e) {
    150             System.out.println("读取文件流错误");
    151         } catch (InvalidTokenOffsetsException e) {
    152             System.out.println("查询失败");
    153         }
    154     }
    155     
    156     /**
    157      * 分页,高亮显示
    158      * 
    159      * @param analyzer
    160      * @param isearcher
    161      * @param query
    162      * @param topDocs
    163      * @throws IOException
    164      * @throws InvalidTokenOffsetsException
    165      */
    166     public void higherIndex(Analyzer analyzer, IndexSearcher isearcher, Query query, TopDocs topDocs)
    167             throws IOException, InvalidTokenOffsetsException {
    168         TopScoreDocCollector results = TopScoreDocCollector.create(topDocs.totalHits, false);
    169         isearcher.search(query, results);
    170         // 分页取出指定的doc(开始条数, 取几条)
    171         ScoreDoc[] docs = results.topDocs(1, 2).scoreDocs;
    172         for (int i = 0; i < docs.length; i++) {
    173             Document targetDoc = isearcher.doc(docs[i].doc);
    174             System.out.println("内容:" + targetDoc.toString());
    175         }
    176         // 关键字高亮显示的html标签,需要导入lucene-highlighter-3.5.0.jar
    177         SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
    178         Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
    179         for (int i = 0; i < docs.length; i++) {
    180             Document doc = isearcher.doc(docs[i].doc);
    181             // 标题增加高亮显示
    182             TokenStream tokenStream1 = analyzer.tokenStream("filename", new StringReader(doc.get("filename")));
    183             String title = highlighter.getBestFragment(tokenStream1, doc.get("filename"));
    184             // 内容增加高亮显示
    185             TokenStream tokenStream2 = analyzer.tokenStream("content", new StringReader(doc.get("content")));
    186             String content = highlighter.getBestFragment(tokenStream2, doc.get("content"));
    187             System.out.println(doc.get("filename") + " : " + title + " : " + content);
    188         }
    189     }
    190 }
  • 相关阅读:
    自定义View的ToolBar布局报错Error:(2) No resource identifier found for attribute 'context' in package 'c
    在学git之主分支 branch
    获取发布版SHA1
    关于开启线程与UI的操作
    播放音频和视频(VideoView控件)
    通知栏Notification的应用
    Android 真机调式 Installation failed with message 远程主机强迫关闭了一个现有的连接。. It is possible that this issue is resolved by uninstalling an existing version of the apk if it is present, and then re-installing. WA
    运行程序申请危险权限
    mysql乐观锁总结和实践
    Nginx配置文件nginx.conf中文详解
  • 原文地址:https://www.cnblogs.com/sunxucool/p/2799805.html
Copyright © 2011-2022 走看看