zoukankan      html  css  js  c++  java
  • Lucese——字符串检索

    Lucese版本太多,最新的已经到8,而网络教程大都还是3,并且有类似于ES、Solr这样的封装框架,学习收益较低,并没有准备花很多时间研究的打算。

    之前双十一购物时,有了个思考:“如何检索一个商品名称?”

    用数据库的Like语句,功能是在太单薄,因此,用搜索引擎搞一个吧。

    以下代码可以微调,设计成文件检索索引。

            <!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-core -->
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-core</artifactId>
                <version>3.6.2</version>
            </dependency>
    package lucese.test;
    
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriterConfig;
    import org.apache.lucene.queryParser.QueryParser;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TopDocs;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.store.RAMDirectory;
    import org.apache.lucene.util.Version;
    
    import java.io.File;
    import java.io.IOException;
    
    public class StringIndexer {
      private static String KEY = "string";
      private static String path;
      private static Directory indexDirectory;
      private static Directory ramDirectory;
      private static Analyzer analyzer;
      private static IndexWriterConfig ramConfig;
      private static IndexWriterConfig discConfig;
    
      public static void init(String out) throws IOException {
        path = out;
        indexDirectory = FSDirectory.open(new File(out));
    
    
        ramDirectory = new RAMDirectory(indexDirectory);
    
        //标准的分词
        analyzer = new StandardAnalyzer(Version.LUCENE_36);
    
        ramConfig = new IndexWriterConfig(Version.LUCENE_36, analyzer);
    
        discConfig = new IndexWriterConfig(Version.LUCENE_36, analyzer);
        discConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
      }
    
      public static void createIndex(String str) {
        try (IndexWriter writer = new IndexWriter(ramDirectory, ramConfig)) {
          write(writer, str);
        } catch (IOException e) {
          e.printStackTrace();
          throw new RuntimeException("create IndexWriter error:", e);
        }
      }
    
      public static void createIndexInDisc(String str) {
        try (IndexWriter writer = new IndexWriter(indexDirectory, discConfig)) {
          write(writer, str);
        } catch (IOException e) {
          e.printStackTrace();
          throw new RuntimeException("create IndexWriter error:", e);
        }
      }
    
      public static void write(IndexWriter writer, String str) throws IOException {
        Document document = new Document();
        Field fileNameField = new Field(KEY,
            str,
            Field.Store.YES,
            Field.Index.ANALYZED);
        document.add(fileNameField);
        writer.addDocument(document);
      }
    
      public static void query(String str) {
        try (IndexSearcher indexSearcher = new IndexSearcher(ramDirectory)) {
          /**
           * 参数一: Version matchVersion 版本号
           * 参数二:String f
           * 参数三:Analyzer
           * */
          QueryParser queryParser = new QueryParser(Version.LUCENE_36, KEY, analyzer);
          Query query = queryParser.parse(str);
    
          //用IndexSearcher对象去索引库中查询符合条件的前100条记录,不足100条记录的以实际为准
          TopDocs hits = indexSearcher.search(query, 100);
          for (ScoreDoc scoreDoc : hits.scoreDocs) {
            Document doc = indexSearcher.doc(scoreDoc.doc);
            System.out.println(doc.get(KEY));
          }
        } catch (Exception e) {
          e.printStackTrace();
          throw new RuntimeException("query error:", e);
        }
      }
    
      public static void queryFromDisc(String str) {
        try (IndexSearcher indexSearcher = new IndexSearcher(indexDirectory)) {
          /**
           * 参数一: Version matchVersion 版本号
           * 参数二:String f
           * 参数三:Analyzer
           * */
          QueryParser queryParser = new QueryParser(Version.LUCENE_36, KEY, analyzer);
          Query query = queryParser.parse(str);
    
          //用IndexSearcher对象去索引库中查询符合条件的前100条记录,不足100条记录的以实际为准
          TopDocs hits = indexSearcher.search(query, 100);
          for (ScoreDoc scoreDoc : hits.scoreDocs) {
            Document doc = indexSearcher.doc(scoreDoc.doc);
            System.out.println(doc.get(KEY));
          }
        } catch (Exception e) {
          e.printStackTrace();
          throw new RuntimeException("query error:", e);
        }
      }
    
      /**
       * 数据同步,内存同步到磁盘
       */
      public static void synce() {
        try (IndexWriter fsIndexWriter = new IndexWriter(indexDirectory, discConfig)) {
          fsIndexWriter.addIndexes(ramDirectory);
        } catch (IOException e) {
          e.printStackTrace();
          throw new RuntimeException("create IndexWriter error:", e);
        }
      }
    
      public static void main(String[] args) throws IOException {
        StringIndexer.init("D:/lucene/index");
        StringIndexer.createIndex("Mr.css is a teacher!");
        StringIndexer.synce();
    //        Indexer.query("Mr.css");
    
    //        Indexer.createIndexInDisc("Mr.css is a teacher!");
        StringIndexer.queryFromDisc("Mr.css");
    //        Indexer.synce();
    //        {
    //        }
      }
    }
  • 相关阅读:
    JavaScript创建对象及对象继承
    Shell基础学习小结
    深入理解Java反射
    STL"源码"剖析-重点知识总结
    Java IO工作机制分析
    优先队列原理与实现
    CleanBlog(个人博客+源码)
    线性时间排序
    深入理解FTP协议
    Spring学习之AOP总结帖
  • 原文地址:https://www.cnblogs.com/chenss15060100790/p/11954744.html
Copyright © 2011-2022 走看看