zoukankan      html  css  js  c++  java
  • lucene索引的创建与搜索

    package com.cs.multi;

    import java.io.File;
    import java.io.IOException;

    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.index.CorruptIndexException;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriter.MaxFieldLength;
    import org.apache.lucene.index.Term;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TermQuery;
    import org.apache.lucene.search.TopDocs;
    import org.apache.lucene.store.LockObtainFailedException;

    import com.cs.itcast.FileToDocumentUtil;
    import com.cs.tool.PrintDocumentUtil;

    public class MultiDocument {

    private static String dataPath = "D:\work-tool\workspace\luencedemo\datasource";

    private static String indexPath = "D:\work-tool\workspace\luencedemo\directory";

    private static Analyzer analyzer = new StandardAnalyzer();

    public static void main(String[] args) throws Exception {

    //createIndex();
    //search();
    }

    //创建索引

    private static void createIndex() throws CorruptIndexException, LockObtainFailedException, IOException, Exception {
    File file = new File(dataPath);

    IndexWriter indexWriter = new IndexWriter(indexPath, analyzer, true, MaxFieldLength.LIMITED);//四个参数是1:建立索引的位置,2:分词器,3:是否创建索引,4:索引的最大文档内容长度.MaxFieldLength.LIMITED表示:10000,MaxFieldLength.UNLIMITED表示无限大

    if(file!=null&&file.isDirectory()){

    File[] files = file.listFiles();
    for (File f : files) {
    Document document = FileToDocumentUtil.fileToDocument(f.getAbsolutePath());
    indexWriter.addDocument(document);//添加文档索引

    }
    indexWriter.close();
    }else{
    return ;
    }
    }

    //搜索
    private static void search() throws CorruptIndexException, IOException{

    IndexSearcher indexSearcher = new IndexSearcher(indexPath);  //搜索索引的位置
    /*第一种 构建Query

    String keyword = "MySQL";
    Query query = new TermQuery(new Term("content", keyword.toLowerCase()));   //Term参数是filed字段,要查找的关键词

    */

    /*第二种 构建Query

    String[] fields = {"path","content","name"};

    QueryParser queryParser = new MultiFieldQueryParser(fields,analyzer);//语言分析器 多字段的语言分析器  这个分词器要和创建索引的分词器一样
    Query query = queryParser.parse(keyword);

    */
    TopDocs topDocs = indexSearcher.search(query, null, 10000);  //查找并返回结果集

    int totalHits = topDocs.totalHits; //查询出来的文档总数

    System.out.println("查询出来的文档总数为:【"+totalHits+"】条记录数");

    //遍历结果集
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
    int doc = scoreDoc.doc;//文档编号
    Document document = indexSearcher.doc(doc);//得到该编号的文档
    PrintDocumentUtil.print(document);//打印文档内容
    }

    indexSearcher.close();
    }



    }

    //工具类  将File转化为Document  该Document是lucene的

    import java.io.File;

    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.document.Field.Index;
    import org.apache.lucene.document.Field.Store;

    import com.cs.tool.FileToContent;

    public class FileToDocumentUtil {

    public static Document fileToDocument(String path) throws Exception{
    File file = new File(path);

    Document doc = new Document();
    //后面两个属性是建立索引和分词
    doc.add(new Field("name",file.getName(),Store.YES,Index.ANALYZED));
    doc.add(new Field("path",file.getCanonicalPath(),Store.YES,Index.NOT_ANALYZED));
    doc.add(new Field("content",FileToContent.fileToContent(file),Store.YES,Index.ANALYZED));
    doc.add(new Field("length",file.length()+"",Store.YES,Index.NO));

    return doc;
    }
    }

    package com.cs.tool;

    import org.apache.lucene.document.Document;

    public class PrintDocumentUtil {

    //打印文档内容
    public static void print(Document doc){
    System.out.println("文档路径为"+doc.get("path"));//该Document记录下的字段
    System.out.println("文档名称为"+doc.get("name"));
    System.out.println("文档内容为"+doc.get("content"));
    System.out.println("文档长度为"+doc.get("length"));
    }
    }

  • 相关阅读:
    JVM 垃圾收集与内存分配
    JVM 内存管理机制
    JVM 启动调优总结
    Visual Studio 2019 秘钥
    dubbo初学采坑记
    Intellij idea 一个窗口打开多模块并添加依赖
    Intellij idea 自动生成serialVersionUID
    office visio 2019 下载激活
    ASP.NET Core中的配置
    electron快捷键
  • 原文地址:https://www.cnblogs.com/hjy9420/p/4135233.html
Copyright © 2011-2022 走看看