zoukankan      html  css  js  c++  java
  • lucene Hello World

    一个lucene创建索引和查找索引的样例:

    创建索引:

    public class Indexer {
    private  IndexWriter indexWriter;
    /**
     * 构造器实例化indexWriter
     * @throws Exception 
     */
    public Indexer(String indexPath) throws Exception {
    Directory directory = FSDirectory.open(Paths.get(indexPath));//索引存储的位置
    Analyzer analyzer = new StandardAnalyzer();//标准分析器
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    indexWriter = new IndexWriter(directory, iwc);
    }
    /**
     * 关闭indexWriter
     * @param indexWriter
     * @throws IOException 
     */
    public void close() throws Exception {
    indexWriter.close();
    }
    /**
     * 获取文档Document
     * @throws FileNotFoundException 
     */
    public Document getDocumnet(File f) throws Exception {
    Document doc = new Document();
    doc.add(new TextField("content", new FileReader(f)));
    doc.add(new TextField("tittle",f.getName(),Field.Store.YES));
    doc.add(new TextField("path",f.getCanonicalPath(), Field.Store.YES));
    return doc;
    }
    /**
     * 索引当个文件
     * @throws Exception 
     */
    public void indexFile(File f) throws Exception {
    System.out.println(f.getName());
    Document doc = this.getDocumnet(f);
    indexWriter.addDocument(doc);
    }
    /**
     * 索引一个目录下的所有文件
     * @param filePath 目录路径
     * @return 索引文件的个数
     * @throws Exception 
     */
    public int index(String filePath) throws Exception {
    File[] files = new File(filePath).listFiles();
    for(File f:files) {
    this.indexFile(f);
    }
    return indexWriter.numDocs();
    }
    public static void main(String[] args) {
            String indexPath = "G:\工作\luence\index";
            String dataPath = "G:\工作\luence\data";
            Indexer indexer = null;
            int indexNum=0;
            try {
                indexer = new Indexer(indexPath);
                indexNum = indexer.index(dataPath);
            } catch (Exception e) {
                e.printStackTrace();
            }finally {
                try {
                    indexer.close();
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
            System.out.println("索引了"+indexNum+"个文件");
        }
    }

    查找索引:

    public class Searcher {
    public static void search(String indexPath,String searchStr) throws Exception {
     
    Directory dir = FSDirectory.open(Paths.get(indexPath));
    IndexReader indeReader = DirectoryReader.open(dir);
    IndexSearcher indexSearch = new IndexSearcher(indeReader);
     
    Analyzer analyzer = new StandardAnalyzer();//标准分词器
    QueryParser parser = new QueryParser("content", analyzer);
    Query query = parser.parse(searchStr);
    TopDocs td = indexSearch.search(query, 10);
    for(ScoreDoc sc:td.scoreDocs) {
    Document doc = indexSearch.doc(sc.doc);
    System.out.println(doc.get("tittle"));
    System.out.println(doc.get("path"));
    }
    }
    public static void main(String[] args) throws Exception {
    Searcher.search("G:\工作\luence\index\", "Hollywood");
    }
    }
  • 相关阅读:
    Python模块进阶、标准库、扩展库
    python垃圾回收机制
    VMWare workstation 安装 CentOS 8后自适应调整分辨率(如1920x1080)
    使用 Zeal 打造属于自己的文档
    Erlang 开发者的福音:IntelliJ IDEA 的 Erlang 插件
    Intellij IDEA 14的注册码
    在Intellij IDEA或者PhpStorm下用X-debug调试PHP
    PHPCMS 核心代码与 www 分离部署
    PHPCMS如何实现后台访问限制?
    推荐:PHPCMS v9 安全防范教程!
  • 原文地址:https://www.cnblogs.com/jnba/p/10522723.html
Copyright © 2011-2022 走看看