zoukankan html css js c++ java

Lucene3.6第一篇创建索引

lucene 3.6 source and Binary code download

import java.io.File;
import java.io.FileReader;
import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;

public class Indexer {
    public static void main(String[] args) throws Exception {
//        args=new String[2];
//        args[0]="D:/workspace/LuceneAction/lib/IndexDir";
//        args[1]="D:/workspace/HaiduShare/src/com/weishangye/share";
        if (args.length != 2) {
            throw new Exception("Usege Java: " + Indexer.class.getName()
                    + "<indexDir> <dataDir>");
        }
        //索引文件存放路径
        Directory indexDir = new SimpleFSDirectory(new File(args[0]));
        //原始数据文件
        File dataDir = new File(args[1]);

        long start = System.currentTimeMillis();

        //遍历文件夹创建索引文件
        int numIndexed = Index(indexDir, dataDir);

        long end = System.currentTimeMillis();

        System.out.println("Index " + numIndexed + "files took "
                + (end - start) + " milliseconds");
    }

    public static int Index(Directory indexDir, File dataDir) throws IOException {
        if (!dataDir.exists() || !dataDir.isDirectory()) {
            throw new IOException(dataDir+"is not exist or a directory");
        }

        IndexWriterConfig config=new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36));
        IndexWriter writer=new IndexWriter(indexDir,config);
        //是否合并索引文件(过时)
        //writer.setUseCompoundFile(false);
        IndexDirectory(writer,dataDir);
        int numIndexed=writer.numDocs();
        //优化索引(过时)
        //writer.optimize();
        writer.close();
        
        return numIndexed;
    }
    
    private static void IndexDirectory(IndexWriter writer,File dir) throws IOException{
        File[] files =dir.listFiles();

        for (int i = 0; i < files.length; i++) {
            File f=files[i];
            if (f.isDirectory()) {
                IndexDirectory(writer, f);
            }
            else {
                IndexFile(writer,f);
            }
        }    
    }
    
    private static void IndexFile(IndexWriter writer,File f) throws IOException {
        if (f.isHidden()||!f.exists()||!f.canRead()) {
            return;
        }
        
        System.out.println("Indexing "+f.getCanonicalPath());
        Document doc=new Document();
        doc.add(new Field("contents", new FileReader(f)));
        doc.add(new Field("filename",new FileReader(f.getCanonicalPath())));
        writer.addDocument(doc);
    }
}

package com.meetlucene;

import java.io.File;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;

public class Searcher {
    public static final String FIELD_FILENAME = "filename";
    public static final String FIELD_CONTENTS = "contents";
    
    public static void main(String[] args) throws Exception {
        args=new String[2];
        args[0]="E:/workspace/LuceneAction/lib/IndexDir";
        args[1]="Solr";
        
        if (args.length!=2) {
            throw new Exception("Usage:Java "+Searcher.class.getName()
                    +" <indexDir><query>");
        }
        
        //Indexer所创建的索引目录
        File indexDir=new File(args[0]);
        //Directory indexDir=new SimpleFSDirectory();
        String q=args[1];
        
        if (!indexDir.exists()||!indexDir.isDirectory()) {
            throw new Exception(indexDir+
                    " dose not exist or is not a directory");
        }
        search(indexDir,q);
        
    }
    
    public static void search(File indexDir,String q) throws Exception{
        Directory faDir=new SimpleFSDirectory(indexDir);
        
        IndexReader indexReader=IndexReader.open(faDir);
        IndexSearcher indexSearcher=new IndexSearcher(indexReader);
        
        Analyzer analyzer=new StandardAnalyzer(Version.LUCENE_36);
        QueryParser queryParser =new QueryParser(Version.LUCENE_36,
                FIELD_CONTENTS, analyzer);
        
        Query query=queryParser.parse(q);
        
        long start =System.currentTimeMillis();
        //@deprecated Use {@link TopScoreDocCollector} and {@link TopDocs} instead. Hits will be removed in Lucene 3.0.
        //org.apache.lucene.search.Hit
        TopDocs topDocs=indexSearcher.search(query, 10);
        long end =System.currentTimeMillis();

        System.out.println("Found " + topDocs.totalHits + " document(s)(in "
                + (end - start) + " milliseconds) that matched query '" + q
                + "':");
        
        for (ScoreDoc sd : topDocs.scoreDocs) {
            Document doc=indexSearcher.doc(sd.doc);
            
            System.out.println(sd.doc+"-->"+doc.get(FIELD_FILENAME)+"-->"+doc.get(FIELD_CONTENTS));
        }
    }
}

查看全文

相关阅读:
Java学习-IO流-read()和write()详解
 JAVA中String类常用构造方法
 java的System.exit(0)和System.exit(1)区别。
Eclipse快捷键大全
 Java Arraylist的遍历
 Java Map的遍历
 C++求最大公约数，最小公倍数
 C++sort使用实例
 [Project Euler] 题目汇总
 [leetcode]做过的题的目录

原文地址：https://www.cnblogs.com/a282421083/p/2698622.html