zoukankan      html  css  js  c++  java
  • Lucene3.6第一篇创建索引

    lucene 3.6 source and Binary code download

    import java.io.File;
    import java.io.FileReader;
    import java.io.IOException;
    
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriterConfig;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.SimpleFSDirectory;
    import org.apache.lucene.util.Version;
    
    public class Indexer {
        public static void main(String[] args) throws Exception {
    //        args=new String[2];
    //        args[0]="D:/workspace/LuceneAction/lib/IndexDir";
    //        args[1]="D:/workspace/HaiduShare/src/com/weishangye/share";
            if (args.length != 2) {
                throw new Exception("Usege Java: " + Indexer.class.getName()
                        + "<indexDir> <dataDir>");
            }
            //索引文件存放路径
            Directory indexDir = new SimpleFSDirectory(new File(args[0]));
            //原始数据文件
            File dataDir = new File(args[1]);
    
            long start = System.currentTimeMillis();
    
            //遍历文件夹创建索引文件
            int numIndexed = Index(indexDir, dataDir);
    
            long end = System.currentTimeMillis();
    
            System.out.println("Index " + numIndexed + "files took "
                    + (end - start) + " milliseconds");
        }
    
        public static int Index(Directory indexDir, File dataDir) throws IOException {
            if (!dataDir.exists() || !dataDir.isDirectory()) {
                throw new IOException(dataDir+"is not exist or a directory");
            }
    
            IndexWriterConfig config=new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36));
            IndexWriter writer=new IndexWriter(indexDir,config);
            //是否合并索引文件(过时)
            //writer.setUseCompoundFile(false);
            IndexDirectory(writer,dataDir);
            int numIndexed=writer.numDocs();
            //优化索引(过时)
            //writer.optimize();
            writer.close();
            
            return numIndexed;
        }
        
        private static void IndexDirectory(IndexWriter writer,File dir) throws IOException{
            File[] files =dir.listFiles();
    
            for (int i = 0; i < files.length; i++) {
                File f=files[i];
                if (f.isDirectory()) {
                    IndexDirectory(writer, f);
                }
                else {
                    IndexFile(writer,f);
                }
            }    
        }
        
        private static void IndexFile(IndexWriter writer,File f) throws IOException {
            if (f.isHidden()||!f.exists()||!f.canRead()) {
                return;
            }
            
            System.out.println("Indexing "+f.getCanonicalPath());
            Document doc=new Document();
            doc.add(new Field("contents", new FileReader(f)));
            doc.add(new Field("filename",new FileReader(f.getCanonicalPath())));
            writer.addDocument(doc);
        }
    }
    package com.meetlucene;
    
    import java.io.File;
    
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.index.IndexReader;
    import org.apache.lucene.queryParser.QueryParser;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TopDocs;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.SimpleFSDirectory;
    import org.apache.lucene.util.Version;
    
    public class Searcher {
        public static final String FIELD_FILENAME = "filename";
        public static final String FIELD_CONTENTS = "contents";
        
        public static void main(String[] args) throws Exception {
            args=new String[2];
            args[0]="E:/workspace/LuceneAction/lib/IndexDir";
            args[1]="Solr";
            
            if (args.length!=2) {
                throw new Exception("Usage:Java "+Searcher.class.getName()
                        +" <indexDir><query>");
            }
            
            //Indexer所创建的索引目录
            File indexDir=new File(args[0]);
            //Directory indexDir=new SimpleFSDirectory();
            String q=args[1];
            
            if (!indexDir.exists()||!indexDir.isDirectory()) {
                throw new Exception(indexDir+
                        " dose not exist or is not a directory");
            }
            search(indexDir,q);
            
        }
        
        public static void search(File indexDir,String q) throws Exception{
            Directory faDir=new SimpleFSDirectory(indexDir);
            
            IndexReader indexReader=IndexReader.open(faDir);
            IndexSearcher indexSearcher=new IndexSearcher(indexReader);
            
            Analyzer analyzer=new StandardAnalyzer(Version.LUCENE_36);
            QueryParser queryParser =new QueryParser(Version.LUCENE_36,
                    FIELD_CONTENTS, analyzer);
            
            Query query=queryParser.parse(q);
            
            long start =System.currentTimeMillis();
            //@deprecated Use {@link TopScoreDocCollector} and {@link TopDocs} instead. Hits will be removed in Lucene 3.0.
            //org.apache.lucene.search.Hit
            TopDocs topDocs=indexSearcher.search(query, 10);
            long end =System.currentTimeMillis();
    
            System.out.println("Found " + topDocs.totalHits + " document(s)(in "
                    + (end - start) + " milliseconds) that matched query '" + q
                    + "':");
            
            for (ScoreDoc sd : topDocs.scoreDocs) {
                Document doc=indexSearcher.doc(sd.doc);
                
                System.out.println(sd.doc+"-->"+doc.get(FIELD_FILENAME)+"-->"+doc.get(FIELD_CONTENTS));
            }
        }
    }
  • 相关阅读:
    HDU1260DP
    HDU1114 背包
    HDU1078记忆化搜索
    HDU1024 最大m子段和
    Codeforces Round #401 (Div. 2) A,B,C,D,E
    HDU3666 差分约束
    HDU1540 区间合并
    HDU3308 线段树(区间合并)
    Codeforces Round #403 (Div. 2) B 三分 C dfs
    HDU1573 线性同余方程(解的个数)
  • 原文地址:https://www.cnblogs.com/a282421083/p/2698622.html
Copyright © 2011-2022 走看看