zoukankan      html  css  js  c++  java
  • 一:luecne初体验

    package com.cmy.lucene.lucene;
    
    import java.io.File;
    import java.io.FileReader;
    import java.nio.file.Paths;
    
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.document.TextField;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriterConfig;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    
    public class Indexer {
    
        private IndexWriter writer;
        
        /**
         * 构造方法,实例化indexwriter
         * @param indexDir
         * @throws Exception
         */
        public Indexer(String indexDir) throws Exception{
            Directory directory  = FSDirectory.open(Paths.get(indexDir));
            Analyzer analyzer = new StandardAnalyzer();//标准分词器
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
            writer = new IndexWriter(directory, indexWriterConfig);
        }
        
        /**
         * 
         * @throws Exception
         */
        public void close() throws Exception{
            writer.close();
        }
        
        /**
         * 
         * @param dataDir
         * @throws Exception
         */
        public int index(String dataDir) throws Exception{
            File []files = new File(dataDir).listFiles();
            for(File file:files){
                IndexFile(file);
            }
            return writer.numDocs();//返回索引文件的数量
        }
    
        /**
         * 索引指定文件
         * @param file
         * @throws Exception 
         */
        private void IndexFile(File file) throws Exception {
            System.out.println("索引文件:"+file.getCanonicalPath());//返回规范化的绝对路径
            Document document = getDocument(file);
            writer.addDocument(document);;
        }
    
        /**
         * 获取文档,文档里再设置每个字段
         * @param file
         * @return 
         */
        private Document getDocument(File file) throws Exception{
            Document document = new Document();//定义文档对象
            document.add(new TextField("contents",new FileReader(file)));//在文档中引入字段(key,value)形式
            document.add(new TextField("fileName",file.getName(),Field.Store.YES));
            document.add(new TextField("fullPath",file.getCanonicalPath(),Field.Store.YES));
            return document;
        }
        
        public static void main(String[] args) {
            String indexDir = "D:\lucene";
            String dataDir = "E:\JavaEE\luceneData";
            Indexer indexer = null;
            int numIndexed = 0;
            long start = System.currentTimeMillis();
            try {
                indexer = new Indexer(indexDir);
                numIndexed = indexer.index(dataDir);
            } catch (Exception e) {
                e.printStackTrace();
                e.printStackTrace();
            }finally {
                try {
                    indexer.close();
                } catch (Exception e2) {
                    e2.printStackTrace();
                }
            }
            long end = System.currentTimeMillis();
            System.out.println("索引: "+numIndexed+" 个文件,话费了"+(end-start)+" s");
        }
    }

    package com.cmy.lucene.lucene;
    
    import java.nio.channels.ScatteringByteChannel;
    import java.nio.file.Paths;
    
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.index.DirectoryReader;
    import org.apache.lucene.index.IndexReader;
    import org.apache.lucene.queryparser.classic.QueryParser;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TopDocs;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    
    public class Searcher {
    
        public static void search(String indexDir,String qString) throws Exception{
            
            Directory directory = FSDirectory.open(Paths.get(indexDir));
            IndexReader reader = DirectoryReader.open(directory);//读取完整路径下的reader
            IndexSearcher iSearcher = new IndexSearcher(reader);//索引查询器,参数是Indexreader
            Analyzer analyzer = new StandardAnalyzer();//标准分词器
            QueryParser parser = new QueryParser("contents", analyzer);//解析制定内容,使用制定分词器
            Query query = parser.parse(qString);
            long start = System.currentTimeMillis();
            TopDocs hits = iSearcher.search(query, 10);//传入query对象,返回的数据数量,此处返回前十条,哎,那总该有个顺序吧,怎么搞
            long end = System.currentTimeMillis();
            System.out.println("匹配"+qString+",总共花费"+(end-start)+" 毫秒");
            //遍历结果集,获取文档
            for(ScoreDoc scoreDoc:hits.scoreDocs){
                Document document = iSearcher.doc(scoreDoc.doc);//获取结果集中的doc主键(id)并据此查询获取文档对象
                System.out.println("fullPath: "+document.get("fullPath"));//获取完整的fullPath,
                
            }
            reader.close();
        }
        public static void main(String[] args) {
            String indexDir = "D:\lucene";
            String dataDir = "Zygmunt Saloni";
            try {
                search(indexDir,dataDir);
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

  • 相关阅读:
    上班5个月总结
    使用余弦定理计算两篇文章的相似性
    9月10日 小结
    软件测试
    《增长黑客》笔记
    统计学术语
    数据分析师:数据分析工作常见七种错误及其规避技巧(转自经管之家)
    输入一个日期,计算这个日期与 2018-03-12差多少天;
    求输入数字的阶乘 及加和 #s=1!+2!+3!+…..+n!
    列表去重
  • 原文地址:https://www.cnblogs.com/tingbogiu/p/6072190.html
Copyright © 2011-2022 走看看