zoukankan      html  css  js  c++  java
  • Lucene全文检索引擎

    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
     xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
      <groupId>demo.lucene</groupId>
      <artifactId>Lucene01</artifactId>
      <version>0.0.1-SNAPSHOT</version>
      <build/>
    
      <dependencies>
        <!-- lucene核心包 -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>5.3.1</version>
        </dependency>
        <!-- lucene查询解析包 -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queryparser</artifactId>
            <version>5.3.1</version>
        </dependency>
        <!-- lucene解析器包 -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-common</artifactId>
            <version>5.3.1</version>
        </dependency>
      </dependencies>
    </project>
    

      

    import java.io.File;
    import java.io.FileReader;
    import java.nio.file.Paths;
    
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.document.TextField;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriterConfig;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    
    /**
     * 建立索引的类
     * @author Ni Shengwu
     *
     */
    public class Indexer {
    
        private IndexWriter writer; //写索引实例
    
        //构造方法,实例化IndexWriter
        public Indexer(String indexDir) throws Exception {
            Directory dir = FSDirectory.open(Paths.get(indexDir));
            Analyzer analyzer = new StandardAnalyzer(); //标准分词器,会自动去掉空格啊,is a the等单词
            IndexWriterConfig config = new IndexWriterConfig(analyzer); //将标准分词器配到写索引的配置中
            writer = new IndexWriter(dir, config); //实例化写索引对象
        }
        //关闭写索引
        public void close() throws Exception {
            writer.close();
        }
        //索引指定目录下的所有文件
        public int indexAll(String dataDir) throws Exception {
            File[] files = new File(dataDir).listFiles(); //获取该路径下的所有文件
            for(File file : files) {
                indexFile(file); //调用下面的indexFile方法,对每个文件进行索引
            }
            return writer.numDocs(); //返回索引的文件数
        }
        //索引指定的文件
        private void indexFile(File file) throws Exception {
            System.out.println("索引文件的路径:" + file.getCanonicalPath());
            Document doc = getDocument(file); //获取该文件的document
            writer.addDocument(doc); //调用下面的getDocument方法,将doc添加到索引中
        }
        //获取文档,文档里再设置每个字段,就类似于数据库中的一行记录
        private Document getDocument(File file) throws Exception{
            Document doc = new Document();
            //添加字段
            doc.add(new TextField("contents", new FileReader(file))); //添加内容
            doc.add(new TextField("fileName", file.getName(), Field.Store.YES)); //添加文件名,并把这个字段存到索引文件里
            doc.add(new TextField("fullPath", file.getCanonicalPath(), Field.Store.YES)); //添加文件路径
            return doc;
        }
        public static void main(String[] args) {
            String indexDir = "D:\lucene"; //将索引保存到的路径
            String dataDir = "D:\lucene\data"; //需要索引的文件数据存放的目录
            Indexer indexer = null;
            int indexedNum = 0;
            long startTime = System.currentTimeMillis(); //记录索引开始时间
            try {
                indexer = new Indexer(indexDir);
                indexedNum = indexer.indexAll(dataDir);
            } catch (Exception e) {
                e.printStackTrace();
            } finally {
                try {
                    indexer.close();
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
            long endTime = System.currentTimeMillis(); //记录索引结束时间
            System.out.println("索引耗时" + (endTime-startTime) + "毫秒");
            System.out.println("共索引了" + indexedNum + "个文件");
        }
    }
    

      

    import java.nio.file.Paths;
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.index.DirectoryReader;
    import org.apache.lucene.index.IndexReader;
    import org.apache.lucene.queryparser.classic.QueryParser;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TopDocs;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    
    public class Searcher {
    	
    	public static void search(String indexDir, String q) throws Exception {
    
            Directory dir = FSDirectory.open(Paths.get(indexDir)); //获取要查询的路径,也就是索引所在的位置
            IndexReader reader = DirectoryReader.open(dir);
            IndexSearcher searcher = new IndexSearcher(reader);
            Analyzer analyzer = new StandardAnalyzer(); //标准分词器,会自动去掉空格啊,is a the等单词
            QueryParser parser = new QueryParser("contents", analyzer); //查询解析器
            Query query = parser.parse(q); //通过解析要查询的String,获取查询对象
    
            long startTime = System.currentTimeMillis(); //记录索引开始时间
            TopDocs docs = searcher.search(query, 10);//开始查询,查询前10条数据,将记录保存在docs中
            long endTime = System.currentTimeMillis(); //记录索引结束时间
            System.out.println("匹配" + q + "共耗时" + (endTime-startTime) + "毫秒");
            System.out.println("查询到" + docs.totalHits + "条记录");
    
            for(ScoreDoc scoreDoc : docs.scoreDocs) { //取出每条查询结果
                Document doc = searcher.doc(scoreDoc.doc); //scoreDoc.doc相当于docID,根据这个docID来获取文档
                System.out.println(doc.get("fullPath")); //fullPath是刚刚建立索引的时候我们定义的一个字段
            }
            reader.close();
        }
        public static void main(String[] args) {
            String indexDir = "D:\lucene";
            String q = "generate-maven-artifacts"; //查询这个字符串
            try {
                search(indexDir, q);
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
    

      

    pom.xml

  • 相关阅读:
    105个软件测试工具大放送
    2016年开源巨献:来自百度的71款开源项目
    开源代码:Http请求封装类库HttpLib介绍、使用说明
    C#的HTTP开发包 HttpLib
    dropzonejs中文翻译手册 DropzoneJS是一个提供文件拖拽上传并且提供图片预览的开源类库.
    Windows平台分布式架构实践
    Windows平台下利用APM来做负载均衡方案
    C# .net dotnet属性定义属性,以提供显示明称,默认值
    细说ASP.NET Forms身份认证
    IIS 7.5 Application Warm-Up Module
  • 原文地址:https://www.cnblogs.com/Jansens520/p/7813924.html
Copyright © 2011-2022 走看看