zoukankan      html  css  js  c++  java
  • lecune入门示例

      注意:本示例中的lucene版本需在jdk7以上使用。

    一、pom.xml

    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
        xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
        <modelVersion>4.0.0</modelVersion>
        <groupId>cd.jeryzhi</groupId>
        <artifactId>luceneDemo</artifactId>
        <version>1.0</version>
        <name>${project.artifactId}</name>
        
        <dependencies>
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-core</artifactId>
                <version>5.0.0</version>
            </dependency>
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-queryparser</artifactId>
                <version>5.0.0</version>
            </dependency>
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-analyzers-common</artifactId>
                <version>5.0.0</version>
            </dependency>
        </dependencies>
        
    </project>

    二、代码:

      

    package luceneDemo;
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileReader;
    import java.util.ArrayList;
    import java.util.List;
    
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field.Store;
    import org.apache.lucene.document.TextField;
    import org.apache.lucene.index.DirectoryReader;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriterConfig;
    import org.apache.lucene.queryparser.classic.QueryParser;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.RAMDirectory;
    
    public class FindTxtManager {
    
        public static void main(String[] args) {
            
            find("C:\Users\Administrator\Desktop\新建文件夹", "Application Strategy and Integration","pdf");
    
        }
    
        public static void find(String dirPath, String findStr,String fileType) {
            try {
                Directory directory = new RAMDirectory();
                Analyzer analyzer = new StandardAnalyzer();
                IndexWriterConfig config = new IndexWriterConfig(analyzer);
                IndexWriter iwriter = new IndexWriter(directory, config);
    
                File[] files = new File(dirPath).listFiles();
                List<File> fileList = new ArrayList<File>();
                StringBuffer sb = new StringBuffer();
                for (File file : files) {
                    if (file.getName().lastIndexOf("."+fileType) > 0) {
                        fileList.add(file);
                        String fileStr = txt2String(file);
                        sb.append(fileStr);
                        Document document = new Document();
                        document.add(new TextField("filename", file.getName(), Store.YES));
                        document.add(new TextField("content", fileStr, Store.YES));
                        document.add(new TextField("path", file.getPath(), Store.YES));
                        iwriter.addDocument(document);
                        // iwriter.commit();
                    }
                }
                iwriter.close();
    
                DirectoryReader ireader = DirectoryReader.open(directory);
                IndexSearcher isearcher = new IndexSearcher(ireader);
    
                QueryParser parser = new QueryParser("content", analyzer);
                Query query = parser.parse(findStr);
                ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs;
    
                System.out.println(hits.length);
                for (int i = 0; i < hits.length; i++) {
                    Document hitDoc = isearcher.doc(hits[i].doc);
                    System.out.println("____________________________");
    //                System.out.println(hitDoc.get("filename"));
    //                System.out.println(hitDoc.get("content"));
                    System.out.println(hitDoc.get("path"));
                    System.out.println("____________________________");
                }
                ireader.close();
                directory.close();
    
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    
        public static String txt2String(File file) {
            StringBuffer sb = new StringBuffer();
            try {
                BufferedReader br = new BufferedReader(new FileReader(file));// 构造一个BufferedReader类来读取文件
                String s = null;
                while ((s = br.readLine()) != null) {// 使用readLine方法,一次读一行
                    sb.append("
    ").append(s);
                }
                br.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
            return sb.toString();
        }
    }

    输出:

    1
    ____________________________
    C:UsersAdministratorDesktop新建文件夹java.pdf
    ____________________________
  • 相关阅读:
    OnMeasureItem和OnDrawItem的区别和联系
    DockPanel 类
    C# 源码 AForge.NET
    ystem.Windows.Forms.SplitContainer : ContainerControl, ISupportInitialize
    System.Windows.Forms.Control : Component, IOleControl, IOleObject, IOleInPlaceObject, IOleInPlaceActiveObject....
    System.ComponentModel.Component : MarshalByRefObject, IComponent, IDisposable
    System.Windows.Forms.ListView : Control
    vs2013 密钥_
    系统封装 EasyBoot如何将WIN7安装版提取到光盘
    系统封装 ES3使用方法
  • 原文地址:https://www.cnblogs.com/shoubianxingchen/p/6479544.html
Copyright © 2011-2022 走看看