zoukankan      html  css  js  c++  java
  • Lucene 4.9索引txt文件

    暂时只是跑起来了,不知道是否正确,困了,睡觉了,改天再弄。搜索那块是分页的,也没仔细弄。。。

    参考着 http://blog.csdn.net/kingskyleader/article/details/8444739

    在data下放了三个txt...

    S:lucenedata永生.txt

    S:lucenedata1.txt

    S:lucenedata2.txt

    永生是本小说,汉语的应该没有英文。

    1.txt 内容: hello

    2.txt 内容: hi hello  哈哈

    程序运行之后控制台打印的信息:

    adding [Ljava.io.File;@3f611531
    adding [Ljava.io.File;@3f611531
    adding [Ljava.io.File;@3f611531
    S:lucenedata1.txt
    1407857427736
    S:lucenedata2.txt
    1407857444245

    具体改天再研究。

    下面是代码:

    pom:

    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
      xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
      <modelVersion>4.0.0</modelVersion>
    
      <groupId>LuceneTest</groupId>
      <artifactId>lucene</artifactId>
      <version>0.0.1-SNAPSHOT</version>
      <packaging>jar</packaging>
    
      <name>lucene</name>
      <url>http://maven.apache.org</url>
    
      <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
      </properties>
    
      <dependencies>
        <dependency>
          <groupId>junit</groupId>
          <artifactId>junit</artifactId>
          <version>3.8.1</version>
          <scope>test</scope>
        </dependency>
        
        <!-- lucene -->
        <dependency>
        <groupId>org.apache.lucene</groupId>
        <artifactId>lucene-core</artifactId>
        <version>4.9.0</version>
        </dependency>
        
        <dependency>
        <groupId>org.apache.lucene</groupId>
        <artifactId>lucene-queryparser</artifactId>
        <version>4.9.0</version>
        </dependency>
        
        <dependency>
        <groupId>org.apache.lucene</groupId>
        <artifactId>lucene-analyzers-common</artifactId>
        <version>4.9.0</version>
        </dependency>
        
        <dependency>
        <groupId>org.apache.lucene</groupId>
        <artifactId>lucene-highlighter</artifactId>
        <version>4.9.0</version>
        </dependency>
        
      </dependencies>
    </project>

    建立索引:

    package lucene;
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.IOException;
    import java.io.InputStreamReader;
    
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.document.LongField;
    import org.apache.lucene.document.StringField;
    import org.apache.lucene.document.TextField;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriterConfig;
    import org.apache.lucene.index.IndexWriterConfig.OpenMode;
    import org.apache.lucene.index.Term;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.util.Version;
    
    public class InitIndex {
    
        public void creatIndex() throws IOException {
    
            boolean create = true;
    
            File data = new File("S:\lucene\data");
            File index = new File("S:\lucene\index");
    
            Directory dir = FSDirectory.open(index);
    
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
    
            IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9,
                    analyzer);
    
            if (create) {
                // Create a new index in the directory, removing any
                // previously indexed documents:
                iwc.setOpenMode(OpenMode.CREATE);
            } else {
                // Add new documents to an existing index:
                iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
            }
    
            IndexWriter iw = new IndexWriter(dir, iwc);
    
            File[] file = data.listFiles();
            FileInputStream fis = null;
            for (File f : file) {
                
                fis = new FileInputStream(f);
    
                Document doc = new Document();
    
                Field pathField = new StringField("path", f.getPath(),
                        Field.Store.YES);
                doc.add(pathField);
    
                doc.add(new LongField("modified", f.lastModified(), Field.Store.YES));
    
                doc.add(new TextField("contents", new BufferedReader(
                        new InputStreamReader(fis, "GBK"))));
    
                if (iw.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can
                    // be there):
                    System.out.println("adding " + file);
                    iw.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been
                    // indexed) so
                    // we use updateDocument instead to replace the old one matching
                    // the exact
                    // path, if present:
                    System.out.println("updating " + file);
                    iw.updateDocument(new Term("path", f.getPath()), doc);
                }
    
            }
            iw.close();
            fis.close();
            
        }
    }

    搜索:

    package lucene;
    
    import java.io.File;
    import java.io.IOException;
    
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.index.DirectoryReader;
    import org.apache.lucene.index.IndexReader;
    import org.apache.lucene.queryparser.classic.ParseException;
    import org.apache.lucene.queryparser.classic.QueryParser;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TopDocs;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.util.Version;
    
    public class Search {
    
        public void query() throws IOException, ParseException {
    
            String queries = "hello";
    
            int hitsPerPage = 10; 
            
            File index = new File("S:\lucene\index");
    
            IndexReader reader = DirectoryReader.open(FSDirectory.open(index));
            IndexSearcher searcher = new IndexSearcher(reader);
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
            
            QueryParser parser = new QueryParser(Version.LUCENE_4_9, "contents", analyzer);  
    
            Query query = parser.parse(queries);
            
            TopDocs results = searcher.search(query, 5 * hitsPerPage);
            ScoreDoc[] hits = results.scoreDocs;  
            int numTotalHits = results.totalHits;  
             
            
            int start = 0;  
            int end = Math.min(numTotalHits, hitsPerPage);  
            
            for (int i = start; i < end; i++) {  
          
                Document doc = searcher.doc(hits[i].doc);  
                String path = doc.get("path");  
               System.out.println(path);
               String modified=doc.get("modified");
               System.out.println(modified);
                            
              }  
          
    
          
        
    
        }
    }

    主函数:

    package lucene;
    
    import java.io.IOException;
    
    import org.apache.lucene.queryparser.classic.ParseException;
    
    public class Main {
    
        public static void main(String args[]) throws IOException, ParseException{
            InitIndex id=new InitIndex();
            id.creatIndex();
            Search se=new Search();
            se.query();
        }
    }
  • 相关阅读:
    【转】寻找最好的笔记软件:海选篇 (v1.0)
    【转】git rebase简介(基本篇)
    【转】学会这13个原则写UI界面文案,用户才能秒懂
    sqlserver巧用row_number和partition by分组取top数据
    使用SQL语句清空数据库所有表的数据
    在 SQL Server 2005 中配置数据库邮件
    SQL compute by 的使用
    SQL Cursor 基本用法
    Sqlserver双机热备文档(无域)
    查询分页的几种Sql写法
  • 原文地址:https://www.cnblogs.com/acehalo/p/3908760.html
Copyright © 2011-2022 走看看