zoukankan      html  css  js  c++  java
  • lucene 建立索引的不同方式

    1.创建一个简单的索引:

    package lia.meetlucene;
    
    import java.io.File;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.util.Version;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    
    public class BasicIndexer {
        public static void main(String[] args) throws java.io.IOException {
            String indexDir = "C:/Users/Administrator/Desktop/xdj";
            
            Directory dir = FSDirectory.open(new File(indexDir));
            /*
             * writer = new IndexWriter(dir, //3 创建Lucene Index Writer new
             * StandardAnalyzer( //3 Version.LUCENE_30),//3 true, //3
             * IndexWriter.MaxFieldLength.UNLIMITED); //3
             */
            IndexWriter writer = new IndexWriter(dir, // 3 创建Lucene Index Writer
                    new StandardAnalyzer(Version.LUCENE_30),// 3
                    true, // 3
                    IndexWriter.MaxFieldLength.UNLIMITED); // 3
    
            // Document
            Document doc = new Document();
    
            // Field -title
            String title = "i love china";
            Field field = new Field("title", title, Field.Store.YES,
                    Field.Index.ANALYZED);
            // add field
            doc.add(field);
    
            // Field -content
            String content = "i love you, my mother land! ";
            field = new Field("content", content, Field.Store.YES,
                    Field.Index.ANALYZED);
            // add field
            doc.add(field);
    
            // add document
            writer.addDocument(doc);
    
            // close IndexWriter
            writer.close();
    
            // message
            System.out.println("Index Created!");
        }
    }
    View Code

    2.创建一个复杂点的索引:

    package lia.meetlucene;
    
    import java.io.File;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.util.Version;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    
    public class BasicIndexer {
        public static void main(String[] args) throws java.io.IOException {
            String indexDir = "C:/Users/Administrator/Desktop/xdj";
    
            Directory dir = FSDirectory.open(new File(indexDir));
            /*
             * writer = new IndexWriter(dir, //3 创建Lucene Index Writer new
             * StandardAnalyzer( //3 Version.LUCENE_30),//3 true, //3
             * IndexWriter.MaxFieldLength.UNLIMITED); //3
             */
            IndexWriter writer = new IndexWriter(dir, // 3 创建Lucene Index Writer
                    new StandardAnalyzer(Version.LUCENE_30),// 3
                    true, // 3
                    IndexWriter.MaxFieldLength.UNLIMITED); // 3
    
            // 创建Document--1
            Document doc = new Document();
    
            // 创建Field -title
            String title = "i love china";
            Field field = new Field("title", title, Field.Store.YES,
                    Field.Index.NOT_ANALYZED);
            // 添加add field
            doc.add(field);
    
            // 创建Field -content
            String content = "i love you, my mother land! ";
            field = new Field("content", content, Field.Store.YES,
                    Field.Index.NOT_ANALYZED);
            // 添加add field
            doc.add(field);
    
            // 创建Field -time
            String time = "2007-05-31";
            field = new Field("time", time, Field.Store.YES, Field.Index.NO);
            // 创建add field
            doc.add(field);
    
            // 添加add document
            writer.addDocument(doc);
    
            // 创建Document--2
            doc = new Document();
    
            // 创建Field -title
            title = "i love mom";
            field = new Field("title", title, Field.Store.YES,
                    Field.Index.NOT_ANALYZED);
            // add field
            doc.add(field);
    
            // 创建Field -content
            content = "i love you, my mother! ";
            field = new Field("content", content, Field.Store.YES,
                    Field.Index.NOT_ANALYZED);
            // 添加add field
            doc.add(field);
    
            // 创建Field -time
            time = "2007-05-31";
            field = new Field("time", time, Field.Store.YES, Field.Index.NO);
            // 添加add field
            doc.add(field);
    
            // 添加add document
            writer.addDocument(doc);
    
            // 创建Document--3
            doc = new Document();
    
            // 创建Field -title
            title = "i love xiaoyue";
            field = new Field("title", title, Field.Store.YES,
                    Field.Index.NOT_ANALYZED);
            // 添加add field
            doc.add(field);
    
            // 创建Field -content
            content = "i love you, my wife! ";
            field = new Field("content", content, Field.Store.YES,
                    Field.Index.NOT_ANALYZED);
            // 添加add field
            doc.add(field);
    
            // 创建Field -time
            time = "2007-05-31";
            field = new Field("time", time, Field.Store.YES, Field.Index.NO);
            // add field
            doc.add(field);
    
            // 添加add document
            writer.addDocument(doc);
    
            // 关闭close IndexWriter
            writer.close();
    
            // 提示message
            System.out.println("Index Three Created!");
        }
    }
    View Code

    3.文件创建一个索引

    package lia.meetlucene;
    
    import java.io.File;
    import java.io.FileReader;
    
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.util.Version;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    
    public class BasicIndexer {
        public static void main(String[] args) throws java.io.IOException {
            String indexDir = "C:/Users/Administrator/Desktop/xdj";
    
            Directory dir = FSDirectory.open(new File(indexDir));
            /*
             * writer = new IndexWriter(dir, //3 创建Lucene Index Writer new
             * StandardAnalyzer( //3 Version.LUCENE_30),//3 true, //3
             * IndexWriter.MaxFieldLength.UNLIMITED); //3
             */
            IndexWriter writer = new IndexWriter(dir, // 3 创建Lucene Index Writer
                    new StandardAnalyzer(Version.LUCENE_30),// 3
                    true, // 3
                    IndexWriter.MaxFieldLength.UNLIMITED); // 3
    
            // 创建Document
            Document doc = new Document();
            File f = new File(
                    "E:/xdj/tengxun/a_______________mm/2014-02-19 06.59.53.xml");
    
            // 创建Field -name
            String name = f.getName();
            Field field = new Field("name", name, Field.Store.YES,
                    Field.Index.NOT_ANALYZED);
            // 添加add field
            doc.add(field);
    
            // 创建Field -content
            field = new Field("content", new FileReader(f)); // FileText.getText(f);
            // add field
            doc.add(field);
    
            // 创建Field -path
            String path = f.getPath();
            field = new Field("path", path, Field.Store.YES, Field.Index.NO);
            // 添加add field
            doc.add(field);
    
            // 添加add document
            writer.addDocument(doc);
    
            // 创建**************************************************************/
            doc = new Document();
            f = new File(
                    "E:/xdj/tengxun/a_______________mm/2014-02-04 11.43.01.xml");
    
            // 创建Field -name
            name = f.getName();
            field = new Field("name", name, Field.Store.YES,
                    Field.Index.NOT_ANALYZED);
            // add field
            doc.add(field);
    
            // 创建Field -content
            field = new Field("content", new FileReader(f));
            // 添加add field
            doc.add(field);
    
            // 创建Field -path
            path = f.getPath();
            field = new Field("path", path, Field.Store.YES, Field.Index.NO);
            // 添加add field
            doc.add(field);
    
            // 添加add document
            writer.addDocument(doc);
    
            // 关闭close IndexWriter
            writer.close();
    
            // 提示message
            System.out.println("File Index Created!");
        }
    }
    View Code

    4.某个文件夹的所有文件创建索引

    package lia.meetlucene;
    
    /**
     * Copyright Manning Publications Co.
     *
     * Licensed under the Apache License, Version 2.0 (the "License");
     * you may not use this file except in compliance with the License.
     * You may obtain a copy of the License at
     *
     *     http://www.apache.org/licenses/LICENSE-2.0
     *
     * Unless required by applicable law or agreed to in writing, software
     * distributed under the License is distributed on an "AS IS" BASIS,
     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     * See the License for the specific lan      
     */
    
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.util.Version;
    
    import java.io.File;
    import java.io.FileFilter;
    import java.io.IOException;
    import java.io.FileReader;
    
    // From chapter 1
    
    /**
     * This code was originally written for Erik's Lucene intro java.net article
     */
    public class Indexer {
    
        public static void main(String[] args) throws Exception {
            // args = new String[2];
            // args[0] = "E:/xiaodajun/new/lia2e/src/lia/meetlucene";
            // args[1] =
            // "E:/xiaodajun/new/lia2e/src/lia/meetlucene/data";//"src/lia/meetlucene/data";
            // C:/Users/Administrator/Desktop/xdj/data
            if (args.length != 2) {
                throw new IllegalArgumentException("Usage: java "
                        + Indexer.class.getName() + " <index dir> <data dir>");
            }
            // String indexDir = args[0]; // 1
            // String dataDir = args[1]; // 2
    
            String indexDir = "C:/Users/Administrator/Desktop/xdj/suoyin";
            String dataDir = "C:/Users/Administrator/Desktop/xdj/data";
    
            // String indexDir =
            // "E:/xdj/tengxun";//"C:/Users/Administrator/Desktop/xdj/suoyin";
            // String dataDir =
            // "E:/xdj/tengxunsuoying";//"C:/Users/Administrator/Desktop/xdj/weibohanzi";
    
            long start = System.currentTimeMillis();
            // ///////////////////////////////////////////////////////////////////////////////////////////
            Indexer indexer = new Indexer(indexDir);
            int numIndexed;
            try {
                numIndexed = indexer.index(dataDir, new TextFilesFilter());
            } finally {
                indexer.close();
            }
            long end = System.currentTimeMillis();
            // /////////////////////////////////////////////////////////////////////////////////////////////
            System.out.println("Indexing " + numIndexed + " files took "
                    + (end - start) + " milliseconds");
        }
    
        private IndexWriter writer;
    
        public Indexer(String indexDir) throws IOException {
            Directory dir = FSDirectory.open(new File(indexDir));
    
            /*
             * writer = new IndexWriter(dir, //3 创建Lucene Index Writer new
             * StandardAnalyzer( //3 Version.LUCENE_30),//3 true, //3
             * IndexWriter.MaxFieldLength.UNLIMITED); //3
             */
            writer = new IndexWriter(dir, // 3 创建Lucene Index Writer
                    new SmartChineseAnalyzer(Version.LUCENE_20),// 3
                    true, // 3
                    IndexWriter.MaxFieldLength.UNLIMITED); // 3
        }
    
        public void close() throws IOException {
            writer.close(); // 4 关闭Lucene Index Writer
        }
    
        public int index(String dataDir, FileFilter filter) throws Exception {
    
            File[] files = new File(dataDir).listFiles();
    
            for (File f : files) {
                if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead()
                        && (filter == null || filter.accept(f))) {
                    indexFile(f);
                }
            }
    
            return writer.numDocs(); // 5返沪被索引文档数
        }
    
        private static class TextFilesFilter implements FileFilter {
            public boolean accept(File path) {
                return path.getName().toLowerCase() // 6只索引.txt文件,采用FileFilter
                        .endsWith(".txt"); // 6
            }
        }
    
        protected Document getDocument(File f) throws Exception {
            Document doc = new Document();
            doc.add(new Field("contents", new FileReader(f))); // 7索引文件内容
            doc.add(new Field("filename", f.getName(), // 8索引文件名
                    Field.Store.YES, Field.Index.NOT_ANALYZED));// 8
            doc.add(new Field("fullpath", f.getCanonicalPath(), // 9索引文件完整路径
                    Field.Store.YES, Field.Index.NOT_ANALYZED));// 9
            return doc;
        }
    
        // Store.是否存储 yes no compress(压缩之后再存)
        // Index。是否进行索引 Index.ANALYZED 分词后进行索引,NOT_ANALYZED 不索引,NOT_ANALYZED 不分词直接索引
    
        private void indexFile(File f) throws Exception {
            System.out.println("Indexing " + f.getCanonicalPath());
            Document doc = getDocument(f);
            writer.addDocument(doc); // 10向Lucene索引中添加文档
        }
    }
    
    /*
     * #1 Create index in this directory #2 Index *.txt files from this directory #3
     * Create Lucene IndexWriter #4 Close IndexWriter #5 Return number of documents
     * indexed #6 Index .txt files only, using FileFilter #7 Index file content #8
     * Index file name #9 Index file full path #10 Add document to Lucene index
     */
    View Code

    5.<Lucene in action>第二版索引demo

    package lia.meetlucene;
    
    /**
     * Copyright Manning Publications Co.
     *
     * Licensed under the Apache License, Version 2.0 (the "License");
     * you may not use this file except in compliance with the License.
     * You may obtain a copy of the License at
     *
     *     http://www.apache.org/licenses/LICENSE-2.0
     *
     * Unless required by applicable law or agreed to in writing, software
     * distributed under the License is distributed on an "AS IS" BASIS,
     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     * See the License for the specific lan      
     */
    
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.util.Version;
    
    import java.io.File;
    import java.io.FileFilter;
    import java.io.IOException;
    import java.io.FileReader;
    
    // From chapter 1
    
    /**
     * This code was originally written for Erik's Lucene intro java.net article
     */
    public class Indexer {
    
        public static void main(String[] args) throws Exception {
            // args = new String[2];
            // args[0] = "E:/xiaodajun/new/lia2e/src/lia/meetlucene";
            // args[1] =
            // "E:/xiaodajun/new/lia2e/src/lia/meetlucene/data";//"src/lia/meetlucene/data";
            // C:/Users/Administrator/Desktop/xdj/data
            if (args.length != 2) {
                throw new IllegalArgumentException("Usage: java "
                        + Indexer.class.getName() + " <index dir> <data dir>");
            }
            // String indexDir = args[0]; // 1
            // String dataDir = args[1]; // 2
    
            // String indexDir = "C:/Users/Administrator/Desktop/xdj/suoyin";
            // String dataDir = "C:/Users/Administrator/Desktop/xdj/data";
    
            String indexDir = "C:/Users/Administrator/Desktop/xdj/suoyin";
            String dataDir = "C:/Users/Administrator/Desktop/xdj/tengxun/A__Vae";
    
            long start = System.currentTimeMillis();
            // ///////////////////////////////////////////////////////////////////////////////////////////
            Indexer indexer = new Indexer(indexDir);
            int numIndexed;
            try {
                numIndexed = indexer.index(dataDir, new TextFilesFilter());
            } finally {
                indexer.close();
            }
            long end = System.currentTimeMillis();
            // /////////////////////////////////////////////////////////////////////////////////////////////
            System.out.println("Indexing " + numIndexed + " files took "
                    + (end - start) + " milliseconds");
        }
    
        private IndexWriter writer;
    
        public Indexer(String indexDir) throws IOException {
            Directory dir = FSDirectory.open(new File(indexDir));
    
            /*
             * writer = new IndexWriter(dir, //3 创建Lucene Index Writer new
             * StandardAnalyzer( //3 Version.LUCENE_30),//3 true, //3
             * IndexWriter.MaxFieldLength.UNLIMITED); //3
             */
            writer = new IndexWriter(dir, // 3 创建Lucene Index Writer
                    new SmartChineseAnalyzer(Version.LUCENE_20),// 3
                    // new StandardAnalyzer(Version.LUCENE_30),
                    true, // 3
                    IndexWriter.MaxFieldLength.UNLIMITED); // 3
        }
    
        public void close() throws IOException {
            writer.close(); // 4 关闭Lucene Index Writer
        }
    
        public int index(String dataDir, FileFilter filter) throws Exception {
    
            File[] files = new File(dataDir).listFiles();
    
            for (File f : files) {
                if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead()
                        && (filter == null || filter.accept(f))) {
    
                    indexFile(f);
                }
            }
    
            return writer.numDocs(); // 5返沪被索引文档数
        }
    
        private static class TextFilesFilter implements FileFilter {
            public boolean accept(File path) {
                return path.getName().toLowerCase() // 6只索引.txt文件,采用FileFilter
                        .endsWith(".xml"); // 6
            }
        }
    
        protected Document getDocument(File f) throws Exception {
            Document doc = new Document();
            doc.add(new Field("contents", new FileReader(f))); // 7索引文件内容
            doc.add(new Field("filename", f.getName(), // 8索引文件名
                    Field.Store.YES, Field.Index.NOT_ANALYZED));// 8
            doc.add(new Field("fullpath", f.getCanonicalPath(), // 9索引文件完整路径
                    Field.Store.YES, Field.Index.NOT_ANALYZED));// 9
            return doc;
        }
    
        // Store.是否存储 yes no compress(压缩之后再存)
        // Index。是否进行索引 Index.ANALYZED 分词后进行索引,NOT_ANALYZED 不索引,NOT_ANALYZED 不分词直接索引
    
        private void indexFile(File f) throws Exception {
            System.out.println("Indexing " + f.getCanonicalPath());
            Document doc = getDocument(f);
            writer.addDocument(doc); // 10向Lucene索引中添加文档
        }
    }
    
    /*
     * #1 Create index in this directory #2 Index *.txt files from this directory #3
     * Create Lucene IndexWriter #4 Close IndexWriter #5 Return number of documents
     * indexed #6 Index .txt files only, using FileFilter #7 Index file content #8
     * Index file name #9 Index file full path #10 Add document to Lucene index
     */
    View Code
  • 相关阅读:
    ASP.NET MVC案例教程(基于ASP.NET MVC beta)——第二篇:第一个页面
    HTML5网页录音和压缩,边猜边做..(附源码)
    策划编写一个新的Helper类
    正由另一进程使用,因此该进程无法访问此文件。
    第三方组件引用另一个第三方组件的悲剧
    数据库连接池的计数器设计
    让Ajax更简单
    更新Literacy
    多说
    利用C#自带组件强壮程序日志
  • 原文地址:https://www.cnblogs.com/XDJjy/p/4433226.html
Copyright © 2011-2022 走看看