zoukankan      html  css  js  c++  java
  • lucene创建索引

    1、需要添加的依赖

     <!-- Lucene core -->
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-core</artifactId>
                <version>7.2.1</version>
            </dependency>
            <!-- Lucene解析库 -->
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-queryparser</artifactId>
                <version>7.2.1</version>
            </dependency>
            <!-- Lucene附加的分析库 -->
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-analyzers-common</artifactId>
                <version>7.2.1</version>
            </dependency>
            <!-- 索引分词,可以选用其他分词器 -->
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-analyzers-smartcn</artifactId>
                <version>7.2.1</version>
            </dependency>

    2、索引创建及测试

    package com.zxf.lucene.util;
    
    import com.zxf.lucene.analyzer.lucene.IKAnalyzer;
    import org.apache.lucene.document.*;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriterConfig;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    
    import java.io.File;
    import java.io.IOException;
    import java.nio.file.Paths;
    
    /**
     * Created by jiangyang on 2019/3/8.
     */
    public class LuceneDemo {
    
    
        /**索架索引*/
        public Document createDocument(){
            /*
                0.Field.Store.NO 查询出document时,通过document.get(key)无法获取值,设置为NO时只能进行索引查询
                1.使用new TextField("字段名称","字段值"),采用TextField会将其字段值进行分词;
                    new TextField("name" ,"张三", Field.Store.NO)
                   如果不需要分词的字符串可以采用 new StringField(key,value)形式,如果不分词对于value字段值有长度限制,超出则报异常
                2. NumericDocValuesField:数值型,不会进行分词
                   document.add(new NumericDocValuesField("mynumber", 1234));
                   //相当于Field.Store.YES
                   document.add(new StoredField("mynumber", 1234));
                3.
             */
            Document document = new Document();
            document.add(new TextField("name" ,"张三", Field.Store.NO));
            document.add(new NumericDocValuesField("id", 1234));
            document.add(new StoredField("id", 1234));
            document.add(new StringField("nickname", "冷太阳",Field.Store.YES));
            return document;
        }
    
        /**
         * @param dir 存放索引文件的 文件存放路径
         * @return
         */
        public  IndexWriter getIndexWriterInstance(String dir) {
            File file = new File(dir);
            if (!file.exists()) {
                file.mkdirs();
            }
            try{
                Directory directory = FSDirectory.open(Paths.get(dir));
                IKAnalyzer ikAnalyzer = new IKAnalyzer();
                //设置相应的分词器
                IndexWriterConfig indexWriterConfig = new IndexWriterConfig(ikAnalyzer);
                return new IndexWriter(directory, indexWriterConfig);
            } catch (Exception e) {
                e.printStackTrace();
            }
            return null;
        }
    
        /**
         * 测试
         * @param args
         */
        public static void main(String[] args) {
            LuceneDemo demo = new LuceneDemo();
            String dir = "索引文件夹的路径";
            IndexWriter indexWriter = demo.getIndexWriterInstance(dir);
            Document document = demo.createDocument();
            try {
                //添加索引
                indexWriter.addDocument(document);
                indexWriter.commit();
                indexWriter.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
    人生没有彩排,每天都是现场直播!
  • 相关阅读:
    Power OJ 2790.GAUSS 2014(KMP或AC自动机+矩阵快速幂)
    PowerOJ 2789 上决╇ф的战争 (KMP)
    牛客网 The K-th Largest Interval (二分+尺取)
    牛客网 wyh的天鹅 (权值线段树)
    图像边缘计算 canny算子
    图像边缘检测 拉普拉斯算子
    图像边缘检测,sobel,scharr
    卷积的边缘像素填充
    图像的二值化
    图像的上采样和下采样
  • 原文地址:https://www.cnblogs.com/northern-light/p/10498213.html
Copyright © 2011-2022 走看看