zoukankan      html  css  js  c++  java
  • lucene简单使用

    lucene7以上最低要求jdk1.8

    lucene下载地址:

    http://archive.apache.org/dist/lucene/java/
    <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-core</artifactId>
                <version>6.0.0</version>
            </dependency>
    
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-highlighter</artifactId>
                <version>6.0.0</version>
            </dependency>
    
            <!-- http://mvnrepository.com/artifact/org.apache.lucene/lucene-analyzers-common -->
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-analyzers-common</artifactId>
                <version>6.0.0</version>
            </dependency>
    
            <!-- http://mvnrepository.com/artifact/org.apache.lucene/lucene-memory -->
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-memory</artifactId>
                <version>6.0.0</version>
            </dependency>
    
            <dependency>
                <groupId>junit</groupId>
                <artifactId>junit</artifactId>
                <version>4.9</version>
            </dependency>
    
            <!-- http://mvnrepository.com/artifact/org.apache.lucene/lucene-queryparser -->
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-queryparser</artifactId>
                <version>6.0.0</version>
            </dependency>
            <dependency>
                <groupId>commons-io</groupId>
                <artifactId>commons-io</artifactId>
                <version>2.6</version>
            </dependency>
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-core</artifactId>
                <version>6.0.0</version>
            </dependency>
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-analyzers-smartcn</artifactId>
                <version>7.3.0</version>
            </dependency>
            <dependency>
                <groupId>com.janeluo</groupId>
                <artifactId>ikanalyzer</artifactId>
                <version>2012_u6</version>
                <!--排除掉里面旧的lucene包,因为我们要重写里面的分析器和分词器  -->
                <exclusions>
                    <exclusion>
                        <groupId>org.apache.lucene</groupId>
                        <artifactId>lucene-core</artifactId>
                    </exclusion>
                    <exclusion>
                        <groupId>org.apache.lucene</groupId>
                        <artifactId>lucene-queryparser</artifactId>
                    </exclusion>
                    <exclusion>
                        <groupId>org.apache.lucene</groupId>
                        <artifactId>lucene-analyzers-common</artifactId>
                    </exclusion>
                </exclusions>
            </dependency>
            <dependency>
                <groupId>com.janeluo</groupId>
                <artifactId>ikanalyzer</artifactId>
                <version>2012_u6</version>
            </dependency>
    package com.ytkj.lucene;
    
    import org.apache.commons.io.FileUtils;
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.TokenStream;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.document.TextField;
    import org.apache.lucene.index.*;
    import org.apache.lucene.search.*;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    import org.wltea.analyzer.lucene.IKAnalyzer;
    
    import java.io.File;
    import java.io.IOException;
    
    /**
     * lucene入门程序
     */
    public class LuceneFrist {
        /**
         * 创建索引库
         * @throws Exception
         */
        public static void createIndex()throws Exception{
            //1.创建一个Director对象,指定索引库保存的位置,保存在磁盘中
            Directory directory=FSDirectory.open(new File("E:\lucene\lucenetemp").toPath());
            //2.创建IndexWriter对象
                //创建使用的分词器
            StandardAnalyzer analyzer = new StandardAnalyzer();
            IndexWriter indexWriter=new IndexWriter(directory,new IndexWriterConfig(analyzer));
            //3.读取磁盘上的文件,对应每个文件创建一个文档对象
            File dir=new File("E:\lucene\luceneresource");
            File[] files = dir.listFiles();
            for (File file : files) {
                //文件名称
                String name = file.getName();
                //文件路径
                String path = file.getPath();
                //文件内容
                String content = FileUtils.readFileToString(file, "utf-8");
                //文件大小
                long size = FileUtils.sizeOf(file);
                //创建域 参数:域的名称 文件名称 是否存储
                Field fieldName=new TextField("name",name,Field.Store.YES);
                Field fieldPath=new TextField("path",path,Field.Store.YES);
                Field fieldContent=new TextField("content",content,Field.Store.YES);
                Field fieldSize=new TextField("size",size+"",Field.Store.YES);
                //4.创建文档对象
                Document document=new Document();
                //5.向文档对象中添加域
                document.add(fieldName);
                document.add(fieldPath);
                document.add(fieldContent);
                document.add(fieldSize);
                //6.把文档对象写入索引库
                indexWriter.addDocument(document);
            }
            //7.关闭indexwriter对象
            indexWriter.close();
        }
    
        /**
         * 查询索引库
         * @throws Exception
         */
        public static void searchIndex() throws Exception {
            //1.创建一个Director对象,指定索引库保存的位置
            Directory directory=FSDirectory.open(new File("E:\lucene\lucenetemp").toPath());
            //2.创建indexReader对象
            IndexReader indexReader= DirectoryReader.open(directory);
            //3.创建indexsearcher对象,构造方法中的参数indexReader对象
            IndexSearcher indexSearcher=new IndexSearcher(indexReader);
            //4.创建一个query对象
            Query query=new TermQuery(new Term("content","spring"));
            //5.执行查询,得到一个TopDocs对象 参数:查询对象 查询结果返回的最大记录数
            TopDocs topDocs = indexSearcher.search(query, 10);
            //6.取查询结果的总记录数
            int totalHits = topDocs.totalHits;
            System.out.println("查询结果的总记录数:"+totalHits);
            //7.获取文档列表
            ScoreDoc[] scoreDocs = topDocs.scoreDocs;
            for (ScoreDoc scoreDoc : scoreDocs) {
                //取文档id
                int docId = scoreDoc.doc;
                //8.根据id获取文档对象
                Document document = indexSearcher.doc(docId);
                System.out.println(document.get("name"));
                System.out.println(document.get("path"));
                System.out.println(document.get("content"));
                System.out.println(document.get("size"));
            }
            //关闭创建indexReader对象
            indexReader.close();
    
        }
    
        /**
         * 查看分词器的分词效果
         * @throws Exception
         */
        public static  void testTikenStream() throws Exception {
            //创建使用的标准分词器
            StandardAnalyzer analyzer = new StandardAnalyzer();
            //使用分词器对象的tokenStream方法获取tokenStream对象
            TokenStream tokenStream=analyzer.tokenStream("","org.springframework.jdbc.datasource.DataSourceTransactionManager");
            //向tokenstream对象中设置一个引用,相当于一个指针
            CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
            //调用tokenstream的rest方法
            tokenStream.reset();
            //循环遍历tokenStream对象
            while (tokenStream.incrementToken()){
                System.out.println(charTermAttribute.toString());
            }
            //关闭
            tokenStream.close();
    
        }
    
        /**
         * 中文分词器测试
         */
        public static  void testIKAnalyzer() throws Exception {
            String etext = "Analysis is one of the main causes of slow indexing. Simply put, the more you analyze the slower analyze the indexing (in most cases).";
            String chineseText = "张三说的确实在理。";
            /**
             * ikanalyzer 中文分词器 因为Analyzer的createComponents方法API改变了 需要我们自己实现
             * 分析器IKAnalyzer4Lucene7和分词器IKTokenizer4Lucene7
             */
            // IKAnalyzer 细粒度切分
            try (Analyzer ik = new IKAnalyzer();) {
                TokenStream ts = ik.tokenStream("content", etext);
                System.out.println("IKAnalyzer中文分词器 细粒度切分,英文分词效果:");
                doToken(ts);
                ts = ik.tokenStream("content", chineseText);
                System.out.println("IKAnalyzer中文分词器 细粒度切分,中文分词效果:");
                doToken(ts);
            }
    
            // IKAnalyzer 智能切分
            try (Analyzer ik = new IKAnalyzer(true);) {
                TokenStream ts = ik.tokenStream("content", etext);
                System.out.println("IKAnalyzer中文分词器 智能切分,英文分词效果:");
                doToken(ts);
                ts = ik.tokenStream("content", chineseText);
                System.out.println("IKAnalyzer中文分词器 智能切分,中文分词效果:");
                doToken(ts);
            }
        }
        private static void doToken(TokenStream ts) throws IOException {
            ts.reset();
            CharTermAttribute cta = ts.getAttribute(CharTermAttribute.class);
            while (ts.incrementToken()) {
                System.out.print(cta.toString() + "|");
            }
            System.out.println();
            ts.end();
            ts.close();
        }
    
    
        public static void main(String[] args) throws Exception {
            //createIndex();
            //searchIndex();
            //testTikenStream();
            testIKAnalyzer();
        }
    }
  • 相关阅读:
    tyvj 1031 热浪 最短路
    【bzoj2005】 [Noi2010]能量采集 数学结论(gcd)
    hdu 1394 Minimum Inversion Number 逆序数/树状数组
    HDU 1698 just a hook 线段树,区间定值,求和
    ZeptoLab Code Rush 2015 C. Om Nom and Candies 暴力
    ZeptoLab Code Rush 2015 B. Om Nom and Dark Park DFS
    ZeptoLab Code Rush 2015 A. King of Thieves 暴力
    hdoj 5199 Gunner map
    hdoj 5198 Strange Class 水题
    vijos 1659 河蟹王国 线段树区间加、区间查询最大值
  • 原文地址:https://www.cnblogs.com/yscec/p/11946661.html
Copyright © 2011-2022 走看看