zoukankan      html  css  js  c++  java
  • Lucene学习笔记1(V7.1)

    Lucene是一个搜索类库,solr、nutch和elasticsearch都是基于Lucene。个人感觉学习高级搜索引擎应用程序之前 有必要了解Lucene。

    开发环境:idea maven springboot

    开始贴代码:

    maven配置

     <parent>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-parent</artifactId>
            <version>1.4.3.RELEASE</version>
        </parent>
    
        <properties>
            <java.version>1.8</java.version>
        </properties>
    
        <dependencies>
            <dependency>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-starter</artifactId>
            </dependency>
            <dependency>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-starter-thymeleaf</artifactId>
            </dependency>
            <!-- hot swapping, disable cache for template, enable live reload -->
            <dependency>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-devtools</artifactId>
                <optional>true</optional>
            </dependency>
    
                <!--Lucene-->
                <dependency>
                    <groupId>org.apache.lucene</groupId>
                    <artifactId>lucene-core</artifactId>
                    <version>7.1.0</version>
                </dependency>
    
                <!--中文分词器,一般分词器适用于英文分词(common)-->
                <dependency>
                    <groupId>org.apache.lucene</groupId>
                    <artifactId>lucene-analyzers-smartcn</artifactId>
                    <version>7.1.0</version>
                </dependency>
                <dependency>
                    <groupId>org.apache.lucene</groupId>
                    <artifactId>lucene-queryparser</artifactId>
                    <version>7.1.0</version>
                </dependency>
    
                <!--检索关键字高亮显示-->
                <dependency>
                    <groupId>org.apache.lucene</groupId>
                    <artifactId>lucene-highlighter</artifactId>
                    <version>7.1.0</version>
                </dependency>
                <!--Lucene-->
    
                <dependency>
                    <groupId>junit</groupId>
                    <artifactId>junit</artifactId>
                    <version>4.12</version>
                </dependency>
    
    
        </dependencies>
    
        <build>
            <plugins>
                <!-- Package as an executable jar/war -->
                <plugin>
                    <groupId>org.springframework.boot</groupId>
                    <artifactId>spring-boot-maven-plugin</artifactId>
                </plugin>
            </plugins>
        </build>
    View Code

    辅助类

    public class LuceneConstants {
        public static final String CONTENTS="contents";
        public static final String FILE_NAME="filename";
        public static final String FILE_PATH="filepath";
        public static final int MAX_SEARCH = 10;
    
        public  static final String IndexDir ="E:\\Lucene\\Index";
        public  static final String DataDir ="E:\\Lucene\\Data";
        public  static final String ArticleDir ="E:\\Lucene\\Files\\article.txt";
    }
    View Code

    调用Lucene

    public class Indexer {
    
        public void addEntity() throws IOException {
            Article article = new Article();
            //article.setId(1);
            //article.setTitle("Lucene全文检索");
            //article.setContent("Lucene是apache软件基金会4 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包,但它不是一个完整的全文检索引擎,而是一个全文检索引擎的架构,提供了完整的查询引擎和索引引擎,部分文本分析引擎(英文与德文两种西方语言)。");
            article.setId(2);
            article.setTitle("Solr搜索引擎");
            article.setContent("Solr是基于Lucene框架的搜索莹莹程序,是一个开放源代码的全文检索引擎。");
    
            final Path path = Paths.get(LuceneConstants.IndexDir);
            Directory directory = FSDirectory.open(path);//索引存放目录 存在磁盘
            //Directory RAMDirectory= new RAMDirectory();// 存在内存
    
            Analyzer analyzer = new StandardAnalyzer();
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
            //indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
            indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
    
            IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);//更新或创建索引
    
            Document document = new Document();
            document.add(new TextField("id", article.getId().toString(), Field.Store.YES));
            document.add(new TextField("title", article.getTitle(), Field.Store.YES));
            document.add(new TextField("content", article.getContent(), Field.Store.YES));
    
            indexWriter.addDocument(document);
            indexWriter.close();
        }
    
        public void addFile() throws IOException {
            final Path path = Paths.get(LuceneConstants.IndexDir);
    
            Directory directory = FSDirectory.open(path);
            Analyzer analyzer=new StandardAnalyzer();
    
            IndexWriterConfig indexWriterConfig=new IndexWriterConfig(analyzer);
            indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    
            IndexWriter indexWriter=new IndexWriter(directory,indexWriterConfig);
            InputStreamReader isr = new InputStreamReader(new FileInputStream(LuceneConstants.ArticleDir), "GBK");//.txt文档,不设置格式会乱码
            BufferedReader bufferedReader=new BufferedReader(isr);
    
            String content="";
            while ((content=bufferedReader.readLine())!=null){
                Document document=new Document();
                document.add(new TextField("content",content,Field.Store.YES) );
                indexWriter.addDocument(document);
            }
            bufferedReader.close();
            indexWriter.close();
        }
    
        public List<String> SearchFiles() throws IOException, ParseException {
            String queryString = "Solr";
    
            final Path path = Paths.get(LuceneConstants.IndexDir);
            Directory directory = FSDirectory.open(path);//索引存储位置
            Analyzer analyzer = new StandardAnalyzer();//分析器
    
            //单条件
            //关键词解析
            //QueryParser queryParser=new QueryParser("content",analyzer);
            //Query query=queryParser.parse(queryString);
    
            //多条件
            Query mQuery = MultiFieldQueryParser.parse(new String[]{"Solr"},new String[]{"content"},new StandardAnalyzer());
    
            IndexReader indexReader = DirectoryReader.open(directory);//索引阅读器
            IndexSearcher indexSearcher = new IndexSearcher(indexReader);//查询
    
            //TopDocs topDocs=indexSearcher.search(query,3);
            TopDocs topDocs=indexSearcher.search(mQuery,10);
            long count = topDocs.totalHits;
            ScoreDoc[] scoreDocs = topDocs.scoreDocs;
    
    
            List<String> list=new ArrayList<String>();
            list.add(String.valueOf(count));
    
            Integer cnt=0;
    
            for (ScoreDoc scoreDoc : scoreDocs) {
                Document document = indexSearcher.doc(scoreDoc.doc);
    
                //list.add(cnt.toString()+"-"+"相关度:"+scoreDoc.score+"-----time:"+document.get("time"));
                //list.add("|||");
                //list.add(cnt.toString()+"-"+document.get("content"));
    
                list.add(document.get("content"));
                cnt++;
            }
    
            return  list;
        }
    }
    View Code

    查看运行效果

    @Controller
    public class LuceneController {
        @RequestMapping("/add")
        public String welcomepage(Map<String, Object> model) {
    
            try {
                Indexer indexer = new Indexer();
                indexer.addEntity();
    
                model.put("message", "Success");
            } catch (IOException ex) {
                model.put("message", "Failure");
            }
    
            return "welcome";
        }
    
        @RequestMapping("/file")
        public String fileindex(Map<String, Object> model) {
    
            try {
                Indexer indexer = new Indexer();
                indexer.addFile();
    
                model.put("message", "SuccessF");
            } catch (IOException ex) {
                model.put("message", "FailureF");
            }
    
            return "welcome";
        }
    
        @RequestMapping("/search")
        public String searchindex(Map<String, Object> model) {
    
            try {
                Indexer indexer = new Indexer();
                List<String> rlts = indexer.SearchFiles();
                String message = "";
                for (String str : rlts) {
                    message += str + " ";
                }
                model.put("message", message);
            } catch (Exception ex) {
                model.put("message", "FailureF");
            }
    
            return "welcome";
        }
    
    }
    View Code

  • 相关阅读:
    偏振光相机2
    偏振光相机1
    偏振光工业相机
    Qt QSlider介绍(属性设置、信号、实现滑块移动到鼠标点击位置)
    C++ malloc()和free()函数的理解
    C++调用MATLAB函数
    C/C++ 获取unsigned short的高八位和低八位数值
    Qt QtConcurrent::Run 阻塞方式调用
    Qt QtConcurrent::Run 非阻塞方式调用
    Cognex.VisionPro.QuickBuild命名空间下的一些委托方法
  • 原文地址:https://www.cnblogs.com/brainthink/p/8072289.html
Copyright © 2011-2022 走看看