zoukankan      html  css  js  c++  java
  • Lucene搜索引擎例子demo

    一.导入相应的jar包

    KAnalyzer3.2.0Stable.jar
    lucene-analyzers-3.0.1.jar
    lucene-core-3.0.1.jar
    lucene-highlighter-3.0.1.jar
    lucene-memory-3.0.1.jar
    二.写一个完整的demo
    1.创建一个实体
    DROP TABLE IF EXISTS `article`;
    CREATE TABLE `article` (
      `id` int(11) NOT NULL AUTO_INCREMENT,
      `title` varchar(20) DEFAULT NULL,
      `content` varchar(5000) DEFAULT NULL,
      PRIMARY KEY (`id`)
    ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
    public class Article {
    private int id;
    private String title;
    private String content;
    get/set方法省略...
    }

    2.创建一个提供公共方法的类:

    //提供封装分页数据的类:
    @SuppressWarnings("rawtypes")
    public class QueryResult {
    private List list;
    private int count;
    get/set省略...
    }

    3.//提供创建索引,目录的类

    public class LuceneUtils{
    private static Directory directory;// 建立索引库存储目录
    private static Analyzer analyzer;// 创建分词器
    private static IndexWriter indexWriter; // 在程序启动是初始化,建立索引
    private static IndexSearcher indexSearcher;// 查询
    static {
    try {
    // 加载配置文件lucene.properties,该文件中是创建索引库的路径"path=D:\IindexSearch
    Properties prop = new Properties();
    InputStream inStream = LuceneUtils.class.getClassLoader().getResourceAsStream("lucene.properties");
    //InputStream inStream = ClassLoader.getSystemResourceAsStream("lucene.properties");
    prop.load(inStream);
    directory = FSDirectory.open(new File(prop.getProperty("path")));
    analyzer = new StandardAnalyzer(Version.LUCENE_30);
    // 在程序启动是初始化,建立索引
    indexWriter = new IndexWriter(directory, analyzer, MaxFieldLength.LIMITED);
    //程序退出时关闭资源
    Runtime.getRuntime().addShutdownHook(new Thread(){
    public void run(){
    try {
    indexWriter.close();
    } catch (Exception e) {
    e.printStackTrace();
    } 
    }
    });
    } catch (Exception e) {
    e.printStackTrace();
    }
    }
    public static Document objectToDocument(Object obj) {
    Article article = (Article) obj;
    // 将文档转为domcment
    Document doc = new Document();
    String idstr = NumericUtils.intToPrefixCoded(article.getId());
    doc.add(new Field("id", idstr, Store.YES, Index.NOT_ANALYZED));
    doc.add(new Field("title", article.getTitle(), Store.YES, Index.ANALYZED));
    doc.add(new Field("content", article.getContent(), Store.YES, Index.ANALYZED));
    return doc;
    }
    public static Object documentToObject(Document doc) {
    Article article = new Article();
    //将Document转为Article
    //将字符串转化为数字
    int id = NumericUtils.prefixCodedToInt(doc.get("id"));
    article.setId(id);
    article.setTitle(doc.get("title"));
    article.setContent(doc.get("content"));
    return article;
    }
    public static IndexWriter getIndexWriter() {
    return indexWriter;
    }
    public static IndexSearcher getIndexSearch() {
    // 执行查询
    try {
    indexSearcher = new IndexSearcher(directory);
    } catch (Exception e) {
    throw new RuntimeException(e);
    }
    return indexSearcher;
    }
    public static Directory getDirectory() {
    return directory;
    }
    public static Analyzer getAnalyzer() {
    return analyzer;
    }
    }

    4.创建增删改查方法

    public class IndexDao {
    /**
    * 
    * @return
    * @throws Exception
    */
    public void save(Article article) {
    try {
    // 将Aritcle转为Documnet
    Document doc = LuceneUtils.objectToDocument(article);
    // 建立索引
    IndexWriter indexWriter = LuceneUtils.getIndexWriter();
    indexWriter.addDocument(doc);
    indexWriter.commit();
    } catch (Exception e) {
    throw new RuntimeException(e);
    }
    }
    /**
    * 删除索引库 Term 表示制定列中包含的关键字
    * 
    * @return
    * @throws Exception
    */
    public void delete(Article article) {
    String idStr = NumericUtils.intToPrefixCoded(article.getId());
    Term term = new Term("id", idStr);
    try {
    // 建立索引
    IndexWriter indexWriter = LuceneUtils.getIndexWriter();
    indexWriter.deleteDocuments(term);// 删除指定Term总重的documnet数据
    indexWriter.commit();
    } catch (Exception e) {
    throw new RuntimeException(e);
    }
    }
    /**
    * 修改索引库
    * 
    * @return
    * @throws Exception
    */
    public void update(Article article) {
    // 创建Term
    String idStr = NumericUtils.intToPrefixCoded(article.getId());
    Term term = new Term("id", idStr);
    // 准备document
    Document doc = LuceneUtils.objectToDocument(article);
    try {
    // 建立索引
    IndexWriter indexWriter = LuceneUtils.getIndexWriter();
    indexWriter.updateDocument(term, doc);// 删除指定Term总重的documnet数据
    indexWriter.commit();
    // 先删除,在创建
    // indexWriter.deleteDocuments(term);
    // indexWriter.addDocument(doc);
    } catch (Exception e) {
    throw new RuntimeException(e);
    }
    }
    /**
    * 查询索引库
    * 
    * @return
    * @throws Exception
    */
    public QueryResult query(String queryString, int first, int max) {
    IndexSearcher indexSearcher = null;
    try {
    // MultiFieldQueryParser:表示可以根据多个字段查询
    int totail = first + max;
    // 1.把字符串转为Query对象
    QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_30, new String[] { "id", "title", "content" },
    LuceneUtils.getAnalyzer());
    Query query = parser.parse(queryString);
    // 2.执行查询
    indexSearcher = LuceneUtils.getIndexSearch();
    // 指定排序条件
    Sort sort = new Sort(new SortField("id", SortField.INT));// 按照id升序
    TopDocs topDocs = indexSearcher.search(query, null, totail, sort);// 查询并返回最多的前n条数据
    int count = topDocs.totalHits;// 总记录数
    ScoreDoc[] scoreDoc = topDocs.scoreDocs;// 最多前n条结果数据
    // 生成高亮显示器;设置前缀,后缀,摘要的大小
    Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
    Scorer scorer = new QueryScorer(query);// 查询条件
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleFragmenter(100));// 设置摘要的大小
    // 3.取出数据
    int endIndex = Math.min(totail, scoreDoc.length);
    List<Article> list = new ArrayList<Article>();
    for (int i = 0; i < endIndex; i++) {
    // float score = scoreDoc[i].score;//平均得分
    int docId = scoreDoc[i].doc;
    Document doc = indexSearcher.doc(docId);
    // 进行高亮操作,当没有找到关键词时,返回为null
    String text = highlighter.getBestFragment(LuceneUtils.getAnalyzer(), "title", doc.get("title"));
    if (text != null) {
    doc.getField("title").setValue(text);
    }
    // 将Document转为Article
    Article article = (Article) LuceneUtils.documentToObject(doc);
    list.add(article);
    }
    QueryResult queryResult = new QueryResult(list, count);
    return queryResult;
    } catch (Exception e) {
    throw new RuntimeException(e);
    }
    }

    5.测试dao方法:

    @Test
    public void testSave() {
    // 创建文档对象
    Article article = new Article();
    for (int i = 0; i < 20; i++) {
    article.setId(i);
    article.setTitle("Lucene搜索的方式");
    article.setContent("全文检索是计算机程序通过扫描文章中的每一个词,对每一个词建立一个索引,指明该词在文章中出现的次数和位置。");
    indexDao.save(article);
    }
    }
    @Test
    public void testDelete() {
    Article article = new Article();
    article.setId(1);
    indexDao.delete(article);
    }
    @Test
    public void testUpdate() {
    // 创建文档对象
    Article article = new Article();
    article.setId(1);
    article.setTitle("Lucene搜索的方式");
    article.setContent("跟新索引库测试是否正确");
    indexDao.update(article);
    }
    @Test
    @SuppressWarnings("unchecked")
    public void testQuery() {
    String queryString = "Lucene";
    QueryResult queryResult = indexDao.searchAndOrderBy(queryString, 0, 10);
    System.out.println("count---------->" + queryResult.getCount());
    List<Article> list = (List<Article>)queryResult.getList();
    for(Article article:list){
    System.err.println("list--------->" + article.toString());
    }
    }
  • 相关阅读:
    852. Peak Index in a Mountain Array
    841. Keys and Rooms
    832. Flipping an Image
    821. Shortest Distance to a Character
    824. Goat Latin
    如何生成git的公钥和私钥
    学习笔记
    加快JavaScript加载和执行效率
    PO BO VO DTO POJO DAO概念及其作用
    jvm 垃圾回收区的形象说明
  • 原文地址:https://www.cnblogs.com/Jansens520/p/7825764.html
Copyright © 2011-2022 走看看