zoukankan      html  css  js  c++  java
  • Lucene基础(四)-- 结合数据库使用

    需求

    很多时候我们在用数据库的需要使用模糊查询,我们一般会使用like语句来做,然而这样的做的效率不是很多(很抱歉我们亲自去测,很多都这么说的),那么使用Lucene来检索的话,效率会高很多。

    lucene结合数据库步骤

    1. 写一段传统的JDBC程序,将每条的用户信息从数据库读取出来
    2. 针对每条用户记录,建立一个lucene document 
      Document doc = new Document(); 
      并根据你的需要,将用户信息的各个字段对应luncene document中的field 进行添加,如: 
      doc.add(new Field(“NAME”,”USERNAME”,Field.Store.YES,Field.Index.UN_TOKENIZED)); 
      然后将该条doc加入到索引中, 如: luceneWriter.addDocument(doc); 
      这样就建立了lucene的索引库
    3. 编写对索引库的搜索程序(看lucene文档),通过对lucene的索引库的查找,你可以快速找到对应记录的ID
    4. 通过ID到数据库中查找相关记录

    注意 
    在索引的过程中,可以使用增量的方式建立索引,这样对已经索引的记录不在建立索引。实现思路:保存上次(lasttime)的新增时候的id,在建立索引的时候,值查询这个id之后的记录进行索引,更新这个记录下来的id,在数据库数据修改时候,针对这个数据制作索引的修改

    操作实例

    package lucene_demo05;

    import java.io.IOException;
    import java.sql.Connection;
    import java.sql.DriverManager;
    import java.sql.PreparedStatement;
    import java.sql.ResultSet;
    import java.sql.SQLException;

    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.document.TextField;
    import org.apache.lucene.index.CorruptIndexException;
    import org.apache.lucene.index.DirectoryReader;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriterConfig;
    import org.apache.lucene.index.IndexWriterConfig.OpenMode;
    import org.apache.lucene.queryparser.classic.ParseException;
    import org.apache.lucene.queryparser.classic.QueryParser;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.RAMDirectory;
    import org.apache.lucene.util.Version;
    import org.wltea.analyzer.lucene.IKAnalyzer;

    /**
    *
    * Lucene与数据库结合使用
    *
    * @author YipFun
    */
    public class LuceneDemo05 {

      private static final String driverClassName="com.mysql.jdbc.Driver";
      private static final String url="jdbc:mysql://127.0.0.1:3306/test?characterEncoding=utf-8";
      private static final String username="****";
      private static final String password="****";

      private static final Version version = Version.LUCENE_4_9;
      private Directory directory = null;
      private DirectoryReader ireader = null;
      private IndexWriter iwriter = null;
      private IKAnalyzer analyzer;

      private Connection conn;

      public LuceneDemo05() {
        directory = new RAMDirectory();
      }

      public IndexSearcher getSearcher(){
        try {
          if(ireader==null) {
          ireader = DirectoryReader.open(directory);
        } else {
          DirectoryReader tr = DirectoryReader.openIfChanged(ireader) ;
          if(tr!=null) {
            ireader.close();
            ireader = tr;
          }
        }
        return new IndexSearcher(ireader);
      } catch (CorruptIndexException e) {
        e.printStackTrace();
      } catch (IOException e) {
        e.printStackTrace();
      }
      return null;
      }

      public Connection getConnection(){
        if(this.conn == null){
        try {
          Class.forName(driverClassName);
          conn = DriverManager.getConnection(url, username, password);
        } catch (ClassNotFoundException e) {
          e.printStackTrace();
        } catch (SQLException e) {
          e.printStackTrace();
        }

      }

      return conn;
      }

      private IKAnalyzer getAnalyzer(){
        if(analyzer == null){
        return new IKAnalyzer();
      }else{
        return analyzer;
      }
      }

      public void createIndex(){
        Connection conn = getConnection();
        ResultSet rs = null;
        PreparedStatement pstmt = null;
        if(conn == null){
          System.out.println("get the connection error...");
          return ;
        }
        String sql = "select * from t_user";
        try {
          pstmt = conn.prepareStatement(sql);
          rs = pstmt.executeQuery();

          IndexWriterConfig iwConfig = new IndexWriterConfig(version, getAnalyzer());
          iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
          iwriter = new IndexWriter(directory,iwConfig);

          while(rs.next()){
            int id = rs.getInt(1);
            String name = rs.getString(2);
            String psd = rs.getString(3);
            Document doc = new Document();
            doc.add(new TextField("id", id+"",Field.Store.YES));
            doc.add(new TextField("name", name+"",Field.Store.YES));
            doc.add(new TextField("psd", psd+"",Field.Store.YES));
            iwriter.addDocument(doc);
          }
        } catch (SQLException e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        } catch (IOException e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        }finally{
        try {
          if(iwriter != null)
          iwriter.close();
          rs.close();
          pstmt.close();
          if(!conn.isClosed()){
          conn.close();
        }
       } catch (IOException e) {
          e.printStackTrace();
        } catch (SQLException e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        }
        }
      }

      public void searchByTerm(String field,String keyword,int num) throws InvalidTokenOffsetsException{
        IndexSearcher isearcher = getSearcher();
        Analyzer analyzer = getAnalyzer();
        //使用QueryParser查询分析器构造Query对象
        QueryParser qp = new QueryParser(version,field,analyzer);
        //这句所起效果?
        qp.setDefaultOperator(QueryParser.OR_OPERATOR);
        try {
          Query query = qp.parse(keyword);
          ScoreDoc[] hits;

          //注意searcher的几个方法
          hits = isearcher.search(query, null, num).scoreDocs;

          System.out.println("the ids is =");
          for (int i = 0; i < hits.length; i++) {
            Document doc = isearcher.doc(hits[i].doc);
            System.out.print(doc.get("id")+" ");
          }

        } catch (IOException e) {
          e.printStackTrace();
        } catch (ParseException e) {
          e.printStackTrace();
        }
      }

      public static void main(String[] args) throws InvalidTokenOffsetsException {
        LuceneDemo05 ld = new LuceneDemo05();
        ld.createIndex();
        ld.searchByTerm("name", "Bruce", 100);
      }
    }

    索引之后就可以拿到需要id,这个时候按id查询数据库的记录,就快多了。

    思考

    • 这是对单表的数据进行索引,当我们的业务复杂的是,需要的数据通常是多个表联合查询的结果,我们的索引是如何建立?

      1. 使用视图,对多表建立视图,在视图上面创建索引?
      2. 还是单表索引,只是把联合查询化解,在lucene的索引中使用多次查询,找到目标,在数据库查询?
    • 和数据使用的时候 ,索引到底是和数据库数据相关联的,还是和结果集相关联的?

    写测试程序发现,应该是索引在数据结果集上面的。

    测试如下: 
    t_user 表 
    这里写图片描述 
    t_user_teacher 表 
    这里写图片描述 

    t_teacher 表 
    这里写图片描述

    package lucene_demo05;

    import java.io.IOException;
    import java.sql.Connection;
    import java.sql.DriverManager;
    import java.sql.PreparedStatement;
    import java.sql.ResultSet;
    import java.sql.SQLException;

    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.document.TextField;
    import org.apache.lucene.index.CorruptIndexException;
    import org.apache.lucene.index.DirectoryReader;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriterConfig;
    import org.apache.lucene.index.IndexWriterConfig.OpenMode;
    import org.apache.lucene.queryparser.classic.ParseException;
    import org.apache.lucene.queryparser.classic.QueryParser;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.RAMDirectory;
    import org.apache.lucene.util.Version;
    import org.wltea.analyzer.lucene.IKAnalyzer;

    /**
    *
    * Lucene与数据库结合使用
    *
    * @author YipFun
    */
    public class LuceneDemo06
    {

      private static final String driverClassName = "com.mysql.jdbc.Driver";
      private static final String url = "jdbc:mysql://127.0.0.1:3306/test?characterEncoding=utf-8";
      private static final String username = "****";
      private static final String password = "****";

      private static final Version version = Version.LUCENE_4_9;
      private Directory directory = null;
      private DirectoryReader ireader = null;
      private IndexWriter iwriter = null;
      private IKAnalyzer analyzer;

      private Connection conn;

      public LuceneDemo06()
      {
        directory = new RAMDirectory();
      }

      public IndexSearcher getSearcher()
      {
        try
      {
        if (ireader == null)
      {
        ireader = DirectoryReader.open(directory);
      } else
      {
        DirectoryReader tr = DirectoryReader.openIfChanged(ireader);
        if (tr != null)
        {
          ireader.close();
          ireader = tr;
        }
      }
      return new IndexSearcher(ireader);
      } catch (CorruptIndexException e)
      {
        e.printStackTrace();
      } catch (IOException e)
      {
        e.printStackTrace();
      }
      return null;
      }

      public Connection getConnection()
      {
        if (this.conn == null)
        {
          try
          {
            Class.forName(driverClassName);
            conn = DriverManager.getConnection(url, username, password);
          } catch (ClassNotFoundException e)
          {
            e.printStackTrace();
          } catch (SQLException e)
          {
            e.printStackTrace();
          }

        }

        return conn;
      }

      private IKAnalyzer getAnalyzer()
      {
        if (analyzer == null)
        {
          return new IKAnalyzer();
        } else
        {
          return analyzer;
        }
      }

      public void createIndex()
      {
        Connection conn = getConnection();
        ResultSet rs = null;
        PreparedStatement pstmt = null;
        if (conn == null)
        {
          System.out.println("get the connection error...");
          return;
        }
        String sql = "select " + "u.id as uid," + "u.name as uname," + "u.psd as upsd," + "u.email as uemail," + "u.tel as utel," + "t.id as tid,"
        + "t.name as tname " + "from t_user u , t_user_teacher ut ,t_teacher t " + "where u.id=ut.u_id and ut.t_id= t.id ";
        try
        {
          pstmt = conn.prepareStatement(sql);
          rs = pstmt.executeQuery();

          IndexWriterConfig iwConfig = new IndexWriterConfig(version, getAnalyzer());
          iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
          iwriter = new IndexWriter(directory, iwConfig);

          while (rs.next())
          {
            int id = rs.getInt("uid");
            String name = rs.getString("uname");
            String psd = rs.getString("upsd");
            int tid = rs.getInt("tid");
            String tname = rs.getString("tname");
            Document doc = new Document();
            doc.add(new TextField("uid", id + "", Field.Store.YES));
            doc.add(new TextField("uname", name + "", Field.Store.YES));
            doc.add(new TextField("upsd", psd + "", Field.Store.YES));
            doc.add(new TextField("tid", tid + "", Field.Store.YES));
            doc.add(new TextField("tname", tname + "", Field.Store.YES));
            iwriter.addDocument(doc);
          }
        } catch (SQLException e)
        {
          // TODO Auto-generated catch block
          e.printStackTrace();
        } catch (IOException e)
        {
          // TODO Auto-generated catch block
          e.printStackTrace();
        } finally
        {
          try
          {
            if (iwriter != null)
            iwriter.close();
            rs.close();
            pstmt.close();
            if (!conn.isClosed())
            {
              conn.close();
            }
          } catch (IOException e)
          {
            e.printStackTrace();
          } catch (SQLException e)
          {
            // TODO Auto-generated catch block
            e.printStackTrace();
          }
        }
      }

      public void searchByTerm(String field, String keyword, int num) throws InvalidTokenOffsetsException
      {
        IndexSearcher isearcher = getSearcher();
        Analyzer analyzer = getAnalyzer();
        // 使用QueryParser查询分析器构造Query对象
        QueryParser qp = new QueryParser(version, field, analyzer);
        // 这句所起效果?
        qp.setDefaultOperator(QueryParser.OR_OPERATOR);
        try
        {
          Query query = qp.parse(keyword);
          ScoreDoc[] hits;

          // 注意searcher的几个方法
          hits = isearcher.search(query, null, num).scoreDocs;

          System.out.println("the ids is =");
          for (int i = 0; i < hits.length; i++)
          {
            Document doc = isearcher.doc(hits[i].doc);
            System.out.print(doc.get("uid") + " ");
          }

        } catch (IOException e)
        {
          e.printStackTrace();
        } catch (ParseException e)
        {
          e.printStackTrace();
        }
      }

      public static void main(String[] args) throws InvalidTokenOffsetsException
      {
        LuceneDemo06 ld = new LuceneDemo06();
        ld.createIndex();
        ld.searchByTerm("tname", "aaa", 100);
      }
    }

    搜索教师为aaa的学生的Id 
    结果:

    加载扩展词典:ext.dic

    加载扩展停止词典:stopword.dic

    the ids is = 1 2

  • 相关阅读:
    关于table表格的一些问题
    leetcode 845. 数组中的最长山脉 做题笔记
    leetcode 845. 数组中的最长山脉 做题小结
    leetcode 925. 长按键入小结
    leetcode 925. 长按键入小结
    java单链表反转 详细讲述
    java单链表反转 详细讲述
    Leetcode 3. 无重复字符的最长子串 做题小结
    Leetcode 3. 无重复字符的最长子串 做题小结
    复变函数的幂函数
  • 原文地址:https://www.cnblogs.com/downey/p/4890781.html
Copyright © 2011-2022 走看看