zoukankan      html  css  js  c++  java
  • lucene构建restful风格的简单搜索引擎服务

    来自于本人博客: lucene构建restful风格的简单搜索引擎服务


    本人的博客如今也要改成使用lucene进行全文检索的功能,因此在这里把代码贴出来与大家分享

    一,文件夹结构:

    二,配置文件:

        总共同拥有四个配置文件:bonecp-config.xml,IKAnalyzer.cfg.xml,log4j.properties,system-config.xml

        1.bonecp-config.xml是配置jdbc连接池用的,不用这个配置也行,bonecp包有默认配置

        2.IKAnalyzer.cfg.xml是IKAnalyzer分词要用的字典配置文件

        

    这里也能够不用配置
    <?xml version="1.0" encoding="UTF-8"?

    > <!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">    <properties>   <comment>IK Analyzer 扩展配置</comment> <!--用户能够在这里配置自己的扩展字典 --> <entry key="ext_dict">/data/lucene/dict/1_dict.txt;/data/lucene/dict/2_dict.txt;/data/lucene/dict/3_dict.txt;/data/lucene/dict/4_dict.txt;/data/lucene/dict/5_dict.txt;/data/lucene/dict/6_dict.txt;/data/lucene/dict/7_dict.txt;/data/lucene/dict/8_dict.txt;</entry>  <!--用户能够在这里配置自己的扩展停止词字典 <entry key="ext_stopwords">/data/lucene/dict/stopword.dic</entry>  --> </properties>

        3.log4j.properties这个不用多说了

        4.system-config.xml是一些系统的配置參数

       

    <?xml version="1.0" encoding="UTF-8"?>
    <configs>
            <mysql>
                <port>3306</port>
                <user>test</user>
                <password>test</password>
                <partitionCount>6</partitionCount>
                <maxWait>3600</maxWait>
                <driverClass>com.mysql.jdbc.Driver</driverClass>
                <idleMaxAge>1800</idleMaxAge>
                <idleConnectionTestPeriod>300</idleConnectionTestPeriod>
                <host>jdbc:mysql://localhost/blog?

    characterEncode=UTF-8</host>         </mysql>         <search>             <!--这里的路径能够自己修改-->             <indexPath>/data/lucene/index</indexPath>             <recommendNetIndexPath>/data/lucene/index/recommendNet</recommendNetIndexPath>             <searcNum>10</searcNum>             <resultNum>10000</resultNum>         </search> </configs>

    三,监听器SystemStartupListener,实现了ServletContextListener

        

    package com.blog.listener;
    
    import java.io.File;
    import java.net.URL;
    import java.sql.SQLException;
    import java.util.List;
    
    import javax.servlet.ServletContextEvent;
    import javax.servlet.ServletContextListener;
    
    import org.apache.log4j.Logger;
    import org.dom4j.Document;
    import org.dom4j.DocumentException;
    import org.dom4j.Element;
    import org.dom4j.io.SAXReader;
    
    import com.blog.db.DBFactory;
    import com.blog.search.BlogSearch;
    import com.blog.search.index.BlogIndex;
    
    public class SystemStartupListener implements ServletContextListener {
        private static Logger log = Logger.getLogger(SystemStartupListener.class);
        public void contextDestroyed(ServletContextEvent arg0) {
            DBFactory.shutDown();
        }
    
        public void contextInitialized(ServletContextEvent arg0) {
            SAXReader reader = new SAXReader();
            try {
                URL url = this.getClass().getClassLoader().getResource("system-config.xml");
                String  path = url.getFile();
                Document doc = reader.read(new File(path));
                Element rootEle = doc.getRootElement();
                List list = rootEle.elements("mysql");
                if(list.size() > 0) {
                    Element mysqlEle = (Element) list.get(0);
                    if(null != mysqlEle) {
                        String host = mysqlEle.elementText("host");
                        String port = mysqlEle.elementText("port");
                        String user = mysqlEle.elementText("user");
                        String password = mysqlEle.elementText("password");
                        Integer partitionCount = Integer.parseInt(mysqlEle.elementText("partitionCount"));
                        Integer maxWait = Integer.parseInt(mysqlEle.elementText("maxWait"));
                        String driverClass = mysqlEle.elementText("driverClass");
                        Integer idleMaxAge = Integer.parseInt(mysqlEle.elementText("idleMaxAge"));
                        Integer idleConnectionTestPeriod = Integer.parseInt(mysqlEle.elementText("idleConnectionTestPeriod"));
                        DBFactory.init(driverClass, host, user, password, partitionCount, maxWait, idleMaxAge, idleConnectionTestPeriod);
                    }
                } else {
                    throw new RuntimeException("初始化失败....");
                        
                }
                list = rootEle.elements("search");
                if(list.size() > 0) {
                    Element searchEle = (Element) list.get(0);
                    String indexPath = searchEle.elementText("indexPath");   //索引文件的存放位置
                    String searcNum = searchEle.elementText("searcNum");  //一次搜索结果数
                    String resultNum = searchEle.elementText("resultNum");
                    String recommendNetIndexPath = searchEle.elementText("recommendNetIndexPath");
                    System.setProperty("searcNum", searcNum);
                    System.setProperty("resultNum", resultNum);
                    System.setProperty("indexFilePath", indexPath);
                    System.setProperty("recommendNetIndexPath", recommendNetIndexPath);
                    BlogIndex.buildIndex(recommendNetIndexPath);
                } else {
                    throw new RuntimeException("初始化失败....");
                }
                
                log.info("初始化搜索.....");
                BlogSearch.init();
            } catch (DocumentException e) {
                log.error("解析配置文件出错.....",e);
            } catch(Exception e) {
                log.error("出现未知错误....",e);
            }
        }
    }

    四。util包中的Constant常量类

        

    package com.blog.util;
    
    public class Constant {
        public static final Integer searcNum = Integer.parseInt(System.getProperty("searcNum"));
        public static final Integer resultNum = Integer.parseInt(System.getProperty("resultNum"));
    }

       util包中的DataToJson类:

        

    package com.blog.util;
    
    import java.util.List;
    
    import com.google.gson.JsonArray;
    import com.google.gson.JsonObject;
    
    public class DataToJson {
        
        public static String parseDataToJson(List<Long> ids, int totalCount) {
            JsonObject json = new JsonObject();
            json.addProperty("totalCount", totalCount);
            JsonArray array = new JsonArray();
            if(ids.size() > 0) {
                for(Long id : ids) {
                    JsonObject obj = new JsonObject();
                    obj.addProperty("id", id);
                    array.add(obj);
                }
            }
            json.add("data", array);
            return json.toString();
        }
        
    }

    五。entity包中的实体类:

        Dashboard:

        

    package com.blog.search.entity;
    
    public class Dashboard {
        private Long id;
        private String content;
        private String title;
        public Long getId() {
            return id;
        }
        public void setId(Long id) {
            this.id = id;
        }
        public String getContent() {
            return content;
        }
        public void setContent(String content) {
            this.content = content;
        }
        public String getTitle() {
            return title;
        }
        public void setTitle(String title) {
            this.title = title;
        }
    }

    六,lucene相关的索引和检索类:

        index包中的BlogIndex:

        

    package com.blog.search.index;
    
    import java.io.File;
    import java.io.IOException;
    
    import org.apache.log4j.Logger;
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.document.StringField;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriterConfig;
    import org.apache.lucene.index.IndexWriterConfig.OpenMode;
    import org.apache.lucene.index.Term;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.util.Version;
    import org.wltea.analyzer.lucene.IKAnalyzer;
    
    import com.blog.search.entity.Dashboard;
    
    public class BlogIndex {
        private static final String indexFilePath = System.getProperty("indexFilePath");
        private static Logger log = Logger.getLogger(BlogIndex.class);
        public BlogIndex() {
    
        }
        //这种方法在没有索引的时候须要在初始化时调用
        public static void buildIndex(String path) {
            File file = new File(path);
            if(file.isDirectory() && file.listFiles().length == 0){  
                Directory dir;
                try {
                    dir = FSDirectory.open(new File(path));
                
                    Analyzer analyzer = new IKAnalyzer(true);
                    //配置类
                    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer);
                    iwc.setOpenMode(OpenMode.CREATE);
                    IndexWriter writer = new IndexWriter(dir, iwc);
                    writer.deleteAll();
                    writer.close();
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
            
        }
        
        @SuppressWarnings("deprecation")
        private Document getDocument(Dashboard dashboard) throws Exception {
            Document doc = new Document();
            doc.add(new Field("title", dashboard.getTitle(), Field.Store.YES, Field.Index.ANALYZED));
            doc.add(new Field("content", dashboard.getContent(),Field.Store.NO,Field.Index.ANALYZED));
            Field idField = new StringField("id",dashboard.getId().toString(), Field.Store.YES);
            doc.add(idField);
            return doc;
        }
        
        public void writeToIndex(Dashboard dashboard) throws Exception {
            Document doc = getDocument(dashboard);
            IndexWriter writer = null;
            try {
                
                Directory dir = FSDirectory.open(new File(indexFilePath));
                //分析器
                Analyzer analyzer = new IKAnalyzer(true);
                //配置类
                IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer);
                writer = new IndexWriter(dir, iwc);
            } catch(Exception e) {
                e.printStackTrace();
            }
            writer.addDocument(doc);
            writer.commit();
            writer.close();
        }
        
        public void deleteIndex(Long id) {
            IndexWriter writer = null;
            try {
                Directory dir = FSDirectory.open(new File(indexFilePath));
                Analyzer analyzer = new IKAnalyzer(true);
                IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer);
                writer = new IndexWriter(dir, iwc);
                writer.deleteDocuments(new Term("id",id.toString()));
                writer.commit();
                
            } catch(Exception e) {
                log.error("删除索引出错.....");
            } finally {
                if(writer != null) {
                    try {
                        writer.close();
                    } catch (IOException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                }
            }
        }
        
        public void updateIndex(Dashboard dashboard) throws Exception {
            Document doc = getDocument(dashboard);
            IndexWriter writer = null;
            try {
                
                Directory dir = FSDirectory.open(new File(indexFilePath));
                //分析器
                Analyzer analyzer = new IKAnalyzer(true);
                //配置类
                IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer);
                //iwc.setOpenMode(OpenMode.CREATE);
                writer = new IndexWriter(dir, iwc);
            } catch(Exception e) {
                e.printStackTrace();
            }
            writer.updateDocument(new Term("id", dashboard.getId().toString()), doc);
            writer.commit();
            writer.close();
        }
    }

    七,search包以下的BlogSearch类:

    package com.blog.search;
    
    import java.io.File;
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.List;
    import java.util.Map;
    import java.util.concurrent.ConcurrentHashMap;
    
    import org.apache.log4j.Logger;
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.index.DirectoryReader;
    import org.apache.lucene.index.IndexReader;
    import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
    import org.apache.lucene.queryparser.classic.QueryParser;
    import org.apache.lucene.queryparser.classic.QueryParser.Operator;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TopDocs;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.util.Version;
    import org.wltea.analyzer.lucene.IKAnalyzer;
    
    import com.blog.util.Constant;
    import com.blog.util.DataToJson;
    
    public class BlogSearch {
        private static Logger log = Logger.getLogger(BlogSearch.class);
        
        private static final String indexFilePath = System.getProperty("indexFilePath");
        private static String[] field = {"title","content"};
        private IndexSearcher searcher;
        //存储初始化的IndexReader,节省每次又一次打开索引文件的性能开销
        private static Map<String, IndexReader> readers = new ConcurrentHashMap<String, IndexReader>();
        private static Object lock = new Object();
        public static void init() {
            try {
                IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexFilePath)));
                readers.put("blogsearch", reader);
                log.info(readers.toString());
            } catch (IOException e) {
                log.error("初始化搜索器出错.......",e);
            }
            
        }
        
        public TopDocs search(String keyword) {
            try {
                Analyzer analyzer = new IKAnalyzer(true);
                
                QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_43, field,analyzer);
                parser.setDefaultOperator(Operator.AND);
                // 将关键字包装成Query对象
                Query query = parser.parse(keyword);
                
                //加锁为了防止在一个线程读取IndexReader之后。可是还没有运行查询之前。索引改变了,
                //导致IndexReader对象被关闭后又一次创建,可能导致关闭异常的问题
                synchronized(lock) {  
                    IndexReader reader = readers.get("blogsearch");
                    IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader)reader);
                    if(newReader == null) {  //假设为空。表示索引没有变化
                        newReader = reader;
                    } else {
                        readers.put("blogsearch", newReader);
                        reader.close();
                    }
                    searcher = new IndexSearcher(newReader);
                }
                //newReader = DirectoryReader.open(FSDirectory.open(new File(indexFilePath)));
                TopDocs results = searcher.search(query, Constant.resultNum);
                return results;
            } catch(Exception e) {
                log.error("搜索关键字出错......",e);
                return null;
            }
        }
        
        public String getResult(String keyword, int pageSize) {
            TopDocs td = search(keyword);
            int totalCount = td.totalHits;
            ScoreDoc[] h = td.scoreDocs;
            List<Long> ids = new ArrayList<Long>(h.length);
            if(h.length == 0) {
                log.debug("no result data");
            } else {
                int start = Constant.searcNum*(pageSize - 1);
                int end = Constant.searcNum*pageSize;
                if(start >= totalCount) {
                    start = 0;
                    end = totalCount;
                } 
                if(end > totalCount) {
                    end = totalCount;
                    
                }
                for(int i = start; i < end; i++) {
                    try {
                        Document doc = searcher.doc(h[i].doc);
                        ids.add(Long.parseLong(doc.get("id")));
                        //log.debug("这是第" + (i + 1) + "个检索到的结果,id为:" + doc.get("id")+",  " + doc.get("title"));
                    } catch(Exception e) {
                        e.printStackTrace();
                        log.error("start=" +start + ", end=" + end + ", " + h.length);
                    }
                }
            }
            return DataToJson.parseDataToJson(ids, totalCount);
        }
    }

    八。service包下的BlogSearchService,这是jersey的入口,由这个类向外界提供api:

        

    package com.blog.search.service;
    
    import javax.ws.rs.FormParam;
    import javax.ws.rs.GET;
    import javax.ws.rs.POST;
    import javax.ws.rs.Path;
    import javax.ws.rs.Produces;
    import javax.ws.rs.QueryParam;
    import javax.ws.rs.core.MediaType;
    
    import com.blog.search.BlogSearch;
    import com.blog.search.entity.Dashboard;
    import com.blog.search.index.BlogIndex;
    import com.google.gson.JsonObject;
    
    @Path("/blogSearch/")
    public class BlogSearchService {
    
        @GET
        @Path("/queryByKeyword")
        @Produces(MediaType.APPLICATION_JSON)
        public String queryIdsByKeyword(@QueryParam("keyword") String keyword, @QueryParam("pageSize") Integer pageSize) {
            return new BlogSearch().getResult(keyword, pageSize);
        }
        
        @POST
        @Path("/buildByContent")
        @Produces(MediaType.APPLICATION_JSON)
        public String buildIndexByContent(@FormParam("content") String content,@FormParam("title")String title, @FormParam("id") Long id) {
            BlogIndex bi = new BlogIndex();
            Dashboard dashboard = new Dashboard();
            dashboard.setContent(content);
            dashboard.setTitle(title);
            dashboard.setId(id);
            JsonObject json = new JsonObject();
            try {
                bi.writeToIndex(dashboard);
                json.addProperty("result", "200");
            } catch (Exception e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
                json.addProperty("result", "500");
            } finally {
                //index();
                return json.toString();
            }
            
        }
        
        @POST
        @Path("/deleteById")
        @Produces(MediaType.APPLICATION_JSON)
        public String deleteIndexById(@FormParam("id") Long id) {
            BlogIndex bi = new BlogIndex();
            JsonObject json = new JsonObject();
            try {
                bi.deleteIndex(id);
                json.addProperty("result", 200);
            } catch(Exception e) {
                json.addProperty("result", 500);
            } finally {
                return json.toString();
            }
        }
        
        @POST
        @Path("/update")
        @Produces(MediaType.APPLICATION_JSON)
        public String updateIndex(@FormParam("id") Long id, @FormParam("content") String content, @FormParam("title") String title) {
            BlogIndex bi = new BlogIndex();
            JsonObject json = new JsonObject();
            try {
                Dashboard dashboard = new Dashboard();
                dashboard.setContent(content);
                dashboard.setTitle(title);
                dashboard.setId(id);
                bi.updateIndex(dashboard);
                json.addProperty("result", 200);
            } catch(Exception e) {
                json.addProperty("result", 500);
            } finally {
                return json.toString();
            }
        }
        
    }

    九,web.xml的配置:

    <?

    xml version="1.0" encoding="UTF-8"?

    > <web-app version="2.5"      xmlns="http://java.sun.com/xml/ns/javaee"      xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"      xsi:schemaLocation="http://java.sun.com/xml/ns/javaee      http://java.sun.com/xml/ns/javaee/web-app_2_5.xsd">   <display-name></display-name>       <welcome-file-list>     <welcome-file>index.jsp</welcome-file>   </welcome-file-list>       <servlet>     <servlet-name>JerseyServlet</servlet-name>     <servlet-class>         com.sun.jersey.spi.container.servlet.ServletContainer       </servlet-class>    <init-param>       <param-name>com.sun.jersey.config.property.packages</param-name>         <!-- 系统启动时扫描的包的路径-->       <param-value>com.blog.search.service</param-value>     </init-param>         <load-on-startup>1</load-on-startup>  </servlet>      <servlet-mapping>     <servlet-name>JerseyServlet</servlet-name>     <url-pattern>/search/*</url-pattern>   </servlet-mapping>         <listener>       <listener-class>com.blog.listener.SystemStartupListener</listener-class>   </listener> </web-app>

    十,程序依赖包:

        

           self4j-nop-1.7.5.jar

    好了。完毕之后,tomcat的配置好之后,假设你是用myeclipse的自带tomcat公布的,则訪问http://localhost:port/项目名称/search/blogSearch/buildByContent?后面就是參数传递,查询也跟这个url类似

    就这样。我们创建了一个简单的restful风格的简单搜索引擎。里面的配置大家依照自己的需求改改就好


  • 相关阅读:
    ftp上传下载
    阿里云轻量服务器价格及轻量与ECS服务器区别比较
    找工作
    程序员的精力管理
    应届生如何笔试面试
    java重点总结(一)
    真是面试题汇总(二)
    真实笔试题汇总(一)
    真实面试问题汇总(一)
    Java后端面试准备
  • 原文地址:https://www.cnblogs.com/zhchoutai/p/8450429.html
Copyright © 2011-2022 走看看