zoukankan      html  css  js  c++  java
  • lucen全文检索

    package cn.richinfo.cmail.basemail.common.tools;
    
    import java.io.File;
    import java.util.ArrayList;
    import java.util.Arrays;
    import java.util.List;
    
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.document.IntField;
    import org.apache.lucene.document.StringField;
    import org.apache.lucene.index.DirectoryReader;
    import org.apache.lucene.index.IndexReader;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriterConfig;
    import org.apache.lucene.index.IndexWriterConfig.OpenMode;
    import org.apache.lucene.index.Term;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TopDocs;
    import org.apache.lucene.search.WildcardQuery;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.util.Version;
    
    import cn.richinfo.cmail.basemail.addr.model.LuceneEngineModel;
    import cn.richinfo.cmail.common.log.CommonLogger;
    import cn.richinfo.cmail.common.log.Log;
    
    public class LuceneEngineUtil {
    
        private static final Log log = CommonLogger.getInstance();
    
        /**
         * 清空旧索引文件
         * @param path
         */
        public static void delFiles(String path) {
            File file = new File(path);
            if (file.exists() && file.isDirectory() && file.list().length > 0) {
                log.info("delete file " + path);
                File[] files = file.listFiles();
                for (int i = 0; i < files.length; i++) {
                    if (!files[i].isFile()) {
                        delFiles(files[i].getAbsolutePath());
                    }
                    files[i].delete();
                }
            }
        }
    
        private static IndexWriter getIndexWriter(String path) throws Exception {
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
            IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_40, analyzer);
            conf.setOpenMode(OpenMode.CREATE_OR_APPEND);
            conf.setMaxBufferedDocs(100);
            Directory directory = FSDirectory.open(new File(path));
            return new IndexWriter(directory, conf);
        }
    
        private static IndexReader getIndexReader(String path) throws Exception {
            Directory directory = FSDirectory.open(new File(path));
            return DirectoryReader.open(directory);
        }
    
        /**
         * 创建索引
         * 
         * @param path
         * @param model
         */
        public static void createIndex(String path, LuceneEngineModel model) {
            log.info("create index: path=" + path);
            IndexWriter writer = null;
            try {
                writer = getIndexWriter(path);
                writer.addDocument(toDocument(model));
                log.info("create index success.");
            } catch (Exception e) {
                log.error("create index fail: ", new Exception(e));
            } finally {
                try {
                    if (writer != null) {
                        log.info("writer close……");
                        writer.close();
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
    
        /**
         * 创建索引
         * 
         * @param path
         * @param list
         */
        public static void createIndex(String path, List<LuceneEngineModel> list) {
            log.info(String.format("create index batch: path=%s | size=%s", path, list.size()));
            IndexWriter writer = null;
            long start = System.currentTimeMillis();
            try {
                writer = getIndexWriter(path);
                for (int i = 0; i < list.size(); i++) {
                    writer.addDocument(toDocument(list.get(i)));
                }
                log.info(String.format("create index batch success : time=%sms", (System.currentTimeMillis() - start)));
            } catch (Exception e) {
                log.error("create index batch fail: ", new Exception(e));
            } finally {
                try {
                    if (writer != null) {
                        log.info("writer close……");
                        writer.close();
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
    
        private static Document toDocument(LuceneEngineModel model) {
            Document document = new Document();
            if (model != null) {
                log.info("addDocument: " + model.toString());
                document.add(new IntField("type", model.getType(), Field.Store.YES));
                document.add(new IntField("id", model.getId(), Field.Store.YES));
                document.add(new StringField("email", formatNull(model.getEmail()), Field.Store.YES));
                document.add(new StringField("mobile", formatNull(model.getMobile()), Field.Store.YES));
                document.add(new StringField("first_name", formatNull(model.getFirst_name()), Field.Store.YES));
                document.add(new StringField("second_name", formatNull(model.getSecond_name()), Field.Store.YES));
                document.add(new StringField("position", formatNull(model.getPosition()), Field.Store.YES));
                document.add(new StringField("locate_name_list", formatNull(model.getLocate_name_string()), Field.Store.YES));
                document.add(new StringField("dept_list", formatNull(model.getDept_list()), Field.Store.YES));
            }
            return document;
        }
    
        private static String formatNull(String value) {
            if (value == null)
                return "";
            return value;
        }
    
        /**
         * 删除索引
         * 
         * @param path
         * @param email
         */
        public static void deleteIndex(String path, String email) {
            log.info(String.format("delete index: path=%s | email=%s", path, email));
            IndexWriter writer = null;
            try {
                writer = getIndexWriter(path);
                Term term = new Term("email", email);
                writer.deleteDocuments(term);
                log.info("delete index success.");
            } catch (Exception e) {
                log.error("delete index fail: ", new Exception(e));
            } finally {
                try {
                    if (writer != null) {
                        log.info("writer close……");
                        writer.close();
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
    
        /**
         * 更新索引
         * 
         * @param path
         * @param email
         * @param model
         */
        public static void updateIndex(String path, String email, LuceneEngineModel model) {
            log.info(String.format("update index: path=%s | email=%s", path, email));
            IndexWriter writer = null;
            try {
                writer = getIndexWriter(path);
                Term term = new Term("email", email);
                writer.updateDocument(term, toDocument(model));
                log.info("update index success.");
            } catch (Exception e) {
                log.error("update index fail: ", new Exception(e));
            } finally {
                try {
                    if (writer != null) {
                        log.info("writer close……");
                        writer.close();
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
    
        public static List<LuceneEngineModel> searchDeptIndex(String path, String deptId) {
            log.info(String.format("search index: path=%s | deptId=[%s]", path, deptId));
            List<LuceneEngineModel> list = new ArrayList<LuceneEngineModel>();
            IndexReader reader = null;
            long start = System.currentTimeMillis();
            try {
                reader = getIndexReader(path);
                IndexSearcher searcher = new IndexSearcher(reader);
                Term term = new Term("dept_list", deptId);
                Query query = new WildcardQuery(term);
    
                TopDocs tds = searcher.search(query, 20);
                for (ScoreDoc sd : tds.scoreDocs) {
                    Document doc = searcher.doc(sd.doc);
                    list.add(toEntity(doc));
                }
    
                log.info(String.format("search index success: size=%s | time=%sms", tds.totalHits, (System.currentTimeMillis() - start)));
            } catch (Exception e) {
                log.error("search index fail: ", new Exception(e));
            } finally {
                try {
                    if (reader != null) {
                        log.info("reader close……");
                        reader.close();
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
            return list;
        }
    
        /**
         * 检索索引
         * 
         * @param path
         * @param content
         * @param total
         * @return
         * @throws Exception
         */
        public static List<LuceneEngineModel> searchIndex(String path, String content, String total) {
            log.info(String.format("search index: path=%s | content=[%s] | total=%s", path, content, total));
            List<LuceneEngineModel> list = new ArrayList<LuceneEngineModel>();
            IndexReader reader = null;
            long start = System.currentTimeMillis();
            try {
                reader = getIndexReader(path);
                IndexSearcher searcher = new IndexSearcher(reader);
                Term term = new Term("email", "*" + content + "*");
                Query query = new WildcardQuery(term);
                TopDocs tds = searcher.search(query, Integer.parseInt(total));
                log.info(String.format("search index success: size=%s | time=%sms", tds.totalHits, (System.currentTimeMillis() - start)));
                for (ScoreDoc sd : tds.scoreDocs) {
                    Document doc = searcher.doc(sd.doc);
                    list.add(toEntity(doc));
                }
            } catch (Exception e) {
                log.error("search index fail: ", new Exception(e));
            } finally {
                try {
                    if (reader != null) {
                        log.info("reader close……");
                        reader.close();
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
            return list;
        }
    
        private static LuceneEngineModel toEntity(Document doc) {
            LuceneEngineModel model = new LuceneEngineModel();
            model.setType(Integer.parseInt(doc.get("type")));
            model.setId(Integer.parseInt(doc.get("id")));
            model.setEmail(doc.get("email"));
            model.setMobile(doc.get("mobile"));
            model.setFirst_name(doc.get("first_name"));
            model.setSecond_name(doc.get("second_name"));
            model.setPosition(doc.get("position"));
            String array[] = doc.get("locate_name_list").split(",");
            model.setLocate_name_list(Arrays.asList(array));
            return model;
        }
    
    }
  • 相关阅读:
    使用FolderBrowserDialog组件选择文件夹
    使用OpenFileDialog组件打开多个文
    使用OpenFileDialog组件打开对话框
    获取弹出对话框的相关返回值
    PAT 甲级 1139 First Contact (30 分)
    PAT 甲级 1139 First Contact (30 分)
    PAT 甲级 1138 Postorder Traversal (25 分)
    PAT 甲级 1138 Postorder Traversal (25 分)
    PAT 甲级 1137 Final Grading (25 分)
    PAT 甲级 1137 Final Grading (25 分)
  • 原文地址:https://www.cnblogs.com/linying/p/5897341.html
Copyright © 2011-2022 走看看