zoukankan      html  css  js  c++  java
  • 6、统计solr目录索引信息

    package com.main.java.solr.statistics;
    
    import org.apache.lucene.document.Document;
    import org.apache.lucene.index.IndexReader;
    import org.apache.lucene.index.IndexableField;
    import org.apache.lucene.search.ScoreDoc;
    import org.slf4j.Logger;
    import org.slf4j.LoggerFactory;
    
    import java.io.File;
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.List;
    
    /**
     * 统计目录下索引信息
     */
    public class SolrIndexInfoStatistics {
        public static final Logger LOG= LoggerFactory.getLogger(SolrIndexInfoStatistics.class);
    
        private static final String DEFAULT_PATH="/data/index";
        private static String upperDir;
    
        private static List<SolrIndexInfo> result;
    
        /**
         * 给出一个上层目录,统计所有子目录的索引信息 (目录名称,索引数目,字段个数,一行字段值和字段名称
         * @param args
         */
        public static void main(String[] args) {
    
            if(null==args||0==args.length){
                args=new String[]{"F:\indexTest"};
               // return;
            }
            upperDir=args[0];
            statisticsSolrIndexInfo(upperDir);
            displayIndexInfo(result);
        }
    
        //计目录下索引信息
        private static void statisticsSolrIndexInfo(String upperDir){
            List<String> list=fetchSubDirs(upperDir);
            result=new ArrayList<>();
            for (String path : list) {
                IndexReader reader=SolrUtil.getReader(path);
                if(null==reader){
                    continue;
                }
                ScoreDoc [] scoreDoc=SolrUtil.queryDocs(reader,path);
                if(null ==scoreDoc){
                    continue;
                }
                SolrIndexInfo info=new SolrIndexInfo();
                info.setFilePath(path);
                info.setIndexNum(scoreDoc.length);
                Document doc=null;
                try {
                    doc=reader.document(scoreDoc[0].doc);
                } catch (IOException e) {
                    e.printStackTrace();
                }
                List<IndexableField> fieldsList= doc.getFields();
                String [] fields=new String[fieldsList.size()];
                Object [] values=new Object[fieldsList.size()];
                for(int i=0;i<fieldsList.size();i++){
                    fields[i]=fieldsList.get(i).name();
                    values[i]=doc.get(fields[i]);
                }
                info.setFields(fields);
                info.setValues(values);
                info.setFieldNum(fieldsList.size());
                result.add(info);
            }
        }
        //展示获取的索引信息
        private static void displayIndexInfo(List<SolrIndexInfo> infos){
            for (SolrIndexInfo info : infos) {
                System.out.println(info.toString());
            }
        }
        //获取子目录
        private static List<String> fetchSubDirs(String upperDir){
            List<String> list=new ArrayList<>();
            //判断目录是否存在
            if(!new File(upperDir).exists()){
                LOG.error("目录不存在:{}",upperDir);
                return list;
            }
            File [] files=new File(upperDir).listFiles();
            for (File file : files) {
                String fileName=file.getName();
                String filePath=upperDir+File.separator+fileName+DEFAULT_PATH;
                if(new File(filePath).exists()){
                    list.add(filePath);
                }else{
                    LOG.error("目录不存在:{}",filePath);
                }
            }
            return list;
        }
    }
    package com.main.java.solr.statistics;
    
    /**
     * 索引信息 (目录名称,索引数目,字段个数,三行字段值和字段名称 字段名称s ,一行字段值
     */
    public class SolrIndexInfo {
    
        private String filePath;
    
        private int indexNum;
    
        private int fieldNum;
    
        private String [] fields;
    
        private Object [] values;
    
    
        public String getFilePath() {
            return filePath;
        }
    
        public void setFilePath(String filePath) {
            this.filePath = filePath;
        }
    
        public int getIndexNum() {
            return indexNum;
        }
    
        public void setIndexNum(int indexNum) {
            this.indexNum = indexNum;
        }
    
        public int getFieldNum() {
            return fieldNum;
        }
    
        public void setFieldNum(int fieldNum) {
            this.fieldNum = fieldNum;
        }
    
        public String[] getFields() {
            return fields;
        }
    
        public void setFields(String[] fields) {
            this.fields = fields;
        }
    
        public Object[] getValues() {
            return values;
        }
    
        public void setValues(Object[] values) {
            this.values = values;
        }
        public String toString(){
            StringBuffer sb=new StringBuffer();
            sb.append("索引目录:").append(filePath).append(",")
                    .append("索引数目:").append(indexNum).append(",")
                    .append("索引字段个数:").append(fieldNum).append(",")
                    .append("字段名称和值:");
            for(int i=0;i<fields.length;i++){
                StringBuilder builder=new StringBuilder();
                String fieldInfo=builder.append(fields[i]).append(":").append(values[i]).append(",").toString();
                sb.append(fieldInfo);
            }
            return sb.toString();
        }
    
    }
    package com.main.java.solr.statistics;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.index.DirectoryReader;
    import org.apache.lucene.index.IndexReader;
    import org.apache.lucene.queryparser.classic.ParseException;
    import org.apache.lucene.queryparser.classic.QueryParser;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TopDocs;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.NIOFSDirectory;
    import org.apache.solr.store.hdfs.HdfsDirectory;
    import org.slf4j.Logger;
    import org.slf4j.LoggerFactory;
    
    import java.io.IOException;
    import java.nio.file.Paths;
    
    /**
     * Solr常用方法
     */
    public class SolrUtil {
        public static final Logger LOGGER= LoggerFactory.getLogger(SolrUtil.class);
    
        /**
         *  设置查询条件
         * @param queryDefaultFieldName 查询的字段信息 *:* 表示查询所有字段
         * @return query
         * @throws org.apache.lucene.queryparser.classic.ParseException
         */
        public static Query getQuery(String queryDefaultFieldName) throws ParseException {
            //实例化分析器
            Analyzer analyzer=new StandardAnalyzer();
            //建立查询解析器
            /**
             * 第一个参数是要查询的字段;
             * 第二个参数是分析器Analyzer
             * */
            QueryParser parser=new QueryParser(queryDefaultFieldName, analyzer);
            //根据传进来的queryStr查找
            Query query = parser.parse("*:*");
            return query;
        }
    
        /**
         * 通过directory得到的路径下的所有的文件
         * @param luceneFileDir lucene文件路径,linux 本地路径"/usr/temp/lucene/index"和hdfs路径 "hdfs://SERVICE-HADOOP-001,hdh184:8020,hdh185:8020/tmp/lucene/index"
         * @return indexReader
         */
        public static IndexReader getReader(String luceneFileDir){
            IndexReader reader=null;
            if(luceneFileDir.contains("hdfs")){
                Configuration conf =new Configuration();
                try {
                    HdfsDirectory directory=new HdfsDirectory(new Path(luceneFileDir),conf);
                    reader= DirectoryReader.open(directory);
                } catch (IOException e) {
                    LOGGER.error(("读取索引目录异常,索引目录:" + luceneFileDir + " 异常信息:" + e.getMessage()));
                }
                return  reader;
            }else{
                try {
                    //得到读取索引文件的路径
                    Directory dir = NIOFSDirectory.open(Paths.get(luceneFileDir));
                    //通过dir得到的路径下的所有的文件
                    reader=  DirectoryReader.open(dir);
                }catch (IOException e){
                    LOGGER.error(("读取索引目录异常,索引目录:"+luceneFileDir+" 异常信息:"+e.getMessage()));
                }
                if(null==reader){
                    LOGGER.warn("初始化indexReader失败,reader is null");
                }
                return reader;
            }
        }
    
        /**
         * 获取索引信息
         * @param reader IndexReader
         * @return ScoreDoc[] 存储的为所有索引数据的id
         */
        public static ScoreDoc[] queryDocs(IndexReader reader,String luceneFileDir){
            ScoreDoc[] docs =null;
            if(null!=reader){
                //索引总条数
                int numDocs=reader.numDocs();
                if(0==numDocs){
                    return docs;
                }
                //建立索引查询器
                IndexSearcher is=new IndexSearcher(reader);
                //设置查询条件
                Query query;
                try {
                    query = SolrUtil.getQuery("*:*");
                    if(null!=query){
                        TopDocs hits;
                        /**
                         * 第一个参数是通过传过来的参数来查找得到的query;
                         * 第二个参数是要出查询的行数
                         * */
                        hits = is.search(query, numDocs);
                        if(null!=hits){
                            //获取所有的索引id集合
                            docs= hits.scoreDocs;
                            LOGGER.info("查询到目录"+luceneFileDir+"索引总条数为:"+docs.length);
                        } else{
                            LOGGER.error("查询出错,未查询到满足query条件的索引信息");
                        }
                    }else{
                        LOGGER.info("indexSearch和query初始化失败,"+"IndexSearch:"+is+"  query:"+query);
                    }
                } catch (ParseException e) {
                    LOGGER.error("获取查询条件异常,异常信息:{}",e.getMessage());
                }catch (IOException e) {
                    LOGGER.error("查询索引信息异常,异常信息:{}",e.getMessage());
                }
            }else{
                LOGGER.warn("reader is null 无法获取索引信息");
            }
            return docs;
        }
    
        public static void main(String[] args) {
            int [] nums=new int[]{5337424,
            8354699,
            5737071,
            6785709,
            5169227,
            4083041,
            4822605,
            4235623,
            3968948,
            3059084,
            3460965,
            6991442};
            int max=0;
            for (int num : nums) {
                max=max+num;
            }
            System.out.println(max);
    
        }
    
    }
  • 相关阅读:
    [LeetCode][JavaScript]Copy List with Random Pointer
    [LeetCode][JavaScript]Best Time to Buy and Sell Stock II
    [LeetCode][JavaScript]Best Time to Buy and Sell Stock
    [LeetCode][JavaScript]Populating Next Right Pointers in Each Node
    [LeetCode][JavaScript]Text Justification
    [LeetCode][JavaScript]LRU Cache
    用CRTP在C++中实现静态函数的多态
    C++的静态分发(CRTP)和动态分发(虚函数多态)的比较
    用Y分钟学会X
    看看你能认出多少种编程语言
  • 原文地址:https://www.cnblogs.com/yangh2016/p/5888118.html
Copyright © 2011-2022 走看看