package com.main.java.solr.statistics; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.ScoreDoc; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; /** * 统计目录下索引信息 */ public class SolrIndexInfoStatistics { public static final Logger LOG= LoggerFactory.getLogger(SolrIndexInfoStatistics.class); private static final String DEFAULT_PATH="/data/index"; private static String upperDir; private static List<SolrIndexInfo> result; /** * 给出一个上层目录,统计所有子目录的索引信息 (目录名称,索引数目,字段个数,一行字段值和字段名称 * @param args */ public static void main(String[] args) { if(null==args||0==args.length){ args=new String[]{"F:\indexTest"}; // return; } upperDir=args[0]; statisticsSolrIndexInfo(upperDir); displayIndexInfo(result); } //计目录下索引信息 private static void statisticsSolrIndexInfo(String upperDir){ List<String> list=fetchSubDirs(upperDir); result=new ArrayList<>(); for (String path : list) { IndexReader reader=SolrUtil.getReader(path); if(null==reader){ continue; } ScoreDoc [] scoreDoc=SolrUtil.queryDocs(reader,path); if(null ==scoreDoc){ continue; } SolrIndexInfo info=new SolrIndexInfo(); info.setFilePath(path); info.setIndexNum(scoreDoc.length); Document doc=null; try { doc=reader.document(scoreDoc[0].doc); } catch (IOException e) { e.printStackTrace(); } List<IndexableField> fieldsList= doc.getFields(); String [] fields=new String[fieldsList.size()]; Object [] values=new Object[fieldsList.size()]; for(int i=0;i<fieldsList.size();i++){ fields[i]=fieldsList.get(i).name(); values[i]=doc.get(fields[i]); } info.setFields(fields); info.setValues(values); info.setFieldNum(fieldsList.size()); result.add(info); } } //展示获取的索引信息 private static void displayIndexInfo(List<SolrIndexInfo> infos){ for (SolrIndexInfo info : infos) { System.out.println(info.toString()); } } //获取子目录 private static List<String> fetchSubDirs(String upperDir){ List<String> list=new ArrayList<>(); //判断目录是否存在 if(!new File(upperDir).exists()){ LOG.error("目录不存在:{}",upperDir); return list; } File [] files=new File(upperDir).listFiles(); for (File file : files) { String fileName=file.getName(); String filePath=upperDir+File.separator+fileName+DEFAULT_PATH; if(new File(filePath).exists()){ list.add(filePath); }else{ LOG.error("目录不存在:{}",filePath); } } return list; } }
package com.main.java.solr.statistics; /** * 索引信息 (目录名称,索引数目,字段个数,三行字段值和字段名称 字段名称s ,一行字段值 */ public class SolrIndexInfo { private String filePath; private int indexNum; private int fieldNum; private String [] fields; private Object [] values; public String getFilePath() { return filePath; } public void setFilePath(String filePath) { this.filePath = filePath; } public int getIndexNum() { return indexNum; } public void setIndexNum(int indexNum) { this.indexNum = indexNum; } public int getFieldNum() { return fieldNum; } public void setFieldNum(int fieldNum) { this.fieldNum = fieldNum; } public String[] getFields() { return fields; } public void setFields(String[] fields) { this.fields = fields; } public Object[] getValues() { return values; } public void setValues(Object[] values) { this.values = values; } public String toString(){ StringBuffer sb=new StringBuffer(); sb.append("索引目录:").append(filePath).append(",") .append("索引数目:").append(indexNum).append(",") .append("索引字段个数:").append(fieldNum).append(",") .append("字段名称和值:"); for(int i=0;i<fields.length;i++){ StringBuilder builder=new StringBuilder(); String fieldInfo=builder.append(fields[i]).append(":").append(values[i]).append(",").toString(); sb.append(fieldInfo); } return sb.toString(); } }
package com.main.java.solr.statistics; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.NIOFSDirectory; import org.apache.solr.store.hdfs.HdfsDirectory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.nio.file.Paths; /** * Solr常用方法 */ public class SolrUtil { public static final Logger LOGGER= LoggerFactory.getLogger(SolrUtil.class); /** * 设置查询条件 * @param queryDefaultFieldName 查询的字段信息 *:* 表示查询所有字段 * @return query * @throws org.apache.lucene.queryparser.classic.ParseException */ public static Query getQuery(String queryDefaultFieldName) throws ParseException { //实例化分析器 Analyzer analyzer=new StandardAnalyzer(); //建立查询解析器 /** * 第一个参数是要查询的字段; * 第二个参数是分析器Analyzer * */ QueryParser parser=new QueryParser(queryDefaultFieldName, analyzer); //根据传进来的queryStr查找 Query query = parser.parse("*:*"); return query; } /** * 通过directory得到的路径下的所有的文件 * @param luceneFileDir lucene文件路径,linux 本地路径"/usr/temp/lucene/index"和hdfs路径 "hdfs://SERVICE-HADOOP-001,hdh184:8020,hdh185:8020/tmp/lucene/index" * @return indexReader */ public static IndexReader getReader(String luceneFileDir){ IndexReader reader=null; if(luceneFileDir.contains("hdfs")){ Configuration conf =new Configuration(); try { HdfsDirectory directory=new HdfsDirectory(new Path(luceneFileDir),conf); reader= DirectoryReader.open(directory); } catch (IOException e) { LOGGER.error(("读取索引目录异常,索引目录:" + luceneFileDir + " 异常信息:" + e.getMessage())); } return reader; }else{ try { //得到读取索引文件的路径 Directory dir = NIOFSDirectory.open(Paths.get(luceneFileDir)); //通过dir得到的路径下的所有的文件 reader= DirectoryReader.open(dir); }catch (IOException e){ LOGGER.error(("读取索引目录异常,索引目录:"+luceneFileDir+" 异常信息:"+e.getMessage())); } if(null==reader){ LOGGER.warn("初始化indexReader失败,reader is null"); } return reader; } } /** * 获取索引信息 * @param reader IndexReader * @return ScoreDoc[] 存储的为所有索引数据的id */ public static ScoreDoc[] queryDocs(IndexReader reader,String luceneFileDir){ ScoreDoc[] docs =null; if(null!=reader){ //索引总条数 int numDocs=reader.numDocs(); if(0==numDocs){ return docs; } //建立索引查询器 IndexSearcher is=new IndexSearcher(reader); //设置查询条件 Query query; try { query = SolrUtil.getQuery("*:*"); if(null!=query){ TopDocs hits; /** * 第一个参数是通过传过来的参数来查找得到的query; * 第二个参数是要出查询的行数 * */ hits = is.search(query, numDocs); if(null!=hits){ //获取所有的索引id集合 docs= hits.scoreDocs; LOGGER.info("查询到目录"+luceneFileDir+"索引总条数为:"+docs.length); } else{ LOGGER.error("查询出错,未查询到满足query条件的索引信息"); } }else{ LOGGER.info("indexSearch和query初始化失败,"+"IndexSearch:"+is+" query:"+query); } } catch (ParseException e) { LOGGER.error("获取查询条件异常,异常信息:{}",e.getMessage()); }catch (IOException e) { LOGGER.error("查询索引信息异常,异常信息:{}",e.getMessage()); } }else{ LOGGER.warn("reader is null 无法获取索引信息"); } return docs; } public static void main(String[] args) { int [] nums=new int[]{5337424, 8354699, 5737071, 6785709, 5169227, 4083041, 4822605, 4235623, 3968948, 3059084, 3460965, 6991442}; int max=0; for (int num : nums) { max=max+num; } System.out.println(max); } }