/** * 获取hadoop相关配置信息 * @param hadoopConfPath 目前用户需要提供hadoop的配置文件路径 * @return */ public static Configuration getHadoopConfig(String hadoopConfPath){ Configuration conf=new Configuration(); conf.addResource(new Path(hadoopConfPath+"/core-site.xml")); conf.addResource(new Path(hadoopConfPath+"/hdfs-site.xml")); return conf; } /** * 获取hdfs文件系统连接 * @param hadoopConfPath 目前用户需要提供hadoop的配置文件路径 * @return */ public static FileSystem getFileSystem(String hadoopConfPath) { Configuration conf=new Configuration(); conf.addResource(new Path(hadoopConfPath+"/core-site.xml")); conf.addResource(new Path(hadoopConfPath+"/hdfs-site.xml")); FileSystem fs = null; try { fs=FileSystem.get(conf); } catch (IOException e) { LOGGER.error("从path={}路径获取hadoop配置信息错误:{}", hadoopConfPath, e.getMessage()); } return fs; }
正则匹配路径的方法:
/** * 通过正则获取该目录下满足条件的所有目录 * @param luceneFilePathRegular 正则目录,如/user/solrindex/正则表达式 * @return 满足正则表达式的目录集合 list */ public static List<String> fetchDirByRegularLinux(String luceneFilePathRegular){ List<String> list=new ArrayList<>(); //分割获取主目录 int len= luceneFilePathRegular.lastIndexOf(EtlConstants.LINUX_ROUTE_SEGMENT)+1; String mainDir=luceneFilePathRegular.substring(0, len); String regular=luceneFilePathRegular.substring(len,luceneFilePathRegular.length()); File dir=new File(mainDir); if(dir.exists() && dir.isDirectory()){ File [] arr= dir.listFiles(); for (File file : arr) { if (file.exists() && file.isDirectory()) { String fileName = file.getName(); if (matchStr(fileName, regular)) { list.add(file.getAbsolutePath()+SolrUtil.INDEX_DIR_SUFFIX); } } } } if(list.size()>0){ LOGGER.info("通过正则匹配到的Solr目录有:"); for (String s : list) { LOGGER.info(s); } }else{ LOGGER.error("路径{}下,不存在满足正则:{}条件的目录", dir, regular); } return list; } /** * 通过正则获取该目录下满足条件的所有目录 * @param luceneFilePathRegular 正则目录,如hdfs:/user/solrindex/正则表达式 * @param nameNodeConfigPath //获取name配置信息目录 * @return 满足正则表达式的目录集合 list */ public static List<String> fetchDirByRegularHdfs(String luceneFilePathRegular,String nameNodeConfigPath){ List<String> list=new ArrayList<>(); FileSystem fs=HdfsUtil.getFileSystem(nameNodeConfigPath); String prefixHdfs=luceneFilePathRegular.split(":")[0]; String hdfsPath=luceneFilePathRegular.split(":")[1]; //分割获取主目录 int len= hdfsPath.lastIndexOf(EtlConstants.LINUX_ROUTE_SEGMENT)+1; String mainDir=hdfsPath.substring(0, len); String regular=hdfsPath.substring(len, hdfsPath.length()); try { FileStatus[] fileStatuses = fs.globStatus(new Path(mainDir+"*")); for (FileStatus fileStatus : fileStatuses){ if (fileStatus.isDirectory() && matchStr(fileStatus.getPath().getName(), regular)) { list.add(prefixHdfs+":"+mainDir+fileStatus.getPath().getName()+SolrUtil.INDEX_DIR_SUFFIX); } } } catch (IOException e) { LOGGER.error("获取hdfs目录信息异常,路径:{},异常信息:{}",luceneFilePathRegular,e.getMessage()); e.printStackTrace(); } if(list.size()>0){ LOGGER.info("通过正则匹配到的Solr目录有:"); for (String s : list) { LOGGER.info(s); } }else{ LOGGER.error("路径{}下,不存在满足正则:{}条件的目录", luceneFilePathRegular, regular); } return list; } /** * @Method Description:按正则表示是匹配字符串 * @param str * @param regular * @return * @author: libingjie */ public static Boolean matchStr(String str, String regular) { Pattern pattern = Pattern.compile(regular); Matcher matcher = pattern.matcher(str); return matcher.matches(); }