1. 加载配置文件
2. 初始化文件系统
Configuration conf = new Configuration();//加载配置文件 FileSystem fs = FileSystem.get(conf);//初始化文件系统
static{ //print deprecation warning if hadoop-site.xml is found in classpath ClassLoader cL = Thread.currentThread().getContextClassLoader(); if (cL == null) { cL = Configuration.class.getClassLoader(); } if(cL.getResource("hadoop-site.xml")!=null) {//确保在类路径下不存在hadoop-site.xml(已过时) LOG.warn("DEPRECATED: hadoop-site.xml found in the classpath. " + "Usage of hadoop-site.xml is deprecated. Instead use core-site.xml, " + "mapred-site.xml and hdfs-site.xml to override properties of " + "core-default.xml, mapred-default.xml and hdfs-default.xml " + "respectively"); } addDefaultResource("core-default.xml"); addDefaultResource("core-site.xml"); }
FileSystem的get(Configuration conf)方法调用了它的另一个方法get(getDefaultUri(conf),conf),这个方法通过判断是否采用了缓存机制,如果采用了缓存机制,则从缓存中获取,如果没有采用缓存机制,则创建新的文件系统,默认开启缓存机制。
public static FileSystem get(Configuration conf) throws IOException { return get(getDefaultUri(conf), conf); }
public static URI getDefaultUri(Configuration conf) {
return URI.create(fixName(conf.get(FS_DEFAULT_NAME_KEY, DEFAULT_FS)));//通过conf中的fs.defaultFS属性获得URI(hdfs://s101)
public static FileSystem get(URI uri, Configuration conf) throws IOException { String scheme = uri.getScheme();//hdfs String authority = uri.getAuthority();//s101 if (scheme == null && authority == null) { // use default FS :默认为本地文件系统 file:/// return get(conf); }
//判断是否缓存FileSystem String disableCacheName = String.format("fs.%s.impl.disable.cache", scheme); //如果不采用缓存机制,每次都创建新的FileSystem if (conf.getBoolean(disableCacheName, false)) { return createFileSystem(uri, conf); } //如果采用缓存机制,则从CACHE中获取 return CACHE.get(uri, conf);
static class Cache { //省略...... private final Map<Key, FileSystem> map = new HashMap<Key, FileSystem>();//FileSystem容器 /** FileSystem.Cache.Key */ static class Key { final String scheme;//hdfs final String authority;//s101 final UserGroupInformation ugi; //省略... } }
FileSystem get(URI uri, Configuration conf) throws IOException{ Key key = new Key(uri, conf); return getInternal(uri, conf, key); }
private FileSystem getInternal(URI uri, Configuration conf, Key key) throws IOException{ FileSystem fs; synchronized (this) { fs = map.get(key);//由于此时为一次创建FileSystem,所以此时map为null } if (fs != null) { return fs; } //fs不为null,创建文件系统 fs = createFileSystem(uri, conf); synchronized (this) { // refetch the lock again FileSystem oldfs = map.get(key); if (oldfs != null) { // a file system is created while lock is releasing fs.close(); // close the new file system return oldfs; // return the old file system } // now insert the new file system into the map if (map.isEmpty() && !ShutdownHookManager.get().isShutdownInProgress()) { ShutdownHookManager.get().addShutdownHook(clientFinalizer, SHUTDOWN_HOOK_PRIORITY); } fs.key = key; //将文件系统存入map容器 map.put(key, fs); if (conf.getBoolean("fs.automatic.close", true)) { toAutoClose.add(key); } return fs; } }
下面我们来看一下 createFileSystem(uri, conf)是如何创建FileSystem的:
private static FileSystem createFileSystem(URI uri, Configuration conf ) throws IOException { //根据conf和Schema获取对应的FileSystemClass,这里指的是DistributedFileSystem.class Class<?> clazz = getFileSystemClass(uri.getScheme(), conf); //通过反射创建文件系统 FileSystem fs = (FileSystem)ReflectionUtils.newInstance(clazz, conf); //初始化文件系统 fs.initialize(uri, conf); return fs; }
public static Class<? extends FileSystem> getFileSystemClass(String scheme, Configuration conf) throws IOException { if (!FILE_SYSTEMS_LOADED) { loadFileSystems();//加载文件系统,加载了hadoop支持的9种文件系统,存放到了SERVICE_FILE_SYSTEMS=new HashMap<String,Class<? extends FileSystem>>
Class<? extends FileSystem> clazz = null; //省略 if (clazz == null) {//根据schema获得对应的文件系统的clazz clazz = SERVICE_FILE_SYSTEMS.get(scheme); } if (clazz == null) { throw new IOException("No FileSystem for scheme: " + scheme); } return clazz; }
public void initialize(URI uri, Configuration conf) throws IOException { super.initialize(uri, conf); this.setConf(conf); String host = uri.getHost(); if (host == null) { throw new IOException("Incomplete HDFS URI, no host: " + uri); } else { this.homeDirPrefix = conf.get("dfs.user.home.dir.prefix", "/user"); //创建DFS客户端(每个文件系统都持有一个dfs客户端对象) this.dfs = new DFSClient(uri, conf, this.statistics); this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority()); //工作空间 this.workingDir = this.getHomeDirectory(); }
FileSystem 的创建过程:
1. 首先加载配置文件,主要是获得fs.defaultFS的属性值。
2. 创建文件系统: