zoukankan      html  css  js  c++  java
  • Hadoop HDFS文件操作的Java代码

    1、创建目录

    import java.io.IOException;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    
    public class MakeDir {
    	public static void main(String[] args) throws IOException {
    		Configuration conf = new Configuration();
    		FileSystem fs = FileSystem.get(conf);
    		Path path = new Path("/user/hadoop/hdfs/xxxx");
    		fs.create(path);
    		fs.close();
    	}
    }
    

    2、删除目录

    import java.io.IOException;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    
    public class DeleteDir {
    	public static void main(String[] args) throws IOException {
    		Configuration conf = new Configuration();
    		FileSystem fs = FileSystem.get(conf);
    		
    		Path path = new Path("/user/hadoop/hdfs/xxxx");
    		fs.delete(path);
    		fs.close();
    	}
    }
    

    3、写文件

    import java.io.IOException;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FSDataOutputStream;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    
    public class WriteFile {
    	public static void main(String[] args) throws IOException {
    		Configuration conf = new Configuration();
    		FileSystem fs = FileSystem.get(conf);
    		Path path = new Path("/user/hadoop/hdfs/xxxx.txt");
    		FSDataOutputStream out = fs.create(path);
    		out.writeUTF("da jia hao,cai shi zhen de hao!");
    		fs.close();
    	}
    }
    

    4、读文件

    import java.io.IOException;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FSDataInputStream;
    import org.apache.hadoop.fs.FileStatus;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    
    public class ReadFile {
    	public static void main(String[] args) throws IOException {
    		Configuration conf = new Configuration();
    		FileSystem fs = FileSystem.get(conf);
    		Path path = new Path("/user/hadoop/hdfs/xxxx.txt");
    		
    		if(fs.exists(path)){
    			FSDataInputStream is = fs.open(path);
    			FileStatus status = fs.getFileStatus(path);
    			byte[] buffer = new byte[Integer.parseInt(String.valueOf(status.getLen()))];
    			is.readFully(0, buffer);
    			is.close();
                fs.close();
                System.out.println(buffer.toString());
    		}
    	}
    }
    

    5、上传本地文件到HDFS

    import java.io.IOException;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    
    public class CopyFromLocalFile {
    
    	public static void main(String[] args) throws IOException {
    		
    		Configuration conf = new Configuration();
    		FileSystem fs = FileSystem.get(conf);
    		Path src = new Path("/home/hadoop/xxxx.txt");
    		Path dst = new Path("/user/hadoop/hdfs/");
    		fs.copyFromLocalFile(src, dst);
    		fs.close();
    	}
    }
    

    6、删除文件

    import java.io.IOException;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    
    public class DeleteFile {
    
    	public static void main(String[] args) throws IOException {
    		Configuration conf = new Configuration();
    		FileSystem fs = FileSystem.get(conf);
    		
    		Path path = new Path("/user/hadoop/hdfs/xxxx.txt");
    		fs.delete(path);
    		fs.close();
    	}
    }
    

    7、获取给定目录下的所有子目录以及子文件

    import java.io.IOException;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileStatus;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    
    public class GetAllChildFile {
    	static Configuration conf = new Configuration();
    	
    	
    	public static void main(String[] args)throws IOException {
    		FileSystem fs = FileSystem.get(conf);
    		Path path = new Path("/user/hadoop");
    		getFile(path,fs);
    		//fs.close();
    	}
    	
    	public static void getFile(Path path,FileSystem fs) throws IOException {
    		
    		FileStatus[] fileStatus = fs.listStatus(path);
    		for(int i=0;i<fileStatus.length;i++){
    			if(fileStatus[i].isDir()){
    				Path p = new Path(fileStatus[i].getPath().toString());
    				getFile(p,fs);
    			}else{
    				System.out.println(fileStatus[i].getPath().toString());
    			}
    		}
    	}
    
    }
    

    8、查找某个文件在HDFS集群的位置

    import java.io.IOException;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.BlockLocation;
    import org.apache.hadoop.fs.FileStatus;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.hdfs.DistributedFileSystem;
    import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
    
    public class FindFile {
    	
    	public static void main(String[] args) throws IOException {	
    		getFileLocal();
    	}
    	
    	/**
    	 * 查找某个文件在HDFS集群的位置
    	 * @Title:  
    	 * @Description: 
    	 * @param 
    	 * @return 
    	 * @throws
    	 */
    	public static void getFileLocal() throws IOException{
    		Configuration conf = new Configuration();
    		FileSystem fs = FileSystem.get(conf);
    		Path path = new Path("/user/hadoop/hdfs/xxxx.txt");
    		
    		FileStatus status = fs.getFileStatus(path);
    		BlockLocation[] locations = fs.getFileBlockLocations(status, 0, status.getLen());
    		
    		int length = locations.length;
    		for(int i=0;i<length;i++){
    			String[] hosts = locations[i].getHosts();
    			System.out.println("block_" + i + "_location:" + hosts[i]);
    		}
    	}
    	
    }
    

    9、HDFS集群上所有节点名称信息

    package com.hadoop.file;
    
    import java.io.IOException;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.BlockLocation;
    import org.apache.hadoop.fs.FileStatus;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.hdfs.DistributedFileSystem;
    import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
    
    public class FindFile {
    	
    	public static void main(String[] args) throws IOException {	
    		getHDFSNode();
    	}
    	
    	/**
    	 * HDFS集群上所有节点名称信息
    	 * @Title:  
    	 * @Description: 
    	 * @param 
    	 * @return 
    	 * @throws
    	 */
    	public static void getHDFSNode() throws IOException{
    		Configuration conf = new Configuration();
    		FileSystem fs = FileSystem.get(conf);
    
    		DistributedFileSystem  dfs = (DistributedFileSystem)fs;
    		DatanodeInfo[] dataNodeStats = dfs.getDataNodeStats();
    		
    		for(int i=0;i<dataNodeStats.length;i++){
    			System.out.println("DataNode_" + i + "_Node:" + dataNodeStats[i].getHostName());
    		}
    		
    	}
    	
    	
    }
    

    伪分布环境下操作FileSystem时候会出现异常:  

    Java代码如下:

        FileSystem fs = FileSystem.get(conf); 
        in = fs.open(new Path("hdfs://localhost:9000/wzqSearchEngine/crawl/inject_in")); 
    

    抛出异常如下:

        Exception in thread "main" java.lang.IllegalArgumentException: Wrong FS: hdfs://localhost:9000/wzqSearchEngine/crawl/inject_in, expected: file:/// 
            at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:310) 
            at org.apache.hadoop.fs.RawLocalFileSystem.pathToFile(RawLocalFileSystem.java:47) 
            at org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:357) 
            at org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:245) 
            at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSInputChecker.<init>(ChecksumFileSystem.java:125) 
            at org.apache.hadoop.fs.ChecksumFileSystem.open(ChecksumFileSystem.java:283) 
            at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:356) 
            at com.netease.hadoop.HDFSCatWithAPI.main(HDFSCatWithAPI.java:23) 

    解决方案:
    将hadoop的core-site.xml和hdfs-site.xml放到当前工程下(Eclipse工作目录的bin文件夹下面)即可。

    总结:

    因为是访问远程的HDFS 需要通过URI来获得FileSystem。  

      

      

      

      

      

      

      

  • 相关阅读:
    java移位运算符详解[转]
    Android四大基本组件介绍与生命周期
    oracle中怎么得到日期相减除去周末后的天数
    小计_合计_统计
    Oracle--SQL技巧之一(查询连续的记录)
    游戏中地图的制作(一)
    在别的地方看的<<给程序员介绍一些C++开源库>>,记录给大家共同学习
    C语言调用python代码
    XML文件中怎么写小于号 等特殊符号
    system->copy 和 ShellExecute 用法
  • 原文地址:https://www.cnblogs.com/wuzhenquan/p/3617751.html
Copyright © 2011-2022 走看看