zoukankan      html  css  js  c++  java
  • Maven下从HDFS文件系统读取文件内容

    需要注意以下几点
    1.所以的包都是org.apache.hadoop.XXX
    2.三个配置文件要放到指定文件夹中等待文件系统读取(src/main/resources):core-site.xml hdfs-site.xml log4j.properties
    3.文件路径指向要正确

    package com.cenzhongman.hadoop.hdfs;
    
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileNotFoundException;
    import java.io.IOException;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FSDataInputStream;
    import org.apache.hadoop.fs.FSDataOutputStream;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IOUtils;
    
    public class HdfsApp {
    
    	/**
    	 * to get fileSystem
    	 * 
    	 * @return fileSystem
    	 */
    	public static FileSystem getFileSystem() {
    		// 1.read configuration information : core-site.xml core-default.xml
    		// hdfs-site.xml hdfs-default.xml
    		Configuration conf = new Configuration();
    
    		// 2.get fileSystem
    		org.apache.hadoop.fs.FileSystem fileSystem = null;
    		try {
    			fileSystem = org.apache.hadoop.fs.FileSystem.get(conf);
    		} catch (IOException e) {
    			e.printStackTrace();
    		}
    
    		// 3.return fileSystem
    		return fileSystem;
    	}
    
    	/**
    	 * read data form fileSystem
    	 * 
    	 * @param fileName
    	 */
    	public static void read(String fileName) {
    		// 1.get fileSystem
    		FileSystem fileSystem = getFileSystem();
    		System.out.println(fileSystem);
    
    		// 2.read path
    		Path readPath = new Path(fileName);
    
    		// 3.open file and get FSDataInputStream
    		FSDataInputStream inStream = null;
    		try {
    			inStream = fileSystem.open(readPath);
    		} catch (IOException e1) {
    			e1.printStackTrace();
    		}
    
    		// 4.read file info
    		try {
    			// read
    			IOUtils.copyBytes(inStream, System.out, 4096, false);
    		} catch (Exception e) {
    			e.printStackTrace();
    		} finally {
    			// close stream
    			IOUtils.closeStream(inStream);
    		}
    	}
    
    	public static void uploadFile(String fromFilePath, String putFilePath) {
    		// 1.get fileSystem
    		FileSystem fileSystem = getFileSystem();
    
    		// 2.write path
    		Path weitePath = new Path(putFilePath);
    
    		// 3.Output Stream
    		FSDataOutputStream ourStream = null;
    		try {
    			ourStream = fileSystem.create(weitePath);
    		} catch (IOException e1) {
    			e1.printStackTrace();
    		}
    
    		// 4.input Stream
    		FileInputStream inStream = null;
    		try {
    			inStream = new FileInputStream(new File(fromFilePath));
    		} catch (FileNotFoundException e1) {
    			// TODO Auto-generated catch block
    			e1.printStackTrace();
    		}
    
    		// 5.stream read/write
    		try {
    			// read
    			IOUtils.copyBytes(inStream, ourStream, 4096, false);
    		} catch (Exception e) {
    			e.printStackTrace();
    		} finally {
    			// close stream
    			IOUtils.closeStream(inStream);
    			IOUtils.closeStream(ourStream);
    		}
    	}
    
    	public static void main(String[] args) throws Exception {
    		String fileName = "/tmp/hadoop-yarn/staging/history/done_intermediate/cen/job_1497948413653_0001_conf.xml";
    		read(fileName);
    
    		String putFilePath = "/user/cen/output/file-output-test.xml";
    		String fromFilePath = "/usr/local/hadoop-2.5.0/input/core-site.xml";
    		uploadFile(fromFilePath, putFilePath);
    	}
    }
  • 相关阅读:
    我的WCF之旅(1):创建一个简单的WCF程序
    网页设计中颜色的搭配
    CSS HACK:全面兼容IE6/IE7/IE8/FF的CSS HACK
    UVa 1326 Jurassic Remains
    UVa 10340 All in All
    UVa 673 Parentheses Balance
    UVa 442 Matrix Chain Multiplication
    UVa 10970 Big Chocolate
    UVa 679 Dropping Balls
    UVa 133 The Dole Queue
  • 原文地址:https://www.cnblogs.com/cenzhongman/p/7096028.html
Copyright © 2011-2022 走看看