zoukankan      html  css  js  c++  java
  • java操作hdfs(第2篇)

    摘要

    通过从零开始创建一个maven工程,运用HDFS的JAVA API远程操作HDFS文件系统,通过实战提升编程能力。

    Hadoop java api文档地址:http://hadoop.apache.org/docs/current/api/index.html

    1:项目创建

    1:创建maven项目

    打开eclipse—file—new—maven project--next     #选择工作空间,我用的默认

    2:配置参数

    输入Group Id和Artifact Id,这里输入com.scitc和hdfs,填写完毕后点“Finish”完成工程项目的创建。

    3:检查jdk版本    #默认是javase1.5,改成自己的jdk1.8

    创建完毕后检查下jdk版本是否正确,在鼠标移动到项目上点鼠标右键 -> Build Path ->Configure Build Path。

    弹出一个窗口,选择Java Build Path -> JRE System Library -> Edit

    弹出另一窗口,选择Workspace default JRE,Finish完成设置。 

    接着选择Java Compiler,将JDK版本改为1.8,点Apply后,点OK关闭窗口。

    4:检查maven

    依次选择window—preferences—maven设置

    --选中installations:右边add

    E:javamavenapache-maven-3.5.0    #这是我的maven的主目录

    --选中user settings:右边browse如下内容

    global settings:E:javamaven epositorysettings.xml

    user settings: E:javamaven epositorysettings.xml

    local repository: E:javamaven epositorymaven_jar 

    #说明:如上三个大家需要根据自己安装的maven目录来选择

    到此,maven项目hdfs创建结束,jdk、maven配置结束。

    2:项目开发

    1:编写jar包依赖pom.xml文件

    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
      xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
      <modelVersion>4.0.0</modelVersion>
    
      <groupId>com.scitc</groupId>
      <artifactId>hdfs</artifactId>
      <version>0.0.1-SNAPSHOT</version>
      <packaging>jar</packaging>
    
      <name>hdfs</name>
      <url>http://maven.apache.org</url>
    
    #配置hadoop的版本号,我的是2.7.5 <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <hadoop.version>2.7.5</hadoop.version> </properties> <dependencies> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>3.8.1</version> <scope>test</scope> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-mapreduce-client-common</artifactId> <version>${hadoop.version}</version> </dependency>

    <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-mapreduce-client-jobclient</artifactId> <version>${hadoop.version}</version> <scope>provided</scope> </dependency>

    <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-yarn-common</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-mapreduce-client-core</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>jdk.tools</groupId> <artifactId>jdk.tools</artifactId> <version>1.8</version> <scope>system</scope> <systemPath>${JAVA_HOME}/lib/tools.jar</systemPath> </dependency> </dependencies> <build> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <configuration> <source>1.8</source> <target>1.8</target> </configuration> </plugin> </plugins> </build> </project>

    2:新建log4j.properties文件

    该文件放在src/main/java代码目录的根目录下,内容如下:

    log4j.rootLogger=INFO, stdout
    log4j.appender.stdout=org.apache.log4j.ConsoleAppender
    log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
    log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n
    log4j.appender.logfile=org.apache.log4j.FileAppender
    log4j.appender.logfile.File=target/spring.log
    log4j.appender.logfile.layout=org.apache.log4j.PatternLayout
    log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n
    

      

    备注:在运行APP.java之前,需要先启动hadoop。

    3:编辑App.java类

    package com.scitc.hdfs;
    
    import java.io.File;
    import java.io.IOException;
    import java.io.InputStream;
    import java.security.PrivilegedExceptionAction;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FSDataOutputStream;
    import org.apache.hadoop.fs.FileStatus;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IOUtils;
    import org.apache.hadoop.security.UserGroupInformation;
    
    /**
    *实现java远程操作hdfs:远程创建目录、创建文件、上传文件、下载文件、读取文件、重命名、删除文件
    */
    public class App
    {
       static Configuration conf = new Configuration();
       static FileSystem hdfs;
    
       //初始化访问hdfs的配置信息
       static {
          UserGroupInformation ugi = UserGroupInformation.createRemoteUser("root");
               try {
                   ugi.doAs(new PrivilegedExceptionAction<Void>() {
                       public Void run() throws Exception {
                           Configuration conf = new Configuration();
                           conf.set("fs.defaultFS", "hdfs://192.168.56.110:9000/");
                           //以下两行是支持 hdfs的追加功能的:hdfs.append()
                           conf.set("dfs.client.block.write.replace-datanode-on-failure.policy", "NEVER");
                           conf.set("dfs.client.block.write.replace-datanode-on-failure.enable", "true");
                           Path path = new Path("hdfs://192.168.56.110:9000/");
                           hdfs = FileSystem.get(path.toUri(), conf);
                           //hdfs = path.getFileSystem(conf); // 这个也可以
                           return null;
                       }
                   });
               } catch (IOException e) {
                   e.printStackTrace();
               } catch (InterruptedException e) {
                   e.printStackTrace();
               }
       }
    /** * 方法1:创建目录 * @param dir * @throws IOException */ public static void createDir(String dir) throws IOException { //String dir = "/test3/"; Path path = new Path(dir); //判断文件夹是否已存在,如果已存在就不再创建。 if (hdfs.exists(path)) { System.out.println("文件夹 " + dir + " 已存在"); return; } //开始创建文件夹 hdfs.mkdirs(path); System.out.println("新建文件夹 " + dir); } /** * 方法2:创建文件 * @throws IOException */ public static void createFile() throws IOException{ String fileName = "/test/myfile.txt"; String fileContent = "this is new file"; Path dst = new Path(fileName); if(hdfs.exists(dst)) { System.out.println("Error:文件已存在"); }else { //将文件内容转成字节数组 byte[] bytes=fileContent.getBytes(); FSDataOutputStream output = hdfs.create(dst); output.write(bytes); output.close(); System.out.println("创建文件 "+fileName); } } /** * 方法3:copyFromLocalFile方法实现本地文件上传到hdfs上 * @param localSrc * @param hdfsDst * @param fileName * @throws IOException */ public static void copyFile(String localSrc, String hdfsDst, String fileName) throws IOException { if("".equals(localSrc)){ localSrc = "E:/java/data/myfile.txt"; } if("".equals(hdfsDst)){ hdfsDst = "/test/"; } Path src = new Path(localSrc); Path dst = new Path(hdfsDst); //本地文件不存在 if (!(new File(localSrc)).exists()) { System.out.println("Error: 本地文件 " + localSrc + " 不存在。"); return; } //hdfs路径不存在 if (!hdfs.exists(dst)) { System.out.println("Error: hdfs目录 " + dst.toUri() + " 不存在。"); return; } if("".equals(fileName)) { fileName = src.getName(); } String dstPath = dst.toUri() + "/" + fileName; System.out.println(dstPath); //"/test2/myfile.txt" Path targetPath = new Path(dstPath); //判断上传的文件 hdfs的目录下是否存在 if (hdfs.exists(targetPath)) { System.out.println("Warn: 文件 " + dstPath + " 已存在。"); }else{ //本地文件上传hdfs hdfs.copyFromLocalFile(src, targetPath); //遍历该目录文件 FileStatus files[] = hdfs.listStatus(dst); System.out.println("上传到 " + conf.get("fs.defaultFS") + hdfsDst); for (FileStatus file : files) { System.out.println(file.getPath()); } } }
    /** * 方法4:copyToLocalFile方法实现HDFS文件下载到本地 * @throws IllegalArgumentException * @throws IOException */ public static void downloadFile() throws IllegalArgumentException,IOException{ String hdfsDst = "/test/myfile.txt"; String localSrc = "E:/java"; Path dst = new Path(hdfsDst); Path src = new Path(localSrc); String localFile = localSrc + "/" + dst.getName(); //本地的路径 + hdfs下载的文件名 if(!hdfs.exists(dst.getParent())){ //如果HDFS路径不存在 System.out.println("Error : HDFS路径: " + dst.getParent() + " 不存在!"); return; } if(!new File(localSrc).exists()){ //如果本地目录不存在,则创建 new File(localSrc).mkdirs(); System.out.println("Warn : 本地目录已创建!"); } if(new File(localFile).exists()){ // 如果本地文件存在 System.out.println("Error : 本地文件: " + localFile + " 已存在."); return; } if(!hdfs.exists(new Path(hdfsDst))){ //如果HDFS文件不存在 System.out.println("Error : HDFS文件: " + hdfsDst + " 不存在."); }else{ //HDFS下载文件到本地 hdfs.copyToLocalFile(false, dst, src, true); System.out.println("successful :下载成功! 请查看: " + localSrc); } } /** * 方法5:读取HDFS文件,并在本地控制台打印 * @throws IOException */ public static void readFile() throws IOException { String uri = "/test/myfile.txt"; // 判断文件是否存在 if (!hdfs.exists(new Path(uri))) { System.out.println("Error;文件不存在"); return; } InputStream in = null; try { in = hdfs.open(new Path(uri)); // 复制到标准输出流 IOUtils.copyBytes(in, System.out, 4096, false); } catch (Exception e) { e.printStackTrace(); } finally { IOUtils.closeStream(in); } }
    /** * 方法6:重命名hdfs上面的文件 * @throws IOException */ public static void renameFile() throws IOException{ String oldName="/test/myfile.txt"; String newName="/test/myfile1.txt"; Path oldPath=new Path(oldName); Path newPath=new Path(newName); if(hdfs.exists(oldPath)) { hdfs.rename(oldPath, newPath); System.out.println("rename success"); }else { System.out.println("文件不存在,rename fail"); } } /** * 方法7:给hdfs上面的文件追加内容 * @throws IOException */ public static void appendFile() throws IOException{ String fileName = "/test/myfile1.txt"; String appendContent = "这是追加的内容"; Path dst = new Path(fileName); byte[] bytes = appendContent.getBytes(); //如果文件不存在 if(!hdfs.exists(dst)) { System.out.println("Error:文件不存在"); return; } FSDataOutputStream output = hdfs.append(dst); output.write(bytes); output.close(); System.out.println("success:追加内容到 "+fileName); } /** * 方法8:删除hdfs上面的文件 * @param fileName * @throws IOException */ public static void deleteFile(String fileName) throws IOException{ if("".equals(fileName)) { fileName = "/test/myfile1.txt"; } Path f = new Path(fileName); boolean isExists = hdfs.exists(f); if(isExists) { boolean isDel = hdfs.delete(f,true); System.out.println(fileName+"删除状态:"+isDel); }else { System.out.println(fileName+"文件不存在!"); } } public static void main( String[] args ) throws IOException { System.out.println( "Hello World!" ); //createDir("/test1"); //createFile(); //copyFile("E:/java/data/myfile.txt", "/test/", "myfile.txt"); //downloadFile(); //readFile(); //renameFile(); //appendFile(); deleteFile("/test/myfile1.txt"); } }
  • 相关阅读:
    hdu 5446 Unknown Treasure lucas和CRT
    Hdu 5444 Elven Postman dfs
    hdu 5443 The Water Problem 线段树
    hdu 5442 Favorite Donut 后缀数组
    hdu 5441 Travel 离线带权并查集
    hdu 5438 Ponds 拓扑排序
    hdu 5437 Alisha’s Party 优先队列
    HDU 5433 Xiao Ming climbing dp
    hdu 5432 Pyramid Split 二分
    Codeforces Round #319 (Div. 1) B. Invariance of Tree 构造
  • 原文地址:https://www.cnblogs.com/hemomo/p/12283709.html
Copyright © 2011-2022 走看看