zoukankan      html  css  js  c++  java
  • java操作hdfs(第2篇)

    摘要

    通过从零开始创建一个maven工程,运用HDFS的JAVA API远程操作HDFS文件系统,通过实战提升编程能力。

    Hadoop java api文档地址:http://hadoop.apache.org/docs/current/api/index.html

    1:项目创建

    1:创建maven项目

    打开eclipse—file—new—maven project--next     #选择工作空间,我用的默认

    2:配置参数

    输入Group Id和Artifact Id,这里输入com.scitc和hdfs,填写完毕后点“Finish”完成工程项目的创建。

    3:检查jdk版本    #默认是javase1.5,改成自己的jdk1.8

    创建完毕后检查下jdk版本是否正确,在鼠标移动到项目上点鼠标右键 -> Build Path ->Configure Build Path。

    弹出一个窗口,选择Java Build Path -> JRE System Library -> Edit

    弹出另一窗口,选择Workspace default JRE,Finish完成设置。 

    接着选择Java Compiler,将JDK版本改为1.8,点Apply后,点OK关闭窗口。

    4:检查maven

    依次选择window—preferences—maven设置

    --选中installations:右边add

    E:javamavenapache-maven-3.5.0    #这是我的maven的主目录

    --选中user settings:右边browse如下内容

    global settings:E:javamaven epositorysettings.xml

    user settings: E:javamaven epositorysettings.xml

    local repository: E:javamaven epositorymaven_jar 

    #说明:如上三个大家需要根据自己安装的maven目录来选择

    到此,maven项目hdfs创建结束,jdk、maven配置结束。

    2:项目开发

    1:编写jar包依赖pom.xml文件

    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
      xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
      <modelVersion>4.0.0</modelVersion>
    
      <groupId>com.scitc</groupId>
      <artifactId>hdfs</artifactId>
      <version>0.0.1-SNAPSHOT</version>
      <packaging>jar</packaging>
    
      <name>hdfs</name>
      <url>http://maven.apache.org</url>
    
    #配置hadoop的版本号,我的是2.7.5 <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <hadoop.version>2.7.5</hadoop.version> </properties> <dependencies> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>3.8.1</version> <scope>test</scope> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-mapreduce-client-common</artifactId> <version>${hadoop.version}</version> </dependency>

    <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-mapreduce-client-jobclient</artifactId> <version>${hadoop.version}</version> <scope>provided</scope> </dependency>

    <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-yarn-common</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-mapreduce-client-core</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>jdk.tools</groupId> <artifactId>jdk.tools</artifactId> <version>1.8</version> <scope>system</scope> <systemPath>${JAVA_HOME}/lib/tools.jar</systemPath> </dependency> </dependencies> <build> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <configuration> <source>1.8</source> <target>1.8</target> </configuration> </plugin> </plugins> </build> </project>

    2:新建log4j.properties文件

    该文件放在src/main/java代码目录的根目录下,内容如下:

    log4j.rootLogger=INFO, stdout
    log4j.appender.stdout=org.apache.log4j.ConsoleAppender
    log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
    log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n
    log4j.appender.logfile=org.apache.log4j.FileAppender
    log4j.appender.logfile.File=target/spring.log
    log4j.appender.logfile.layout=org.apache.log4j.PatternLayout
    log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n
    

      

    备注:在运行APP.java之前,需要先启动hadoop。

    3:编辑App.java类

    package com.scitc.hdfs;
    
    import java.io.File;
    import java.io.IOException;
    import java.io.InputStream;
    import java.security.PrivilegedExceptionAction;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FSDataOutputStream;
    import org.apache.hadoop.fs.FileStatus;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IOUtils;
    import org.apache.hadoop.security.UserGroupInformation;
    
    /**
    *实现java远程操作hdfs:远程创建目录、创建文件、上传文件、下载文件、读取文件、重命名、删除文件
    */
    public class App
    {
       static Configuration conf = new Configuration();
       static FileSystem hdfs;
    
       //初始化访问hdfs的配置信息
       static {
          UserGroupInformation ugi = UserGroupInformation.createRemoteUser("root");
               try {
                   ugi.doAs(new PrivilegedExceptionAction<Void>() {
                       public Void run() throws Exception {
                           Configuration conf = new Configuration();
                           conf.set("fs.defaultFS", "hdfs://192.168.56.110:9000/");
                           //以下两行是支持 hdfs的追加功能的:hdfs.append()
                           conf.set("dfs.client.block.write.replace-datanode-on-failure.policy", "NEVER");
                           conf.set("dfs.client.block.write.replace-datanode-on-failure.enable", "true");
                           Path path = new Path("hdfs://192.168.56.110:9000/");
                           hdfs = FileSystem.get(path.toUri(), conf);
                           //hdfs = path.getFileSystem(conf); // 这个也可以
                           return null;
                       }
                   });
               } catch (IOException e) {
                   e.printStackTrace();
               } catch (InterruptedException e) {
                   e.printStackTrace();
               }
       }
    /** * 方法1:创建目录 * @param dir * @throws IOException */ public static void createDir(String dir) throws IOException { //String dir = "/test3/"; Path path = new Path(dir); //判断文件夹是否已存在,如果已存在就不再创建。 if (hdfs.exists(path)) { System.out.println("文件夹 " + dir + " 已存在"); return; } //开始创建文件夹 hdfs.mkdirs(path); System.out.println("新建文件夹 " + dir); } /** * 方法2:创建文件 * @throws IOException */ public static void createFile() throws IOException{ String fileName = "/test/myfile.txt"; String fileContent = "this is new file"; Path dst = new Path(fileName); if(hdfs.exists(dst)) { System.out.println("Error:文件已存在"); }else { //将文件内容转成字节数组 byte[] bytes=fileContent.getBytes(); FSDataOutputStream output = hdfs.create(dst); output.write(bytes); output.close(); System.out.println("创建文件 "+fileName); } } /** * 方法3:copyFromLocalFile方法实现本地文件上传到hdfs上 * @param localSrc * @param hdfsDst * @param fileName * @throws IOException */ public static void copyFile(String localSrc, String hdfsDst, String fileName) throws IOException { if("".equals(localSrc)){ localSrc = "E:/java/data/myfile.txt"; } if("".equals(hdfsDst)){ hdfsDst = "/test/"; } Path src = new Path(localSrc); Path dst = new Path(hdfsDst); //本地文件不存在 if (!(new File(localSrc)).exists()) { System.out.println("Error: 本地文件 " + localSrc + " 不存在。"); return; } //hdfs路径不存在 if (!hdfs.exists(dst)) { System.out.println("Error: hdfs目录 " + dst.toUri() + " 不存在。"); return; } if("".equals(fileName)) { fileName = src.getName(); } String dstPath = dst.toUri() + "/" + fileName; System.out.println(dstPath); //"/test2/myfile.txt" Path targetPath = new Path(dstPath); //判断上传的文件 hdfs的目录下是否存在 if (hdfs.exists(targetPath)) { System.out.println("Warn: 文件 " + dstPath + " 已存在。"); }else{ //本地文件上传hdfs hdfs.copyFromLocalFile(src, targetPath); //遍历该目录文件 FileStatus files[] = hdfs.listStatus(dst); System.out.println("上传到 " + conf.get("fs.defaultFS") + hdfsDst); for (FileStatus file : files) { System.out.println(file.getPath()); } } }
    /** * 方法4:copyToLocalFile方法实现HDFS文件下载到本地 * @throws IllegalArgumentException * @throws IOException */ public static void downloadFile() throws IllegalArgumentException,IOException{ String hdfsDst = "/test/myfile.txt"; String localSrc = "E:/java"; Path dst = new Path(hdfsDst); Path src = new Path(localSrc); String localFile = localSrc + "/" + dst.getName(); //本地的路径 + hdfs下载的文件名 if(!hdfs.exists(dst.getParent())){ //如果HDFS路径不存在 System.out.println("Error : HDFS路径: " + dst.getParent() + " 不存在!"); return; } if(!new File(localSrc).exists()){ //如果本地目录不存在,则创建 new File(localSrc).mkdirs(); System.out.println("Warn : 本地目录已创建!"); } if(new File(localFile).exists()){ // 如果本地文件存在 System.out.println("Error : 本地文件: " + localFile + " 已存在."); return; } if(!hdfs.exists(new Path(hdfsDst))){ //如果HDFS文件不存在 System.out.println("Error : HDFS文件: " + hdfsDst + " 不存在."); }else{ //HDFS下载文件到本地 hdfs.copyToLocalFile(false, dst, src, true); System.out.println("successful :下载成功! 请查看: " + localSrc); } } /** * 方法5:读取HDFS文件,并在本地控制台打印 * @throws IOException */ public static void readFile() throws IOException { String uri = "/test/myfile.txt"; // 判断文件是否存在 if (!hdfs.exists(new Path(uri))) { System.out.println("Error;文件不存在"); return; } InputStream in = null; try { in = hdfs.open(new Path(uri)); // 复制到标准输出流 IOUtils.copyBytes(in, System.out, 4096, false); } catch (Exception e) { e.printStackTrace(); } finally { IOUtils.closeStream(in); } }
    /** * 方法6:重命名hdfs上面的文件 * @throws IOException */ public static void renameFile() throws IOException{ String oldName="/test/myfile.txt"; String newName="/test/myfile1.txt"; Path oldPath=new Path(oldName); Path newPath=new Path(newName); if(hdfs.exists(oldPath)) { hdfs.rename(oldPath, newPath); System.out.println("rename success"); }else { System.out.println("文件不存在,rename fail"); } } /** * 方法7:给hdfs上面的文件追加内容 * @throws IOException */ public static void appendFile() throws IOException{ String fileName = "/test/myfile1.txt"; String appendContent = "这是追加的内容"; Path dst = new Path(fileName); byte[] bytes = appendContent.getBytes(); //如果文件不存在 if(!hdfs.exists(dst)) { System.out.println("Error:文件不存在"); return; } FSDataOutputStream output = hdfs.append(dst); output.write(bytes); output.close(); System.out.println("success:追加内容到 "+fileName); } /** * 方法8:删除hdfs上面的文件 * @param fileName * @throws IOException */ public static void deleteFile(String fileName) throws IOException{ if("".equals(fileName)) { fileName = "/test/myfile1.txt"; } Path f = new Path(fileName); boolean isExists = hdfs.exists(f); if(isExists) { boolean isDel = hdfs.delete(f,true); System.out.println(fileName+"删除状态:"+isDel); }else { System.out.println(fileName+"文件不存在!"); } } public static void main( String[] args ) throws IOException { System.out.println( "Hello World!" ); //createDir("/test1"); //createFile(); //copyFile("E:/java/data/myfile.txt", "/test/", "myfile.txt"); //downloadFile(); //readFile(); //renameFile(); //appendFile(); deleteFile("/test/myfile1.txt"); } }
  • 相关阅读:
    event的属性
    dom三个事件
    setInterval和setTimeout定时器
    eclipse编码格式设置
    eclipse Subversion Native Library Not Available
    NET Framework V4.0.30319
    eclipse配置tomcat
    eclipse Multiple annotations found at this line
    eclipse安装svn插件
    eclipse安装maven
  • 原文地址:https://www.cnblogs.com/hemomo/p/12283709.html
Copyright © 2011-2022 走看看