zoukankan      html  css  js  c++  java
  • Hadoop-1、HDFS API简介

    HDFS是一个高度容错的分布式文件系统,为了保证数据的一致性采用“写入一次,多次读取”的方式。

    1、上传本地文件

    import java.io.IOException;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileStatus;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    
    
    public class CopeFile {
        public static void main(String[] args) throws IOException {
            Configuration conf = new Configuration();
            conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml"));
            FileSystem hdfs = FileSystem.get(conf);
            //设置本地文件
            Path src = new Path("/home/ja/CADATA/SVD/prediction");
            //设置上传数据的路劲
            Path dst = new Path("/");
            hdfs.copyFromLocalFile(src, dst);
            System.out.println("Upload to" + conf.get("fs.default.name"));
            FileStatus[] files = hdfs.listStatus(dst);
            for (FileStatus file : files) {
                System.out.println(file.getPath());
            }
        }
    }

    稍微描述下:Hadoop下各种奇葩问题,有些书上没添加配置文件路劲,结果会导致找不到HDFS的文件,所以注意下,不要被坑,而且有些书数组那块不写[],无良阿。

    2、创建HDFS文件

    import java.io.IOException;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FSDataOutputStream;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    
    
    public class CreateFile {
        public static void main(String[] args) throws IOException {
            Configuration conf = new Configuration();
            conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml"));
            FileSystem hdfs = FileSystem.get(conf);
            byte[] buff = "Hello Hadoop World!
    ".getBytes();
            Path dfs = new Path("/Test");
            FSDataOutputStream outputstream = hdfs.create(dfs);
            outputstream.write(buff);
        }
    }

    3、创建HDFS目录

    import java.io.IOException;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    
    
    public class CreateDir {
        public static void main(String[] args) throws IOException {
            Configuration conf = new Configuration();
            conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml"));
            FileSystem hdfs = FileSystem.get(conf);
            Path dfs = new Path("/TestDir");
            hdfs.mkdirs(dfs);
        }
    }

    4、重命名HDFS文件

    import java.io.IOException;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    
    
    public class Rename {
        public static void main(String[] args) throws IOException {
            Configuration conf = new Configuration();
            conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml"));
            FileSystem hdfs = FileSystem.get(conf);
            Path frpath = new Path("/Test");
            Path topath = new Path("/T");
            boolean isRename = hdfs.rename(frpath, topath);
            String result = isRename ? "成功" : "失败";
            System.out.println("重命名结果为:" + result);
        }
    
    }

    5、删除HDFS文件

    import java.io.IOException;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    
    
    public class DeleteFile {
        public static void main(String[] args) throws IOException {
            Configuration conf = new Configuration();
            conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml"));
            FileSystem hdfs = FileSystem.get(conf);
            Path delef = new Path("/prediction");
            boolean isDelete = hdfs.delete(delef, false);
            System.out.println("Delete ? " + isDelete);
        }
    
    }

    删除目录和文件类似。

    6、查看HDFS文件是否存在

    import java.io.IOException;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    
    
    public class CheckFile {
        public static void main(String[] args) throws IOException {
            Configuration conf = new Configuration();
            conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml"));
            FileSystem hdfs = FileSystem.get(conf);
            Path findf = new Path("/usr/root/input/test.data");
            boolean isExist = hdfs.exists(findf);
            System.out.println("Exists ? " + isExist);
        }
    }

    7、查看HDFS文件的最后修改时间

    import java.io.IOException;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileStatus;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    
    
    public class GetLTime {
        public static void main(String[] args) throws IOException {
            Configuration conf = new Configuration();
            conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml"));
            FileSystem hdfs = FileSystem.get(conf);
            Path fPath = new Path("/T");
            FileStatus fileStatus = hdfs.getFileStatus(fPath);
            long moditime = fileStatus.getModificationTime();
            System.out.println("修改时间:" + moditime);
        }
    
    }

    8、读取HDFS某个目录下的所有文件

    import java.io.IOException;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileStatus;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    
    
    public class ListALLFile {
        public static void main(String[] args) throws IOException {
            Configuration conf = new Configuration();
            conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml"));
            FileSystem hdfs = FileSystem.get(conf);
            Path listf = new Path("/usr/root/");
            FileStatus[] status = hdfs.listStatus(listf);
            for (int i=0;i<status.length;i++) {
                System.out.println(status[i].getPath().toString());
            }
            hdfs.close();
        }
    
    }

    9、查找某个文件在集群中的位置

    import java.io.IOException;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.BlockLocation;
    import org.apache.hadoop.fs.FileStatus;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    
    
    public class FileLoc {
        public static void main(String[] args) throws IOException {
            Configuration conf = new Configuration();
            conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml"));
            FileSystem hdfs = FileSystem.get(conf);
            Path fpath = new Path("/T");
            FileStatus status = hdfs.getFileStatus(fpath);
            
            BlockLocation[] blockcations = hdfs.getFileBlockLocations(status, 0, status.getLen());
            int blockLen = blockcations.length;
            
            for (int i=0;i<blockLen;i++) {
                String[] hosts = blockcations[i].getHosts();
                System.out.println("block_" + i + "_location:" + hosts[0]);
            }
            hdfs.close();
        }
    
    }

    10、获取HDFS集群上所有节点名称信息

    import java.io.IOException;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.BlockLocation;
    import org.apache.hadoop.fs.FileStatus;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.hdfs.DistributedFileSystem;
    import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
    
    
    public class GetList {
        public static void main(String[] args) throws IOException {
            Configuration conf = new Configuration();
            conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml"));
            FileSystem fs = FileSystem.get(conf);
            DistributedFileSystem hdfs = (DistributedFileSystem)fs;
            
            DatanodeInfo[] dataNodeStats = hdfs.getDataNodeStats();
            for (int i=0;i<dataNodeStats.length;i++) {
                System.out.println("DataNode_" + i + "_Name:" + dataNodeStats[i].getHostName());
            }
            hdfs.close();
        }
    }
  • 相关阅读:
    【图灵学院09】RPC底层通讯原理之Netty线程模型源码分析
    【图灵学院10】高并发之java线程池源码分析
    【图灵学院02】大型互联网技术之数据库分库分表
    【图灵学院01】Java程序员开发效率工具IntelliJ IDEA使用
    【OD深入学习】Java多线程面试题
    高级工程师->架构师
    架构师成长之路
    python--openCV--鼠标事件
    python--openCV--视频处理
    python--openCV
  • 原文地址:https://www.cnblogs.com/wn19910213/p/3645399.html
Copyright © 2011-2022 走看看