zoukankan      html  css  js  c++  java
  • HDFS操作--文件上传/创建/删除/查询文件信息

    1.上传本地文件到HDFS

    //上传本地文件到HDFS
    public class CopyFile {
        public static void main(String[] args) {        
            try {
                Configuration conf = new Configuration();            
                String str_src = "/usr/local/myjar/mongo/地图数据/Zhengye_Drive_Testing_Data/solu"
                        + "/solu_Yanming_DriveTesting_09-04.16-17.16-27_True_TA.json";
                String str_dst = "hdfs://node4:9000/user/hadoop/TestFile.json";
                
                Path src = new Path(str_src); //本地地址
                Path dst = new Path(str_dst); //hdfs地址            
    
                FileSystem hdfs = dst.getFileSystem(conf);
                //FileSystem hdfs = FileSystem.get(URI.create(str_dst),conf);  //这样也可以
                //伪分布式上面两种都可以,如果直接FileSystem.get(conf),可能出现错误
                
                hdfs.copyFromLocalFile(src, dst);            
                System.out.println("Upload to "+conf.get("fs.default.name"));
                
                FileStatus files[] = hdfs.listStatus(dst);
                for(FileStatus file:files){
                    System.out.println(file.getPath());
                }            
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    可能出现的错误 Wrong FS解决方法:
    http://blog.csdn.net/kurama_sai/article/details/8604640
    http://blog.itpub.net/22846396/viewspace-1119945

    2. 在hdfs中创建文件,并写入一行文字

    //创建文件,并向文件中写入一行文字
    public class CreateFile {
        public static void main(String[] args) {       
            try {
                Configuration conf = new Configuration();
                byte[] buff = "This is a test line.".getBytes();
                String dsf = "hdfs://node4:9000/user/hadoop/Test";
                Path pathdsf = new Path(dsf);
                FileSystem hdfs = pathdsf.getFileSystem(conf);
                FSDataOutputStream outputStream = hdfs.create(pathdsf);
                outputStream.write(buff,0,buff.length);
                System.out.println("Finish write!");
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

     3.删除文件

    Configuration conf = new Configuration();
    Path path_del = new Path("hdfs://node4:9000/user/hadoop/Test2");
    FileSystem hdfs = path_del.getFileSystem(conf);
    boolean isDeleted = hdfs.delete(path_del,false);
    //hdfs.delete(path_del,true); //递归删除,如果path_del是一个文件夹,将文件夹以及下面的子文件全删除
    System.out.println("delete? " +isDeleted);

    4.重命名文件

    Configuration conf = new Configuration();
    Path path_fr = new Path("hdfs://node4:9000/user/hadoop/Test");
    Path path_to = new Path("hdfs://node4:9000/user/hadoop/Test2");
    FileSystem hdfs = path_fr.getFileSystem(conf);
    boolean isRename = hdfs.rename(path_fr, path_to);  //对文件进行重命名
    System.out.println("is rename? "+isRename);

    5.查看文件以及文件系统的各项信息

    Configuration conf = new Configuration();
    Path findf = new Path("hdfs://node4:9000/user/hadoop/hadoop.txt");
    FileSystem hdfs = findf.getFileSystem(conf);
    
    //查看某个HDFS文件是否存在
    boolean isExists = hdfs.exists(findf); //查看文件或文件夹是否存在
    System.out.println("exists? " + isExists);
    
    //查看HDFS文件的属性
    FileStatus filestatus = hdfs.getFileStatus(findf);
    long modificationTime = filestatus.getModificationTime(); //最后修改时间
    System.out.println("Modification time is: "+modificationTime);
    long blocksize = filestatus.getBlockSize(); //块大小
    System.out.println("Block size is: "+blocksize);
    
    
    //查看某个文件在HDFS集群的位置
    BlockLocation[] blkLocations = hdfs.getFileBlockLocations(filestatus, 0, filestatus.getLen());
    int blockLen = blkLocations.length;
    for(int i = 0 ; i < blockLen ; i++){
        String[] hosts = blkLocations[i].getHosts();
        System.out.println("block "+i+" location: "+hosts[i]);
    }    
    
    //查看hdfs文件系统的的各项信息
    System.out.println("scheme: "+hdfs.getScheme()); 
    System.out.println("used: "+hdfs.getUsed());
    System.out.println("canonical service name: "+hdfs.getCanonicalServiceName());
    System.out.println("default block size: "+hdfs.getDefaultBlockSize(findf));

    输出结果:

    exists? true
    Modification time is: 1430225267896
    Block size is: 134217728
    block 0 location: node4
    scheme: hdfs
    used: 0
    canonical service name: 192.168.1.160:9000
    default block size: 134217728

    6.读取HDFS中的文件内容

    下面代码的效果就是Test文件的内容输出

    String dsf = "hdfs://node4:9000/user/hadoop/Test";
    Configuration conf = new Configuration();
    
    Path pathdsf = new Path(dsf);
    
    FileSystem fs = FileSystem.get(URI.create(dsf), conf);
    //FileSystem fs = pathdsf.getFileSystem(conf); //这样也可以
    
    FSDataInputStream hdfsInStream = fs.open(pathdsf);
    
    byte[] ioBuffer = new byte[1024];
    int readLen = hdfsInStream.read(ioBuffer);
    while (readLen != -1) {
        System.out.write(ioBuffer, 0, readLen);
        readLen = hdfsInStream.read(ioBuffer);
    }
    hdfsInStream.close();
    fs.close();

    7.获取集群上所有节点的名称

    Configuration conf = new Configuration();
    Path path = new Path("hdfs://node4:9000/user/hadoop");
    FileSystem fs = path.getFileSystem(conf);
    DistributedFileSystem dfs = (DistributedFileSystem) fs;
    DatanodeInfo[] dataNodeStats = dfs.getDataNodeStats();
    
    String[] names = new String[dataNodeStats.length];
    for(int i = 0 ; i < dataNodeStats.length ; i++){
        names[i] = dataNodeStats[i].getHostName();
        System.out.println("no."+i+", name:"+names[i]);
    }

    输出的就是节点名称
    no.0, name:node4
    no.1, name:node3

  • 相关阅读:
    Parallel Programming指南
    使用Autofac IOC组织多项目应用程序
    对SQL Server 2008 R2感兴趣StreamInsight特性
    跨平台团队协作项目源码管理软件Mercurial客户端TortoiseHg
    Windows Server AppFabric正式发布
    Windows NLB搭配IIS的ARR搭建高可用环境
    使用VS2010的Database 项目模板统一管理数据库对象
    Fityk曲线拟合工具
    Windows Server AppFabric Beta 2 for For Vistual Studio 2010已经发布
    Visual Studio 2010快速参考指南里头的Scrum海报
  • 原文地址:https://www.cnblogs.com/gnivor/p/4895888.html
Copyright © 2011-2022 走看看