使用idea操作HDFS、创建文件、上传文件、获取块信息、下载文件
1.搭建maven工程
2.pom依赖
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common --> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>2.6.5</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs --> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>2.6.5</version> </dependency>
3.hadoop conf中core-site.xml、hdfs-site.xml 放入resource目录下
4.代码
package com.xiaoke.hdfs; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org.apache.hadoop.io.IOUtils; import org.junit.After; import org.junit.Before; import org.junit.Test; import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; public class TestHDFS { public Configuration conf = null; public FileSystem fs = null; //C/ @Before public void conn() throws Exception { // true 会获取resource下的配置文件,并加载进Configuration中 conf = new Configuration(true); // 使用配置文件中的配置信息 fs = FileSystem.get(conf); // <property> // <name>fs.defaultFS</name> // <value>hdfs://mycluster</value> // </property> //去环境变量 HADOOP_USER_NAME god 需要在winows环境变量中配置 // 不使用配置文件中的信息 自己创建 // fs = FileSystem.get(URI.create("hdfs://mycluster"), conf, "god"); } // 创建文件夹 @Test public void mkdir() throws Exception { Path dir = new Path("/xiaoke002"); if (fs.exists(dir)) { fs.delete(dir, true); } fs.mkdirs(dir); } // 上传文件 @Test public void upload() throws Exception { BufferedInputStream input = new BufferedInputStream(new FileInputStream(new File("./data/hello.txt"))); Path outfile = new Path("/xiaoke002/out.txt"); FSDataOutputStream output = fs.create(outfile); // 下面的工具相当于我们从文件流中读取一行,在往上面刷新一次,完了关闭流 IOUtils.copyBytes(input, output, conf, true); } @Test public void blocks() throws Exception { Path file = new Path("/user/god/data.txt"); FileStatus fss = fs.getFileStatus(file); BlockLocation[] blks = fs.getFileBlockLocations(fss, 0, fss.getLen()); for (BlockLocation b : blks) { System.out.println(b); } // 0, 1048576, node04,node02 A // 1048576, 540319, node04,node03 B //计算向数据移动~! //其实用户和程序读取的是文件这个级别~!并不知道有块的概念~! FSDataInputStream in = fs.open(file); //面向文件打开的输入流 无论怎么读都是从文件开始读起~! // blk01: he // blk02: llo msb 66231 in.seek(1048576); //计算向数据移动后,期望的是分治,只读取自己关心(通过seek实现),同时,具备距离的概念(优先和本地的DN获取数据--框架的默认机制) System.out.println((char) in.readByte()); System.out.println((char) in.readByte()); System.out.println((char) in.readByte()); System.out.println((char) in.readByte()); System.out.println((char) in.readByte()); System.out.println((char) in.readByte()); System.out.println((char) in.readByte()); System.out.println((char) in.readByte()); System.out.println((char) in.readByte()); System.out.println((char) in.readByte()); System.out.println((char) in.readByte()); System.out.println((char) in.readByte()); } @After public void close() throws Exception { fs.close(); } }
代码地址: https://gitee.com/Xiaokeworksveryhard/big-data.git
文件:hadoop-hdfs