前面项目中我们是创建的java项目来演示的,但是hadoop相关的依赖太多了,不方便,本文通过maven项目来演示HDFS的java API操作
创建maven项目
相关的依赖
<dependencies> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>2.5.0</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>2.5.1</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>2.5.0</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.12</version> </dependency> </dependencies>
CRUD 操作
package com.sxt.test; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.net.URI; import java.util.Iterator; import java.util.Map.Entry; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.junit.Before; import org.junit.Test; public class HdfsTest { FileSystem fs = null; Configuration conf = null; @Before public void test() throws Exception { // 1.加载配置文件 conf = new Configuration(true); // 2.获取FileSystem对象 fs = FileSystem.get(new URI("hdfs://hadoop-node01:9000"),conf,"root"); } /** * 上传文件 * @throws IOException * @throws IllegalArgumentException */ @Test public void updateFile() throws IllegalArgumentException, IOException{ fs.copyFromLocalFile(true, true, new Path("c:/unintall.log"), new Path("/")); fs.close(); } /** * 下载文件 * @throws Exception */ @Test public void testDownload() throws Exception { fs.copyToLocalFile(new Path("/a.txt"), new Path("c:/tools")); fs.close(); } /** * 打印参数 */ @Test public void testConf(){ Iterator<Entry<String, String>> it = conf.iterator(); while(it.hasNext()){ Entry<String, String> ent = it.next(); System.out.println(ent.getKey() + " : " + ent.getValue()); } } /** * 创建文件夹 * @throws Exception */ @Test public void testMkdir() throws Exception { boolean mkdirs = fs.mkdirs(new Path("/testmkdir/aaa/bbb")); System.out.println(mkdirs); } /** * 删除文件夹 * @throws Exception */ @Test public void testDelete() throws Exception { // recursive 是否递归删除 boolean flag = fs.delete(new Path("/testmkdir/aaa"), true); System.out.println(flag); } /** * 递归列出指定目录下所有子文件夹中的文件 * @throws Exception */ @Test public void testLs() throws Exception { RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"), true); while(listFiles.hasNext()){ LocatedFileStatus fileStatus = listFiles.next(); System.out.println("blocksize: " +fileStatus.getBlockSize()); System.out.println("owner: " +fileStatus.getOwner()); System.out.println("Replication: " +fileStatus.getReplication()); System.out.println("Permission: " +fileStatus.getPermission()); System.out.println("Name: " +fileStatus.getPath().getName()); System.out.println("------------------"); BlockLocation[] blockLocations = fileStatus.getBlockLocations(); for(BlockLocation b:blockLocations){ System.out.println("块起始偏移量: " +b.getOffset()); System.out.println("块长度:" + b.getLength()); //块所在的datanode节点 String[] datanodes = b.getHosts(); for(String dn:datanodes){ System.out.println("datanode:" + dn); } } } } /** * 获取文件的类型 * @throws Exception */ @Test public void testLs2() throws Exception { FileStatus[] listStatus = fs.listStatus(new Path("/")); for(FileStatus file :listStatus){ System.out.println("name: " + file.getPath().getName()); System.out.println((file.isFile()?"file":"directory")); } } /** * 通过流的方式将文件上传到HDFS系统中 * @throws Exception * @throws IOException */ @Test public void testStreamUpload() throws Exception, IOException{ // 输出流 FSDataOutputStream out = fs.create(new Path("/dpb.txt"), true); // 字节输入流 InputStream in = new FileInputStream("c:/tools/aaaaa.txt"); IOUtils.copy(in, out); in.close(); out.close(); } /** * 通过流的方式获取HDFS上的数据 * @throws Exception * @throws IllegalArgumentException */ @Test public void testStreamDownload() throws IllegalArgumentException, Exception{ FSDataInputStream in = fs.open(new Path("/dpb.txt")); OutputStream out = new FileOutputStream("c:/tools/a1.txt"); IOUtils.copy(in, out); out.close(); in.close(); } /** * 获取HDFS中的文件的一部分内容 * @throws Exception * @throws IllegalArgumentException */ @Test public void testRandomAccess() throws IllegalArgumentException, Exception{ FSDataInputStream in = fs.open(new Path("/dpb.txt")); in.seek(4); OutputStream out = new FileOutputStream("c:/tools/a2.txt"); IOUtils.copy(in, out); out.close(); in.close(); } /** * 显示HDFS上文件的内容 * @throws IOException * @throws IllegalArgumentException */ @Test public void testCat() throws IllegalArgumentException, IOException{ FSDataInputStream in = fs.open(new Path("/dpb.txt")); // 将输出的目的地执行控制台即可~ IOUtils.copy(in, System.out); } }
只放出最后一个方法的输出。其他方法执行效果请各自尝试