注意
1)记得快照,快照,快照一下
ECS服务器怎么快照? ECS服务器搭建hadoop伪分布式_CBeann的博客-CSDN博客_宝塔部署hadoop
2)参考资料(视频)的问题
上面网盘的视频,里面是有教怎么搭建服务器环境和Eclipse客户端环境的,服务器环境搭搭建问题讲的不错,但是客户端环境搭建视频里只讲了win7和win10,也就是说里面的资料是win7和win10的资料(jar文件),如果你想看win8怎么搭建环境,对不起,帮不到你;其他的就没什么问题了
服务器环境搭建(阿里云ECS服务器+伪分布式)
通俗易懂,图文并茂的环境搭建
HDFS_shell命令
前提:承接上面的搭建环境
显示目录信息(Linux中的ls)
hadoop fs -ls HDFS中的路径
hadoop fs -ls /
HDFS中的路径 //递归显示
hadoop fs -ls -R /
在HDFS上创建目录
参数-p是创建多级目录
hadoop fs -mkdir -p /sanguo/shuguo
从本地剪切到HDFS
剪切,剪切,剪切
将当前本地目录下的panjinlian.txt 上传到HDFS中的/sanguo/shuguo/下
hadoop fs -moveFromLocal ./panjinlian.txt /sanguo/shuguo/
从本地文件系统中拷贝文件到HDFS路径去
将当前本地目录下的README.txt 上传到HDFS中的根目录下
hadoop fs -copyFromLocal README.txt /
追加一个文件到已经存在的文件末尾
只支持追加,不支持修改
hadoop fs -appendToFile 本地文件路径 HDFS路径
hadoop fs -appendToFile liubei.txt /sanguo/shuguo/kongming.txt
查看HDFS中文件内容
hadoop fs -cat HDFS路径
hadoop fs -cat /sanguo/shuguo/panjinlian.txt
从HDFS拷贝到本地
hadoop fs -copyToLocal HDFS路径 本地路径
hadoop fs -copyToLocal /sanguo/shuguo/kongming.txt ./
从HDFS的一个路径拷贝到HDFS的另一个路径
hadoop fs -cp /sanguo/shuguo/kongming.txt /zhuge.txt
在HDFS目录中移动文件
hadoop fs -mv /zhuge.txt /sanguo/shuguo/
get从HDFS下载文件到本地,等同于copyToLocal
hadoop fs -get HDFS路径 Linux系统路径
hadoop fs -get /sanguo/shuguo/kongming.txt ./
合并下载多个文件
比如HDFS的目录 /user/atguigu/test下有多个文件:log.1, log.2,log.3,...合并成zaiyiqi.txt
hadoop fs -getmerge /user/atguigu/test/* ./zaiyiqi.txt
put从本地上传到HDFS,等同于copyFromLocal
hadoop fs -put ./zaiyiqi.txt /user/atguigu/test/
删除HDFS文件
hadoop fs -rm /user/test/jinlian2.txt
多级删除
hadoop fs -rm -r /user/test/jinlian2.txt
HDFS客户端(Eclipse等)搭建
这个地方确实有些难,也百度了一些,我的环境搭建是按照视频做的,视频在文章最上面的百度网盘里有
HDFS客户端API操作
前提(重中之重,否则后面会有坑):修改本地host文件
客户端环境测试
package com.imooc.hdfs; import java.io.IOException; import java.net.URI; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class HdfsClient { public static void main(String[] args) throws Exception { // 1 获取文件系统 Configuration configuration = new Configuration(); // 配置在集群上运行( //URL中的IP地址对应你图一中的IP //代码中的"root"位置对应图二中箭头的指向 FileSystem fs = FileSystem.get(new URI("hdfs://47.105.133.99:9000"), configuration, "root"); // 2 创建目录 fs.mkdirs(new Path("/20190307/CBeann")); // 3 关闭资源 fs.close(); System.out.println("--------over---------"); } }
代码中参数介绍(下面代码中的参数)
configuration.set("fs.defaultFS", "hdfs://a99qngm2v98asii1aZ:8020");
hdfs://某某某:8020
其中某某某为下图
FileSystem fs = FileSystem.get(new URI("hdfs://47.105.133.99:9000"), configuration, "root");
其中root位置为下图
copyFromLocalFile:文件上传
public static void main(String[] args) throws Exception { // 1 获取文件系统 Configuration configuration = new Configuration(); configuration.set("dfs.client.use.datanode.hostname", "true"); configuration.set("fs.defaultFS", "hdfs://a99qngm2v98asii1aZ:8020"); FileSystem fs = FileSystem.get(new URI("hdfs://47.105.133.99:9000"), configuration, "root"); // 2 上传文件 参数:原数据路径,目标路径 fs.copyFromLocalFile(new Path("e:/temp/hello.txt"), new Path("/hello.txt")); // 3 关闭资源 fs.close(); System.out.println("over"); }
copyToLocalFile:文件下载
// 1 获取文件系统 Configuration configuration = new Configuration(); configuration.set("dfs.client.use.datanode.hostname", "true"); configuration.set("fs.defaultFS", "hdfs://ea99qngm2v98asii1aZ:8020"); FileSystem fs = FileSystem.get(new URI("hdfs://47.105.133.99:9000"), configuration, "root"); // 2 执行下载操作 // boolean delSrc 指是否将原文件删除 // Path src 指要下载的文件路径 // Path dst 指将文件下载到的路径 // boolean useRawLocalFileSystem 是否开启文件校验 fs.copyToLocalFile(false, new Path("/hello.txt"), new Path("e:/temp/helloword.txt"), true); // 3 关闭资源 fs.close(); System.out.println("-----------over--------------");
delete:文件删除
// 1 获取文件系统 Configuration configuration = new Configuration(); configuration.set("dfs.client.use.datanode.hostname", "true"); configuration.set("fs.defaultFS", "hdfs://a99qngm2v98asii1aZ:8020"); FileSystem fs = FileSystem.get(new URI("hdfs://47.105.133.99:9000"), configuration, "root"); // 2 执行删除 // Path 指要删除的文件路径 // boolean 是否递归删除 fs.delete(new Path("/hello.txt"), true); // 3 关闭资源 fs.close(); System.out.println("-----------over--------------");
rename:文件名修改
// 1 获取文件系统 Configuration configuration = new Configuration(); configuration.set("dfs.client.use.datanode.hostname", "true"); configuration.set("fs.defaultFS", "hdfs://a99qngm2v98asii1aZ:8020"); FileSystem fs = FileSystem.get(new URI("hdfs://47.105.133.99:9000"), configuration, "root"); // 2 修改文件名称 fs.rename(new Path("/hello.txt"), new Path("/helloworld.txt")); // 3 关闭资源 fs.close(); System.out.println("-----------over--------------");
HDFS文件详情查看
// 1获取文件系统 Configuration configuration = new Configuration(); configuration.set("dfs.client.use.datanode.hostname", "true"); configuration.set("fs.defaultFS", "hdfs://ea99qngm2v98asii1aZ:8020"); FileSystem fs = FileSystem.get(new URI("hdfs://47.105.133.99:9000"), configuration, "root"); // 2 获取文件详情 RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"), true); while (listFiles.hasNext()) { LocatedFileStatus status = listFiles.next(); // 输出详情 // 文件名称 System.out.println("文件名:"+status.getPath().getName()); // 长度 System.out.println("长度:"+status.getLen()); // 权限 System.out.println("权限:"+status.getPermission()); // 分组 System.out.println("分组:"+status.getGroup()); // 获取存储的块信息 BlockLocation[] blockLocations = status.getBlockLocations(); System.out.println("获取存储的块信息:"); for (BlockLocation blockLocation : blockLocations) { // 获取块存储的主机节点 String[] hosts = blockLocation.getHosts(); for (String host : hosts) { System.out.println(host); } } System.out.println("-----------分割线----------"); } // 3 关闭资源 fs.close(); System.out.println("-----over-----");
HDFS文件和文件夹判断
// 1 获取文件配置信息 Configuration configuration = new Configuration(); configuration.set("dfs.client.use.datanode.hostname", "true"); configuration.set("fs.defaultFS", "hdfs://ea99qngm2v98asii1aZ:8020"); FileSystem fs = FileSystem.get(new URI("hdfs://47.105.133.99:9000"), configuration, "root"); // 2 判断是文件还是文件夹 FileStatus[] listStatus = fs.listStatus(new Path("/")); for (FileStatus fileStatus : listStatus) { // 如果是文件 if (fileStatus.isFile()) { System.out.println("f:" + fileStatus.getPath().getName()); } else { System.out.println("d:" + fileStatus.getPath().getName()); } } // 3 关闭资源 fs.close(); System.out.println("----over----");
HDFS的I/O流操作
HDFS文件上传
需求
把本地e盘上的hello.txt文件上传到HDFS根目录
编写代码
// 1 获取文件系统 Configuration configuration = new Configuration(); configuration.set("dfs.client.use.datanode.hostname", "true"); configuration.set("fs.defaultFS", "hdfs://ea99qngm2v98asii1aZ:8020"); FileSystem fs = FileSystem.get(new URI("hdfs://47.105.133.99:9000"), configuration, "root"); // 2 创建输入流 FileInputStream fis = new FileInputStream(new File("e:/temp/hello.txt")); // 3 获取输出流 FSDataOutputStream fos = fs.create(new Path("/hello.txt")); // 4 流对拷 IOUtils.copyBytes(fis, fos, configuration); // 5 关闭资源 IOUtils.closeStream(fos); IOUtils.closeStream(fis); fs.close(); System.out.println("over");
HDFS文件下载
需求
从HDFS上下载banhua.txt文件到本地e盘上
编写代码
// 1 获取文件系统 Configuration configuration = new Configuration(); configuration.set("dfs.client.use.datanode.hostname", "true"); configuration.set("fs.defaultFS", "hdfs://ea99qngm2v98asii1aZ:8020"); FileSystem fs = FileSystem.get(new URI("hdfs://47.105.133.99:9000"), configuration, "root"); // 2 获取输入流 FSDataInputStream fis = fs.open(new Path("/hello.txt")); // 3 获取输出流 FileOutputStream fos = new FileOutputStream(new File("e:/temp/helloworld.txt")); // 4 流的对拷 IOUtils.copyBytes(fis, fos, configuration); // 5 关闭资源 IOUtils.closeStream(fos); IOUtils.closeStream(fis); fs.close(); System.out.println("over");
常见问题
问题1:File /hdfsapi/test/a.txt could only be replicated to 0 nodes instead of minReplication (=1)
解决办法(请,请,请写代码之前修改一下host文件):
异常详情:
2019-03-09 16:26:29,406 INFO [org.apache.hadoop.hdfs.DFSClient] - Exception in createBlockOutputStream java.net.ConnectException: Connection timed out: no further information at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method) at sun.nio.ch.SocketChannelImpl.finishConnect(Unknown Source) at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206) at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:531) at org.apache.hadoop.hdfs.DFSOutputStream.createSocketForPipeline(DFSOutputStream.java:1537) at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.createBlockOutputStream(DFSOutputStream.java:1313) at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.nextBlockOutputStream(DFSOutputStream.java:1266) at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:449) 2019-03-09 16:26:29,470 INFO [org.apache.hadoop.hdfs.DFSClient] - Abandoning BP-109356787-172.31.236.96-1547785821831:blk_1073741830_1006 2019-03-09 16:26:29,626 INFO [org.apache.hadoop.hdfs.DFSClient] - Excluding datanode DatanodeInfoWithStorage[172.31.236.96:50010,DS-96fc6538-dec0-4c3b-a8fb-8f73908a3370,DISK] 2019-03-09 16:26:29,809 WARN [org.apache.hadoop.hdfs.DFSClient] - DataStreamer Exception org.apache.hadoop.ipc.RemoteException(java.io.IOException): File /hello.txt could only be replicated to 0 nodes instead of minReplication (=1). There are 1 datanode(s) running and 1 node(s) are excluded in this operation. at org.apache.hadoop.hdfs.server.blockmanagement.BlockManager.chooseTarget4NewBlock(BlockManager.java:1547) at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getNewBlockTargets(FSNamesystem.java:3107) at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getAdditionalBlock(FSNamesystem.java:3031) at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.addBlock(NameNodeRpcServer.java:724) at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.addBlock(ClientNamenodeProtocolServerSideTranslatorPB.java:492) at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java) at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:616) at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:969) at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2049) at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2045) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657) at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2043) at org.apache.hadoop.ipc.Client.call(Client.java:1475) at org.apache.hadoop.ipc.Client.call(Client.java:1412) at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:229) at com.sun.proxy.$Proxy9.addBlock(Unknown Source) at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.addBlock(ClientNamenodeProtocolTranslatorPB.java:418) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source) at java.lang.reflect.Method.invoke(Unknown Source) at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:191) at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102) at com.sun.proxy.$Proxy10.addBlock(Unknown Source) at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.locateFollowingBlock(DFSOutputStream.java:1459) at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.nextBlockOutputStream(DFSOutputStream.java:1255) at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:449) Exception in thread "main" org.apache.hadoop.ipc.RemoteException(java.io.IOException): File /hello.txt could only be replicated to 0 nodes instead of minReplication (=1). There are 1 datanode(s) running and 1 node(s) are excluded in this operation. at org.apache.hadoop.hdfs.server.blockmanagement.BlockManager.chooseTarget4NewBlock(BlockManager.java:1547) at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getNewBlockTargets(FSNamesystem.java:3107) at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getAdditionalBlock(FSNamesystem.java:3031) at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.addBlock(NameNodeRpcServer.java:724) at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.addBlock(ClientNamenodeProtocolServerSideTranslatorPB.java:492) at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java) at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:616) at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:969) at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2049) at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2045) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657) at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2043) at org.apache.hadoop.ipc.Client.call(Client.java:1475) at org.apache.hadoop.ipc.Client.call(Client.java:1412) at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:229) at com.sun.proxy.$Proxy9.addBlock(Unknown Source) at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.addBlock(ClientNamenodeProtocolTranslatorPB.java:418) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source) at java.lang.reflect.Method.invoke(Unknown Source) at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:191) at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102) at com.sun.proxy.$Proxy10.addBlock(Unknown Source) at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.locateFollowingBlock(DFSOutputStream.java:1459) at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.nextBlockOutputStream(DFSOutputStream.java:1255) at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:449)