新建项目
- File->New->Module->Maven,选择
quickstart
- 设置文件名和文件地址
- 点击完成
- 项目目录
- 配置
pom.xml
- 添加
hadoop
依赖包
<!--添加hadoop依赖--><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-client</artifactId><version>${hadoop.version}</version></dependency>
- 全局定义
hadoop
版本
<!--全局定义hadoop的版本--><hadoop.version>2.6.0-cdh5.7.0</hadoop.version>
- 配置好的
pom.xml
<projectxmlns="http://maven.apache.org/POM/4.0.0"xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"><modelVersion>4.0.0</modelVersion><groupId>com.syh</groupId><artifactId>hadoopdemo</artifactId><version>1.0-SNAPSHOT</version><name>hadoopdemo</name><!-- FIXME change it to the project's website --><url>http://www.example.com</url><properties><project.build.sourceEncoding>UTF-8</project.build.sourceEncoding><maven.compiler.source>1.7</maven.compiler.source><maven.compiler.target>1.7</maven.compiler.target><!--全局定义hadoop的版本--><hadoop.version>2.6.0-cdh5.7.0</hadoop.version></properties><repositories><repository><id>cloudera</id><url>https://repository.cloudera.com/artifactory/cloudera-repos/</url></repository></repositories><dependencies><!--添加hadoop依赖--><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-client</artifactId><version>${hadoop.version}</version></dependency><dependency><groupId>junit</groupId><artifactId>junit</artifactId><version>4.11</version><scope>test</scope></dependency></dependencies><build><pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) --><plugins><!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle --><plugin><artifactId>maven-clean-plugin</artifactId><version>3.1.0</version></plugin><!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging --><plugin><artifactId>maven-resources-plugin</artifactId><version>3.0.2</version></plugin><plugin><artifactId>maven-compiler-plugin</artifactId><version>3.8.0</version></plugin><plugin><artifactId>maven-surefire-plugin</artifactId><version>2.22.1</version></plugin><plugin><artifactId>maven-jar-plugin</artifactId><version>3.0.2</version></plugin><plugin><artifactId>maven-install-plugin</artifactId><version>2.5.2</version></plugin><plugin><artifactId>maven-deploy-plugin</artifactId><version>2.8.2</version></plugin><!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle --><plugin><artifactId>maven-site-plugin</artifactId><version>3.7.1</version></plugin><plugin><artifactId>maven-project-info-reports-plugin</artifactId><version>3.0.0</version></plugin></plugins></pluginManagement></build></project>
- 点击图中标志
- 配置好的Maven目录
测试hadoop
是否安装成功
- 在
AppTest
文件中写入以下代码
packagecom.syh; importstaticorg.junit.Assert.assertTrue; importorg.junit.Test; importorg.apache.hadoop.fs.*; /*** Unit test for simple App.*/publicclassAppTest{ /*** Rigorous Test :-)*/publicvoidshouldAnswerWithTrue() { assertTrue( true ); } publicvoidtestHadoop() { FileSystemfileSystem=null; } }
- 按住
Ctrl
点击FilrSystem
可以看到跳转到hadoop目录下,说明安装成功了
连接hdfs
- 新建java文件
- 在
HDFSApp
文件中写入以下代码,在hadoop中创建文件夹
packagecom.syh.hdfs; importorg.apache.hadoop.conf.Configuration; importorg.apache.hadoop.fs.FileSystem; importorg.apache.hadoop.fs.Path; importorg.junit.After; importorg.junit.Before; importorg.junit.Test; importjava.net.URI; publicclassHDFSApp { Configurationconfiguration=null; FileSystemfileSystem=null; // 配置路径(ip地址)publicstaticfinalStringHDFS_PATH="hdfs://192.168.207.128:8020"; // 测试(新建文件夹)publicvoidmkdir() throwsException { fileSystem.mkdirs(newPath("/emperorlawd/test")); } // Java 连接hdfs 需要先建立一个连接// 测试方法执行之前要执行的操作publicvoidsetUp() throwsException { System.out.println("开始建立与HDFS的连接"); configuration=newConfiguration(); fileSystem=FileSystem.get(newURI(HDFS_PATH), configuration, "hadoop"); } // 测试之后要执行的代码publicvoidtearDown() { configuration=null; fileSystem=null; System.out.println("关闭与HDFS的连接"); } }
- 运行成功的效果
创建文件
- 在
HDFSApp
文件中加入以下代码
// 创建文件publicvoidcreate() throwsException { Pathpath=newPath("/emperorlawd/test/hello.txt"); FSDataOutputStreamoutputStream=fileSystem.create(path); outputStream.write("hello hadoop".getBytes()); outputStream.flush(); outputStream.close(); }
- 运行
create()
重命名文件
- 在
HDFSApp
文件中加入以下代码
// 重命名文件publicvoidrename() throwsException { PatholdPath=newPath("/emperorlawd/test/hello.txt"); PathnewPath=newPath("/emperorlawd/test/rehello.txt"); fileSystem.rename(oldPath, newPath); }
- 运行
rename()
查看文件
- 在
HDFSApp
文件中加入以下代码
// 查看文件publicvoidcat() throwsException { Pathpath=newPath("/emperorlawd/test/rehello.txt"); FSDataInputStreaminputStream=fileSystem.open(path); IOUtils.copyBytes(inputStream, System.out, 1024); inputStream.close(); }
- 运行
cat()
上传文件
- 在
HDFSApp
文件中加入以下代码
// 上传文件publicvoidupload() throwsException { PathlocalPath=newPath("cifar-10-python.tar.gz"); PathhdfsPath=newPath("/"); fileSystem.copyFromLocalFile(localPath, hdfsPath); }
- 运行
upload()
下载文件
- 在
HDFSApp
文件中加入以下代码
// 下载文件publicvoiddownload() throwsException { PathhdfsPath=newPath("/hadoop-2.6.0-cdh5.7.0.tar.gz"); PathlocalPath=newPath("./down/hadoop-2.6.0-cdh5.7.0.tar.gz"); fileSystem.copyToLocalFile(false, hdfsPath, localPath, true); }
- 运行
download()