使用 OSS SDK 存在的问题
若在 Spark 或者 Hadoop 作业中无法直接使用 OSS SDK 来操作 OSS 中的文件,是因为OSS SDK 中依赖的 http-client-4.4.x 版本与 Spark 或者 Hadoop 运行环境中的 http-client 存在版本冲突。如果要这么做,就必须先解决这个依赖冲突问题。实际上在 E-MapReduce 中,Spark 和 Hadoop 已经对 OSS 做了无缝兼容,可以像使用 HDFS 一样来操作 OSS 文件。
- 当前E-MapReduce环境支持MetaService服务,可以支持在E-MapReduce环境面AK访问OSS数据。旧的显示写AK的方式依旧支持,请注意在操作OSS的时候优先使用内网的Endpoint。
- 当您需要在本地进行测试的时候,才要用到OSS的外网的Endpoint,这样才能从本地访问到OSS的数据。
所有的Endpint可以参考
OSS Endpoint。
推荐做法(以免AK方式为例)
请您使用如下方法来查询 OSS 目录下的文件:
- [backcolor=transparent] [backcolor=transparent][[backcolor=transparent]Scala[backcolor=transparent]][backcolor=transparent]
- [backcolor=transparent] [backcolor=transparent]import[backcolor=transparent] org[backcolor=transparent].[backcolor=transparent]apache[backcolor=transparent].[backcolor=transparent]hadoop[backcolor=transparent].[backcolor=transparent]conf[backcolor=transparent].[backcolor=transparent]Configuration
- [backcolor=transparent] [backcolor=transparent]import[backcolor=transparent] org[backcolor=transparent].[backcolor=transparent]apache[backcolor=transparent].[backcolor=transparent]hadoop[backcolor=transparent].[backcolor=transparent]fs[backcolor=transparent].{[backcolor=transparent]Path[backcolor=transparent],[backcolor=transparent] [backcolor=transparent]FileSystem[backcolor=transparent]}
- [backcolor=transparent] val dir [backcolor=transparent]=[backcolor=transparent] [backcolor=transparent]"oss://bucket/dir"
- [backcolor=transparent] val path [backcolor=transparent]=[backcolor=transparent] [backcolor=transparent]new[backcolor=transparent] [backcolor=transparent]Path[backcolor=transparent]([backcolor=transparent]dir[backcolor=transparent])
- [backcolor=transparent] val conf [backcolor=transparent]=[backcolor=transparent] [backcolor=transparent]new[backcolor=transparent] [backcolor=transparent]Configuration[backcolor=transparent]()
- [backcolor=transparent] conf[backcolor=transparent].[backcolor=transparent]set[backcolor=transparent]([backcolor=transparent]"fs.oss.impl"[backcolor=transparent],[backcolor=transparent] [backcolor=transparent]"com.aliyun.fs.oss.nat.NativeOssFileSystem"[backcolor=transparent])
- [backcolor=transparent] val fs [backcolor=transparent]=[backcolor=transparent] [backcolor=transparent]FileSystem[backcolor=transparent].[backcolor=transparent]get[backcolor=transparent]([backcolor=transparent]path[backcolor=transparent].[backcolor=transparent]toUri[backcolor=transparent],[backcolor=transparent] conf[backcolor=transparent])
- [backcolor=transparent] val fileList [backcolor=transparent]=[backcolor=transparent] fs[backcolor=transparent].[backcolor=transparent]listStatus[backcolor=transparent]([backcolor=transparent]path[backcolor=transparent])
- [backcolor=transparent] [backcolor=transparent]...
- [backcolor=transparent] [backcolor=transparent][[backcolor=transparent]Java[backcolor=transparent]]
- [backcolor=transparent] [backcolor=transparent]import[backcolor=transparent] org[backcolor=transparent].[backcolor=transparent]apache[backcolor=transparent].[backcolor=transparent]hadoop[backcolor=transparent].[backcolor=transparent]conf[backcolor=transparent].[backcolor=transparent]Configuration[backcolor=transparent];
- [backcolor=transparent] [backcolor=transparent]import[backcolor=transparent] org[backcolor=transparent].[backcolor=transparent]apache[backcolor=transparent].[backcolor=transparent]hadoop[backcolor=transparent].[backcolor=transparent]fs[backcolor=transparent].[backcolor=transparent]Path[backcolor=transparent];
- [backcolor=transparent] [backcolor=transparent]import[backcolor=transparent] org[backcolor=transparent].[backcolor=transparent]apache[backcolor=transparent].[backcolor=transparent]hadoop[backcolor=transparent].[backcolor=transparent]fs[backcolor=transparent].[backcolor=transparent]FileStatus[backcolor=transparent];
- [backcolor=transparent] [backcolor=transparent]import[backcolor=transparent] org[backcolor=transparent].[backcolor=transparent]apache[backcolor=transparent].[backcolor=transparent]hadoop[backcolor=transparent].[backcolor=transparent]fs[backcolor=transparent].[backcolor=transparent]FileSystem[backcolor=transparent];
- [backcolor=transparent] [backcolor=transparent]String[backcolor=transparent] dir [backcolor=transparent]=[backcolor=transparent] [backcolor=transparent]"oss://bucket/dir"[backcolor=transparent];
- [backcolor=transparent] [backcolor=transparent]Path[backcolor=transparent] path [backcolor=transparent]=[backcolor=transparent] [backcolor=transparent]new[backcolor=transparent] [backcolor=transparent]Path[backcolor=transparent]([backcolor=transparent]dir[backcolor=transparent]);
- [backcolor=transparent] [backcolor=transparent]Configuration[backcolor=transparent] conf [backcolor=transparent]=[backcolor=transparent] [backcolor=transparent]new[backcolor=transparent] [backcolor=transparent]Configuration[backcolor=transparent]();
- [backcolor=transparent] conf[backcolor=transparent].[backcolor=transparent]set[backcolor=transparent]([backcolor=transparent]"fs.oss.impl"[backcolor=transparent],[backcolor=transparent] [backcolor=transparent]"com.aliyun.fs.oss.nat.NativeOssFileSystem"[backcolor=transparent]);
- [backcolor=transparent] [backcolor=transparent]FileSystem[backcolor=transparent] fs [backcolor=transparent]=[backcolor=transparent] [backcolor=transparent]FileSystem[backcolor=transparent].[backcolor=transparent]get[backcolor=transparent]([backcolor=transparent]path[backcolor=transparent].[backcolor=transparent]toUri[backcolor=transparent](),[backcolor=transparent] conf[backcolor=transparent]);
- [backcolor=transparent] [backcolor=transparent]FileStatus[backcolor=transparent][][backcolor=transparent] fileList [backcolor=transparent]=[backcolor=transparent] fs[backcolor=transparent].[backcolor=transparent]listStatus[backcolor=transparent]([backcolor=transparent]path[backcolor=transparent]);
- [backcolor=transparent] [backcolor=transparent]...