MapReduce报错:「MKDirs failed to create file」
0. 写在前面
- Linux:
Ubuntu Kylin16.04
- Hadoop:
Hadoop2.7.2
1. 程序代码及报错信息
输入、输出路径
zhangsan@hadoop01:/$ ll | grep input drwxr-xr-x 3 zhangsan zhangsan 40969月 2003:35 input/ zhangsan@hadoop01:/$ ll | grep output zhangsan@hadoop01:/$
程序代码
packagecom.mr.ch07.maxmin; importjava.io.DataInput; importjava.io.DataOutput; importjava.io.IOException; importorg.apache.hadoop.conf.Configuration; importorg.apache.hadoop.fs.Path; importorg.apache.hadoop.io.Text; importorg.apache.hadoop.io.Writable; importorg.apache.hadoop.mapreduce.Job; importorg.apache.hadoop.mapreduce.Mapper; importorg.apache.hadoop.mapreduce.Reducer; importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat; importorg.apache.hadoop.mapreduce.lib.output.FileOutputFormat; publicclassMinMaxValueDemo7_2_1 { publicstaticclassMinMaxMapperextendsMapper<Object, Text, Text, MinMaxWritable> { privateMinMaxWritableoutTuple=newMinMaxWritable(); publicvoidmap(Objectkey, Textvalue, Contextcontext) throwsIOException, InterruptedException { String[] strs=value.toString().split(" "); StringstrDate=strs[0];// 定义记录日期的字符串变量strDateif (strDate==null) { return;// 如果该日期值为空,则返回 } System.out.println(strs[0] +",,,,"+strs[1]); // 将值即做为最大值又做为最小值存储到自定义Writable类MinMaxWritable中。outTuple.setMin(Integer.parseInt(strs[1])); outTuple.setMax(Integer.parseInt(strs[1])); // 将结果写入上下文context.write(newText(strDate), outTuple); } } publicstaticclassMinMaxReducerextendsReducer<Text, MinMaxWritable, Text, MinMaxWritable> { privateMinMaxWritableresult=newMinMaxWritable(); publicvoidreduce(Textkey, Iterable<MinMaxWritable>values, Contextcontext) throwsIOException, InterruptedException { result.setMax(0); result.setMin(0); // 按key迭代输出value的值for (MinMaxWritableval : values) { // 最小值放于结果集中if (result.getMin() ==0||val.getMin() <result.getMin()) { result.setMin(val.getMin()); } // 最大值放于结果集中if (result.getMax() ==0||val.getMax() >result.getMax()) { System.out.println("val.getMax(): "+val.getMax() +",,,,"+result.getMax()); result.setMax(val.getMax()); } } context.write(key, result); } } publicstaticvoidmain(String[] args) throwsException { Configurationconf=newConfiguration(); // String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();String[] otherArgs=newString[] { "/input/ch07/minmax", "/output/ch07/minmax" }; if (otherArgs.length!=2) { System.err.println("Usage: MinMaxCountDriver <in> <out>"); System.exit(2); } // Job job = new Job(conf, "StackOverflow Comment Date Min Max Count");Jobjob=Job.getInstance(conf); job.setJarByClass(MinMaxValueDemo7_2_1.class); job.setMapperClass(MinMaxMapper.class); job.setCombinerClass(MinMaxReducer.class); job.setReducerClass(MinMaxReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(MinMaxWritable.class); FileInputFormat.addInputPath(job, newPath(otherArgs[0])); FileOutputFormat.setOutputPath(job, newPath(otherArgs[1])); System.exit(job.waitForCompletion(true) ?0 : 1); } publicstaticclassMinMaxWritableimplementsWritable { privateintmin;// 记录最小值privateintmax;// 记录最大值publicintgetMin() { returnmin; } publicvoidsetMin(intmin) { this.min=min; } publicintgetMax() { returnmax; } publicvoidsetMax(intmax) { this.max=max; } publicvoidreadFields(DataInputin) throwsIOException { min=in.readInt(); max=in.readInt(); } publicvoidwrite(DataOutputout) throwsIOException { out.writeInt(max); out.writeInt(min); } publicStringtoString() { returnmax+"\t"+min; } } }
报错信息
java.lang.Exception: java.io.IOException: Mkdirs failed to create file:/output/ch07/minmax/_temporary/0/_temporary/attempt_local391816241_0001_r_000000_0 (exists=false, cwd=file:/home/zhangsan/Java_Eclipse/eclipse-workspace/MapReduce) at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:462) at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:529) Caused by: java.io.IOException: Mkdirs failed to create file:/output/ch07/minmax/_temporary/0/_temporary/attempt_local391816241_0001_r_000000_0 (exists=false, cwd=file:/home/zhangsan/Java_Eclipse/eclipse-workspace/MapReduce) at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:449) at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:435) at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:909) at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:890) at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:787) at org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.getRecordWriter(TextOutputFormat.java:132) at org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.<init>(ReduceTask.java:540) at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:614) at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:389) at org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:319) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 2022-09-21 20:42:24,059 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1360)) - Job job_local391816241_0001 running in uber mode : false 2022-09-21 20:42:24,062 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1367)) - map 100% reduce 0% 2022-09-21 20:42:24,064 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1380)) - Job job_local391816241_0001 failed with state FAILED due to: NA 2022-09-21 20:42:24,080 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1385)) - Counters: 30 File System Counters FILE: Number of bytes read=247 FILE: Number of bytes written=269461 FILE: Number of read operations=0 FILE: Number of large read operations=0 FILE: Number of write operations=0 Map-Reduce Framework Map input records=8 Map output records=8 Map output bytes=128 Map output materialized bytes=60 Input split bytes=97 Combine input records=8 Combine output records=3 Reduce input groups=0 Reduce shuffle bytes=60 Reduce input records=0 Reduce output records=0 Spilled Records=3 Shuffled Maps =1 Failed Shuffles=0 Merged Map outputs=1 GC time elapsed (ms)=0 Total committed heap usage (bytes)=193986560 Shuffle Errors BAD_ID=0 CONNECTION=0 IO_ERROR=0 WRONG_LENGTH=0 WRONG_MAP=0 WRONG_REDUCE=0 File Input Format Counters Bytes Read=96 File Output Format Counters Bytes Written=0
2. 查找资料
网上有一个帖子提到了这个报错信息
https://community.cloudera.com/t5/Support-Questions/MKDirs-failed-to-create-file/td-p/35041
根据[@snm1523]的回答,我尝试将 mapred-site.xml
添加如下内容
<property> <name>mapreduce.jobtracker.address</name> <value>localhost:9101</value> </property>
3. 原因分析
输出路径不能
create
,那就先创建输出路径
- 创建输出路径目录/output
zhangsan@hadoop01:/$ sudomkdir /output [sudo] zhangsan 的密码: zhangsan@hadoop01:/$ ll | grep output drwxr-xr-x 2 root root 40969月 2120:43 output/
依旧跟前面一样的报错信息
创建了路径还是报错,那应该是涉及到`权限问题`
- 修改
/output
目录权限为当前用户
「即hadoop安装目录所在的所有者」
zhangsan@hadoop01:/$ sudochown-R zhangsan:zhangsan output/ zhangsan@hadoop01:/$ ll | grep output drwxr-xr-x 2 zhangsan zhangsan 40969月 2120:43 output/
重新执行MR程序
- 运行成功
2022-09-2120:44:53,945 INFO [Thread-15] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(456)) - reduce task executor complete. 2022-09-2120:44:54,597 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1360)) - Job job_local2141955672_0001 running in uber mode : false2022-09-2120:44:54,600 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1367)) - map 100% reduce 100% 2022-09-2120:44:54,602 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1378)) - Job job_local2141955672_0001 completed successfully 2022-09-2120:44:54,615 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1385)) - Counters: 30 File System Counters FILE: Number of bytes read=646 FILE: Number of bytes written=541974 FILE: Number of read operations=0 FILE: Number of large read operations=0 FILE: Number of writeoperations=0 Map-Reduce Framework Map input records=8 Map output records=8 Map output bytes=128 Map output materialized bytes=60 Input split bytes=97 Combine input records=8 Combine output records=3 Reduce input groups=3 Reduce shuffle bytes=60 Reduce input records=3 Reduce output records=3 Spilled Records=6 Shuffled Maps =1 Failed Shuffles=0 Merged Map outputs=1 GC time elapsed (ms)=0 Total committed heap usage (bytes)=387973120 Shuffle Errors BAD_ID=0CONNECTION=0IO_ERROR=0WRONG_LENGTH=0WRONG_MAP=0WRONG_REDUCE=0 File Input Format Counters Bytes Read=96 File Output Format Counters Bytes Written=60
- 查看结果
zhangsan@hadoop01:/output/ch07/minmax$ ll总用量 20drwxrwxr-x 2 zhangsan zhangsan 40969月 2120:44 ./ drwxrwxr-x 3 zhangsan zhangsan 40969月 2120:44 ../ -rw-r--r--1 zhangsan zhangsan 489月 2120:44 part-r-00000 -rw-r--r--1 zhangsan zhangsan 129月 2120:44 .part-r-00000.crc -rw-r--r--1 zhangsan zhangsan 09月 2120:44 _SUCCESS -rw-r--r--1 zhangsan zhangsan 89月 2120:44 ._SUCCESS.crc
4. 参考
https://community.cloudera.com/t5/Support-Questions/MKDirs-failed-to-create-file/td-p/35041
顺利结束