开发者学堂课程【Hadoop 分布式计算框架 MapReduc:KeyValueTextInputFormat 案例实现】学习笔记,与课程紧密联系,让用户快速学习知识。
课程地址:https://developer.aliyun.com/learning/course/94/detail/1505
KeyValueTextInputFormat案例实现
目录:
一、编写 Mapper 类
二、编写 Reducer 类
三、编写 Driver 类
一、编写 Mapper 类
package com. atguigu . mapreduce . KeyValueTextInputFormat;.import java. io. IOException;·
import org . apache . hadoop. io. LongWritable;·
import org . apache . hadoop. io. Text;.
import org. apache . hadoop . mapreduce . Mapper;.
public classKVTextMapper extends Mapper<Text, Text,Text,
LongWritable>
// 1 设置 value.
LongWritable V = new LongWritable (1) ;
二、编写 Reducer 类
package com. atguigu . mapreduce . KeyValueTextInputFormat;.import java. io. IOException;·
import org . apache . hadoop. io. LongWritable;·
import org . apache . hadoop. io. Text;.
import org. apache . hadoop . mapreduce . Mapper;.
public classKVTextMapper extends Mapper<Text, Text,Text,
LongWritable>
LongWritable V = new LongWritable (1)
@override,
protected voidmap (Text key, Text value,Context context)
Reducer<Text, IntWritable, Text, IntWritable> . Context context) throws IOException, InterruptedException
< banzhang,1>
< banzhang,1>
int sum = 0;
// 1累加求和
for (IntWritable value : values)
sum += value. get();]
v. set(sum);
// 2 写出
context .write(key, v);
三、编写 Driver 类
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueLineRecordReader;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class KVTextDriver (
public static void main(String[] args) throws IOException, ClassNotFoundException, Interrup args = new String[]f"e:/input/inputkv", "e:/output1");
Configuration conf = new Configuration();
conf.set(KeyValueLineRecordReader.KEY_VALUE_SEPERATOR, "卜);
// 1获取 job 对象 I
Job job = Job.getInstance(conf);
// 2设置 jar 存储路径
job.setJarByClass(KVTextDriver.class);
// 3 关联 mapper 和 reducer 类
job.setMapperClass(KVTextMapper.class);
job.setReducerClass(KVTextReducer.class);
//4设置 mapper 输出的 key 和 value 类型
job.setMap0utputKeyClass(Text.class);
// 5 设置最终输出的 key 和 value 类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setInputFormatClass(KeyValueTextInputFormat.class);
// 6设置输入输出路径
FileInputFormat.setInputPaths(job, new Path(args[0]));File0utputFormat.setOutputPath(job, new Path(args[1]));
// 7提交 job
boolean result = job.waitForCompletion(true);