一、✌题目要求
> 统计文本中每个单词的数量
二、✌实现思想
> Map阶段默认输入为TextInputFormat,键值对对应为行的偏移量和每行的文本内容 > 在map函数中将每行文本进行切分,提取出每个单词 > 在Reduce阶段根据相同Key值进行累加求和 >
三、✌代码实现
1.✌Map类
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> { @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { //获取每一行的文本 String line = value.toString(); //将每行文本进行切分 String[] words = line.split(" "); //循环写入,写出格式为:hello 1 for (String word : words) { context.write(new Text(word), new IntWritable(1)); } } }
2.✌Reduce类
public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> { @Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { //创建计数变量 int sum = 0; //统计相同key对应的value值进行累加 for (IntWritable value : values) { sum += value.get(); } //写出,写出格式为:hello 3 context.write(key, new IntWritable(sum)); } }
3.✌Driver类
public class WordCountDriver { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { //设置文件输入输出路径 args=new String[]{"D:/input/inputword","D:/output"}; //打印日志信息 BasicConfigurator.configure(); //创建配置信息对象 Configuration conf=new Configuration(); //获取job对象 Job job= Job.getInstance(conf); //关联Driver、Map、Reduce类 job.setJarByClass(WordCountDriver.class); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); //设置Map阶段的输出格式 job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); //设置最终的输出格式 job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); //配置文件路径 FileInputFormat.setInputPaths(job,new Path(args[0])); FileOutputFormat.setOutputPath(job,new Path(args[1])); //提交任务 boolean result=job.waitForCompletion(true); System.exit(result?0:1); } }
、