MapReduce入门编程-成绩求和排序

2023-01-28 371

版权

本文内容由阿里云实名注册用户自发贡献，版权归原作者所有，阿里云开发者社区不拥有其著作权，亦不承担相应法律责任。具体规则请查看《阿里云开发者社区用户服务协议》和《阿里云开发者社区知识产权保护指引》。如果您发现本社区中有涉嫌抄袭的内容，填写侵权投诉表单进行举报，一经查实，本社区将立刻删除涉嫌侵权内容。

简介： MapReduce入门编程-成绩求和排序

MapReduce入门编程

实验目的

1、掌握编程环境的准备方法

2、掌握程序的打包方法

3、掌握MapReduce任务的运行方法、运行状态的查看方法。

实验任务与要求

任务：选择一个任务或自定义其他任务使用MapReduce完成。

要求：完成完整的环境准备、编码、打包、运行、查看状态和查看结果

实验原理（技术）

1、 MapReduce中map和reduce的原理

2、 Linux的基本操作命令

3、 Java编程基础

4、 hdfs基本原理

步骤（算法、程序）、数据记录与处理、结果分析等

环境搭建：

打开VMware开启集群机，准备三份成绩单分别为

Sum_score.txt--------sum_score1.txt-------sum_score2.txt

打开eclipse创建工程：File->New->Other->Map/Reduce->next

输入工程名称（这里我已经建好了，我只是演示输入xxx）

然后点击完成

点开工程在src下面新建一个包专门用来写

在新建的包下面新建一个class编写代码

接下编写代码：

1. package Ds_mapreducd;
2. 
3. import java.io.IOException;
4. 
5. import org.apache.hadoop.conf.Configuration;
6. import org.apache.hadoop.fs.Path;
7. import org.apache.hadoop.io.IntWritable;
8. import org.apache.hadoop.io.LongWritable;
9. import org.apache.hadoop.io.Text;
10. import org.apache.hadoop.mapreduce.Job;
11. import org.apache.hadoop.mapreduce.Mapper;
12. import org.apache.hadoop.mapreduce.Reducer;
13. import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
14. import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
15. 
16. public class sum_out {
17.   static public class TSMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
18.     Text name =new Text();
19.     IntWritable score =new IntWritable();
20.     @Override
21.     protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
22.         throws IOException, InterruptedException {
23.       String[] vals=value.toString().split("\t");
24.       if (vals.length==2) {
25.         score.set(Integer.parseInt(vals[1]));
26.         name.set(vals[0]);
27.         context.write(name, score);
28.       }
29.     }
30.   }
31.   static public class TSReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
32.     Text name =new Text();
33.     IntWritable totalscore=new IntWritable();
34.     @Override
35.     protected void reduce(Text name, Iterable<IntWritable> scores,
36.         Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
37.       int sum=0;
38.       for(IntWritable sc:scores) {
39.         sum=sum+sc.get();
40.       }
41.       totalscore.set(sum);
42.       context.write(name, totalscore);
43.     }
44.   }
45.   public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
46.     // TODO Auto-generated method stub
47.     Configuration conf = new Configuration();           //创建配置类
48.     Job job = Job.getInstance(conf, "任务五:成绩汇总");    //实例化Job类
49. 
50.     job.setJarByClass(sum_out.class);           //设置主类名
51. 
52.     TextInputFormat.setInputPaths(job, new Path(args[0]));  //设置待输入文件的位置
53.     job.setInputFormatClass(TextInputFormat.class);       //指定使用字符串输入格式类
54. 
55.     job.setMapperClass(TSMapper.class);          //指定使用自定义Map类
56.     job.setMapOutputKeyClass(Text.class);      //指定Map类输出的，K类型，(如果同Reduce类的输出可省略)
57.     job.setMapOutputValueClass(IntWritable.class);   //指定Map类输出的，V类型，(如果同Reduce类的输出可省略)
58. 
59.     job.setReducerClass(TSReducer.class);      //指定使用自定义Reduce类
60.     job.setOutputKeyClass(Text.class);         //指定Reduce类输出的,K类型
61.     job.setOutputValueClass(IntWritable.class);    //指定Reduce类输出的,V类型
62.     job.setNumReduceTasks(1); //指定Reduce个数
63. 
64.     job.setOutputFormatClass(TextOutputFormat.class);   //指定使用默认输出格式类
65.     TextOutputFormat.setOutputPath(job, new Path(args[1])); //设置输出结果文件位置
66. 
67.     System.exit(job.waitForCompletion(true)? 0:1);      //提交任务并监控任务状态，等待任务完成
68. 
69.   }
70. 
71. 
72. }