3.4.4 统计每门课程中相同分数分布情况(Css)
package course_score_same; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; /* stu[0]:课程名称 stu[1]:学生姓名 stu[2]:成绩 stu[3]:性别 stu[4]:年龄 该功能实现:统计该课程中成绩相同的学生姓名 */ public class CssMapper extends Mapper<LongWritable, Text,Text,Text> { @Override protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException { //将文件的每一行传递过来,使用split分割后利用字符数组进行接收 String[] stu = value.toString().split(","); //拼接字符串:课程和成绩 String sc = stu[0]+"\t"+stu[2]; //向Reducer传递参数-> Key:课程+成绩 Value:学生名 context.write(new Text(sc),new Text(stu[1])); } }
package course_score_same; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; public class CssReducer extends Reducer <Text,Text,Text,Text>{ @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { //创建StringBuffer用来接收该课程中成绩相同的学生的姓名 StringBuffer sb = new StringBuffer(); //num变量用来计数 int num = 0; //遍历values参数,将所有的value拼接进sb,并统计学生数量 for(Text value:values){ sb.append(value.toString()).append(","); num++; } //如果num=1,则表明该课程的这个成绩只有一个学生,否则就输出 if(num>1){ String names = "一共有" + num + "名学生,他们的名字是:" +sb.toString(); System.out.println("*************************************************"); System.out.println(key.toString() + names); context.write(key,new Text(names)); } } }
package course_score_same; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; public class CssMain { public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException { //创建job和“统计相同课程相同分数的人数”任务入口 Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(CssMain.class); //设置Mapper和Reducer的入口 job.setMapperClass(CssMapper.class); job.setReducerClass(CssReducer.class); //设置Mapper的输入输出类型 job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); //设置Reducer的输入输出类型 job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); //指定输入输出路径 String inputPath = "hdfs://localhost:9000/mapreduce/input/学生成绩.csv"; String outputPath = "hdfs://localhost:9000/mapreduce/output/该课程中成绩相同的学生姓名.txt"; FileInputFormat.setInputPaths(job,new Path(inputPath)); FileOutputFormat.setOutputPath(job,new Path(outputPath)); //输出路径存在的话就删除,不然就只能手动删除,否则会报该文件已存在的异常 FileSystem fileSystem = FileSystem.get(new URI(outputPath), conf); if (fileSystem.exists(new Path(outputPath))) { fileSystem.delete(new Path(outputPath), true); } //执行job job.waitForCompletion(true); } }
3.4.5 统计各性别的人数及他们的姓名(Snn)
package sex_number_name; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; /* stu[0]:课程名称 stu[1]:学生姓名 stu[2]:成绩 stu[3]:性别 stu[4]:年龄 该功能实现:各性别人数及他们的姓名 */ public class SnnMapper extends Mapper<LongWritable, Text,Text,Text> { @Override protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException { //将文件的每一行传递过来,使用split分割后利用字符数组进行接收 String[] stu = value.toString().split(","); //向Reducer传递参数-> Key:性别 Value:姓名 context.write(new Text(stu[3]),new Text(stu[1])); } }
package sex_number_name; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; public class SnnReducer extends Reducer<Text,Text,Text,Text> { @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { //创建集合来去除重复值(HashSet不允许重复值的存在,故可用来去重) List<String> names= new ArrayList<>(); for (Text value:values){ names.add(value.toString()); } HashSet<String> singleNames = new HashSet(names); //创建StringBuffer用来接收同性别学生的姓名 StringBuffer sb = new StringBuffer(); //拼接学生姓名以及统计人数 int num = 0; for(String singleName:singleNames){ sb.append(singleName.toString()).append(","); num++; } //输出 String result = "生一共有" + num + "名,他们的名字是:" +sb.toString(); System.out.println("********************************************"); System.out.println(key.toString() + result); context.write(key,new Text(result)); } }
package sex_number_name; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; public class SnnMain { public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException { //创建job和“统计相同课程相同分数的人数”任务入口 Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(SnnMain.class); //设置Mapper和Reducer的入口 job.setMapperClass(SnnMapper.class); job.setReducerClass(SnnReducer.class); //设置Mapper的输入输出类型 job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); //设置Reducer的输入输出类型 job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); //指定输入输出路径 String inputPath = "hdfs://localhost:9000/mapreduce/input/学生成绩.csv"; String outputPath = "hdfs://localhost:9000/mapreduce/output/各性别人数及他们的姓名.txt"; FileInputFormat.setInputPaths(job,new Path(inputPath)); FileOutputFormat.setOutputPath(job,new Path(outputPath)); //输出路径存在的话就删除,不然就只能手动删除,否则会报该文件已存在的异常 FileSystem fileSystem = FileSystem.get(new URI(outputPath), conf); if (fileSystem.exists(new Path(outputPath))) { fileSystem.delete(new Path(outputPath), true); } //执行job job.waitForCompletion(true); }