Hapdoop的一个Mapreduce示例代码--统计单词个数有排序功能-阿里云开发者社区

Hapdoop的一个Mapreduce示例代码--统计单词个数有排序功能
2017-11-12 995
版权
本文内容由阿里云实名注册用户自发贡献，版权归原作者所有，阿里云开发者社区不拥有其著作权，亦不承担相应法律责任。具体规则请查看《阿里云开发者社区用户服务协议》和《阿里云开发者社区知识产权保护指引》。如果您发现本社区中有涉嫌抄袭的内容，填写侵权投诉表单进行举报，一经查实，本社区将立刻删除涉嫌侵权内容。
简介：
 
          package 
          com.mzsx.hadoop; 
         
          import 
          java.io.IOException; 
         
          import 
          java.util.Random; 
         
          import 
          java.util.StringTokenizer; 
         
          import 
          org.apache.hadoop.conf.Configuration; 
         
          import 
          org.apache.hadoop.fs.FileSystem; 
         
          import 
          org.apache.hadoop.fs.Path; 
         
          import 
          org.apache.hadoop.io.IntWritable; 
         
          import 
          org.apache.hadoop.io.Text; 
         
          import 
          org.apache.hadoop.io.WritableComparable; 
         
          import 
          org.apache.hadoop.mapreduce.Job; 
         
          import 
          org.apache.hadoop.mapreduce.Mapper; 
         
          import 
          org.apache.hadoop.mapreduce.Reducer; 
         
          import 
          org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
         
          import 
          org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
         
          public 
          class 
          MySortWordCount { 
         
          public 
          static 
          class 
          MyMapper 
          extends 
         
          Mapper<Object, Text, Text, IntWritable> { 
         
          private 
          final 
          static 
          IntWritable one = 
          new 
          IntWritable(
          1
          );
          // 类似于int类型 
         
          private 
          Text word = 
          new 
          Text(); 
          // 可以理解成String类型 
         
          public 
          void 
          map(Object key, Text value, Context context) 
         
          throws 
          IOException, InterruptedException { 
         
          System.err.println(key + 
          "," 
          + value); 
         
          // 默认情况下即根据空格分隔字符串 
         
          String tmp=value.toString(); 
         
          tmp=tmp.replace(
          '\''
          , 
          ' '
          ); 
         
          tmp=tmp.replace(
          '.'
          , 
          ' '
          ); 
         
          tmp=tmp.replace(
          ','
          , 
          ' '
          ); 
         
          tmp=tmp.replace(
          ':'
          , 
          ' '
          ); 
         
          tmp=tmp.replace(
          '!'
          , 
          ' '
          ); 
         
          tmp=tmp.replace(
          ';'
          , 
          ' '
          ); 
         
          tmp=tmp.replace(
          '?'
          , 
          ' '
          ); 
         
          tmp=tmp.replace(
          '`'
          , 
          ' '
          ); 
         
          tmp=tmp.replace(
          '"'
          , 
          ' '
          ); 
         
          tmp=tmp.replace(
          '&'
          , 
          ' '
          ); 
         
          tmp=tmp.replace(
          '('
          , 
          ' '
          ); 
         
          tmp=tmp.replace(
          ')'
          , 
          ' '
          ); 
         
          tmp=tmp.replace(
          '-'
          , 
          ' '
          ); 
         
          StringTokenizer itr = 
          new 
          StringTokenizer(tmp); 
         
          while 
          (itr.hasMoreTokens()) { 
         
          word.set(itr.nextToken()); 
         
          context.write(word, one); 
         
          } 
         
          }; 
         
          } 
         
          // Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT> 
         
          public 
          static 
          class 
          MyReducer 
          extends 
         
          Reducer<Text, IntWritable, Text, IntWritable> { 
         
          private 
          IntWritable result = 
          new 
          IntWritable(); 
         
          protected 
          void 
          reduce(Text key, Iterable<IntWritable> values, 
         
          Context context) 
          throws 
          IOException, InterruptedException { 
         
          System.err.println(key + 
          "," 
          + values); 
         
          int 
          sum = 
          0
          ; 
         
          for 
          (IntWritable val : values) { 
         
          sum += val.get(); 
         
          } 
         
          result.set(sum); 
         
          ; 
         
          context.write(key, result);
          // 这是最后结果 
         
          }; 
         
          } 
         
          public 
          static 
          class 
          SortMapper 
          extends 
          Mapper<Object, Text, IntWritable,Text>{ 
         
          public 
          void 
          map(Object key, Text value, Context context) 
          throws 
          IOException, InterruptedException { 
         
          IntWritable times = 
          new 
          IntWritable(
          1
          ); 
         
          Text password = 
          new 
          Text(); 
         
          String eachline=value.toString(); 
         
          String[] eachterm =eachline.split(
          "\t"
          ); 
         
          password.set(eachterm[
          0
          ]); 
         
          times.set(Integer.parseInt(eachterm[
          1
          ])); 
         
          context.write(times,password); 
         
          } 
         
          } 
         
          public 
          static 
          class 
          SortReducer 
          extends 
          Reducer<IntWritable,Text,IntWritable,Text> { 
         
          private 
          Text password = 
          new 
          Text(); 
         
          public 
          void 
          reduce(IntWritable key,Iterable<Text> values, Context context) 
          throws 
          IOException, InterruptedException { 
         
          for 
          (Text val : values) { 
         
          password.set(val); 
         
          context.write(key,password); 
         
          } 
         
          } 
         
          } 
         
          private 
          static 
          class 
          IntDecreasingComparator 
          extends 
          IntWritable.Comparator { 
         
          public 
          int 
          compare(WritableComparable a, WritableComparable b) { 
         
          //return -super.compare(a, b); 
         
          return 
          super
          .compare(a, b); 
         
          } 
         
          public 
          int 
          compare(
          byte
          [] b1, 
          int 
          s1, 
          int 
          l1, 
          byte
          [] b2, 
          int 
          s2, 
          int 
          l2) { 
         
          //return -super.compare(b1, s1, l1, b2, s2, l2); 
         
          return 
          super
          .compare(b1, s1, l1, b2, s2, l2); 
         
          } 
         
          } 
         
          public 
          static 
          void 
          main(String[] args) 
          throws 
          Exception { 
         
          // 声明配置信息 
         
          Configuration conf = 
          new 
          Configuration(); 
         
          // 声明Job 
         
          Job job = 
          new 
          Job(conf, 
          "Word Count"
          ); 
         
          // 设置工作类 
         
          job.setJarByClass(MySortWordCount.
          class
          ); 
         
          // 设置mapper类 
         
          job.setMapperClass(MyMapper.
          class
          ); 
         
          // 可选 
         
          job.setCombinerClass(MyReducer.
          class
          ); 
         
          // 设置合并计算类 
         
          job.setReducerClass(MyReducer.
          class
          ); 
         
          // 设置key为String类型 
         
          job.setOutputKeyClass(Text.
          class
          ); 
         
          // 设置value为int类型 
         
          job.setOutputValueClass(IntWritable.
          class
          ); 
         
          //job.setInputFormatClass(KeyValueTextInputFormat.class); 
         
          // 设置或是接收输入输出 
         
          /*FileInputFormat.setInputPaths(job, new Path("/user/root/aoman.txt")); 
         
          FileOutputFormat.setOutputPath(job, new Path("/user/root/r3")); 
         
          // 执行 
         
          System.exit(job.waitForCompletion(true) ? 0 : 1);*/ 
         
          //定义一个临时目录，先将词频统计任务的输出结果写到临时目录中, 下一个排序任务以临时目录为输入目录。 
         
          FileInputFormat.addInputPath(job, 
          new 
          Path(
          "/user/root/aoman.txt"
          )); 
         
          Path tempDir = 
          new 
          Path(
          "MySortWordCount-temp-" 
          + Integer.toString(
          new 
          Random().nextInt(Integer.MAX_VALUE))); 
         
          FileOutputFormat.setOutputPath(job, tempDir); 
         
          if
          (job.waitForCompletion(
          true
          )) 
         
          { 
         
          Job sortJob = 
          new 
          Job(conf, 
          "csdnsort"
          ); 
         
          sortJob.setJarByClass(MySortWordCount.
          class
          ); 
         
          FileInputFormat.addInputPath(sortJob, tempDir); 
         
          sortJob.setMapperClass(SortMapper.
          class
          ); 
         
          FileOutputFormat.setOutputPath(sortJob, 
          new 
          Path(
          "/user/root/sort1"
          )); 
         
          sortJob.setOutputKeyClass(IntWritable.
          class
          ); 
         
          sortJob.setOutputValueClass(Text.
          class
          ); 
         
          sortJob.setSortComparatorClass(IntDecreasingComparator.
          class
          ); 
         
          FileSystem.get(conf).deleteOnExit(tempDir); 
         
          System.exit(sortJob.waitForCompletion(
          true
          ) ? 
          0 
          : 
          1
          ); 
         
          } 
         
          System.exit(job.waitForCompletion(
          true
          ) ? 
          0 
          : 
          1
          ); 
         
          } 
         
          }
本文转自梦朝思夕 51CTO博客，原文链接:http://blog.51cto.com/qiangmzsx/1404661
Hapdoop的一个Mapreduce示例代码--统计单词个数有排序功能

热门文章

最新文章

相关课程

相关电子书

热门

活动广场

任务中心

开发者评测

高校计划

乘风者计划

训练营

阿里云MVP

话题

直播

下载

镜像站

技术资料

插件

Hapdoop的一个Mapreduce示例代码--统计单词个数有排序功能

热门文章

最新文章

相关课程

相关电子书