开发者学堂课程【大数据实时计算框架 Spark 快速入门:Spark 算子操作及总结_2】学习笔记,与课程紧密联系,让用户快速学习知识。
课程地址:https://developer.aliyun.com/learning/course/100/detail/1692
Spark 算子操作及总结_2
内容简介:
一、CartesianOperator 相关代码
二、CountByKey Operator 相关代码
二、CountByKey Operator 相关代码
一、CartesianOperator 相关代码
17 //中文名笛卡尔乘积
18 //比如说两个RDD, 分别有10条数据,用了cartesian算子以后
19 //两个RDD的每 一个数据都会和另外一个RDD的每条数据执行一次JOIN
20 //最终组成一个笛 卡尔乘积
21
22 // 小案例
23 //比如说,现在有5件衣服,5条裤子,分别属于两个RDD
24 //就是说,需要对每件衣服都和每条裤子做一次JOIN操作, 尝试进行服装搭配!
25 SparkConf conf = new SparkConf()
26 . setAppName("CartesianOperator")
27 JavaSparkContext sc . new JavaSparkContext(conf);
28
29 List clothes = Arrays.asList("T恤衫","夹克","皮大衣", "衬衫","毛衣");
30List trousers = Arrays. asList("西裤"," 内裤", "铅笔裤", "皮裤","牛仔裤");
31 JavaRDD clothesRDD= sc. parallelize(clothes);
32 JavaRDD trousersRDD = sc. parallelize(trousers);
33
34JavaPairRDDpairs-clothesRDD.cartesian(trousersRDD);
35 for(Tuple2 pair : pairs.collect()){
36 System.out.println(pair);
37 }
39 sc.close();
40 }
41}
二、CountByKey Operator 相关代码
30 //对RDD应用CountByKey算子,统计每个70s或者80s,人数分别是多少
31 //统计每种Key对应的元素个数
32 Map counts . students.countByKey();
33 for(Map. Entry studentCount : counts .entrySet())
34 System. out. println(studentCount. getKey() + ": " + studentCount.getValue());
35}
36
37 sc.close();
38
39 }
40
三、CogroupOperator 相关代码
12
13 public class CogroupOperator {
14
15 public static void main(String[] args)
16 SparkConf conf - new SparkConf(). setAppName("ReduceOperator")
17 . setMaster("local");
18 JavaSparkContext sc = new JavaSparkContext(conf);
19
20 List studentsList E Arrays .asList(
21 new Tuple2("1"," xuruyun"),
22 new Tuple2("2" , "wangfei"),
23 new Tuple2("3" ,"lixin"));
24 List
25 new Tuple2("1","100"),
26 new Tuple2("2","90")
27 newTuple2("3","80"),
28 newTuple2("1","70"),
29 newTuple2("2","60"),
30 new Tuple2("3","50"));
31 JavaPairRDD students = sc.parallelizePairs(studentslist);
32 JavaPairRDD scores = sc. parallelizePairs(scorelist);
33