Spark + ONS
Spark 接入 ONS
下面这个例子演示了 Spark Streaming 如何消费 ONS 中的数据,统计每个 batch 内的单词个数。
- [backcolor=transparent] val [backcolor=transparent]Array[backcolor=transparent]([backcolor=transparent]cId[backcolor=transparent],[backcolor=transparent] topic[backcolor=transparent],[backcolor=transparent] subExpression[backcolor=transparent],[backcolor=transparent] parallelism[backcolor=transparent],[backcolor=transparent] interval[backcolor=transparent])[backcolor=transparent] [backcolor=transparent]=[backcolor=transparent] args
- [backcolor=transparent] val accessKeyId [backcolor=transparent]=[backcolor=transparent] [backcolor=transparent]"<accessKeyId>"
- [backcolor=transparent] val accessKeySecret [backcolor=transparent]=[backcolor=transparent] [backcolor=transparent]"<accessKeySecret>"
- [backcolor=transparent] val numStreams [backcolor=transparent]=[backcolor=transparent] parallelism[backcolor=transparent].[backcolor=transparent]toInt
- [backcolor=transparent] val batchInterval [backcolor=transparent]=[backcolor=transparent] [backcolor=transparent]Milliseconds[backcolor=transparent]([backcolor=transparent]interval[backcolor=transparent].[backcolor=transparent]toInt[backcolor=transparent])
- [backcolor=transparent] val conf [backcolor=transparent]=[backcolor=transparent] [backcolor=transparent]new[backcolor=transparent] [backcolor=transparent]SparkConf[backcolor=transparent]().[backcolor=transparent]setAppName[backcolor=transparent]([backcolor=transparent]"Test ONS Streaming"[backcolor=transparent])
- [backcolor=transparent] val ssc [backcolor=transparent]=[backcolor=transparent] [backcolor=transparent]new[backcolor=transparent] [backcolor=transparent]StreamingContext[backcolor=transparent]([backcolor=transparent]conf[backcolor=transparent],[backcolor=transparent] batchInterval[backcolor=transparent])
- [backcolor=transparent] [backcolor=transparent]def[backcolor=transparent] func[backcolor=transparent]:[backcolor=transparent] [backcolor=transparent]Message[backcolor=transparent] [backcolor=transparent]=>[backcolor=transparent] [backcolor=transparent]Array[backcolor=transparent][[backcolor=transparent]Byte[backcolor=transparent]][backcolor=transparent] [backcolor=transparent]=[backcolor=transparent] msg [backcolor=transparent]=>[backcolor=transparent] msg[backcolor=transparent].[backcolor=transparent]getBody
- [backcolor=transparent] val onsStreams [backcolor=transparent]=[backcolor=transparent] [backcolor=transparent]([backcolor=transparent]0[backcolor=transparent] [backcolor=transparent]until[backcolor=transparent] numStreams[backcolor=transparent]).[backcolor=transparent]map [backcolor=transparent]{[backcolor=transparent] i [backcolor=transparent]=>
- [backcolor=transparent] println[backcolor=transparent]([backcolor=transparent]s[backcolor=transparent]"starting stream $i"[backcolor=transparent])
- [backcolor=transparent] [backcolor=transparent]OnsUtils[backcolor=transparent].[backcolor=transparent]createStream[backcolor=transparent](
- [backcolor=transparent] ssc[backcolor=transparent],
- [backcolor=transparent] cId[backcolor=transparent],
- [backcolor=transparent] topic[backcolor=transparent],
- [backcolor=transparent] subExpression[backcolor=transparent],
- [backcolor=transparent] accessKeyId[backcolor=transparent],
- [backcolor=transparent] accessKeySecret[backcolor=transparent],
- [backcolor=transparent] [backcolor=transparent]StorageLevel[backcolor=transparent].[backcolor=transparent]MEMORY_AND_DISK_2[backcolor=transparent],
- [backcolor=transparent] func[backcolor=transparent])
- [backcolor=transparent] [backcolor=transparent]}
- [backcolor=transparent] val unionStreams [backcolor=transparent]=[backcolor=transparent] ssc[backcolor=transparent].[backcolor=transparent]union[backcolor=transparent]([backcolor=transparent]onsStreams[backcolor=transparent])
- [backcolor=transparent] unionStreams[backcolor=transparent].[backcolor=transparent]foreachRDD[backcolor=transparent]([backcolor=transparent]rdd [backcolor=transparent]=>[backcolor=transparent] [backcolor=transparent]{
- [backcolor=transparent] rdd[backcolor=transparent].[backcolor=transparent]map[backcolor=transparent]([backcolor=transparent]bytes [backcolor=transparent]=>[backcolor=transparent] [backcolor=transparent]new[backcolor=transparent] [backcolor=transparent]String[backcolor=transparent]([backcolor=transparent]bytes[backcolor=transparent])).[backcolor=transparent]flatMap[backcolor=transparent]([backcolor=transparent]line [backcolor=transparent]=>[backcolor=transparent] line[backcolor=transparent].[backcolor=transparent]split[backcolor=transparent]([backcolor=transparent]" "[backcolor=transparent]))
- [backcolor=transparent] [backcolor=transparent].[backcolor=transparent]map[backcolor=transparent]([backcolor=transparent]word [backcolor=transparent]=>[backcolor=transparent] [backcolor=transparent]([backcolor=transparent]word[backcolor=transparent],[backcolor=transparent] [backcolor=transparent]1[backcolor=transparent]))
- [backcolor=transparent] [backcolor=transparent].[backcolor=transparent]reduceByKey[backcolor=transparent]([backcolor=transparent]_ [backcolor=transparent]+[backcolor=transparent] _[backcolor=transparent]).[backcolor=transparent]collect[backcolor=transparent]().[backcolor=transparent]foreach[backcolor=transparent]([backcolor=transparent]e [backcolor=transparent]=>[backcolor=transparent] println[backcolor=transparent]([backcolor=transparent]s[backcolor=transparent]"word: ${e._1}, cnt: ${e._2}"[backcolor=transparent]))
- [backcolor=transparent] [backcolor=transparent]})
- [backcolor=transparent] ssc[backcolor=transparent].[backcolor=transparent]start[backcolor=transparent]()
- [backcolor=transparent] ssc[backcolor=transparent].[backcolor=transparent]awaitTermination[backcolor=transparent]()
附录
示例代码请看: