开发者学堂课程【大数据实时计算框架 Spark 快速入门:HDFS 数据源、DStream 的持久化存储_ 1】学习笔记,与课程紧密联系,让用户快速学习知识。
课程地址:https://developer.aliyun.com/learning/course/100/detail/1722
HDFS 数据源、DStream 的持久化存储_ 1
具体操作如下:
package com. shsxt. study,streaming;
import java. util. Arrays;[
public class HDFSWordcount {
public static void main(String[]args){Sparkconf conf=new Sparkconf().setAppName(“HDFSWordcount ”).setMaster(“IOzlI”)] Javastreaming Context jssc=new JavaStreamingContext (conf, Durations, seconds(5) );
JavaDStream<String>lines=jssc.textFileStream ("hdfs://node21#8020/ worldcount _ dir"); JavaDStream <String>words=lines,flatMap(new FlatMapFunction <String, String>()
{
private static final long serial VersionUID =1L;
@Override public Iterable<String>call(String line) throws Exception{
return Arrays.asList(line. split("));
}
JavaPairDStreamsString , Integer>pairds.mapIoPair(new Pair function<5tring,5tring, Integer/(]});
private static final long se rialVersionUID =1L;
@Override publicTuple2 <String, Integer>call(String word) throws Exception{
return newTuple2<String, Integer>(word,1);
}
});
JavaPainDStreawKtring , Integer>wordcounts . reduceByKey(new functional Integer, Integer, Integer/l private static final long se rialVersionUID =1L;
@Override public Integer call(Integerv1, Integerv2) throws Exception{
return v1+v2;
}