/** * */ import java.util.Arrays; import java.util.List; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.DataFrame; import org.apache.spark.sql.SQLContext; /** * @author Administrator * */ public class Ahjt { /** * */ public Ahjt() { // TODO Auto-generated constructor stub } /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub //JavaSparkContextsc=...;// An existing JavaSparkContext.SQLContextsqlContext=neworg.apache.spark.sql.SQLContext(sc); SparkConf conf = new SparkConf().setAppName("test").setMaster("spark://192.168.1.251:7077"); JavaSparkContext sc = new JavaSparkContext(conf); // sc is an existing JavaSparkContext. SQLContext sqlContext = new org.apache.spark.sql.SQLContext(sc); // A JSON dataset is pointed to by path. // The path can be either a single text file or a directory storing text files. DataFrame people = sqlContext.read().json("/bigdata/spark/examples/src/main/resources/people.json"); // Displays the content of the DataFrame to stdout people.show(); // The inferred schema can be visualized using the printSchema() method. people.printSchema(); // root // |-- age: integer (nullable = true) // |-- name: string (nullable = true) // Register this DataFrame as a table. people.registerTempTable("people"); // Print the schema in a tree format people.printSchema(); // root // |-- age: long (nullable = true) // |-- name: string (nullable = true) // Select only the "name" column people.select("name").show(); // name // Michael // Andy // Justin // Select everybody, but increment the age by 1 people.select(people.col("name"), people.col("age").plus(1)).show(); // name (age + 1) // Michael null // Andy 31 // Justin 20 // Select people older than 21 people.filter(people.col("age").gt(21)).show(); // age name // 30 Andy // Count people by age people.groupBy("age").count().show(); // age count // null 1 // 19 1 // 30 1 // SQL statements can be run by using the sql methods provided by sqlContext. DataFrame teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19"); // Alternatively, a DataFrame can be created for a JSON dataset represented by // an RDD[String] storing one JSON object per string. List<String> jsonData = Arrays.asList( "{\"name\":\"Yin\",\"address\":{\"city\":\"Columbus\",\"state\":\"Ohio\"}}"); JavaRDD<String> anotherPeopleRDD = sc.parallelize(jsonData); DataFrame anotherPeople = sqlContext.read().json(anotherPeopleRDD); } }
Connecting to master spark://192.168.1.251:7077...
15/11/17 16:30:06 ERROR SparkUncaughtExceptionHandler: Uncaught exception in thread Thread[appclient-registration-retry-thread,5,main]请问你解决这个问题了吗?
请问你解决这个问题了吗?
版权声明:本文内容由阿里云实名注册用户自发贡献,版权归原作者所有,阿里云开发者社区不拥有其著作权,亦不承担相应法律责任。具体规则请查看《阿里云开发者社区用户服务协议》和《阿里云开发者社区知识产权保护指引》。如果您发现本社区中有涉嫌抄袭的内容,填写侵权投诉表单进行举报,一经查实,本社区将立刻删除涉嫌侵权内容。
你好,我是AI助理
可以解答问题、推荐解决方案等
评论
全部评论 (0)