/**
*
*/
import java.util.Arrays;
import java.util.List;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.SQLContext;
/**
* @author Administrator
*
*/
public class Ahjt {
/**
*
*/
public Ahjt() {
// TODO Auto-generated constructor stub
}
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
//JavaSparkContextsc=...;// An existing JavaSparkContext.SQLContextsqlContext=neworg.apache.spark.sql.SQLContext(sc);
SparkConf conf = new SparkConf().setAppName("test").setMaster("spark://192.168.1.251:7077");
JavaSparkContext sc = new JavaSparkContext(conf);
// sc is an existing JavaSparkContext.
SQLContext sqlContext = new org.apache.spark.sql.SQLContext(sc);
// A JSON dataset is pointed to by path.
// The path can be either a single text file or a directory storing text files.
DataFrame people = sqlContext.read().json("/bigdata/spark/examples/src/main/resources/people.json");
// Displays the content of the DataFrame to stdout
people.show();
// The inferred schema can be visualized using the printSchema() method.
people.printSchema();
// root
// |-- age: integer (nullable = true)
// |-- name: string (nullable = true)
// Register this DataFrame as a table.
people.registerTempTable("people");
// Print the schema in a tree format
people.printSchema();
// root
// |-- age: long (nullable = true)
// |-- name: string (nullable = true)
// Select only the "name" column
people.select("name").show();
// name
// Michael
// Andy
// Justin
// Select everybody, but increment the age by 1
people.select(people.col("name"), people.col("age").plus(1)).show();
// name (age + 1)
// Michael null
// Andy 31
// Justin 20
// Select people older than 21
people.filter(people.col("age").gt(21)).show();
// age name
// 30 Andy
// Count people by age
people.groupBy("age").count().show();
// age count
// null 1
// 19 1
// 30 1
// SQL statements can be run by using the sql methods provided by sqlContext.
DataFrame teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19");
// Alternatively, a DataFrame can be created for a JSON dataset represented by
// an RDD[String] storing one JSON object per string.
List<String> jsonData = Arrays.asList(
"{\"name\":\"Yin\",\"address\":{\"city\":\"Columbus\",\"state\":\"Ohio\"}}");
JavaRDD<String> anotherPeopleRDD = sc.parallelize(jsonData);
DataFrame anotherPeople = sqlContext.read().json(anotherPeopleRDD);
}
}
Connecting to master spark://192.168.1.251:7077...
15/11/17 16:30:06 ERROR SparkUncaughtExceptionHandler: Uncaught exception in thread Thread[appclient-registration-retry-thread,5,main]版权声明:本文内容由阿里云实名注册用户自发贡献,版权归原作者所有,阿里云开发者社区不拥有其著作权,亦不承担相应法律责任。具体规则请查看《阿里云开发者社区用户服务协议》和《阿里云开发者社区知识产权保护指引》。如果您发现本社区中有涉嫌抄袭的内容,填写侵权投诉表单进行举报,一经查实,本社区将立刻删除涉嫌侵权内容。
请问你解决这个问题了吗?
请问你解决这个问题了吗?