今天尝试通过jdbc连接hive,JDBC直接连接是正常成功的,实例:
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
public class SampleHiveJdbc {
public static void main(String[] args) throws SQLException {
Connection conn = getConnection();
String sql = "SELECT * FROM log where day='20180828' limit 5";
PreparedStatement stmt = conn.prepareStatement(sql);
ResultSet rs = stmt.executeQuery();
while (rs.next()) {
System.out.println(rs.getLong(1));
}
stmt.close();
conn.close();
}
static Connection getConnection() {
Connection con = null;
String JDBC_DB_URL = "jdbc:hive2://192.168.44.141:10001/db";
try {
Class.forName("org.apache.hive.jdbc.HiveDriver");
con = DriverManager.getConnection(JDBC_DB_URL, "hive", "hive");
} catch (SQLException e) {
e.printStackTrace();
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
return con;
}
}
但是在spark程序通过jdbc连接hive,却报错,实例:
import java.util.Properties;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.sql.SQLContext;
public class SparkHiveJdbc {
public static void main(String[] args) {
String master = "local[1]";
String appName = "SparkHiveJdbc";
SparkConf sparkConf = new SparkConf();
if (StringUtils.isNoneBlank(master) && master.startsWith("local")) {
// 如果是本地模式
sparkConf.setAppName(appName).setMaster(master);
}
SparkContext sc = SparkContext.getOrCreate(sparkConf);
SQLContext sqlContext = new SQLContext(sc);
/* 1. 读出为RDD */
String readUrl = "jdbc:hive2://192.168.44.14110001/db";
String readTable = "log";
String readUsername = "hive";
String readPassword = "hive";
String driverClassName = "org.apache.hive.jdbc.HiveDriver";
Properties readProperties = new Properties();
readProperties.setProperty("user", readUsername);
readProperties.setProperty("password", readPassword);
readProperties.setProperty("driver", driverClassName);
sqlContext
.read().jdbc(readUrl, readTable,
new String[] { "day='20180828'" }, readProperties)
.show();
sc.stop();
}
}
在pom.xml中引入的是hive-jdbc-0.13.0.jar包会报
hive jdbc java.sql.SQLException: Method not supported
但是更改为hive-jdbc-2.3.3.jar或hive-jdbc-3.1.0.jar,升级包后,又会报错:
Required field 'client_protocol' is unset
即使本地spark换为2.3,也不行。尝试多次后,发现其实很简单,因为集群hive为CDH5.8版本,所以也使用cloudera的hive-jdbc版本就行,可以去cloudera官网下载 https://www.cloudera.com/downloads/connectors/hive/jdbc/2-6-2.html。
使用实例:
import java.util.Properties;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.sql.SQLContext;
public class SparkHiveJdbc {
public static void main(String[] args) {
String master = "local[1]";
String appName = "SparkHiveJdbc";
SparkConf sparkConf = new SparkConf();
if (StringUtils.isNoneBlank(master) && master.startsWith("local")) {
// 如果是本地模式
sparkConf.setAppName(appName).setMaster(master);
}
SparkContext sc = SparkContext.getOrCreate(sparkConf);
SQLContext sqlContext = new SQLContext(sc);
/* 1. 读出为RDD */
String readUrl = "jdbc:hive2://192.168.44.141:10001/db";
String readTable = "log";
String readUsername = "hive";
String readPassword = "hive";
String driverClassName = "com.cloudera.hive.jdbc41.HS2Driver";
Properties readProperties = new Properties();
readProperties.setProperty("user", readUsername);
readProperties.setProperty("password", readPassword);
readProperties.setProperty("driver", driverClassName);
sqlContext
.read().jdbc(readUrl, readTable,
new String[] { "day='20180801'" }, readProperties)
.show();
sc.stop();
}
}