需求:
通过简单的Scala代码远程连接Hive,查询Hive表数据并将数据转存到本地。另外,用Scala查询到数据后,我们还可以将查询到的ResultSet集合转化为RDD或者DataFrame进行scala的算子运算
第一步:启动服务器以及需要的服务(hiveserver2)远程连接端口默认配置为10000
hive --service hiveserver2 10000
第二步:创建maven项目导入pom.xml依赖
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.cyy.sparkSql</groupId>
<artifactId>sparkSqlTest</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<java.version>1.8</java.version>
<spark.version>2.1.0</spark.version>
</properties>
<dependencies>
<!-- spark-->
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>2.1.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.1.0</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.38</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_2.11</artifactId>
<version>2.1.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hive/hive-jdbc -->
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>1.1.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.json/json -->
<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
<version>20160810</version>
</dependency>
</dependencies>
</project>
第三步:代码实现:
import java.io.{File, PrintWriter}
import java.sql.{Connection, DriverManager, ResultSet, Statement}
import org.json.{JSONArray, JSONObject}
object sparkSqlTest {
def main(args: Array[String]): Unit = {
val driverName:String = "org.apache.hive.jdbc.HiveDriver"
try {
Class.forName(driverName)
} catch{
case e: ClassNotFoundException =>
println("Missing Class",e)
}
val con:Connection =DriverManager.getConnection("jdbc:hive2://tdxy-bigdata-04:10000/p2p")
val stmt:Statement = con.createStatement()
val res:ResultSet = stmt.executeQuery("show tables")
System.out.println("------"+res)
while (res.next()) {
System.out.println(res.getString(1))
}
val jsonArray:JSONArray=new JSONArray()
val rs:ResultSet = stmt.executeQuery("select * from p2p_order_info_user")
//该表的列数
println(rs.getMetaData.getColumnCount)
//该表第二列列名
println(rs.getMetaData.getColumnName(2))
//将数据写入txt文件
val writer = new PrintWriter(new File("src/"+"order_table.txt"))
writer.print("[")
//循环取数
while (rs.next()) {
// System.out.println(rs.getObject(4))
val jsonObject:JSONObject = new JSONObject()
for (i <- 1 to rs.getMetaData.getColumnCount) {
if (rs.getObject(i) != null) {
println(rs.getObject(i))
jsonObject.put(rs.getMetaData.getColumnName(i).split("\\.")(1),rs.getString(i))
}else{
jsonObject.put(rs.getMetaData.getColumnName(i).split("\\.")(1),"null")
}
}
writer.println(jsonObject+",")
println("对象串"+jsonObject.toString)
jsonArray.put(jsonObject)
}
writer.println("]")
writer.close()
}
}
运行结果: