一、SQLContext
1.适用spark版本:spark1.x
2.添加依赖
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.11.8</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>2.1.0</version>
<scope>compile</scope>
</dependency>
3.代码
(1)创建Context
(2)进行相关处理(加载数据)
(3)关闭连接
package MoocSparkSQL
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SQLContext
/**
* spark context的使用
*/
object SQLContextApp {
def main(args: Array[String]): Unit = {
val path=args(0)
//1)创建相应的Context
val sparkConf=new SparkConf()
.setAppName("SQLContextApp").setMaster("local[2]")
val sc =new SparkContext(sparkConf)
val sqlContext=new SQLContext(sc)
//2)进行相关处理
val people=sqlContext.read.format("json").load(path)
people.printSchema()
people.show()
//3)关闭资源
//每个sparkContext关闭
sc.stop()
}
}
二、HiveContext
1.适用spark版本:spark1.x
2.前提:
(1)不需要hive环境
(2)需要hive-site.xml
将hive-site.xml拷贝到项目的资源目录下面:...\src\sources\hive-site.xml
3.引入依赖包
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_2.11</artifactId>
<version>2.1.0</version>
</dependency>
4.代码
package SparkSQL
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.hive.HiveContext
/**
* Hive Comtext 的使用
*/
object HiveContextApp {
def main(args: Array[String]): Unit = {
// val path=args(0)
//1)创建相应的Context
val sparkConf=new SparkConf()
//生产环境把下面的注释掉
.setAppName("HiveContextApp").setMaster("local[2]")
val sc =new SparkContext(sparkConf)
val hiveContext=new HiveContext(sc)
//2)进行相关处理
hiveContext.table("emp").show() //这个是可以的
//3)关闭context
sc.stop()
}
}
三、SparkSession
1.适用spark版本:spark2.x
2.代码
package SparkSQL
import org.apache.spark.sql.SparkSession
/**
* sparksession
*/
object SparkSessionApp {
def main(args: Array[String]): Unit = {
val spark = SparkSession
.builder()
.appName("SparkSessionApp")
.master("local[2]")
.getOrCreate()
val people=spark.read.json("datas/people.json")
people.show()
spark.stop()
}
}