一、普通json格式
val session = SparkSession.builder().appName("sql").master("local").getOrCreate()
val df = session.read.format("json").load("./data/json")
二、读取嵌套的json格式文件
val spark = SparkSession.builder().master("local")
.appName("nextjson")
.getOrCreate()
val frame = spark.read.format("json").load("./data/NestJsonFile")
frame.printSchema()
frame.show(100)
frame.createOrReplaceTempView("infosView")
spark.sql("select name,infos.age,score,infos.gender from infosView").show(100)
三、读取嵌套的jsonArray数组
val spark = SparkSession.builder()
.appName("jsonArray")
.master("local")
.getOrCreate()
val frame = spark.read.format("json").load("./data/jsonArrayFile")
frame.show(false)
frame.printSchema()
import org.apache.spark.sql.functions._
import spark.implicits._
val transDF = frame.select($"name",$"age",explode($"scores")).toDF("name","age","allScores")
transDF.show(100,false)
transDF.printSchema()
val result: DataFrame = transDF.select($"name", $"age",
$"allScores.yuwen" as "yuwen",
$"allScores.shuxue" as "shuxue",
$"allScores.yingyu" as "yingyu",
$"allScores.dili" as "dili",
$"allScores.shengwu" as "shengwu",
$"allScores.huaxue" as "huaxue")
result.show(100,true)