准备数据源people.json数据,并放到hdfs上
{"name":"Michael"}
{"name":"Andy", "age":30}
{"name":"Justin", "age":19}
代码:
object jsonTest{
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf().setAppName("jsonTest").setMaster("local")
val sc = new SparkContext(conf)
val jsonStrs: RDD[String] = sc.textFile("hdfs://hdp-1:9000/people.json")
val result: RDD[Option[Any]] = jsonStrs.map(s => JSON.parseFull(s))
result.foreach({r => r match {
case Some (map: Map[String, Any]) => println(map)
case None => println("Parsing failed")
case other => println("Unknows data structure:" + other)
}})
}
}
运行结果: