需求
id | name | age | school |
---|---|---|---|
1 | 12 | abc | |
2 | ww | 12 | abc |
3 | ee | 12 | abc |
===>
id | name | newCol |
---|---|---|
1 | {“age”: “12”,”school”: “abc”} | |
2 | ww | {“age”: “12”,”school”: “abc”} |
3 | ee | {“age”: “12”,”school”: “abc”} |
代码实现
思路:主要是to_json的使用
private def testDF(spark:SparkSession): Unit ={
import spark.implicits._
val data = spark.sparkContext.textFile("data.log")
val dataDF = data.map(_.split("\t"))
.map(x => datainfo(x(0).toInt,x(1),x(2).toInt,x(3)))
.toDF()
dataDF.show()
// 转成Json的写法
val finalDF = dataDF.withColumn("newCol", to_json(struct("age", "school")))
finalDF.show(false)
}
case class datainfo(id:Int, name:String, age:Int, school:String)