package com.ws.sparksql
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types._
import org.apache.spark.sql._
import org.apache.spark.{SparkConf, SparkContext}
/**
* spark sql版本 2.x
*/
object SparkSql2Demo {
def main(args: Array[String]): Unit = {
//spark 2.X sql创建执行过程
val session = SparkSession.builder().appName("SparkSql2Demo").master("local[*]").getOrCreate()
val dataRdd: RDD[String] = session.sparkContext.textFile("hdfs://hadoop-01:9000/student")
val studentRowRdd: RDD[Row] = dataRdd.map(line => {
val fieldArr: Array[String] = line.split(",")
val id = fieldArr(0).toLong
val name = fieldArr(1)
val age = fieldArr(2).toInt
val score = fieldArr(3).toInt
Row(id, name, age, score)
})
//创建结构化表
val schema: StructType = StructType(List(
StructField("id", LongType, true),
StructField("name", StringType, true),
StructField("age", IntegerType, true),
StructField("score", IntegerType, true)
))
val dataFrame: DataFrame = session.createDataFrame(studentRowRdd, schema)
import session.implicits._
val result: Dataset[Row] = dataFrame.where($"score" > 100).orderBy($"score" desc, $"age" asc)
result.show()
session.stop()
}
}
结果 :
+---+----+---+-----+
| id|name|age|score|
+---+----+---+-----+
| 1| 张三| 18| 150|
| 2| 李四| 19| 150|
+---+----+---+-----+