object TestSparkSortBy {
val testData = Seq(
"1,111,68,69,90,1班,经济系 ",
"2,112,73,80,96,1班,经济系 ",
"3,113,90,74,75,1班,经济系 ",
"4,114,89,94,93,1班,经济系 ",
"5,115,99,93,89,1班,经济系 ",
"6,121,96,74,79,2班,经济系 ",
"7,122,89,86,85,2班,经济系 ",
"8,123,70,78,61,2班,经济系 ",
"9,124,76,70,76,2班,经济系 ",
"10,211,89,93,60,1班,外语系",
"11,212,76,83,75,1班,外语系",
"12,213,71,94,90,1班,外语系",
"13,214,94,94,66,1班,外语系",
"14,215,84,82,73,1班,外语系",
"15,216,85,74,93,1班,外语系",
"16,221,77,99,61,2班,外语系",
"17,222,80,78,96,2班,外语系",
"18,223,79,74,96,2班,外语系",
"19,224,75,80,78,2班,外语系",
"20,225,82,85,63,2班,外语系",
"21,226,82,99,63,2班,外语系"
)
def main(args: Array[String]): Unit = {
val conf: SparkConf = new SparkConf().setAppName("TestSparkSortBy").setMaster("local[4]")
val sc: SparkContext = new SparkContext(conf)
val studentsInfoRDD: RDD[(String, Int, Int, Int, String, String)] = sc.parallelize(testData, 4).filter(_.split(",", -1).length == 7)
.map(studentLine => {
val filds: Array[String] = studentLine.split(",", -1)
(filds(1), filds(2).trim.toInt, filds(3).trim.toInt, filds(4).trim.toInt, filds(5).trim, filds(6).trim)
})
val afterSortByRDD: RDD[(String, Int, Int, Int, String, String)] = studentsInfoRDD.sortBy(tuple=>(tuple._5,tuple._2,tuple._3,tuple._4) ,ascending=false)
afterSortByRDD.collect().foreach(println)
sc.stop()
}
}
#结果
(121,96,74,79,2班,经济系)
(122,89,86,85,2班,经济系)
(226,82,99,63,2班,外语系)
(225,82,85,63,2班,外语系)
(222,80,78,96,2班,外语系)
(223,79,74,96,2班,外语系)
(221,77,99,61,2班,外语系)
(124,76,70,76,2班,经济系)
(224,75,80,78,2班,外语系)
(123,70,78,61,2班,经济系)
(115,99,93,89,1班,经济系)
(214,94,94,66,1班,外语系)
(113,90,74,75,1班,经济系)
(114,89,94,93,1班,经济系)
(211,89,93,60,1班,外语系)
(216,85,74,93,1班,外语系)
(215,84,82,73,1班,外语系)
(212,76,83,75,1班,外语系)
(112,73,80,96,1班,经济系)
(213,71,94,90,1班,外语系)
(111,68,69,90,1班,经济系)