单机版wordcount
/**
* @author ${user}
*/
object WordCount {
def main(args: Array[String]): Unit = {
//定义集合
val lines = List("hello tom hello jerry jerry","hello kitty jerry kitty hello hello")
//println(lines.map(_.split(" ")))
//获取到集合中的元素,并压平
val flat = lines.map(_.split(" ")).flatten
//println(flat)
//flatMap 相当于先map再flatten
val words = lines.flatMap(_.split(" "))//List(hello, tom, hello, jerry, hello, kitty, hello, hello)
//println(flat1)
//将集合输出到map
val wordsAndOne = words.map((_,1))
//println(wordsAndOne)//List((hello,1), (tom,1), (hello,1), (jerry,1), (hello,1), (kitty,1), (hello,1), (hello,1))
//按照元组里面的第一个元素分组
val group = wordsAndOne.groupBy(_._1)
//group 结果为:Map(tom -> List((tom,1)), kitty -> List((kitty,1)), jerry -> List((jerry,1)), hello -> List((hello,1), (hello,1), (hello,1), (hello,1), (hello,1)))
//println(group)
val res = group.map(x => (x._1,x._2.size))
println(res)//Map(tom -> 1, kitty -> 1, jerry -> 1, hello -> 5)
//该代码和上一行代码等价
val res1 = group.mapValues(_.size)
println(res1)//Map(tom -> 1, kitty -> 2, jerry -> 3, hello -> 5)
val res2 = group.mapValues(_.foldLeft(0)(_+_._2))
println("res2结果为:"+res2)//res2:Map(tom -> 1, kitty -> 2, jerry -> 3, hello -> 5)
//排序
val finalRes = res.toList.sortBy(_._2).reverse
println(finalRes)//List((hello,5), (jerry,1), (kitty,1), (tom,1))
}
}