数据文件
a,2020-07-11 10:51:12
a,2020-07-11 11:05:00
a,2020-07-11 11:15:20
a,2020-07-11 11:25:05
a,2020-07-11 11:45:00
a,2020-07-11 11:55:36
a,2020-07-11 11:59:56
a,2020-07-11 12:35:12
a,2020-07-11 12:58:59
b,2020-07-11 14:05:00
b,2020-07-11 14:51:12
b,2020-07-11 15:15:20
b,2020-07-11 15:25:05
b,2020-07-11 16:45:00
b,2020-07-11 16:55:36
b,2020-07-11 16:59:56
b,2020-07-11 17:35:12
b,2020-07-11 17:58:59
spark代码实现
package com.atguigu.homework
import java.text.SimpleDateFormat
import org.apache.spark.rdd.RDD
import org.apache.spark.{
SparkConf, SparkContext}
object LogInTime {
def main(args: Array[String]): Unit = {
val sc = new SparkContext(new SparkConf().setMaster("local[4]").setAppName("test"))
val rdd = sc.textFile("E:\\小时登录次数.txt")
val rdd2 = rdd.map(x=>{
val id = x.split(",").head
val datetime = x.split(",").last
val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
val time = sdf.parse(datetime).getTime
(id,time)
})
val rdd3:RDD[(String,List[Long])] = rdd2.groupByKey().map(x=>{
val sorted = x._2.toList.sorted
(x._1,sorted)
})
val rdd4=rdd3.map(x=>{
var num = x._2.map(y=>{
x._2.filter(z=>{
z>=y && z<y+3600000
}).count(x=>x==x)
}).sorted.take(1)
(x._1,num)
})
println(rdd4.collect().toList)
}
}