版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/kaaosidao/article/details/86161875
1. rollup前后对比
前:
|pcode|event_date|duration|
+-----+----------+--------+
| 1|2018-09-02| 3|
| 1|2018-09-03| 2|
| 1|2018-09-04| 1|
| 2|2018-09-01| 4|
| 2|2018-09-02| 3|
+-----+----------+--------+
select pcode,event_date,sum(duration) as sum_duration
from
(
select '1' pcode,'2018-09-04' event_date,1 duration
union
select '1' pcode,'2018-09-03' event_date,2 duration
union
select '1' pcode,'2018-09-02' event_date,3 duration
union
select '2' pcode,'2018-09-02' event_date,3 duration
union
select '2' pcode,'2018-09-01' event_date,4 duration
)
group by pcode,event_date with rollup
order by pcode,event_date
rollup后:
/*+-----+----------+------------+
| null| null| 13|
| 1| null| 6|
| 1|2018-09-02| 3|
| 1|2018-09-03| 2|
| 1|2018-09-04| 1|
| 2| null| 7|
| 2|2018-09-01| 4|
| 2|2018-09-02| 3|
+-----+----------+------------+*/
cube后:
select pcode,event_date,sum(duration) as sum_duration
from
(
select '1' pcode,'2018-09-04' event_date,1 duration
union
select '1' pcode,'2018-09-03' event_date,2 duration
union
select '1' pcode,'2018-09-02' event_date,3 duration
union
select '2' pcode,'2018-09-02' event_date,3 duration
union
select '2' pcode,'2018-09-01' event_date,4 duration
)
group by pcode,event_date with cube
order by pcode,event_date
pcode | event_date | sum_duration |
---|---|---|
13 | ||
2018-09-01 | 4 | |
2018-09-02 | 6 | |
2018-09-03 | 2 | |
2018-09-04 | 1 | |
1 | 6 | |
1 | 2018-09-02 | 3 |
1 | 2018-09-03 | 2 |
1 | 2018-09-04 | 1 |
2 | 7 | |
2 | 2018-09-01 | 4 |
2 | 2018-09-02 | 3 |
2.代码实现:
object Rollup {
def main(args: Array[String]): Unit = {
val spark = SparkUtil.getSpark("func","local")
f1(spark)
spark.stop()
}
/**
* +-----+----------+--------+
|pcode|event_date|duration|
+-----+----------+--------+
| 1|2018-09-02| 3|
| 1|2018-09-03| 2|
| 1|2018-09-04| 1|
| 2|2018-09-01| 4|
| 2|2018-09-02| 3|
+-----+----------+--------+
* @param spark
* @return
*/
def f(spark:SparkSession): DataFrame ={
spark.sql(
"""
|select '1' pcode,'2018-09-04' event_date,1 duration
|union
|select '1' pcode,'2018-09-03' event_date,2 duration
|union
|select '1' pcode,'2018-09-02' event_date,3 duration
|union
|select '2' pcode,'2018-09-02' event_date,3 duration
|union
|select '2' pcode,'2018-09-01' event_date,4 duration
""".stripMargin)
}
/**
* 使用rollup添加all统计
* @param spark
*/
def f1(spark:SparkSession): Unit ={
import org.apache.spark.sql.functions._
import spark.implicits._
//结果
/*+-----+----------+------------+
| null| null| 13|
| 1| null| 6|
| 1|2018-09-02| 3|
| 1|2018-09-03| 2|
| 1|2018-09-04| 1|
| 2| null| 7|
| 2|2018-09-01| 4|
| 2|2018-09-02| 3|
+-----+----------+------------+*/
//sql 实现
/*f(spark).createOrReplaceTempView("tmp")
spark.sql(
"""
| select pcode,event_date,sum(duration) as sum_duration
| from tmp
| group by pcode,event_date with rollup
| order by pcode,event_date
""".stripMargin)
.show()*/
//df 实现
val df = f(spark)
df.rollup($"pcode", $"event_date").agg(sum($"duration")).orderBy($"pcode", $"event_date").show()
}
}