Hive窗口函数计算累积值一例

需求描述:
时段:6月14日18点~6月18日
人群A:时段内赠送过【小甜粽,大甜粽,小咸粽,大咸粽】其中任意礼物的观众(请注意,可能小甜粽或者小咸粽会叫:甜粽或咸粽)
人群B:时段内收到过【小甜粽,大甜粽,小咸粽,大咸粽】其中任意礼物的主播(请注意,可能小甜粽或者小咸粽会叫:甜粽或咸粽)
时段内,每个小时的如下数据:
阵营注解:截止统计时刻,主播收到的甜系礼物价值多,则该主播为“甜阵营”,主播收到的咸系礼物价值多,则该主播为“咸阵营”
date,hour,活动开始至统计小时累计甜阵营主播人数,活动开始至统计小时累计甜阵营主播收甜礼物价值,活动开始至统计小时累计咸阵营主播人数,活动开始至统计小时累计咸阵营主播收咸礼物价值
1、起初的代码:
with tab_accumulate_point as (
select room_id,hours,sum(case when gift_type=101 then accumulate_point else 0 end) accumulate_salty_point,sum(case when gift_type=102 then accumulate_point else 0 end) accumulate_sweet_point
from (select room_id,gift_type,hours,point,sum(point)over(partition by room_id,gift_type order by hours asc ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) accumulate_point 
from (select room_id,gift_type,substr(created_time,1,13) hours,sum(point) point
        from xxx_event_rice_pudding_detail_201806
       group by room_id,gift_type,substr(created_time,1,13)) a1
group by room_id,gift_type,hours,point) a1
group by room_id,hours)
select hours,case when accumulate_salty_point>accumulate_sweet_point then 'salty' when accumulate_salty_point<accumulate_sweet_point then 'sweet' else 'ss' end type,
count(distinct room_id) room_cnt,sum(accumulate_salty_point) accumulate_salty_point,sum(accumulate_sweet_point) accumulate_sweet_point
from tab_accumulate_point a1
group by hours,case when accumulate_salty_point>accumulate_sweet_point then 'salty' when accumulate_salty_point<accumulate_sweet_point then 'sweet' else 'ss' end
;
说明:脚本先计算出每个房间的按类型每小时的累积值,然后再根据小时及类型进行汇总。
2、改进后的脚本:
起初的脚本累积值的计算并没有错误,但依此会导致一个问题:当某小时此房间没有进行直播收礼时,其以往的所以累计值将不会在此小时内统计出来。所以,需要在明细层补齐各房间各小时各类型数据,然后再进行累积数据的计算。
为使代码更清晰及问题的及时发现,采用临时表数据的形式进行数据的转储和计算。
--累积积的不完全计算
drop table if exists xxxxx_liuyl_accumulate_point;
create table xxxxx_liuyl_accumulate_point as 
select room_id,hours,sum(case when gift_type=101 then accumulate_point else 0 end) accumulate_salty_point,sum(case when gift_type=102 then accumulate_point else 0 end) accumulate_sweet_point
from (select room_id,gift_type,hours,point,sum(point)over(partition by room_id,gift_type order by hours asc ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) accumulate_point 
from (select room_id,gift_type,substr(created_time,1,13) hours,sum(point) point
        from xxx_event_rice_pudding_detail_201806
       group by room_id,gift_type,substr(created_time,1,13)) a1
group by room_id,gift_type,hours,point) a1
group by room_id,hours;
--根据上表搭配出完全的房间小时类型数据
drop table if exists xxxxx_ex_room_hours;
create table xxxxx_ex_room_hours as
select a1.room_id,a2.hours,a3.gift_type
from (select distinct room_id
from xxxxx_liuyl_accumulate_point) a1
join (select hours
from xxxxx_liuyl_accumulate_point
group by hours) a2
join (select 101 gift_type
union all
select 102 gift_type) a3
order by a1.room_id,a2.hours;
--完全小时房间累积值计算
drop table if exists xxxxx_allhours_roompoint;
create table xxxxx_allhours_roompoint as
select a1.room_id,a1.hours,a1.gift_type,coalesce(a2.point,0) point
from xxxxx_ex_room_hours a1
left join (select room_id,gift_type,substr(created_time,1,13) hours,sum(point) point
        from xxx_event_rice_pudding_detail_201806
       group by room_id,gift_type,substr(created_time,1,13)) a2 on a1.room_id=a2.room_id and a1.hours=a2.hours and a1.gift_type=a2.gift_type;
--根据类型进行分别汇总
drop table if exists xxxxx_liuyl_accumulate_point_all;
create table xxxxx_liuyl_accumulate_point_all as 
select room_id,hours,sum(case when gift_type=101 then accumulate_point else 0 end) accumulate_salty_point,sum(case when gift_type=102 then accumulate_point else 0 end) accumulate_sweet_point
from (select room_id,gift_type,hours,point,sum(point)over(partition by room_id,gift_type order by hours asc ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) accumulate_point 
from xxxxx_allhours_roompoint a1
group by room_id,gift_type,hours,point) a1
group by room_id,hours;
--最终结果的导出
select hours,case when accumulate_salty_point>accumulate_sweet_point then 'salty' when accumulate_salty_point<accumulate_sweet_point then 'sweet' else 'ss' end type,
count(distinct room_id) room_cnt,sum(accumulate_salty_point) accumulate_salty_point,sum(accumulate_sweet_point) accumulate_sweet_point
from xxxxx_liuyl_accumulate_point_all a1
group by hours,case when accumulate_salty_point>accumulate_sweet_point then 'salty' when accumulate_salty_point<accumulate_sweet_point then 'sweet' else 'ss' end
;
3、总结
对累积值最后按类型的汇总,要保证所计算出来的累积值第一个小时及类型都有记录;即使没有,也要添加一条用0表示。所以,涉及到造数据的过程。
累积值计算代码:
select room_id,gift_type,hours,point,sum(point)over(partition by room_id,gift_type order by hours asc ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) accumulate_point
完全数据模板的制造:
select a1.room_id,a2.hours,a3.gift_type
from (select distinct room_id
from xxxxx_liuyl_accumulate_point) a1
join (select hours
from xxxxx_liuyl_accumulate_point
group by hours) a2
join (select 101 gift_type
union all
select 102 gift_type) a3
order by a1.room_id,a2.hours

猜你喜欢

转载自blog.csdn.net/babyfish13/article/details/80751557