查看每个城市每天完成订单数,取消订单数,下单订单数,下单用户数
select
city_id,
sum(case when order_status=5 then 1 else 0 end) as cnt_ord_succ_d,
sum(case when order_status=3 then 1 else 0 end) as cnt_ord_cacel_d,
sum(1) as cnt_ord_d,
count(distinct CUST_ID) as cnt_ord_user //用户ID去重
FROM
dw.dw_order
WHERE
dt='${day_01}'
group by
city_id;
获取一天所在的自然周
(但是这样比较费时间,如果有时间维表,直接用时间维表的话更方便,但是用时间维表的时候,要清楚每一个字段的含义和用法,这样在使用的时候,可以直接避免冗余的操作)
select
concat(
date_add(
statis_day,
-pmod(
datediff( --ds 是星期几
statis_day,
'20100104'
),
7
)
),
'~'
,date_add(
statis_day,
6-pmod(
datediff( --de 是星期几
statis_day,
'20100104'
),
7
)
)
) as time_stage,
from
t_union_cnt t
近六个月的活跃老用户(近六个月,可以直接根据现在时间,把时间写死)
月份可以用substr函数取,时间数据类型的前六位,即年和月
with target_user as
(
select t1.six_month as six_month
,t1.qimei as qimei
,guid
from
(
select substr(statis_day,0,6) as six_month
,qimei
,guid
from u_wsd::t_od_qidian_cl_daumap_hx_c1
where statis_day>=20200701
) t1
join
(
select substr(statis_day,0,6) as six_month
,qimei
from yw_dh_app::t_ed_ywdh_hx_ai_wide_c1
where is_new = 0 and is_active > 0 and statis_day>=20200701
) t2
on t1.six_month = t2.six_month and t1.qimei = t2.qimei
group by t1.six_month, t1.qimei, guid
)
每日新访客
insert into table etl_user_new_day partition(day="20170101")
select
a.uid,a.commit_time,a.city,a.release_channel,a.app_ver_name
from etl_user_active_day a
left join
etl_history_user b
on a.uid=b.uid
where a.day="20170101" and b.uid is null;
年度销售商品的前十
在select查找的数据中,使用row_number函数,并且使用partition对时间分区,和使用order by对于商品进行排序,使得row_number的值就成为了这个时间区的商品销售名次
select
ref_host,
ref_host_cnts,
concat(month,hour,day),
row_number() over (partition by concat(month,hour,day) order by ref_host_cnts desc) as od
from
dw_ref_host_visit_cnts_h
商品复购率
需求列出的商品的7日,15日,30复购率,目的了解这几款商品的周期. 计算口径:当日购买部分商品的用户数/7日重复购买此商品的用户数。 每天查看每个城市每个商品当日购买用户数,7日15日30日复购率。
SELECT
t3.atdate AS cdate,
t3.city_id,
t3.goods_id,
COUNT(DISTINCT CASE WHEN days=0 THEN t3.cust_id END) AS cnt_buy_cust_d,
COUNT(DISTINCT CASE WHEN days>0 AND days<=7 THEN t3.cust_id END) AS cnt_buy_cust_7_d,
COUNT(DISTINCT CASE WHEN days>0 AND days<=15 THEN t3.cust_id END) AS cnt_buy_cust_15_d,
COUNT(DISTINCT CASE WHEN days>0 AND days<=30 THEN t3.cust_id END) AS cnt_buy_cust_30_d
FROM
(
SELECT
t1.atdate,
t1.city_id,
t1.cust_id,
t1.goods_id,
DATEDIFF(t2.atdate, t1.atdate) days
FROM (
//查出成功订单的。
SELECT
o.order_date AS atdate,
o.city_id,
o.cust_id,
og.goods_id
FROM
dw.dw_order o
INNER JOIN
dw.dw_order_goods og
ON
o.order_id=og.order_id AND
o.ORDER_STATUS = 5 AND
og.source_id=1 AND
o.dt = '20151010'
) t1
INNER JOIN (
//查看n天后成功订单的
SELECT
o.order_date AS atdate,
o.city_id,
o.cust_id,
og.goods_id,
og.goods_name
FROM
dw.dw_order o
INNER JOIN
dw.dw_order_goods og
ON
o.order_id=og.order_id AND
o.ORDER_STATUS = 5 AND
og.source_id=1
) t2
ON
t1.cust_id=t2.cust_id AND
t1.goods_id=t2.goods_id
) t3
GROUP BY
t3.atdate,
t3.city_id,
t3.goods_id;
Tips:
join的时候,可能会有对应的维度表,尽量join维度表这种小标
全量表
select cbid,site,authorid from t
where site in (2,3,22,23) --and (regexp_replace(substr(trim(updatetime),1,10),'-','') between 20200101 and 20200102)
and statis_day = 20201231 and status != 50 --(全量表,一天的数据就含有了以前的历史信息,用当天的数据即可,不要用<=)
and ext1=1 and auditstatus=19
回溯数据任务完成步骤
1先看表中有没有当天数据
2再看这个表是不是可回溯(每天增量还是全量)
3看是否有前置数据,(venus前面的组件)
4venus或者ide insert,加入数据