业务数据方面DWD层的搭建主要注意点在于维度建模。
评价事实表(事务型事实表)
1)建表语句
DROP TABLE IF EXISTS dwd_comment_info;
CREATE EXTERNAL TABLE dwd_comment_info(
`id` STRING COMMENT '编号',
`user_id` STRING COMMENT '用户ID',
`sku_id` STRING COMMENT '商品sku',
`spu_id` STRING COMMENT '商品spu',
`order_id` STRING COMMENT '订单ID',
`appraise` STRING COMMENT '评价(好评、中评、差评、默认评价)',
`create_time` STRING COMMENT '评价时间'
) COMMENT '评价事实表'
PARTITIONED BY (`dt` STRING)
STORED AS PARQUET
LOCATION '/warehouse/gmall/dwd/dwd_comment_info/'
TBLPROPERTIES ("parquet.compression"="lzo");
2)分区规划
3)数据装载
(1)首日装载
insert overwrite table dwd_comment_info partition (dt)
select
id,
user_id,
sku_id,
spu_id,
order_id,
appraise,
create_time,
date_format(create_time,'yyyy-MM-dd')
from ods_comment_info
where dt='2020-06-14';
(2)每日装载
insert overwrite table dwd_comment_info partition(dt='2020-06-15')
select
id,
user_id,
sku_id,
spu_id,
order_id,
appraise,
create_time
from ods_comment_info where dt='2020-06-15';
订单明细事实表(事务型事实表)
1)建表语句
DROP TABLE IF EXISTS dwd_order_detail;
CREATE EXTERNAL TABLE dwd_order_detail (
`id` STRING COMMENT '订单编号',
`order_id` STRING COMMENT '订单号',
`user_id` STRING COMMENT '用户id',
`sku_id` STRING COMMENT 'sku商品id',
`province_id` STRING COMMENT '省份ID',
`activity_id` STRING COMMENT '活动ID',
`activity_rule_id` STRING COMMENT '活动规则ID',
`coupon_id` STRING COMMENT '优惠券ID',
`create_time` STRING COMMENT '创建时间',
`source_type` STRING COMMENT '来源类型',
`source_id` STRING COMMENT '来源编号',
`sku_num` BIGINT COMMENT '商品数量',
`original_amount` DECIMAL(16,2) COMMENT '原始价格',
`split_activity_amount` DECIMAL(16,2) COMMENT '活动优惠分摊',
`split_coupon_amount` DECIMAL(16,2) COMMENT '优惠券优惠分摊',
`split_final_amount` DECIMAL(16,2) COMMENT '最终价格分摊'
) COMMENT '订单明细事实表表'
PARTITIONED BY (`dt` STRING)
STORED AS PARQUET
LOCATION '/warehouse/gmall/dwd/dwd_order_detail/'
TBLPROPERTIES ("parquet.compression"="lzo");
2)分区规划
3)数据装载
(1)首日装载
insert overwrite table dwd_order_detail partition(dt)
select
od.id,
od.order_id,
oi.user_id,
od.sku_id,
oi.province_id,
oda.activity_id,
oda.activity_rule_id,
odc.coupon_id,
od.create_time,
od.source_type,
od.source_id,
od.sku_num,
od.order_price*od.sku_num,
od.split_activity_amount,
od.split_coupon_amount,
od.split_final_amount,
date_format(create_time,'yyyy-MM-dd')
from
(
select
*
from ods_order_detail
where dt='2020-06-14'
)od
left join
(
select
id,
user_id,
province_id
from ods_order_info
where dt='2020-06-14'
)oi
on od.order_id=oi.id
left join
(
select
order_detail_id,
activity_id,
activity_rule_id
from ods_order_detail_activity
where dt='2020-06-14'
)oda
on od.id=oda.order_detail_id
left join
(
select
order_detail_id,
coupon_id
from ods_order_detail_coupon
where dt='2020-06-14'
)odc
on od.id=odc.order_detail_id;
(2)每日装载
insert overwrite table dwd_order_detail partition(dt='2020-06-15')
select
od.id,
od.order_id,
oi.user_id,
od.sku_id,
oi.province_id,
oda.activity_id,
oda.activity_rule_id,
odc.coupon_id,
od.create_time,
od.source_type,
od.source_id,
od.sku_num,
od.order_price*od.sku_num,
od.split_activity_amount,
od.split_coupon_amount,
od.split_final_amount
from
(
select
*
from ods_order_detail
where dt='2020-06-15'
)od
left join
(
select
id,
user_id,
province_id
from ods_order_info
where dt='2020-06-15'
)oi
on od.order_id=oi.id
left join
(
select
order_detail_id,
activity_id,
activity_rule_id
from ods_order_detail_activity
where dt='2020-06-15'
)oda
on od.id=oda.order_detail_id
left join
(
select
order_detail_id,
coupon_id
from ods_order_detail_coupon
where dt='2020-06-15'
)odc
on od.id=odc.order_detail_id;
退单事实表(事务型事实表)
1)建表语句
DROP TABLE IF EXISTS dwd_order_refund_info;
CREATE EXTERNAL TABLE dwd_order_refund_info(
`id` STRING COMMENT '编号',
`user_id` STRING COMMENT '用户ID',
`order_id` STRING COMMENT '订单ID',
`sku_id` STRING COMMENT '商品ID',
`province_id` STRING COMMENT '地区ID',
`refund_type` STRING COMMENT '退单类型',
`refund_num` BIGINT COMMENT '退单件数',
`refund_amount` DECIMAL(16,2) COMMENT '退单金额',
`refund_reason_type` STRING COMMENT '退单原因类型',
`create_time` STRING COMMENT '退单时间'
) COMMENT '退单事实表'
PARTITIONED BY (`dt` STRING)
STORED AS PARQUET
LOCATION '/warehouse/gmall/dwd/dwd_order_refund_info/'
TBLPROPERTIES ("parquet.compression"="lzo");
2)分区规划
3)数据装载
(1)首日装载
insert overwrite table dwd_order_refund_info partition(dt)
select
ri.id,
ri.user_id,
ri.order_id,
ri.sku_id,
oi.province_id,
ri.refund_type,
ri.refund_num,
ri.refund_amount,
ri.refund_reason_type,
ri.create_time,
date_format(ri.create_time,'yyyy-MM-dd')
from
(
select * from ods_order_refund_info where dt='2020-06-14'
)ri
left join
(
select id,province_id from ods_order_info where dt='2020-06-14'
)oi
on ri.order_id=oi.id;
(2)每日装载
insert overwrite table dwd_order_refund_info partition(dt='2020-06-15')
select
ri.id,
ri.user_id,
ri.order_id,
ri.sku_id,
oi.province_id,
ri.refund_type,
ri.refund_num,
ri.refund_amount,
ri.refund_reason_type,
ri.create_time
from
(
select * from ods_order_refund_info where dt='2020-06-15'
)ri
left join
(
select id,province_id from ods_order_info where dt='2020-06-15'
)oi
on ri.order_id=oi.id;
3)查询加载结果
加购事实表(周期型快照事实表,每日快照)
1)建表语句
DROP TABLE IF EXISTS dwd_cart_info;
CREATE EXTERNAL TABLE dwd_cart_info(
`id` STRING COMMENT '编号',
`user_id` STRING COMMENT '用户ID',
`sku_id` STRING COMMENT '商品ID',
`source_type` STRING COMMENT '来源类型',
`source_id` STRING COMMENT '来源编号',
`cart_price` DECIMAL(16,2) COMMENT '加入购物车时的价格',
`is_ordered` STRING COMMENT '是否已下单',
`create_time` STRING COMMENT '创建时间',
`operate_time` STRING COMMENT '修改时间',
`order_time` STRING COMMENT '下单时间',
`sku_num` BIGINT COMMENT '加购数量'
) COMMENT '加购事实表'
PARTITIONED BY (`dt` STRING)
STORED AS PARQUET
LOCATION '/warehouse/gmall/dwd/dwd_cart_info/'
TBLPROPERTIES ("parquet.compression"="lzo");
2)分区规划
3)数据装载
1)首日装载
insert overwrite table dwd_cart_info partition(dt='2020-06-14')
select
id,
user_id,
sku_id,
source_type,
source_id,
cart_price,
is_ordered,
create_time,
operate_time,
order_time,
sku_num
from ods_cart_info
where dt='2020-06-14';
(2)每日装载
insert overwrite table dwd_cart_info partition(dt='2020-06-15')
select
id,
user_id,
sku_id,
source_type,
source_id,
cart_price,
is_ordered,
create_time,
operate_time,
order_time,
sku_num
from ods_cart_info
where dt='2020-06-15';
收藏事实表(周期型快照事实表,每日快照)
1)建表语句
DROP TABLE IF EXISTS dwd_favor_info;
CREATE EXTERNAL TABLE dwd_favor_info(
`id` STRING COMMENT '编号',
`user_id` STRING COMMENT '用户id',
`sku_id` STRING COMMENT 'skuid',
`spu_id` STRING COMMENT 'spuid',
`is_cancel` STRING COMMENT '是否取消',
`create_time` STRING COMMENT '收藏时间',
`cancel_time` STRING COMMENT '取消时间'
) COMMENT '收藏事实表'
PARTITIONED BY (`dt` STRING)
STORED AS PARQUET
LOCATION '/warehouse/gmall/dwd/dwd_favor_info/'
TBLPROPERTIES ("parquet.compression"="lzo");
2)分区规划
3)数据装载
(1)首日装载
insert overwrite table dwd_favor_info partition(dt='2020-06-14')
select
id,
user_id,
sku_id,
spu_id,
is_cancel,
create_time,
cancel_time
from ods_favor_info
where dt='2020-06-14';
(2)每日装载
insert overwrite table dwd_favor_info partition(dt='2020-06-15')
select
id,
user_id,
sku_id,
spu_id,
is_cancel,
create_time,
cancel_time
from ods_favor_info
where dt='2020-06-15';