DWD层构建思路

DWD层构建思路

ODS层数据是近源层,基本上不在这层做过大的格式或数据转换(一般仅对数据进行格式上的转换,比如说对json、URL等格式的日志文件,提取出列数据),因此在DWD层,我们会把数据做一定的整理和优化,并加入一些常用维度,比如说日期、地域等,下面案例简单介绍下DWD层

方向一:

创建分区表

set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table dwd_myshops.dwd_logs partition(actdate) 
select userid,action,acttime,version,system,goodid,title,
price,shopid,mark,soft,soft_version,
from_unixtime(cast(acttime/1000 as bigint),'yyyyMMdd') as actdate 
from ods_myshops.ods_newlog;

方向二:

使用压缩格式,本例使用orc格式

-- 导入ods_orders dwd_myshops.dwd_orders
create table dwd_myshops.dwd_orders(
orderid int,
orderdate string,
userid int,
orderstatus int,
orderno string,
goodid int,
buynum int
)
row format delimited fields terminated by ','
stored as orc;
insert overwrite table dwd_myshops.dwd_orders select * from ods_myshops.ods_orders;

方向三:

增加常用维度,比如时间、地域、职业、学历等维度

-- 创建日期表并导入一年日期
create table dwd_myshops.dwd_dates(
id int,
fdate string,
years int,
months int,
dates int,
week int,
quarter int
)
row format delimited
fields terminated by ','
stored as orc;

本例完整代码如下:

-- 创建dwd层数据库
drop database if exists dwd_myshops cascade;
create database dwd_myshops;
-- 创建dwd用户行为表,动态分区
create table dwd_myshops.dwd_logs(
userid int,
action string,
acttime string,
version string,
system string,
goodid string,
title string,
price string,
shopid string,
mark string,
soft string,
soft_version string
)
partitioned by (actdate string)
row format delimited fields terminated by ','
stored as orc;

-- 开启动态分区
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table dwd_myshops.dwd_logs partition(actdate) 
select userid,action,acttime,version,system,goodid,title,
price,shopid,mark,soft,soft_version,
from_unixtime(cast(acttime/1000 as bigint),'yyyyMM') as actdate 
from ods_myshops.ods_newlog;

-- 导入ods_orders dwd_myshops.dwd_orders
create table dwd_myshops.dwd_orders(
orderid int,
orderdate string,
userid int,
orderstatus int,
orderno string,
goodid int,
buynum int
)
row format delimited fields terminated by ','
stored as orc;
insert overwrite table dwd_myshops.dwd_orders select * from ods_myshops.ods_orders;

--导入商品表
create table dwd_myshops.dwd_goods(
goodid int,
title string,
price double,
typeid int,
issale int,
score int,
shopid int,
paytype int,
delailtabname string
)
row format delimited fields terminated by ','
stored as orc;
insert overwrite table dwd_myshops.dwd_goods select * from ods_myshops.ods_goods;

-- 导入用户表
create table dwd_myshops.dwd_userinfos(
userid int,
username string,
name string,
email string,
password string,
cardno string,
gender int,
localid int,
mobile string,
qq string,
wechat string
)
row format delimited fields terminated by ','
stored as orc;
insert overwrite table dwd_myshops.dwd_userinfos select * from ods_myshops.ods_userinfos;

-- 创建日期表并导入一年日期
create table dwd_myshops.dwd_dates(
id int,
fdate string,
years int,
months int,
dates int,
week int,
quarter int
)
row format delimited
fields terminated by ','
stored as orc;

shell脚本生成日期维度

#!/bin/bash

begin_time=$1
over_time=$2
# 计算两个日期间的天数
#将用户输入的时间转为时间戳
btimestamp=`date -d ${
     
     begin_time} +%s`
otimestamp=`date -d ${
     
     over_time} +%s`
let days=($otimestamp-$btimestamp)/86400
#循环向hive中填充对应数据
int=0
str="insert overwrite table dwd_myshops.dwd_dates values "
while(( $int<=$days ))
do
#  数字‘08’会产生二进制异常,在计算时会被系统判为8进制,需要转为10进制
  fdy=`date -d "${begin_time}+${int} days" "+%Y-%m-%d"`
#  在date基础上提取
#  years=${fdy:0:4}
#  months=${fdy:5:2}
#  dy=${fdy:8:2}
#  week=`date -d ${fdy} +%w`
  years=`date -d "${begin_time}+${int} days" "+%Y"`
  months=`date -d "${begin_time}+${int} days" "+%m"`
  dy=`date -d "${begin_time}+${int} days" "+%d"`
  week=`date -d "${begin_time}+${int} days" "+%w"`
  quarter=1
  if [ $months -lt 4 ];then quarter=1
  elif [ $months -lt 7 ];then quarter=2
  elif [ $months -lt 10 ];then quarter=3
  else
    quarter=4;
  fi
#  echo "${months}"
  let "int++"
#  执行hive命令完成数据填充
  str=$str"(${int},'${fdy}',${years},${months},${dy},${week},${quarter}),"
done

hive -e "${str%?}"

猜你喜欢

转载自blog.csdn.net/xiaoxaoyu/article/details/115030218