1. 热卖品牌Top10数据可视化
1.1 HiveSQL的编写
编写HiveSql
-- 热卖商品Top10
select brand_id,count(item_id) sale_num
from to_user_log
where brand_id is not null
group by brand_id
distribute by sale_num
sort by sale_num desc
limit 20;
-- 创建热卖商品Top10的结果表
create table if not exists tm_hot_sale_brand(
brand_id int comment "品牌id",
sale_name int comment "销售数量",
date_day string comment "添加日期"
)
row format delimited
fields terminated by ","
lines terminated by "\n";
-- 将结果插入表中
from to_user_log
insert into tm_hot_sale_brand
select brand_id,count(item_id) sale_num, '20300101'
where brand_id is not null
group by brand_id
distribute by sale_num
sort by sale_num desc
limit 20;
1.2 Hive数据库同步到MySQL数据库
创建MySQL表
编写sqoop导出数据脚本
[root@node3 ~]# cat export_tm_hot_sale_brand.txt
export
--connect
jdbc:mysql://node1:3306/taobao
--username
root
--password
123456
-m
1
--table
tm_hot_sale_brand
--columns
brand_id,sale_num,date_day
--export-dir
/user/hive_remote/warehouse/taobao.db/tm_hot_sale_brand
使用sqoop导出
[root@node3 ~]# sqoop --options-file export_tm_hot_sale_brand.txt
1.3 superset绘制饼图
添加数据集到superset中
对数据进行设计
准备添加图
选择条件
保存可视化结果
2.购物达人Top10数据可视化
2.1 Hive SQL编写
-- 购物达人top10
select user_id, count(item_id) buy_num,'20300101'
from to_user_log
where user_id is not null
group by user_id
distribute by buy_num
sort by buy_num desc
limit 10;
-- 创建购物达人top10结果表
create table if not exists tm_shopper_master(
user_id int comment "用户id",
buy_num int comment "购买数量",
date_day int comment "添加日期"
)
row format delimited
fields terminated by ","
lines terminated by "\n";
-- 购物达人top10 结果插入表中
from to_user_log
insert into tm_shopper_master
select user_id, count(item_id) buy_num,'20300101'
where user_id is not null
group by user_id
distribute by buy_num
sort by buy_num desc
limit 10;
2.2 Hive数据库同步到MySQL数据库
mysql中创建表
编写sqoop脚本导出数据并执行
[root@node3 ~]# cat export_tm_shopper_master.txt
export
--connect
jdbc:mysql://node1:3306/taobao
--username
root
--password
123456
--table
tm_shopper_master
--columns
user_id,buy_num,date_day
--export-dir
/user/hive_remote/warehouse/taobao.db/tm_shopper_master
[root@node3 ~]# sqoop --options-file export_tm_shopper_master.txt
2.3 supersert绘制漏斗图
导入数据表
编辑表
绘制图像
保存到可视化面板中
扫描二维码关注公众号,回复:
15426140 查看本文章
3.回购商品Top50数据可视化
3.1 HiveSQL的编写
在某个品牌下,同用户购买多次称之为回购:对用户id和商品id分组后,求出商品id出现的次数,对次数进行排序即可。
-- 回购top50
select user_id, brand_id, count(item_id) buy_num, '20300101'
from to_user_log
where user_id is not null and brand_id is not null
group by user_id,brand_id
distribute by buy_num
sort by buy_num desc
limit 50;
-- 回购Top50结果保存
create table if not exists tm_buy_back(
user_id int comment "用户id",
brand_id int comment "品牌id",
buy_num int comment "购买数量",
date_day string comment "添加日期"
)
row format delimited
fields terminated by ","
lines terminated by "\n";
-- 回购top50结果插入数据表
from to_user_log
insert into tm_buy_back
select user_id, brand_id, count(item_id) buy_num, '20300101'
where user_id is not null and brand_id is not null
group by user_id,brand_id
distribute by buy_num
sort by buy_num desc
limit 50;
3.2 Hive数据库同步到MySQL数据库
创建MySQL数据表
CREATE TABLE IF NOT EXISTS tm_but_back(
user_id INT COMMENT "用户id",
brand_id INT COMMENT "品牌id",
buy_num INT COMMENT "购买数量",
date_day VARCHAR(10) COMMENT "添加日期"
)
编写脚本,使用Sqoop同步数据库
[root@node3 ~]# cat export_tm_buy_back.txt
export
--connect
jdbc:mysql://node1:3306/taobao
--username
root
--password
123456
-m
1
--table
tm_buy_back
--columns
user_id,brand_id,buy_num,date_day
--export-dir
/user/hive_remote/warehouse/taobao.db/tm_buy_back
[root@node3 ~]# sqoop --options-file export_tm_buy_back.txt
3.3 superset绘制表格
在superset添加并设置表
绘制table表
保存到可视化面板中
superset自定义显示颜色
4.各年龄段购物数量数据可视化
4.1 HiveSQL的编写
-- 各年龄端购物数量
-- 注意:age_range中存在空数据,将null改为-1,避免将来存在冲突问题
select if(u.age_range is null,-1,u.age_range) age_range, count(item_id) buy_num
from to_user_info u
left join to_user_log g on u.id = g.user_id
where g.user_id is not null
group by u.age_range
distribute by buy_num
sort by buy_num desc;
-- 各年龄段购物数量结果表
create table if not exists tm_age_range_buy(
age_range int comment "用户年龄段",
buy_num int comment "购买数量",
date_day int comment "添加日期"
)
row format delimited
fields terminated by ","
lines terminated by "\n";
-- 各年龄端购物数量保存结果到hive数据表中
-- 注意:age_range中存在空数据,将null改为-1,避免将来存在冲突问题
from to_user_info u
left join to_user_log g on u.id = g.user_id
insert into tm_age_range_buy
select if(u.age_range is null,-1,u.age_range) age_range, count(item_id) buy_num,'20300101'
where g.user_id is not null
group by u.age_range
distribute by buy_num
sort by buy_num desc;
4.2 Hive数据库同步到MySQL数据库
-- 创建mysql数据表保存各年龄阶段购物数据同步
create table if not exists tm_age_range_buy(
age_range int comment "年龄段",
buy_num int comment "购买数量",
date_day varchar(10) comment "添加日期"
);
编写脚本,使用Sqoop同步数据库
[root@node3 ~]# cat export_tm_age_range_buy.txt
export
--connect
jdbc:mysql://node1:3306/taobao
--username
root
--password
123456
-m
1
--table
tm_age_range_buy
--columns
age_range,buy_num,date_day
--export-dir
/user/hive_remote/warehouse/taobao.db/tm_age_range_buy
[root@node3 ~]# sqoop --options-file export_tm_age_range_buy.txt
4.3 superset绘制热力图
发现此时的y轴坐标顺序混乱,不直观
5.网站购物行为与性别关系数据可视化
5.1 HiveSQL的编写
性别数据:男、女、缺失、丢失数据
-- 网购行为与性别关系
select if(u.gender is null,3,u.gender),count(item_id) buy_num,'20300101'
from to_user_info u
left join to_user_log g on u.id=g.user_id
where g.user_id is not null
group by u.gender
distribute by buy_num
sort by buy_num
-- 网购行为与性别关系结果表
create table if not exists tm_gender_buy(
gender int comment "性别",
buy_num int comment "购买数量",
date_buy string comment "添加日期"
)
row format delimited
fields terminated by ","
lines terminated by "\n"
-- 网购行为与性别关系结果存储
from to_user_info u
left join to_user_log g on u.id=g.user_id
insert into tm_gender_buy
select if(u.gender is null,3,u.gender),count(item_id) buy_num,'20300101'
where g.user_id is not null
group by u.gender
distribute by buy_num
sort by buy_num
5.2 Hive数据库同步到MySQL数据库
CREATE TABLE IF NOT EXISTS tm_gender_buy(
gender INT COMMENT "性别 0表示女, 1表示男, 2表示保密, 3表示未知",
buy_num INT COMMENT "购买数量",
date_day VARCHAR(10) COMMENT "添加日期"
);
同步数据
[root@node3 ~]# cat export_tm_gender_buy.txt
export
--connect
jdbc:mysql://node1:3306/taobao
--username
root
--password
123456
-m
1
--table
tm_gender_buy
--columns
gender,buy_num,date_buy
--export-dir
/user/hive_remote/warehouse/taobao.db/tm_gender_buy
[root@node3 ~]# sqoop --options-file export_tm_gender_buy.txt
5.3 superset绘制饼图
6.品牌内热销商品Top3数据可视化
6.1 HiveSQL的编写
求各个品牌的商品销量最高的Top3
-- 品牌内热销商品Top3:如何在双重索引内部进行排列
select brand_id,item_id,sale_num,rank
from (
select brand_id,
item_id,
sale_num,
row_number() over (partition by brand_id order by sale_num desc ) rank
from (
select brand_id, item_id, count(user_id) sale_num
from to_user_log
where brand_id is not null
and item_id is not null
group by brand_id, item_id
-- distribute by brand_id
-- sort by brand_id asc
-- 等价于:brand_id都是一样的 且 为升序
cluster by brand_id
) tba
) tbb
where rank<=3;
-- 品牌内热销商品Top3结果表
create table if not exists tm_brand_item_rank
(
brand_id int comment "品牌id",
item_id int comment "商品id",
sale_num int comment "销售数量",
rank int comment "商品销量排名",
date_buy string comment "添夹日期"
)
row format delimited
fields terminated by ","
lines terminated by "\n";
-- 品牌内热销商品Top3结果存储到结果表中
from (
select brand_id,
item_id,
sale_num,
row_number() over (partition by brand_id order by sale_num desc ) rank
from (
select brand_id, item_id, count(user_id) sale_num
from to_user_log
where brand_id is not null
and item_id is not null
group by brand_id, item_id
-- distribute by brand_id
-- sort by brand_id asc
-- 等价于:brand_id都是一样的 且 为升序
cluster by brand_id
) tba
) tbb
insert into tm_brand_item_rank
select brand_id,item_id,sale_num,rank,'20030101'
where rank<=3;
6.2 Hive数据库同步到MySQL数据库
# 创建品牌内热销mysql表
create table if not exists tm_brand_item_rank(
brand_id int comment '品牌id',
item_id int comment '商品id',
sale_num int comment '销售数量',
rank int comment '销量排名',
date_day varchar(10) comment '添加日期'
)
[root@node3 ~]# cat export_tm_brand_item_rank.txt
export
--connect
jdbc:mysql://node1:3306/taobao
--username
root
--password
123456
--m
1
--table
tm_brand_item_rank
--columns
brand_id,item_id,sale_num,rank,date_day
--export-dir
/user/hive_remote/warehouse/taobao.db/tm_brand_item_rank
[root@node3 ~]# sqoop --options-file export_tm_brand_item_rank.txt
6.3 superset绘制table图表
7.购物记录时间拓宽为年月日数据可视化
7.1 HiveSQL的编写——将数据拓宽为年月日
-- 购物记录时间拓宽为年月日
select user_id,cat_id,brand_id,item_id,seller_id,2030 time_year ,`floor`(time_stamp/100) time_month,time_stamp%100 time_day
from to_user_log
where time_stamp is not null
limit 100;
-- 创建中间表存储结果
create table if not exists temp_user_log
(
user_id int comment "买家id",
cat_id int comment "分类id",
brand_id int comment "品牌id",
item_id int comment "产品id",
seller_id int comment "卖家id",
time_year int comment "年",
time_month int comment "月",
time_day int comment "日"
)
row format delimited
fields terminated by ","
lines terminated by "\n";
-- 查询结果插入中间表
from to_user_log
insert overwrite table temp_user_log
select user_id,cat_id,brand_id,item_id,seller_id,2030 time_year ,`floor`(time_stamp/100) time_month,time_stamp%100 time_day
where time_stamp is not null
7.2 HiveSQL编写——拓宽周数和星期几
-- 购物记录拓宽周数和星期几
select weekofyear('2030-01-01');
select pmod(datediff("2030-01-01","1970-01-01")-3,7);
select concat('aa',100,5.5)
select unix_timestamp('2030-01-01','yyyy-MM-dd');
--从中间零时表数据进行分析计算
select
user_id,cat_id,brand_id,item_id,seller_id,time_year,time_month,time_day,
unix_timestamp(concat(time_year,"-",if(time_month>9,time_month,concat("0",time_month)),"-",
if(time_day>9,time_day,concat("0",time_day))),'yyyy-MM-dd') time_stamp,
weekofyear(concat(time_year,"-",if(time_month>9,time_month,concat("0",time_month)),"-",
if(time_day>9,time_day,concat("0",time_day)))) eek_year
from temp_user_log
limit 50;
-- 创建结果表
create table if not exists td_userlog_year_month_day_week(
user_id int comment "买家id",
cat_id int comment "分类id",
brand_id int comment "品牌id",
item_id int comment "产品id",
seller_id int comment "卖家id",
time_year int comment "年",
time_month int comment "月",
time_day int comment "日",
time_stamp bigint comment "时间戳 单位秒",
week_year int comment "一年中的第几周",
week_day int comment "星期几"
)
row format delimited
fields terminated by ","
lines terminated by "\n";
-- 结果插入结果表中
from temp_user_log
insert into td_userlog_year_month_day_week
select user_id,cat_id,brand_id,item_id,seller_id,time_year,time_month,time_day,
unix_timestamp(concat(time_year,"-",
if(time_month>9,time_month,concat("0",time_month)),"-",if(time_day>9,time_day,concat("0",time_day))),'yyyy-MM-dd') time_stamp,
weekofyear(concat(time_year,"-",if(time_month>9,time_month,concat("0",time_month)),"-",
if(time_day>9,time_day,concat("0",time_day)))) week_year,
pmod(datediff(concat(time_year,"-",if(time_month>9,time_month,concat("0",time_month)),"-",
if(time_day>9,time_day,concat("0",time_day))),
"1970-01-01") - 3,7) week_day;
7.3 创建MySQL表存储
CREATE TABLE IF NOT EXISTS td_userlog_year_month_day_week(
user_id INT COMMENT "买家id",
cat_id INT COMMENT "分类id",
brand_id INT COMMENT "品牌id",
item_id INT COMMENT "产品id",
seller_id INT COMMENT "卖家id",
time_year INT COMMENT "年",
time_month INT COMMENT "月",
time_day INT COMMENT "日",
time_stamp BIGINT COMMENT "时间戳 单位秒",
week_year INT COMMENT "一年中的第几个星期",
week_day INT COMMENT "星期几"
);taobaotd_userlog_year_month_day_week