Hive行转列面试题（三种解法）

源数据

1,语文,98.0
2,数学,80.0
2,政治,78.0
5,语文,88.0
5,数学,66.0
5,政治,99.0

– 建表

create table if not exists ms (grade_id int, subject_name string, max_score double)
row format delimited fields terminated by ",";

–导入数据

load data local inpath "/doit16/ms.txt" into table ms;

想要的结果：
在这里插入图片描述

解法1：拼接json，然后利用json函数取值
===> tmp
1 {“语文”:80}
2 {“语文”:80,“政治”:88}
5 {“语文”:80,“政治”:88,“数学”:66}

with tmp as(
select
grade_id,
concat_ws(concat_ws(',',collect_list(concat_ws(':',concat_ws(subject_name,'"','"'),cast(max_score as string)))),'{','}') as js
from  (
select 
grade_id,
case 
 when subject_name='语文' then 'yw'
 when subject_name='数学' then 'sx'
 when subject_name='政治' then 'zz'
end as subject_name,
max_score
from ms
) o 
group by grade_id
)

select
grade_id,
nvl(get_json_object(js, '$.yw'),0) as `语文`,
nvl(get_json_object(js, '$.sx'),0) as `数学`,
nvl(get_json_object(js, '$.zz'),0) as `政治`

from tmp

;

解法2：利用str_to_map，生成:课程->成绩的hashmap，然后取值

1 语文 98.0
2 数学 80.0
2 政治 78.0
5 语文 88.0
5 数学 66.0
5 政治 99.0

with tmp as (
select
grade_id,
str_to_map(concat_ws(',',collect_list(concat_ws(':',subject_name,cast(max_score as string)))),',',':') as cj
from ms
group by grade_id
)

select
grade_id,
nvl(cj['语文'] ,0)as `语文`,
nvl(cj['数学'] ,0)as `数学`,
nvl(cj['政治'] ,0)as `政治`
from tmp

解法3：

select
 grade_id,
 nvl(max(yw),0) as `语文`,
 nvl(max(sx) ,0) as  `数学`,
 nvl(max(zz) ,0) as  `政治`
from 
(
select
grade_id,
case when 
 subject_name ='语文' then max_score end as  `yw`,
case when
 subject_name ='数学' then max_score  end as  `sx`,
case when
 subject_name ='政治' then max_score  end as  `zz`
from
ms) t
group by grade_id

加强版

select
 grade_id,
 max(case when subject_name = '语文' then max_score else 0 end ) as `语文`,
 max(case when subject_name = '数学' then max_score else 0 end ) as `数学`,
 max(case when subject_name = '政治' then max_score else 0 end ) as `政治`
from
    ms
group by 
   grade_id
;

Hive行转列面试题（三种解法）

猜你喜欢