在配置文件hive.site.xml中
<!--表头-->
<property>
<name>hive.cli.print.header</name>
<value>true</value>
</property>
<!--显示当前数据库-->
<property>
<name>hive.cli.print.current.db</name>
<value>true</value>
</property>
上传数据
bin/hdfs dfs -put ./data/sw17-top11-dl-sh.anon.csv /user/huadian
#加载数据
LOAD DATA INPATH '/user/huadian/sw17-top11-dl-sh.anon.csv'
INTO TABLE db_hive.tb_language_count
#使用Python开发大数据的人数 683
select
count(*)
from
db_hive.tb_language_count
where
python="1" and spark_hadoop="1"
#使用R开发大数据的人数 606
select
count(*)
from
db_hive.tb_language_count
where
r="1" and spark_hadoop="1"
#合并
select
t1.p_c,t2.r_c
from
(select "1" as id ,count(*) as p_c from db_hive.tb_language_count where python="1" and spark_hadoop="1"
) t1
join
(select "1" as id ,count(*) as r_c from db_hive.tb_language_count where r=1 and spark_hadoop="1"
) t2
on t1.id = t2.id
以上是在控制台上输出的
创建表的另一种方式:根据结果创建表,讲有用的数据放在一个临时表中
create table if not exists db_hive.tb_language_count_result
as
select count(*) from db_hive.tb_language_count where python="1" and spark_hadoop="1"
创建一张临时表
create table if not exists db_hive.tb_language_count_temp
as
select
python,r,deep,spark_hadoop
from
db_hive.tb_language_count
hive的运行模式
当前回话设置有效:
所有会话都有效:
在配置文件中设置
<property>
<name>hive.exec.mode.local.auto</name>
<value>true</value>
<description>Let Hive determine whether to run in local mode automatically</description>
</property>
bin/hive -e “一句sql语句”
bin/hive -f “xx.sql” :xx.sql中存放很多句sql语句
bin/hive -e "SELECT word,count(word) FROM db_hive.tb_word GROUP BY word"