1 Hive
[root@hqc-test-hdp1 ~]# su hdfs
[hdfs@hqc-test-hdp1 root]$ cd
# 准备数据
[hdfs@hqc-test-hdp1 ~]$ vim phone.txt
1 iphone5 2G 5999.0
2 oneplus 3G 2299.0
3 锤子T1 2G 1999.0
4 iphone5 2G 5999.0
5 oneplus 3G 2299.0
6 锤子T1 2G 1999.0
[hdfs@hqc-test-hdp1 ~]$ pwd
/home/hdfs
[hdfs@hqc-test-hdp1 ~]$ hive
Logging initialized using configuration in file:/etc/hive/2.5.3.0-37/0/hive-log4j.properties
hive> show databases;
OK
default
Time taken: 1.785 seconds, Fetched: 1 row(s)
hive> use default;
OK
Time taken: 0.359 seconds
hive> show tables;
OK
Time taken: 0.077 seconds
hive> create database test;
OK
Time taken: 0.35 seconds
hive> use test;
OK
Time taken: 0.064 seconds
# 建表
hive> create table phone(id int, brand string, ram string, price double)
> row format delimited
> fields terminated by '\t';
OK
Time taken: 0.405 seconds
hive> show tables;
OK
phone
Time taken: 0.074 seconds, Fetched: 1 row(s)
# 插入数据
hive> load data local inpath '/home/hdfs/phone.txt' into table phone;
Loading data to table test.phone
Table test.phone stats: [numFiles=1, numRows=0, totalSize=110, rawDataSize=0]
OK
Time taken: 0.591 seconds
hive> select * from phone;
OK
1 iphone5 2G 5999.0
2 oneplus 3G 2299.0
3 锤子T1 2G 1999.0
4 iphone5 2G 5999.0
5 oneplus 3G 2299.0
6 锤子T1 2G 1999.0
Time taken: 0.076 seconds, Fetched: 6 row(s)
### count(1) 使用hive 11.7秒
hive> select count(1) from phone;
Query ID = hdfs_20191025151450_153ef2bc-f87f-4c72-a280-aed9e97c8100
Total jobs = 1
Launching Job 1 out of 1
Status: Running (Executing on YARN cluster with App id application_1564035532438_0002)
--------------------------------------------------------------------------------
VERTICES STATUS TOTAL COMPLETED RUNNING PENDING FAILED KILLED
--------------------------------------------------------------------------------
Map 1 .......... SUCCEEDED 1 1 0 0 0 0
Reducer 2 ...... SUCCEEDED 1 1 0 0 0 0
--------------------------------------------------------------------------------
VERTICES: 02/02 [==========================>>] 100% ELAPSED TIME: 8.48 s
--------------------------------------------------------------------------------
OK
6
Time taken: 11.722 seconds, Fetched: 1 row(s)
hive> select * from phone where ram = '2G' and price=5999;
OK
1 iphone5 2G 5999.0
4 iphone5 2G 5999.0
Time taken: 1.543 seconds, Fetched: 2 row(s)
hive> select count(1) from phone where ram = '2G' and price=5999;
Query ID = hdfs_20191028171115_9edb4ce4-84be-44ba-a085-9bb3603098df
Total jobs = 1
Launching Job 1 out of 1
Status: Running (Executing on YARN cluster with App id application_1564035532438_0003)
--------------------------------------------------------------------------------
VERTICES STATUS TOTAL COMPLETED RUNNING PENDING FAILED KILLED
--------------------------------------------------------------------------------
Map 1 .......... SUCCEEDED 1 1 0 0 0 0
Reducer 2 ...... SUCCEEDED 1 1 0 0 0 0
--------------------------------------------------------------------------------
VERTICES: 02/02 [==========================>>] 100% ELAPSED TIME: 7.78 s
--------------------------------------------------------------------------------
OK
2
Time taken: 10.004 seconds, Fetched: 1 row(s)
2 Beeline
因为没有开启用户认证,所以不需要输入用户及密码,但会遇到一个小问题,详见下文:
###### beeline 方式一 使用基于Zookeeper的HiveServer2 HA
[root@hqc-test-hdp1 ~]# su hdfs
[hdfs@hqc-test-hdp1 root]$ beeline
Beeline version 1.2.1000.2.5.3.0-37 by Apache Hive
beeline> !connect jdbc:hive2://hqc-test-hdp1:2181,hqc-test-hdp2:2181,hqc-test-hdp3:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2
Connecting to jdbc:hive2://hqc-test-hdp1:2181,hqc-test-hdp2:2181,hqc-test-hdp3:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2
# 因为没有开启用户认证,所以不需要输入用户及密码
Enter username for jdbc:hive2://hqc-test-hdp1:2181,hqc-test-hdp2:2181,hqc-test-hdp3:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2:
Enter password for jdbc:hive2://hqc-test-hdp1:2181,hqc-test-hdp2:2181,hqc-test-hdp3:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2:
Connected to: Apache Hive (version 1.2.1000.2.5.3.0-37)
Driver: Hive JDBC (version 1.2.1000.2.5.3.0-37)
Transaction isolation: TRANSACTION_REPEATABLE_READ
0: jdbc:hive2://hqc-test-hdp1:2181,hqc-test-h> show databases;
+----------------+--+
| database_name |
+----------------+--+
| default |
| test |
+----------------+--+
2 rows selected (0.117 seconds)
0: jdbc:hive2://hqc-test-hdp1:2181,hqc-test-h> use test;
No rows affected (0.062 seconds)
0: jdbc:hive2://hqc-test-hdp1:2181,hqc-test-h> select * from phone;
+-----------+--------------+------------+--------------+--+
| phone.id | phone.brand | phone.ram | phone.price |
+-----------+--------------+------------+--------------+--+
| 1 | iphone5 | 2G | 5999.0 |
| 2 | oneplus | 3G | 2299.0 |
| 3 | 锤子T1 | 2G | 1999.0 |
| 4 | iphone5 | 2G | 5999.0 |
| 5 | oneplus | 3G | 2299.0 |
| 6 | 锤子T1 | 2G | 1999.0 |
+-----------+--------------+------------+--------------+--+
6 rows selected (0.264 seconds)
0: jdbc:hive2://hqc-test-hdp1:2181,hqc-test-h> select * from phone where ram = '2G' and price=5999;
+-----------+--------------+------------+--------------+--+
| phone.id | phone.brand | phone.ram | phone.price |
+-----------+--------------+------------+--------------+--+
| 1 | iphone5 | 2G | 5999.0 |
| 4 | iphone5 | 2G | 5999.0 |
+-----------+--------------+------------+--------------+--+
2 rows selected (0.144 seconds)
###### beeline 方式二 单点
[hdfs@hqc-test-hdp1 root]$ beeline
Beeline version 1.2.1000.2.5.3.0-37 by Apache Hive
beeline> !connect jdbc:hive2://hqc-test-hdp2:10000
Connecting to jdbc:hive2://hqc-test-hdp2:10000
Enter username for jdbc:hive2://hqc-test-hdp2:10000:
Enter password for jdbc:hive2://hqc-test-hdp2:10000:
Connected to: Apache Hive (version 1.2.1000.2.5.3.0-37)
Driver: Hive JDBC (version 1.2.1000.2.5.3.0-37)
Transaction isolation: TRANSACTION_REPEATABLE_READ
0: jdbc:hive2://hqc-test-hdp2:10000> show databases;
+----------------+--+
| database_name |
+----------------+--+
| default |
| test |
+----------------+--+
2 rows selected (0.115 seconds)
0: jdbc:hive2://hqc-test-hdp2:10000> use test;
No rows affected (0.136 seconds)
0: jdbc:hive2://hqc-test-hdp2:10000> select * from phone;
+-----------+--------------+------------+--------------+--+
| phone.id | phone.brand | phone.ram | phone.price |
+-----------+--------------+------------+--------------+--+
| 1 | iphone5 | 2G | 5999.0 |
| 2 | oneplus | 3G | 2299.0 |
| 3 | 锤子T1 | 2G | 1999.0 |
| 4 | iphone5 | 2G | 5999.0 |
| 5 | oneplus | 3G | 2299.0 |
| 6 | 锤子T1 | 2G | 1999.0 |
+-----------+--------------+------------+--------------+--+
6 rows selected (0.13 seconds)
0: jdbc:hive2://hqc-test-hdp2:10000> select * from phone where ram = '2G' and price=5999;
+-----------+--------------+------------+--------------+--+
| phone.id | phone.brand | phone.ram | phone.price |
+-----------+--------------+------------+--------------+--+
| 1 | iphone5 | 2G | 5999.0 |
| 4 | iphone5 | 2G | 5999.0 |
+-----------+--------------+------------+--------------+--+
2 rows selected (0.115 seconds)
###### 之前没有使用用户登录,默认anonymous用户,没有权限写入。改成hdfs用户登录,不需要输入密码
0: jdbc:hive2://hqc-test-hdp2:10000> select count(1) from phone where ram = '2G' and price=5999;
INFO : Tez session hasn't been created yet. Opening session
ERROR : Failed to execute tez graph.
org.apache.hadoop.security.AccessControlException: Permission denied: user=anonymous, access=WRITE, inode="/user/anonymous":hdfs:hdfs:drwxr-xr-x
at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.check(FSPermissionChecker.java:319)
at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.check(FSPermissionChecker.java:292)
at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:213)
at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:190)
at org.apache.hadoop.hdfs.server.namenode.FSDirectory.checkPermission(FSDirectory.java:1827)
at org.apache.hadoop.hdfs.server.namenode.FSDirectory.checkPermission(FSDirectory.java:1811)
at org.apache.hadoop.hdfs.server.namenode.FSDirectory.checkAncestorAccess(FSDirectory.java:1794)
at org.apache.hadoop.hdfs.server.namenode.FSDirMkdirOp.mkdirs(FSDirMkdirOp.java:71)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.mkdirs(FSNamesystem.java:4011)
at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.mkdirs(NameNodeRpcServer.java:1102)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.mkdirs(ClientNamenodeProtocolServerSideTranslatorPB.java:630)
at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:640)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:982)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2313)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2309)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2307)
0: jdbc:hive2://hqc-test-hdp2:10000> !connect jdbc:hive2://hqc-test-hdp2:10000
Connecting to jdbc:hive2://hqc-test-hdp2:10000
Enter username for jdbc:hive2://hqc-test-hdp2:10000: hdfs
Enter password for jdbc:hive2://hqc-test-hdp2:10000:
Connected to: Apache Hive (version 1.2.1000.2.5.3.0-37)
Driver: Hive JDBC (version 1.2.1000.2.5.3.0-37)
Transaction isolation: TRANSACTION_REPEATABLE_READ
1: jdbc:hive2://hqc-test-hdp2:10000> select count(1) from phone where ram = '2G' and price=5999;
Error: Error while compiling statement: FAILED: SemanticException [Error 10001]: Line 1:21 Table not found 'phone' (state=42S02,code=10001)
1: jdbc:hive2://hqc-test-hdp2:10000> select count(1) from test.phone where ram = '2G' and price=5999;
INFO : Tez session hasn't been created yet. Opening session
INFO : Dag name: select count(1) from test.phone...price=5999(Stage-1)
INFO :
INFO : Status: Running (Executing on YARN cluster with App id application_1564035532438_0004)
INFO : Map 1: -/- Reducer 2: 0/1
INFO : Map 1: 0/1 Reducer 2: 0/1
INFO : Map 1: 0(+1)/1 Reducer 2: 0/1
INFO : Map 1: 0(+1)/1 Reducer 2: 0/1
INFO : Map 1: 1/1 Reducer 2: 0(+1)/1
INFO : Map 1: 1/1 Reducer 2: 1/1
+------+--+
| _c0 |
+------+--+
| 2 |
+------+--+
1 row selected (17.102 seconds)
3 Spark-SQL
### 修改Spark日志输出级别
[root@hqc-test-hdp1 ~]# cd /usr/hdp/2.5.3.0-37/spark2/conf
[root@hqc-test-hdp1 conf]# ls
docker.properties.template fairscheduler.xml.template hive-site.xml log4j.properties log4j.properties.template metrics.properties metrics.properties.template slaves.template spark-defaults.conf spark-defaults.conf.template spark-env.sh spark-env.sh.template
[root@hqc-test-hdp1 conf]# vim log4j.properties
# Set everything to be logged to the console
log4j.rootCategory=WARN, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
# Settings to quiet third party logs that are too verbose
log4j.logger.org.eclipse.jetty=WARN
log4j.logger.org.eclipse.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
### count(1) 使用spark sql 1.6秒
[hdfs@hqc-test-hdp1 ~]$ spark-sql
19/10/25 15:15:52 WARN ObjectStore: Version information not found in metastore. hive.metastore.schema.verification is not enabled so recording the schema version 1.2.0
19/10/25 15:15:52 WARN ObjectStore: Failed to get database default, returning NoSuchObjectException
19/10/25 15:15:56 WARN SparkContext: Use an existing SparkContext, some configuration may not take effect.
spark-sql> use test;
Time taken: 5.63 seconds
spark-sql> select count(1) from phone;
6
Time taken: 1.598 seconds, Fetched 1 row(s)
spark-sql> select count(1) from test.phone where ram = '2G' and price=5999;
2
Time taken: 0.847 seconds, Fetched 1 row(s)
4 Spark-Shell
[hdfs@hqc-test-hdp1 root]$ spark-shell
19/10/28 15:09:56 WARN SparkContext: Use an existing SparkContext, some configuration may not take effect.
Spark context Web UI available at http://10.35.3.17:4040
Spark context available as 'sc' (master = local[*], app id = local-1572246594677).
Spark session available as 'spark'.
Welcome to
____ __
/ __/__ ___ _____/ /__
_\ \/ _ \/ _ `/ __/ '_/
/___/ .__/\_,_/_/ /_/\_\ version 2.0.0.2.5.3.0-37
/_/
Using Scala version 2.11.8 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_101)
Type in expressions to have them evaluated.
Type :help for more information.
###### Spark读取HIve表 方式一:HiveContext 已过时
scala> import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.sql.hive.HiveContext
scala> val sqlContext = new HiveContext(sc)
warning: there was one deprecation warning; re-run with -deprecation for details
sqlContext: org.apache.spark.sql.hive.HiveContext = org.apache.spark.sql.hive.HiveContext@7bbd76b8
scala> sqlContext.sql("select * from test.phone").show()
+---+-------+---+------+
| id| brand|ram| price|
+---+-------+---+------+
| 1|iphone5| 2G|5999.0|
| 2|oneplus| 3G|2299.0|
| 3| 锤子T1| 2G|1999.0|
| 4|iphone5| 2G|5999.0|
| 5|oneplus| 3G|2299.0|
| 6| 锤子T1| 2G|1999.0|
+---+-------+---+------+
###### Spark读取HIve表 方式二:SparkSession
scala> import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.SparkSession
scala> val spark = SparkSession.builder().appName("Spark Hive Example").enableHiveSupport().getOrCreate()
19/10/28 15:45:41 WARN SparkSession$Builder: Use an existing SparkSession, some configuration may not take effect.
spark: org.apache.spark.sql.SparkSession = org.apache.spark.sql.SparkSession@26495639
scala> spark.sql("select * from test.phone").show()
+---+-------+---+------+
| id| brand|ram| price|
+---+-------+---+------+
| 1|iphone5| 2G|5999.0|
| 2|oneplus| 3G|2299.0|
| 3| 锤子T1| 2G|1999.0|
| 4|iphone5| 2G|5999.0|
| 5|oneplus| 3G|2299.0|
| 6| 锤子T1| 2G|1999.0|
+---+-------+---+------+
scala> spark.sql("select * from test.phone where ram = '2G' and price=5999").show()
+---+-------+---+------+
| id| brand|ram| price|
+---+-------+---+------+
| 1|iphone5| 2G|5999.0|
| 4|iphone5| 2G|5999.0|
+---+-------+---+------+
scala> spark.sql("select count(1) from test.phone where ram = '2G' and price=5999").show()
+--------+
|count(1)|
+--------+
| 2|
+--------+