前言
在调试flink table api 查询 hive数据的时候,真的是遇到很多坑,特别是要hive存储的数据是在腾讯云的cos上,而且我是跨集群查询数据,要解决各种依赖和环境问题,下面的代码和pom.xml 已经调试成功,在本地和集群 on yarn都可以运行,本地的时候需要在idea里面加args为dev,集群 on yarn不用加。
代码
package com.bigdata.etl
import org.apache.flink.table.api.{EnvironmentSettings, TableEnvironment}
import org.apache.flink.table.catalog.hive.HiveCatalog
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment
object FlinkTableTest extends App {
//本地idea环境需要设置用户为hadoop
System.setProperty("HADOOP_USER_NAME", "hadoop")
val settings = EnvironmentSettings.newInstance().useBlinkPlanner().build()
println(settings.isStreamingMode)
val stenv = TableEnvironment.create(settings)
//查询默认的元数据
stenv.executeSql("show catalogs").print()
stenv.useCatalog("default_catalog")
stenv.executeSql("show databases").print()
stenv.executeSql("select 1").print()
println("-----fengexian--------------")
val name = "hive"
val defaultDatabase = "odl"
var hiveConfDir = ""
var hadoopConf=""
val hiveVision = "2.3.6"
if (args.size > 0) {
hiveConfDir = "/Users/duzhixin/Documents/flink-hive-conf"
hadoopConf="/Users/duzhixin/Documents/flink-hive-conf"
} else {
hiveConfDir = "/usr/local/service/hive/conf"
}
//在这里把hive,hadoop的配置文件加进去,本地idea环境需要制定hive和hadoop的配置文件,集群上不用制定hadoop的配置
val hive = new HiveCatalog(name, defaultDatabase, hiveConfDir,hadoopConf,hiveVision)
hive.getHiveConf.set("streaming-source.enable ", "true")
stenv.getConfig.getConfiguration.setString("streaming-source.enable ", "true")
stenv.getConfig.getConfiguration.setString("table.exec.hive.infer-source-parallelism.max ", "10000")
stenv.getConfig.getConfiguration.setString("table.exec.hive.infer-source-parallelism ", "true")
stenv.registerCatalog("hive", hive)
// set the HiveCatalog as the current catalog of the session
stenv.useCatalog("hive")
//执行sql,查询hive的元数据
stenv.executeSql("show databases").print()
stenv.executeSql("show tables").print()
stenv.executeSql("select 1 from test.app limit 1").print()
stenv.executeSql("select * from odl.tb_book where dt='2021-06-05' limit 10").print()
stenv.executeSql("select * from odl.dadian where dt='2021-06-05' limit 10").print()
}
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<repositories>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
<repository>
<id>spring-plugin</id>
<url>https://repo.spring.io/plugins-release/</url>
</repository>
</repositories>
<modelVersion>4.0.0</modelVersion>
<groupId>org.jiashu</groupId>
<artifactId>flink-dw</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>jar</packaging>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<spark.version>2.4.3</spark.version>
<scala.version>2.12</scala.version>
<jedis.version>2.8.2</jedis.version>
<fastjson.version>1.2.14</fastjson.version>
<jetty.version>9.2.5.v20141112</jetty.version>
<container.version>2.17</container.version>
<java.version>1.8</java.version>
<hbase.version>1.2.0</hbase.version>
<hive.version>2.3.6</hive.version>
<flink.version>1.13.0</flink.version>
</properties>
<build>
<sourceDirectory>src/main/scala</sourceDirectory>
<finalName>flink-dw</finalName>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.5.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<version>2.12</version>
<executions>
<execution>
<goals>
<goal>compile</goal>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
</plugin>
<!--manven打包插件-->
<!-- 用于打可执行jar包 -->
<!-- 打包jar文件时,配置manifest文件,加入lib包的jar依赖 -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<classesDirectory>target/classes/</classesDirectory>
<archive>
<manifestEntries>
<Class-Path>.</Class-Path>
</manifestEntries>
</archive>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>copy-dependencies</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<type>jar</type>
<includeTypes>jar</includeTypes>
<!--<useUniqueVersions>false</useUniqueVersions> -->
<outputDirectory>
${project.build.directory}/lib
</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.12</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/redis.clients/jedis -->
<dependency>
<groupId>redis.clients</groupId>
<artifactId>jedis</artifactId>
<version>2.9.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka_2.12</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>2.6.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-java_2.12 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-scala-bridge_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-core-asl</artifactId>
<version>1.9.13</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.alibaba/fastjson -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.47</version>
</dependency>
<!-- Flink Dependency -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-hive_2.12</artifactId>
<version>1.13.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge_2.12</artifactId>
<version>1.13.1</version>
</dependency>
<!-- Hive Dependency -->
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${hive.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.mongodb.spark/mongo-spark-connector -->
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-auth -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
<version>2.8.5</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.8.5</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-auth -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.8.5</version>
</dependency>
<!-- https://mvnrepository.com/artifact/hadoop-util/hadoop-util -->
<dependency>
<groupId>hadoop-util</groupId>
<artifactId>hadoop-util</artifactId>
<version>0.3.0</version>
</dependency>
<dependency>
<groupId>com.qcloud</groupId>
<artifactId>cos_api</artifactId>
<version>5.6.42</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.commons/org.apache.commons.codec -->
<!-- https://mvnrepository.com/artifact/org.apache.commons/org.apache.commons.codec -->
<dependency>
<groupId>com.qcloud</groupId>
<artifactId>qcloud-java-sdk</artifactId>
<version>2.0.1</version>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.11</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-compress -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.19</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.11</version>
</dependency>
<dependency>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-resources-plugin</artifactId>
<version>2.4</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.maven.plugins/maven-compiler-plugin -->
<dependency>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
</dependency>
<dependency>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>2.8</version>
</dependency>
<dependency>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>2.4</version>
</dependency>
</dependencies>
</project>
如果还不行就试试加入hadoop-cos-2.8.5-5.9.22.jar 这个依赖