版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
1、首先获取相应的krb5.conf与keytab文件
2、代码示例:
package com.zhbr.hbase.test
import java.io.IOException
import com.google.protobuf.ServiceException
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.client.{HBaseAdmin, HTable}
import org.apache.hadoop.hbase.mapreduce.{TableInputFormat}
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.security.UserGroupInformation
import org.apache.spark.{SparkConf, SparkContext}
/**
* Created by drguo on 2018/7/18.
*/
object SparkExecHBase {
def main(args: Array[String]): Unit = {
//加载krb5.conf文件
System.setProperty("java.security.krb5.conf", "/home/sgbigdata/keytab/krb5.conf")
//获取sparkSession
val sparkSession = SparkSession.builder().appName(this.getClass.getSimpleName.filter(!_.equals('$'))).getOrCreate()
//获取sparkContext
val sparkContext = sparkSession.sparkContext
//设置日志级别
sparkContext.setLogLevel("WARN")
//指定HBASE的表
val tableName = "DWD_AMR_GS_METER-E-CURVE_201902"
//设置HBaseConfiguration
val hbaseConf = HBaseConfiguration.create()
//设备zookeeper集群地址
hbaseConf.set("hbase.zookeeper.quorum","10.213.111.XXX,10.213.111.XXX,10.213.111.XXX")
//设置zookeeper端口
hbaseConf.set("hbase.zookeeper.property.clientPort","2181")
//设置要读取的表名
hbaseConf.set(TableInputFormat.INPUT_TABLE,tableName)
hbaseConf.set("hadoop.security.authentication", "Kerberos")
//登录kerberos
UserGroupInformation.setConfiguration(hbaseConf)
try {
UserGroupInformation.loginUserFromKeytab("YJ00004", "/home/sgbigdata/keytab/YJ00004.keytab")
HBaseAdmin.checkHBaseAvailable(hbaseConf)
} catch {
case e: IOException =>
e.printStackTrace()
case e: ServiceException =>
e.printStackTrace()
}
//读取表,获取RDD
val hbaseRdd = sc.newAPIHadoopRDD(hbaseConf, classOf[TableInputFormat],
classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
classOf[org.apache.hadoop.hbase.client.Result])
//遍历数据,转换成dateFrame类型
val DataDF = hbaseRDD.map(result=>(
Bytes.toDouble(result._2.getValue(Bytes.toBytes("DATA"),Bytes.toBytes("meterID-1"))),
Bytes.toDouble(result._2.getValue(Bytes.toBytes("DATA"),Bytes.toBytes("meterID-2")))
)).toDF("meterID-1","meterID-2")
//注册临时表
DataDF.createTempView("DLZZ")
sparkSession.sql("select * from DLZZ").show()
//关闭sparkSession
sparkSession.stop()
}
}
如出现下列bug,往往是因为System.setProperty(“java.security.krb5.conf”, “/home/sgbigdata/keytab/krb5.conf”)中的krb5.conf文件没有找到(比如路径错误)或是里面配置的kdc、admin_server地址错误。
Exception in thread “main” java.lang.IllegalArgumentException: Can’t get Kerberos realm
at org.apache.hadoop.security.HadoopKerberosName.setConfiguration(HadoopKerberosName.java:65)
at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:319)
at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:374)
at drguo.test.SparkExecHBase$.main(SparkExecHBase.scala:32)
at drguo.test.SparkExecHBase.main(SparkExecHBase.scala)
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.security.authentication.util.KerberosUtil.getDefaultRealm(KerberosUtil.java:84)
at org.apache.hadoop.security.HadoopKerberosName.setConfiguration(HadoopKerberosName.java:63)
… 4 more
Caused by: KrbException: Cannot locate default realm
at sun.security.krb5.Config.getDefaultRealm(Config.java:1029)
… 10 more