package cn.jkjf.bigdata.utils.hbase
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.Partitioner
class HFilePartitioner(conf: Configuration, splits: Array[Array[Byte]], numFilesPerRegion: Int) extends Partitioner {
/* override def numPartitions: Int = startkeyArr.length
override def getPartition(key: Any): Int = {
val domain = key.asInstanceOf[String]
for (i <- 0 until startkeyArr.length) {
if (domain.toString().compare(startkeyArr(i)) < 0) {
return i - 1
}
}
//default return 1
return startkeyArr.length - 1
}
override def equals(other: Any): Boolean = other match {
case h: HFilePartitioner =>
h.numPartitions == numPartitions
case _ =>
false
}*/
val fraction = 1 max numFilesPerRegion min 128
override def getPartition(key: Any): Int = {
def bytes(n: Any) = n match {
case s: String => Bytes.toBytes(s)
case s: Long => Bytes.toBytes(s)
case s:Int=>Bytes.toBytes(s)
}
val h = (key.hashCode() & Int.MaxValue) % fraction
for (i <- 1 until splits.length)
if (Bytes.compareTo(bytes(key), splits(i)) < 0) return (i - 1) * fraction + h
(splits.length - 1) * fraction + h
}
override def numPartitions: Int = splits.length * fraction
}
Scala 生成一个Hfile
猜你喜欢
转载自blog.csdn.net/qq_34635236/article/details/106259933
今日推荐
周排行