Scala 生成一个Hfile

package cn.jkjf.bigdata.utils.hbase

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.Partitioner


class HFilePartitioner(conf: Configuration, splits: Array[Array[Byte]], numFilesPerRegion: Int) extends Partitioner {

/*  override def numPartitions: Int = startkeyArr.length

  override def getPartition(key: Any): Int = {
    val domain = key.asInstanceOf[String]

    for (i <- 0 until startkeyArr.length) {
      if (domain.toString().compare(startkeyArr(i)) < 0) {
        return i - 1
      }
    }
    //default return 1
    return startkeyArr.length - 1
  }


  override def equals(other: Any): Boolean = other match {
    case h: HFilePartitioner =>
      h.numPartitions == numPartitions
    case _ =>
      false
  }*/
val fraction = 1 max numFilesPerRegion min 128

  override def getPartition(key: Any): Int = {
    def bytes(n: Any) = n match {
      case s: String => Bytes.toBytes(s)
      case s: Long => Bytes.toBytes(s)
      case s:Int=>Bytes.toBytes(s)
    }

    val h = (key.hashCode() & Int.MaxValue) % fraction
    for (i <- 1 until splits.length)
      if (Bytes.compareTo(bytes(key), splits(i)) < 0) return (i - 1) * fraction + h

    (splits.length - 1) * fraction + h
  }

  override def numPartitions: Int = splits.length * fraction

}

猜你喜欢

转载自blog.csdn.net/qq_34635236/article/details/106259933