反射构建DataFrame样例类参数大于22

这个错误出现在case class参数超出22个的时候。

case classes cannot have more than 22 parameters
1
在scala 2.11.x版本以下时case class 的参数最多为22个,如果超过这个参数又不能升级时(比如目前spark依赖于2.10.x)要怎么办?

下面解决方法 : 继承 Product 方法 实现序列化
这里举个26个参数的例子,其余情况依此类推

object Bz2Parquet2 {
def main(args: Array[String]): Unit = {
//模拟企业级编程 首先判断目录是否为空
if(args.length != 2){
println(“目录不正确,退出程序”)
sys.exit()
}
//创建一个集合存储输入输出目录
val conf = new SparkConf()
.setAppName(s"${this.getClass.getName}").setMaster(“local[*]”)
//搞定第二个需求
.set(“spark.serializer”,“org.apache.spark.serializer.KryoSerializer”)
val sc = new SparkContext(conf)
val sQLContext = new SQLContext(sc)
// 在spark 1.6 版本时候默认的压缩方式还不是snappy,到2.0以后才默认snappy
sQLContext.setConf(“spark.sql.parquet.compression.codec”,“snappy”)
//开始读取数据
val lines = sc.textFile(inputPath)
//进行过滤,保证字段大于85,并且 需要解析内部的, 要进行特殊处理

val rowRDD = lines.map(t=>t.split(",",t.length)).filter(_.length >= 27).map(arr=>{
 new ss(
    arr(0),
    NBF.toInt(arr(1)),
    NBF.toInt(arr(2)),
    NBF.toInt(arr(3)),
    NBF.toInt(arr(4)),
    arr(5),
    arr(6),
    NBF.toInt(arr(7)),
    NBF.toInt(arr(8)),
    NBF.toDouble(arr(9)),
    NBF.toDouble(arr(10)),
    arr(11),
    arr(12),
    arr(13),
    arr(14),
    arr(15),
    arr(16),
    NBF.toInt(arr(17)),
    arr(18),
    arr(19),
    NBF.toInt(arr(20)),
    NBF.toInt(arr(21)),
    arr(22),
    arr(23),
    arr(24),
    arr(25),
    NBF.toInt(arr(26))
  )
})
import sQLContext.implicits._
val df = rowRDD.toDF()
//存储parquet文件
df.coalesce(1).write.parquet(outputPath)
sc.stop()

}
}

//自定义的类继承Product方法,实现序列化
class ss(
sessionid:String,
advertisersid:Int,
adorderid:Int,
adcreativeid:Int,
adplatformproviderid:Int,
sdkversion:String,
adplatformkey:String,
putinmodeltype:Int,
requestmode:Int,
adprice:Double,
adppprice:Double,
requestdate:String,
ip:String,
appid:String,
appname:String,
uuid:String,
device:String,
client:Int,
osversion:String,
density:String,
pw:Int,
ph:Int,
long:String,
lat:String,
provincename:String,
cityname:String,
ispid:Int,
ispname:String,
networkmannerid:Int,
networkmannername:String,
iseffective:Int,
isbilling:Int,
adspacetype:Int,
adspacetypename:String,
devicetype:Int,
processnode:Int,
apptype:Int,
district:String,
paymode:Int,
isbid:Int
)extends Product() with Serializable{
def productElement(n: Int) = n match {
case 0 =>sessionid
case 1 =>advertisersid
case 2 =>adorderid
case 3 =>adcreativeid
case 4 =>adplatformproviderid
case 5 =>sdkversion
case 6 =>adplatformkey
case 7 =>putinmodeltype
case 8 =>requestmode
case 9 =>adprice
case 10=>adppprice
case 11=>requestdate
case 12=>ip
case 13=>appid
case 14=>appname
case 15=>uuid
case 16=>device
case 17=>client
case 18=>osversion
case 19=>density
case 20=>pw
case 21=>ph
case 22=>long
case 23=>lat
case 24=>provincename
case 25=>cityname
case 26=>ispid

}
def canEqual(that: Any) = that.isInstanceOf[ss]
def productArity = 27
}

猜你喜欢

转载自blog.csdn.net/weixin_38842096/article/details/84260158