贝叶斯分类器是各种分类器中分类错误概率最小或者在预先给定代价的情况下平均风险最小的分类器。下面是spark官网(http://spark.apache.org/docs/latest/mllib-naive-bayes.html)给出的例子
package alg
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.mllib.classification.{NaiveBayes, NaiveBayesModel}
import org.apache.spark.mllib.util.MLUtils
object naiveBayes {
def main(args:Array[String]):Unit={
val sparkConf = new SparkConf().setMaster("local").setAppName("testTansformition")
val sc = new SparkContext(sparkConf)
val data=MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
val Array(traning,test)=data.randomSplit(Array(0.6,0.4))
val model=NaiveBayes.train(traning,lambda = 1.0,modelType ="multinomial")
val predictAndLabel=test.map(p=>(model.predict((p.features)),p.label))
val accuracy=1.0*predictAndLabel.filter(x=>x._1==x._2).count()/test.count()
print("accuracy:"+accuracy)
//save and load model
model.save(sc, "target/tmp/myNaiveBayesModel")
val sameModel = NaiveBayesModel.load(sc, "target/tmp/myNaiveBayesModel")
}
}