package day06 import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.rdd.RDD import scala.collection.Map object MyAction_Scala_2 { def myTake(sc:SparkContext){ val RDD1 = sc.parallelize(List("aa", "bb", "cc")) val array: Array[String] = RDD1.take(2) array.toList.toString } def mySaveASTextFile(sc:SparkContext): String ={ val parallelizeRDD:RDD[String]=sc.parallelize(List("hello world","hello scala")) val flatMapRDD:RDD[String]=parallelizeRDD.flatMap(_.split(" ")) //flatMapRDD.saveAsTextFile("D://3.txt") flatMapRDD.saveAsTextFile("hdfs://hadoop-1707-001:9000/save/test000") "保存成功" } //统计集合key出现的次数 def myCountByKey(sc:SparkContext): Unit ={ var array: Array[(String, String)] = Array(Tuple2("class2","liao"),Tuple2("class2","ao"),Tuple2("class2","li"),Tuple2("class1","lao")) var parallelizeRDD: RDD[(String, String)] = sc.parallelize(array) var countByKeyRDD: Map[String, Long] = parallelizeRDD.countByKey() for((k,v)<-countByKeyRDD){ println("key="+k,"value="+v) } } def main(args: Array[String]) { val conf: SparkConf = new SparkConf().setMaster("local").setAppName("MyAction_scala_2") var sc: SparkContext = new SparkContext(conf) myCountByKey(sc) // myTake(sc) //mySaveASTextFile(sc) sc.stop() } }
SPARK算子实例SCALA实现(take,SavaASTextFile,CountByKey)
猜你喜欢
转载自blog.csdn.net/wjn19921104/article/details/80230283
今日推荐
周排行