Spark-Java-算子

package scala.spark.Day3;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.VoidFunction;

import java.util.Arrays;
import java.util.List;

/**
* Created by Administrator on 2019/10/16.
*/
public class JavaRDDTest {
public static void main(String[] args) {
System.setProperty("hadoop.home.dir", "E:\\hadoop-2.6.0-cdh5.15.0\\hadoop-2.6.0-cdh5.15.0");

//JavaRDD 标准RDD
//JavaPairRDD PairRDD
//JavaDoubleRDD DoubleRDD
//java没有scala隐式转化,生成RDD时候,必须指明是哪种RDD

//实例化驱动器
SparkConf sparkConf = new SparkConf();
sparkConf.setMaster("local");
sparkConf.setAppName("Java RDD");

JavaSparkContext javaSparkContext = new JavaSparkContext(sparkConf);

//javaSparkContext.parallelize() 标准RDD
//javaSparkContext.parallelizePairs() PairRDD
//javaSparkContext.parallelizeDoubles() 数值类型RDD

List<Integer> list = Arrays.asList(1, 2, 3, 4, 5);

JavaRDD<Integer> sourceRDD = javaSparkContext.parallelize(list);

//map 算子
// 匿名子类
/*
public interface Function<T1, R> extends Serializable {
R call(T1 v1) throws Exception;
}

T1 RDD中成员的类型
v1 返回的类型
*/

JavaRDD<Integer> mapRDD = sourceRDD.map(
new Function<Integer, Integer>() {

public Integer call(Integer v1) throws Exception {
return v1 * v1;
}
}
);

/*
public interface VoidFunction<T> extends Serializable {
void call(T t) throws Exception;
}
*/
mapRDD.foreach(
new VoidFunction<Integer>() {
public void call(Integer integer) throws Exception {
System.out.println(integer);
}
}
);


javaSparkContext.close();


}
}

猜你喜欢

转载自www.cnblogs.com/alpha-cat/p/11684915.html