1、新建工程,引入一下spark2.4.0的lib依赖
2、测试代码:
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.SparkSession;
import scala.Tuple2;
public class SparkWordCount {
private static final Pattern SPACE = Pattern.compile(" ");
public static void main(String[] args) throws Exception {
SparkConf conf = new SparkConf().setMaster("local");
SparkSession spark = SparkSession.builder().appName("JavaWordCount").config(conf).getOrCreate();
JavaRDD<String> lines = spark.read().textFile("f:/wordCount.txt").javaRDD();
JavaRDD<String> words = lines.flatMap(s -> Arrays.asList(SPACE.split(s)).iterator());
JavaPairRDD<String, Integer> ones = words.mapToPair(s -> new Tuple2<>(s, 1));
JavaPairRDD<String, Integer> counts = ones.reduceByKey((i1, i2) -> i1 + i2);
List<Tuple2<String, Integer>> output = counts.collect();
for (Tuple2<?, ?> tuple : output) {
System.out.println(tuple._1() + ": " + tuple._2());
}
Thread.sleep(500000);// 可以访问http://localhost:4040 查看UI界面
spark.stop();
}
}