List,JavaRDD和JavaPairRDD的相互转换和打印输出
public class ReadTextToRDD {
public static void main(String[] args) {
SparkConf sparkConf = new SparkConf().setAppName("RDD的打印")
.setMaster("local[2]").set("spark.executor.memory", "2g");
JavaSparkContext jsc = new JavaSparkContext(sparkConf);
// 创建 List<String>
List<String> list = Arrays.asList("A-1", "A-2", "B-1", "B-1", "C-1", "E-1");
// List<String> 转 JavaRDD<String>
JavaRDD<String> javaRDD = jsc.parallelize(list);
// 使用collect对JavaRDD<String>打印
/* List<String> collect = javaRDD.collect();
for (String str : collect) {
System.out.println(String.format("JavaRDD<String>打印:%s", str));
}*/
//使用foreach对JavaRDD<String>打印
javaRDD.foreach(new VoidFunction<String>() {
@Override
public void call(String str) throws Exception {
System.out.println(String.format("JavaRDD<String>打印:%s", str));
}
});
//JavaRDD<String> 转 JavaRDD<Row>
JavaRDD<Row> javaRddRow = javaRDD.map(new Function<String, Row>() {
@Override
public Row call(String s) throws Exception {
String[] sp = s.split("-");
return RowFactory.create(sp[0], sp[1]);
}
});
// 使用Row对JavaRDD<Row >打印
/* List<Row> rowList = javaRddRow.collect();
for (Row row : rowList) {
System.out.println(String.format("JavaRDD<Row>打印:%s", row.toString()));
}*/
// 使用foreach对JavaRDD<Row >打印
javaRddRow.foreach(new VoidFunction<Row>() {
@Override
public void call(Row row) throws Exception {
System.out.println(String.format("JavaRDD<Row>打印:%s", row.toString()));
}
});
// JavaRDD<String> 转 JavaPairRDD
JavaPairRDD<String, Integer> javaPairRDD = javaRDD.mapToPair(
new PairFunction<String, String, Integer>() {
@Override
public Tuple2<String, Integer> call(String s) throws Exception {
String[] sp = s.split("-");
return new Tuple2<String, Integer>(sp[0], Integer.parseInt(sp[1]));
}
});
// 使用collect对JavaPairRDD打印
/* for (Tuple2<String, Integer> str : javaPairRDD.collect()) {
System.out.println(String.format("JavaPairRDD打印:%s", str.toString()));
}*/
// 使用foreach对JavaPairRDD打印
javaPairRDD.foreach(new VoidFunction<Tuple2<String, Integer>>() {
@Override
public void call(Tuple2<String, Integer> tuple) throws Exception {
System.out.println(String.format("JavaPairRDD打印:%s", tuple.toString()));
}
});
}
}
打印结果
JavaRDD<String>打印:A-1
JavaRDD<String>打印:A-2
JavaRDD<String>打印:B-1
JavaRDD<String>打印:B-1
JavaRDD<String>打印:C-1
JavaRDD<String>打印:E-1
JavaRDD<Row>打印:[A,1]
JavaRDD<Row>打印:[A,2]
JavaRDD<Row>打印:[B,1]
JavaRDD<Row>打印:[B,1]
JavaRDD<Row>打印:[C,1]
JavaRDD<Row>打印:[E,1]
JavaPairRDD打印:(A,1)
JavaPairRDD打印:(A,2)
JavaPairRDD打印:(B,1)
JavaPairRDD打印:(B,1)
JavaPairRDD打印:(C,1)
JavaPairRDD打印:(E,1)