import java.util.ArrayList; import java.util.List; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function2; import org.apache.spark.api.java.function.PairFlatMapFunction; import scala.Tuple2; public class SparkMain { @SuppressWarnings("serial") public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("Spark"); /*独立模式 conf.setMaster("spark://master56:7077"); conf.set("spark.cores.max", "48"); */ /*yarn-client模式*/ conf.setMaster("yarn-client"); //设置程序包 conf.setJars(new String[]{"/home/hadoop/Spark-0.0.1-SNAPSHOT/lib/Spark-0.0.1-SNAPSHOT.jar"}); //设置SparkHOME conf.setSparkHome("/home/hadoop/spark-1.2.0-cdh5.3.2"); //设置运行资源参数 conf.set("spark.executor.instances", "30"); conf.set("spark.executor.cores", "3"); conf.set("spark.executor.memory", "5G"); conf.set("spark.driver.memory", "3G"); conf.set("spark.driver.maxResultSize", "10G"); JavaSparkContext context = new JavaSparkContext(conf); //设置运行资源参数 JavaRDD<String> rdd = context.textFile("hdfs://nujhadoop/spark.txt"); List<Tuple2<String, Integer>> result = rdd.flatMapToPair(new PairFlatMapFunction<String, String, Integer>(){ @Override public Iterable<Tuple2<String, Integer>> call(String arg0) throws Exception { ArrayList<Tuple2<String, Integer>> list = new ArrayList<Tuple2<String, Integer>>(); String[] array = arg0.split(" "); for (String temper : array) { list.add(new Tuple2<String, Integer>(temper, 1)); } return list; } }).reduceByKey(new Function2<Integer, Integer, Integer>(){ @Override public Integer call(Integer arg0, Integer arg1) throws Exception { // TODO Auto-generated method stub return arg0 + arg1; } }).collect(); //打印结果 for (Tuple2<String, Integer> temper : result) { System.out.println(temper._1+","+temper._2); } context.stop(); } }说明: 一:上传输入文件到hadoop,本例上传的文件名为spark.txt 二 :打包程序,打包名为: Spark-0.0.1-SNAPSHOT.jar 三 :上传文件到Spark集群进行部署,如:
四:启动程序 sh ./run.sh
日志结果:
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties 15/07/15 16:45:10 INFO SecurityManager: Changing view acls to: hadoop 15/07/15 16:45:10 INFO SecurityManager: Changing modify acls to: hadoop 15/07/15 16:45:10 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(hadoop); users with modify permissions: Set(hadoop) 15/07/15 16:45:11 INFO Slf4jLogger: Slf4jLogger started 15/07/15 16:45:11 INFO Remoting: Starting remoting 15/07/15 16:45:11 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://sparkDriver@slave63:22597] 15/07/15 16:45:11 INFO Remoting: Remoting now listens on addresses: [akka.tcp://sparkDriver@slave63:22597] 15/07/15 16:45:11 INFO Utils: Successfully started service 'sparkDriver' on port 22597. 15/07/15 16:45:11 INFO SparkEnv: Registering MapOutputTracker 15/07/15 16:45:11 INFO SparkEnv: Registering BlockManagerMaster 15/07/15 16:45:11 INFO DiskBlockManager: Created local directory at /tmp/spark-local-20150715164511-17b9 15/07/15 16:45:11 INFO MemoryStore: MemoryStore started with capacity 1635.9 MB 15/07/15 16:45:12 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 15/07/15 16:45:12 INFO HttpFileServer: HTTP File server directory is /tmp/spark-bd6a9445-0d51-4d1b-9fc5-b4dcbcdd4cd0 15/07/15 16:45:12 INFO HttpServer: Starting HTTP Server 15/07/15 16:45:12 INFO Utils: Successfully started service 'HTTP file server' on port 54673. 15/07/15 16:45:12 INFO Utils: Successfully started service 'SparkUI' on port 4040. 15/07/15 16:45:12 INFO SparkUI: Started SparkUI at http://slave63:4040 15/07/15 16:45:13 INFO SparkContext: Added JAR /home/hadoop/Spark-0.0.1-SNAPSHOT/lib/Spark-0.0.1-SNAPSHOT.jar at http://172.20.10.63:54673/jars/Spark-0.0.1-SNAPSHOT.jar with timestamp 1436949913052 15/07/15 16:45:13 INFO RMProxy: Connecting to ResourceManager at master46/172.20.10.46:8032 15/07/15 16:45:13 INFO Client: Requesting a new application from cluster with 30 NodeManagers 15/07/15 16:45:13 INFO Client: Verifying our application has not requested more than the maximum memory capability of the cluster (8192 MB per container) 15/07/15 16:45:13 INFO Client: Will allocate AM container, with 3456 MB memory including 384 MB overhead 15/07/15 16:45:13 INFO Client: Setting up container launch context for our AM 15/07/15 16:45:13 INFO Client: Preparing resources for our AM container 15/07/15 16:45:14 INFO Client: Uploading resource file:/home/hadoop/Spark-0.0.1-SNAPSHOT/lib/spark-assembly-1.2.0-cdh5.3.2.jar -> hdfs://nujhadoop/user/hadoop/.sparkStaging/application_1434338096593_8055/spark-assembly-1.2.0-cdh5.3.2.jar 15/07/15 16:45:15 INFO Client: Setting up the launch environment for our AM container 15/07/15 16:45:16 INFO SecurityManager: Changing view acls to: hadoop 15/07/15 16:45:16 INFO SecurityManager: Changing modify acls to: hadoop 15/07/15 16:45:16 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(hadoop); users with modify permissions: Set(hadoop) 15/07/15 16:45:16 INFO Client: Submitting application 8055 to ResourceManager 15/07/15 16:45:16 INFO YarnClientImpl: Submitted application application_1434338096593_8055 15/07/15 16:45:17 INFO Client: Application report for application_1434338096593_8055 (state: ACCEPTED) 15/07/15 16:45:17 INFO Client: client token: N/A diagnostics: N/A ApplicationMaster host: N/A ApplicationMaster RPC port: -1 queue: root.hadoop start time: 1436949916087 final status: UNDEFINED tracking URL: http://master46:8088/proxy/application_1434338096593_8055/ user: hadoop 15/07/15 16:45:18 INFO Client: Application report for application_1434338096593_8055 (state: ACCEPTED) 15/07/15 16:45:19 INFO Client: Application report for application_1434338096593_8055 (state: ACCEPTED) 15/07/15 16:45:20 INFO Client: Application report for application_1434338096593_8055 (state: ACCEPTED) 15/07/15 16:45:21 INFO Client: Application report for application_1434338096593_8055 (state: ACCEPTED) 15/07/15 16:45:22 INFO Client: Application report for application_1434338096593_8055 (state: ACCEPTED) 15/07/15 16:45:22 INFO YarnClientSchedulerBackend: ApplicationMaster registered as Actor[akka.tcp://sparkYarnAM@slave28:55325/user/YarnAM#945036977] 15/07/15 16:45:22 INFO YarnClientSchedulerBackend: Add WebUI Filter. org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter, Map(PROXY_HOSTS -> master46, PROXY_URI_BASES -> http://master46:8088/proxy/application_1434338096593_8055), /proxy/application_1434338096593_8055 15/07/15 16:45:22 INFO JettyUtils: Adding filter: org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter 15/07/15 16:45:23 INFO Client: Application report for application_1434338096593_8055 (state: RUNNING) 15/07/15 16:45:23 INFO Client: client token: N/A diagnostics: N/A ApplicationMaster host: slave28 ApplicationMaster RPC port: 0 queue: root.hadoop start time: 1436949916087 final status: UNDEFINED tracking URL: http://master46:8088/proxy/application_1434338096593_8055/ user: hadoop 15/07/15 16:45:23 INFO YarnClientSchedulerBackend: Application application_1434338096593_8055 has started running. 15/07/15 16:45:23 INFO NettyBlockTransferService: Server created on 50871 15/07/15 16:45:23 INFO BlockManagerMaster: Trying to register BlockManager 15/07/15 16:45:23 INFO BlockManagerMasterActor: Registering block manager slave63:50871 with 1635.9 MB RAM, BlockManagerId(<driver>, slave63, 50871) 15/07/15 16:45:23 INFO BlockManagerMaster: Registered BlockManager 15/07/15 16:45:28 INFO YarnClientSchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@slave52:23892/user/Executor#469935313] with ID 1 15/07/15 16:45:28 INFO RackResolver: Resolved slave52 to /rack2 15/07/15 16:45:29 INFO BlockManagerMasterActor: Registering block manager slave52:36246 with 2.6 GB RAM, BlockManagerId(1, slave52, 36246) 15/07/15 16:45:33 INFO YarnClientSchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@slave63:19749/user/Executor#-1474529488] with ID 4 15/07/15 16:45:33 INFO RackResolver: Resolved slave63 to /rack2 15/07/15 16:45:34 INFO YarnClientSchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@slave28:51624/user/Executor#1260742083] with ID 28 15/07/15 16:45:34 INFO RackResolver: Resolved slave28 to /rack3 15/07/15 16:45:34 INFO BlockManagerMasterActor: Registering block manager slave63:64068 with 2.6 GB RAM, BlockManagerId(4, slave63, 64068) 15/07/15 16:45:35 INFO BlockManagerMasterActor: Registering block manager slave28:17967 with 2.6 GB RAM, BlockManagerId(28, slave28, 17967) 15/07/15 16:45:36 INFO YarnClientSchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@slave23:57756/user/Executor#-1426187042] with ID 16 15/07/15 16:45:36 INFO RackResolver: Resolved slave23 to /rack3 15/07/15 16:45:37 INFO YarnClientSchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@slave36:35348/user/Executor#-1773874771] with ID 3 15/07/15 16:45:37 INFO RackResolver: Resolved slave36 to /rack1 15/07/15 16:45:37 INFO BlockManagerMasterActor: Registering block manager slave23:62605 with 2.6 GB RAM, BlockManagerId(16, slave23, 62605) 15/07/15 16:45:38 INFO BlockManagerMasterActor: Registering block manager slave36:23663 with 2.6 GB RAM, BlockManagerId(3, slave36, 23663) 15/07/15 16:45:39 INFO YarnClientSchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@slave15:43551/user/Executor#-576231312] with ID 11 15/07/15 16:45:39 INFO RackResolver: Resolved slave15 to /rack3 15/07/15 16:45:40 INFO YarnClientSchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@slave37:40681/user/Executor#-1501756719] with ID 29 15/07/15 16:45:40 INFO RackResolver: Resolved slave37 to /rack1 15/07/15 16:45:40 INFO BlockManagerMasterActor: Registering block manager slave15:55745 with 2.6 GB RAM, BlockManagerId(11, slave15, 55745) 15/07/15 16:45:41 INFO YarnClientSchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@slave26:28665/user/Executor#1165917342] with ID 21 15/07/15 16:45:41 INFO RackResolver: Resolved slave26 to /rack3 15/07/15 16:45:41 INFO YarnClientSchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@slave54:37653/user/Executor#587407704] with ID 2 15/07/15 16:45:41 INFO RackResolver: Resolved slave54 to /rack2 15/07/15 16:45:41 INFO BlockManagerMasterActor: Registering block manager slave37:38747 with 2.6 GB RAM, BlockManagerId(29, slave37, 38747) 15/07/15 16:45:42 INFO BlockManagerMasterActor: Registering block manager slave26:46197 with 2.6 GB RAM, BlockManagerId(21, slave26, 46197) 15/07/15 16:45:42 INFO YarnClientSchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@slave17:64410/user/Executor#-1365579611] with ID 19 15/07/15 16:45:42 INFO RackResolver: Resolved slave17 to /rack3 15/07/15 16:45:42 INFO YarnClientSchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@slave35:15510/user/Executor#972094812] with ID 15 15/07/15 16:45:42 INFO RackResolver: Resolved slave35 to /rack1 15/07/15 16:45:42 INFO YarnClientSchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@slave55:36974/user/Executor#-597250789] with ID 26 15/07/15 16:45:42 INFO RackResolver: Resolved slave55 to /rack2 15/07/15 16:45:42 INFO BlockManagerMasterActor: Registering block manager slave54:18807 with 2.6 GB RAM, BlockManagerId(2, slave54, 18807) 15/07/15 16:45:43 INFO YarnClientSchedulerBackend: SchedulerBackend is ready for scheduling beginning after waiting maxRegisteredResourcesWaitingTime: 30000(ms) 15/07/15 16:45:43 INFO BlockManagerMasterActor: Registering block manager slave17:58808 with 2.6 GB RAM, BlockManagerId(19, slave17, 58808) 15/07/15 16:45:43 INFO BlockManagerMasterActor: Registering block manager slave35:29737 with 2.6 GB RAM, BlockManagerId(15, slave35, 29737) 15/07/15 16:45:43 INFO MemoryStore: ensureFreeSpace(261904) called with curMem=0, maxMem=1715396935 15/07/15 16:45:43 INFO MemoryStore: Block broadcast_0 stored as values in memory (estimated size 255.8 KB, free 1635.7 MB) 15/07/15 16:45:43 INFO BlockManagerMasterActor: Registering block manager slave55:29257 with 2.6 GB RAM, BlockManagerId(26, slave55, 29257) 15/07/15 16:45:43 INFO MemoryStore: ensureFreeSpace(21065) called with curMem=261904, maxMem=1715396935 15/07/15 16:45:43 INFO MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 20.6 KB, free 1635.7 MB) 15/07/15 16:45:43 INFO BlockManagerInfo: Added broadcast_0_piece0 in memory on slave63:50871 (size: 20.6 KB, free: 1635.9 MB) 15/07/15 16:45:43 INFO BlockManagerMaster: Updated info of block broadcast_0_piece0 15/07/15 16:45:43 INFO SparkContext: Created broadcast 0 from textFile at SparkMain.java:31 15/07/15 16:45:44 INFO FileInputFormat: Total input paths to process : 1 15/07/15 16:45:44 INFO SparkContext: Starting job: collect at SparkMain.java:53 15/07/15 16:45:44 INFO DAGScheduler: Registering RDD 2 (flatMapToPair at SparkMain.java:32) 15/07/15 16:45:44 INFO DAGScheduler: Got job 0 (collect at SparkMain.java:53) with 2 output partitions (allowLocal=false) 15/07/15 16:45:44 INFO DAGScheduler: Final stage: Stage 1(collect at SparkMain.java:53) 15/07/15 16:45:44 INFO DAGScheduler: Parents of final stage: List(Stage 0) 15/07/15 16:45:44 INFO DAGScheduler: Missing parents: List(Stage 0) 15/07/15 16:45:44 INFO DAGScheduler: Submitting Stage 0 (FlatMappedRDD[2] at flatMapToPair at SparkMain.java:32), which has no missing parents 15/07/15 16:45:44 INFO MemoryStore: ensureFreeSpace(3672) called with curMem=282969, maxMem=1715396935 15/07/15 16:45:44 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 3.6 KB, free 1635.7 MB) 15/07/15 16:45:44 INFO MemoryStore: ensureFreeSpace(2190) called with curMem=286641, maxMem=1715396935 15/07/15 16:45:44 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 2.1 KB, free 1635.7 MB) 15/07/15 16:45:44 INFO BlockManagerInfo: Added broadcast_1_piece0 in memory on slave63:50871 (size: 2.1 KB, free: 1635.9 MB) 15/07/15 16:45:44 INFO BlockManagerMaster: Updated info of block broadcast_1_piece0 15/07/15 16:45:44 INFO SparkContext: Created broadcast 1 from broadcast at DAGScheduler.scala:838 15/07/15 16:45:44 INFO DAGScheduler: Submitting 2 missing tasks from Stage 0 (FlatMappedRDD[2] at flatMapToPair at SparkMain.java:32) 15/07/15 16:45:44 INFO YarnClientClusterScheduler: Adding task set 0.0 with 2 tasks 15/07/15 16:45:44 INFO RackResolver: Resolved slave38 to /rack1 15/07/15 16:45:44 INFO TaskSetManager: Starting task 0.0 in stage 0.0 (TID 0, slave63, NODE_LOCAL, 1340 bytes) 15/07/15 16:45:44 INFO TaskSetManager: Starting task 1.0 in stage 0.0 (TID 1, slave63, NODE_LOCAL, 1340 bytes) 15/07/15 16:45:45 INFO BlockManagerInfo: Added broadcast_1_piece0 in memory on slave63:64068 (size: 2.1 KB, free: 2.6 GB) 15/07/15 16:45:45 INFO YarnClientSchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@slave65:57998/user/Executor#-1382810865] with ID 12 15/07/15 16:45:45 INFO RackResolver: Resolved slave65 to /rack2 15/07/15 16:45:45 INFO BlockManagerInfo: Added broadcast_0_piece0 in memory on slave63:64068 (size: 20.6 KB, free: 2.6 GB) 15/07/15 16:45:46 INFO YarnClientSchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@slave53:59085/user/Executor#-1064055348] with ID 13 15/07/15 16:45:46 INFO RackResolver: Resolved slave53 to /rack2 15/07/15 16:45:46 INFO YarnClientSchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@slave62:24319/user/Executor#-139207262] with ID 14 15/07/15 16:45:46 INFO RackResolver: Resolved slave62 to /rack2 15/07/15 16:45:46 INFO BlockManagerMasterActor: Registering block manager slave65:64372 with 2.6 GB RAM, BlockManagerId(12, slave65, 64372) 15/07/15 16:45:47 INFO BlockManagerMasterActor: Registering block manager slave62:53823 with 2.6 GB RAM, BlockManagerId(14, slave62, 53823) 15/07/15 16:45:47 INFO YarnClientSchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@slave32:28461/user/Executor#-2071109973] with ID 20 15/07/15 16:45:47 INFO RackResolver: Resolved slave32 to /rack1 15/07/15 16:45:47 INFO BlockManagerMasterActor: Registering block manager slave53:60055 with 2.6 GB RAM, BlockManagerId(13, slave53, 60055) 15/07/15 16:45:47 INFO YarnClientSchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@slave14:35963/user/Executor#148583350] with ID 22 15/07/15 16:45:47 INFO RackResolver: Resolved slave14 to /rack3 15/07/15 16:45:48 INFO BlockManagerMasterActor: Registering block manager slave32:35445 with 2.6 GB RAM, BlockManagerId(20, slave32, 35445) 15/07/15 16:45:48 INFO YarnClientSchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@slave43:63661/user/Executor#1541284948] with ID 24 15/07/15 16:45:48 INFO RackResolver: Resolved slave43 to /rack1 15/07/15 16:45:48 INFO YarnClientSchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@slave33:46267/user/Executor#-1437439698] with ID 10 15/07/15 16:45:48 INFO RackResolver: Resolved slave33 to /rack1 15/07/15 16:45:48 INFO BlockManagerMasterActor: Registering block manager slave43:34953 with 2.6 GB RAM, BlockManagerId(24, slave43, 34953) 15/07/15 16:45:49 INFO BlockManagerMasterActor: Registering block manager slave14:53473 with 2.6 GB RAM, BlockManagerId(22, slave14, 53473) 15/07/15 16:45:49 INFO YarnClientSchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@slave42:26170/user/Executor#794862330] with ID 5 15/07/15 16:45:49 INFO RackResolver: Resolved slave42 to /rack1 15/07/15 16:45:49 INFO YarnClientSchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@slave44:35394/user/Executor#1035079905] with ID 18 15/07/15 16:45:49 INFO RackResolver: Resolved slave44 to /rack1 15/07/15 16:45:49 INFO YarnClientSchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@slave16:52328/user/Executor#1181615525] with ID 30 15/07/15 16:45:49 INFO RackResolver: Resolved slave16 to /rack3 15/07/15 16:45:49 INFO YarnClientSchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@slave13:48403/user/Executor#-1103053012] with ID 27 15/07/15 16:45:49 INFO RackResolver: Resolved slave13 to /rack3 15/07/15 16:45:49 INFO BlockManagerMasterActor: Registering block manager slave42:60923 with 2.6 GB RAM, BlockManagerId(5, slave42, 60923) 15/07/15 16:45:50 INFO BlockManagerMasterActor: Registering block manager slave44:30133 with 2.6 GB RAM, BlockManagerId(18, slave44, 30133) 15/07/15 16:45:50 INFO YarnClientSchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@slave45:63922/user/Executor#-917535710] with ID 6 15/07/15 16:45:50 INFO RackResolver: Resolved slave45 to /rack1 15/07/15 16:45:50 INFO BlockManagerMasterActor: Registering block manager slave16:21970 with 2.6 GB RAM, BlockManagerId(30, slave16, 21970) 15/07/15 16:45:50 INFO BlockManagerMasterActor: Registering block manager slave13:57504 with 2.6 GB RAM, BlockManagerId(27, slave13, 57504) 15/07/15 16:45:50 INFO YarnClientSchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@slave25:18514/user/Executor#-799832935] with ID 25 15/07/15 16:45:50 INFO RackResolver: Resolved slave25 to /rack3 15/07/15 16:45:51 INFO YarnClientSchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@slave27:64380/user/Executor#-520443684] with ID 9 15/07/15 16:45:51 INFO RackResolver: Resolved slave27 to /rack3 15/07/15 16:45:51 INFO BlockManagerMasterActor: Registering block manager slave25:16330 with 2.6 GB RAM, BlockManagerId(25, slave25, 16330) 15/07/15 16:45:51 INFO BlockManagerMasterActor: Registering block manager slave45:63841 with 2.6 GB RAM, BlockManagerId(6, slave45, 63841) 15/07/15 16:45:51 INFO YarnClientSchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@slave24:46357/user/Executor#1463308812] with ID 8 15/07/15 16:45:51 INFO RackResolver: Resolved slave24 to /rack3 15/07/15 16:45:51 INFO TaskSetManager: Finished task 1.0 in stage 0.0 (TID 1) in 7633 ms on slave63 (1/2) 15/07/15 16:45:51 INFO BlockManagerMasterActor: Registering block manager slave33:50916 with 2.6 GB RAM, BlockManagerId(10, slave33, 50916) 15/07/15 16:45:52 INFO TaskSetManager: Finished task 0.0 in stage 0.0 (TID 0) in 7804 ms on slave63 (2/2) 15/07/15 16:45:52 INFO DAGScheduler: Stage 0 (flatMapToPair at SparkMain.java:32) finished in 7.810 s 15/07/15 16:45:52 INFO YarnClientClusterScheduler: Removed TaskSet 0.0, whose tasks have all completed, from pool 15/07/15 16:45:52 INFO DAGScheduler: looking for newly runnable stages 15/07/15 16:45:52 INFO DAGScheduler: running: Set() 15/07/15 16:45:52 INFO DAGScheduler: waiting: Set(Stage 1) 15/07/15 16:45:52 INFO DAGScheduler: failed: Set() 15/07/15 16:45:52 INFO DAGScheduler: Missing parents for Stage 1: List() 15/07/15 16:45:52 INFO DAGScheduler: Submitting Stage 1 (ShuffledRDD[3] at reduceByKey at SparkMain.java:44), which is now runnable 15/07/15 16:45:52 INFO MemoryStore: ensureFreeSpace(2232) called with curMem=288831, maxMem=1715396935 15/07/15 16:45:52 INFO MemoryStore: Block broadcast_2 stored as values in memory (estimated size 2.2 KB, free 1635.7 MB) 15/07/15 16:45:52 INFO MemoryStore: ensureFreeSpace(1403) called with curMem=291063, maxMem=1715396935 15/07/15 16:45:52 INFO MemoryStore: Block broadcast_2_piece0 stored as bytes in memory (estimated size 1403.0 B, free 1635.7 MB) 15/07/15 16:45:52 INFO BlockManagerInfo: Added broadcast_2_piece0 in memory on slave63:50871 (size: 1403.0 B, free: 1635.9 MB) 15/07/15 16:45:52 INFO BlockManagerMaster: Updated info of block broadcast_2_piece0 15/07/15 16:45:52 INFO SparkContext: Created broadcast 2 from broadcast at DAGScheduler.scala:838 15/07/15 16:45:52 INFO DAGScheduler: Submitting 2 missing tasks from Stage 1 (ShuffledRDD[3] at reduceByKey at SparkMain.java:44) 15/07/15 16:45:52 INFO YarnClientClusterScheduler: Adding task set 1.0 with 2 tasks 15/07/15 16:45:52 INFO TaskSetManager: Starting task 0.0 in stage 1.0 (TID 2, slave63, PROCESS_LOCAL, 1121 bytes) 15/07/15 16:45:52 INFO TaskSetManager: Starting task 1.0 in stage 1.0 (TID 3, slave26, PROCESS_LOCAL, 1121 bytes) 15/07/15 16:45:52 INFO BlockManagerInfo: Added broadcast_2_piece0 in memory on slave63:64068 (size: 1403.0 B, free: 2.6 GB) 15/07/15 16:45:52 INFO MapOutputTrackerMasterActor: Asked to send map output locations for shuffle 0 to sparkExecutor@slave63:19749 15/07/15 16:45:52 INFO MapOutputTrackerMaster: Size of output statuses for shuffle 0 is 147 bytes 15/07/15 16:45:52 INFO BlockManagerMasterActor: Registering block manager slave27:35965 with 2.6 GB RAM, BlockManagerId(9, slave27, 35965) 15/07/15 16:45:52 INFO TaskSetManager: Finished task 0.0 in stage 1.0 (TID 2) in 159 ms on slave63 (1/2) 15/07/15 16:45:52 INFO YarnClientSchedulerBackend: Registered executor: Actor[akka.tcp://sparkExecutor@slave18:54423/user/Executor#495118309] with ID 7 15/07/15 16:45:52 INFO RackResolver: Resolved slave18 to /rack3 15/07/15 16:45:52 INFO BlockManagerMasterActor: Registering block manager slave24:57590 with 2.6 GB RAM, BlockManagerId(8, slave24, 57590) 15/07/15 16:45:53 INFO BlockManagerMasterActor: Registering block manager slave18:51244 with 2.6 GB RAM, BlockManagerId(7, slave18, 51244) 15/07/15 16:45:53 INFO BlockManagerInfo: Added broadcast_2_piece0 in memory on slave26:46197 (size: 1403.0 B, free: 2.6 GB) 15/07/15 16:45:53 INFO MapOutputTrackerMasterActor: Asked to send map output locations for shuffle 0 to sparkExecutor@slave26:28665 15/07/15 16:45:53 INFO TaskSetManager: Finished task 1.0 in stage 1.0 (TID 3) in 1605 ms on slave26 (2/2) 15/07/15 16:45:53 INFO DAGScheduler: Stage 1 (collect at SparkMain.java:53) finished in 1.612 s 15/07/15 16:45:53 INFO YarnClientClusterScheduler: Removed TaskSet 1.0, whose tasks have all completed, from pool 15/07/15 16:45:53 INFO DAGScheduler: Job 0 finished: collect at SparkMain.java:53, took 9.550722 s So,1 up.He,1 are,1 got,1 decided,1 bunch,1 his,1 few,1 away,1 backed,1 said��I,1 They,1 air,,1 ripe,1 am,1 never,1 One,1 tried,1 last,1 feeling,1 with,1 day,1 start,,1 One,,1 again,,2 paces,,1 three,,1 they,1 just,1 again,1 still,,1 two,,1 grapes.,1 walked,2 summer,1 walking,1 running,1 up,2 not,1 it,1 He,1 fox,2 orchard.,1 succeeded.,1 was,1 sour.��,1 grapes.The,1 a,4 stopped,1 nose,1 At,1 missed,1 before,1 to,1 back.,1 sure,1 he,5 through,1 thirsty,",1 in,1 could,1 grapes.He,1 of,1 hot,1 juicy."I'm,1 were,1 reach,1 an,1 but,3 jumped,2 and,3 up,,1 give,1 thought.,1 the,3 15/07/15 16:45:53 INFO SparkUI: Stopped Spark web UI at http://slave63:4040 15/07/15 16:45:53 INFO DAGScheduler: Stopping DAGScheduler 15/07/15 16:45:53 INFO YarnClientSchedulerBackend: Shutting down all executors 15/07/15 16:45:53 INFO YarnClientSchedulerBackend: Asking each executor to shut down 15/07/15 16:45:53 INFO YarnClientSchedulerBackend: Stopped 15/07/15 16:45:54 INFO MapOutputTrackerMasterActor: MapOutputTrackerActor stopped! 15/07/15 16:45:54 INFO MemoryStore: MemoryStore cleared 15/07/15 16:45:54 INFO BlockManager: BlockManager stopped 15/07/15 16:45:54 INFO BlockManagerMaster: BlockManagerMaster stopped 15/07/15 16:45:54 INFO SparkContext: Successfully stopped SparkContext 15/07/15 16:45:54 INFO RemoteActorRefProvider$RemotingTerminator: Shutting down remote daemon.