很久不写spark代码,提交的时候遇到不少问题,操! 先记录下来,以后方便查看
先启动 spark集群,这里是三台(dfs-yarn-zk-spark)
启动完毕之后,需要关闭master服务 sbin/stop-master.sh
以ip方式启动
./sbin/start-master.sh -h 192.168.199.120
spark 这就完毕了 这是提交的jar包
./bin/spark-submit --class sparkstreaming_action.wordfreq.main.WordFreq --num-executors 4 --driver-memory 512M --executor-memory 512M --executor-cores 1 --conf spark.default.parallelism=1000 /root/spark/spark/smart.jar
hdfs dfs -put input.txt /root/ 这是代码中的原文件,
代码准备
1.先准备IDEA编译器
2.maven 工程
然后 设置好 maven仓库
3.删除 建立好的 java test ,resource
4.新增 main.scala 并把scala目录 目录整成主目录 (蓝色目录)
新建一个 scala OBJECT
package sparkstreaming_action.wordfreq.main import org.apache.spark.SparkConf import org.apache.spark.SparkContext object WordFreq { def main(args: Array[String]) { // Create spark context val conf = new SparkConf() .setAppName("WordFreq_Spark") .setMaster("spark://192.168.199.120:7077") val sc = new SparkContext(conf) val txtFile = "/root/input.txt" val txtData = sc.textFile(txtFile) txtData.cache() txtData.count() val wcData = txtData.flatMap { line => line.split(" ") } .map { word => (word, 1) } .reduceByKey(_ + _) wcData.collect().foreach(println) sc.stop } }
pom.xml
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.spark_smart</groupId> <artifactId>smart</artifactId> <version>1.0-SNAPSHOT</version> <dependencies> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_2.11</artifactId> <version>2.0.0</version> </dependency> <dependency><!-- Log --> <groupId>log4j</groupId> <artifactId>log4j</artifactId> <version>1.2.17</version> </dependency> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> <version>1.7.12</version> </dependency> </dependencies> <!-- <build>--> <!-- <plugins>--> <!-- <!– mixed scala/java compile –>--> <!-- <plugin>--> <!-- <groupId>org.scala-tools</groupId>--> <!-- <artifactId>maven-scala-plugin</artifactId>--> <!-- <executions>--> <!-- <execution>--> <!-- <id>compile</id>--> <!-- <goals>--> <!-- <goal>compile</goal>--> <!-- </goals>--> <!-- <phase>compile</phase>--> <!-- </execution>--> <!-- <execution>--> <!-- <id>test-compile</id>--> <!-- <goals>--> <!-- <goal>testCompile</goal>--> <!-- </goals>--> <!-- <phase>test-compile</phase>--> <!-- </execution>--> <!-- <execution>--> <!-- <phase>process-resources</phase>--> <!-- <goals>--> <!-- <goal>compile</goal>--> <!-- </goals>--> <!-- </execution>--> <!-- </executions>--> <!-- </plugin>--> <!-- <plugin>--> <!-- <artifactId>maven-compiler-plugin</artifactId>--> <!-- <configuration>--> <!-- <source>1.7</source>--> <!-- <target>1.7</target>--> <!-- </configuration>--> <!-- </plugin>--> <!-- <!– for fatjar –>--> <!-- <plugin>--> <!-- <groupId>org.apache.maven.plugins</groupId>--> <!-- <artifactId>maven-assembly-plugin</artifactId>--> <!-- <version>2.2</version>--> <!-- <configuration>--> <!-- <descriptorRefs>--> <!-- <descriptorRef>jar-with-dependencies</descriptorRef>--> <!-- </descriptorRefs>--> <!-- </configuration>--> <!-- <executions>--> <!-- <execution>--> <!-- <id>assemble-all</id>--> <!-- <phase>package</phase>--> <!-- <goals>--> <!-- <goal>single</goal>--> <!-- </goals>--> <!-- </execution>--> <!-- </executions>--> <!-- </plugin>--> <!-- <plugin>--> <!-- <groupId>org.apache.maven.plugins</groupId>--> <!-- <artifactId>maven-jar-plugin</artifactId>--> <!-- <configuration>--> <!-- <archive>--> <!-- <manifest>--> <!-- <addClasspath>true</addClasspath>--> <!-- <mainClass>com.sparkstreaming.action.main.WordFreq</mainClass>--> <!-- </manifest>--> <!-- </archive>--> <!-- </configuration>--> <!-- </plugin>--> <!-- </plugins>--> <!-- </build>--> <!-- <repositories>--> <!-- <repository>--> <!-- <id>aliyunmaven</id>--> <!-- <url>http://maven.aliyun.com/nexus/content/groups/public/</url>--> <!-- </repository>--> <!-- </repositories>--> </project>