springboot + spark + hadoop

可以在一个Controller里面实现spark抓取的代码:

import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.servlet.ModelAndView;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.SparkConf;
import org.springframework.web.bind.annotation.ResponseBody;

/**
 * @author flash胜龙
 */
@RestController
public class DataFigureController {

    @RequestMapping("/dataimportlocal.html")
    public ModelAndView dataimportlocal() {
        System.setProperty("hadoop.home.dir", "D:\\hadoop-2.7.2test");
        System.setProperty("HADOOP_USER_NAME", "hadoop");
        SparkSession spark = SparkSession.builder().master("local[*]").appName("Word Count").config("spark.sql.warehouse.dir", "file:///d:/tmp").getOrCreate();
        Dataset<Row> df = spark.read().option("header", true).csv("D:\\book.csv");
        df.show();
        return new ModelAndView("dataimport");
    }}

Maven配置如下(英 ['meɪv(ə)n]  美 ['mevn])

其中有不少坑:一个是包冲突问题,hadoop、spark和springboot体系里面每个都自己引用了一系列logger实现的包,一起编译运行会有冲突,对部分包的依赖要exclusions掉;二个是版本问题,对于要使用的版本,必须整个工程前后一致。如果一个引用的是A版本,另一个引用的是B版本,就会出问题,要么把A给exclusion掉,只用B版本,要不想其它办法:

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>my.groud.id</groupId>
    <artifactId>sparkuitest</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <packaging>jar</packaging>

    <name>sparkuitest</name>
    <url>http://maven.apache.org</url>

    <repositories>
        <repository>
            <id>central</id>
            <url>http://maven.aliyun.com/nexus/content/groups/public/</url>
            <!--<url>https://mvnrepository.com/artifact</url>-->
            <snapshots>
                <enabled>true</enabled>
            </snapshots>
        </repository>
        <repository>
            <id>maven2</id>
            <url>http://repo1.maven.org/maven2</url>
            <snapshots>
                <enabled>true</enabled>
            </snapshots>
        </repository>
    </repositories>
    
    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <scala.version>2.11.8</scala.version>
        <spark.version>2.0.0</spark.version>
        <hadoop.version>2.6.0</hadoop.version>
        <junit.version>4.12</junit.version>
        <jackson.version>2.6.5</jackson.version>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <maven.compiler.source>1.8</maven.compiler.source>
        <maven.compiler.target>1.8</maven.compiler.target>

    </properties>

    <dependencies>
        
        <!-- https://mvnrepository.com/artifact/com.opencsv/opencsv -->
		<dependency>
			<groupId>com.opencsv</groupId>
			<artifactId>opencsv</artifactId>
			<version>4.1</version>
		</dependency>
        
        <!-- JsonMappingException: Incompatible Jackson version: 2.8.4 -->
        <dependency>
            <groupId>com.fasterxml.jackson.core</groupId>
            <artifactId>jackson-core</artifactId>
            <version>${jackson.version}</version>
        </dependency>
        <dependency>
            <groupId>com.fasterxml.jackson.core</groupId>
            <artifactId>jackson-databind</artifactId>
            <version>${jackson.version}</version>
        </dependency>
        <dependency>
            <groupId>com.fasterxml.jackson.core</groupId>
            <artifactId>jackson-annotations</artifactId>
            <version>${jackson.version}</version>
        </dependency>

        
        
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <version>1.4.2.RELEASE</version>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-jdbc</artifactId>
            <version>1.4.2.RELEASE</version>
            <exclusions><!-- 排除 IllegalArgumentException: LoggerFactory is not
             a Logback LoggerContext but Logback is on the classpath. Either 
             remove Logback or the competing implementation问题 -->
<!--                <exclusion>
                    <groupId>ch.qos.logback</groupId>
                   <artifactId>logback-classic</artifactId>
                </exclusion>-->
           </exclusions>
        </dependency>
        <dependency>
            <groupId>com.h2database</groupId>
            <artifactId>h2</artifactId>
            <version>1.3.156</version>
        </dependency>
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.27</version>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
            <version>1.4.2.RELEASE</version>
            
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>druid</artifactId>
            <version>1.0.11</version>
        </dependency>





    
    
    
        <dependency>
            <groupId>org.scala-lang</groupId>
            <artifactId>scala-library</artifactId>
            <version>${scala.version}</version>
        </dependency>

        <dependency><!-- 用来处理配置文件 -->
            <groupId>com.typesafe</groupId>
            <artifactId>config</artifactId>
            <version>1.2.1</version>
        </dependency>

        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.11</artifactId>
            <version>${spark.version}</version>
            <exclusions>  
                <exclusion>   
                    <groupId>org.slf4j</groupId>  
                    <artifactId>slf4j-log4j12</artifactId>  
                </exclusion>  
                <exclusion>   
                    <groupId>log4j</groupId>  
                    <artifactId>log4j</artifactId>  
                </exclusion>  
            </exclusions> 
            <!-- <scope>provided</scope>-->
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-yarn 
        解决ClassNotFoundException: org.apache.spark.deploy.yarn.YarnSparkHadoopUtil问题
        -->
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-yarn_2.11</artifactId>
            <version>${spark.version}</version>
            <scope>provided</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>${hadoop.version}</version>
            <exclusions>  
                <exclusion>   
                    <groupId>org.slf4j</groupId>  
                    <artifactId>slf4j-log4j12</artifactId>  
                </exclusion>  
                <exclusion>   
                    <groupId>log4j</groupId>  
                    <artifactId>log4j</artifactId>  
                </exclusion>
                <exclusion>
                    <groupId>org.mortbay.jetty</groupId>
                    <artifactId>jetty-util</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>javax.servlet</groupId>
                    <artifactId>servlet-api</artifactId>
                </exclusion>
            </exclusions> 
            <!--<scope>provided</scope>-->
        </dependency>
        
        <dependency>  
            <groupId>javax.servlet</groupId>  
            <artifactId>javax.servlet-api</artifactId>  
            <version>3.1.0</version>  
        </dependency>  



        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-hive_2.11</artifactId>
            <version>${spark.version}</version>
            <scope>provided</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_2.11</artifactId>
            <version>${spark.version}</version>
            <!--  <scope>provided</scope>-->
        </dependency>

        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-streaming_2.11</artifactId>
            <version>${spark.version}</version>
            <scope>provided</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.spark</groupId>
            <!--<artifactId>spark-streaming-kafka_2.11</artifactId>-->
            <artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
            <!--<version>1.6.1</version>-->
            <version>${spark.version}</version>
            <scope>provided</scope>

        </dependency>


        <dependency><!-- 数据库连接池 -->
            <groupId>com.mchange</groupId>
            <artifactId>c3p0</artifactId>
            <version>0.9.5.2</version>
        </dependency>

        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>${junit.version}</version>
            <scope>test</scope>
        </dependency>
    </dependencies>
	
    <build>
        <plugins>
            <plugin>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-maven-plugin</artifactId>
                <version>1.4.2.RELEASE</version>
                <executions>
                    <execution>
                        <goals>
                            <goal>repackage</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>2.3.2</version>
                <configuration>
                    <encoding>${project.build.sourceEncoding}</encoding>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-resources-plugin</artifactId>
                <version>2.4.3</version>
                <configuration>
                    <encoding>${project.build.sourceEncoding}</encoding>
                </configuration>
            </plugin>
        </plugins>
        <defaultGoal>compile</defaultGoal>
    </build>
	
</project>

猜你喜欢

转载自blog.csdn.net/u010770993/article/details/79137072