版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/wuxintdrh/article/details/89167203
一、hadoop 相关配置信息
默认情况,Spark使用HDFS作为分布式文件系统给, 所以需要获取hadoop相关配置
private var _hadoopConfiguration: Configuration = _
//获取一个Configuration
_hadoopConfiguration = SparkHadoopUtil.get.newConfiguration(_conf)
/**
* A default Hadoop Configuration for the Hadoop code (e.g. file systems) that we reuse.
*
* @note As it will be reused in all Hadoop RDDs, it's better not to modify it unless you
* plan to set some global configurations for all Hadoop RDDs.
*/
def hadoopConfiguration: Configuration = _hadoopConfiguration
二、Executor环境变量
_executorMemory = _conf.getOption("spark.executor.memory")
.orElse(Option(System.getenv("SPARK_EXECUTOR_MEMORY")))
.orElse(Option(System.getenv("SPARK_MEM"))
.map(warnSparkMem))
.map(Utils.memoryStringToMb)
.getOrElse(1024)
// Convert java options to env vars as a work around
// since we can't set env vars directly in sbt.
for { (envKey, propKey) <- Seq(("SPARK_TESTING", "spark.testing"))
value <- Option(System.getenv(envKey)).orElse(Option(System.getProperty(propKey)))} {
executorEnvs(envKey) = value
}
Option(System.getenv("SPARK_PREPEND_CLASSES")).foreach { v =>
executorEnvs("SPARK_PREPEND_CLASSES") = v
}
// The Mesos scheduler backend relies on this environment variable to set executor memory.
// TODO: Set this only in the Mesos scheduler.
executorEnvs("SPARK_EXECUTOR_MEMORY") = executorMemory + "m"
executorEnvs ++= _conf.getExecutorEnv
executorEnvs("SPARK_USER") = sparkUser