安装
换ubuntu 16的源
$ sudo cp /etc/apt/sources.list /etc/apt/sources.list.bak
$ sudo vim /etc/apt/sources.list
""
deb http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial main restricted
deb http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-updates main restricted
deb http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial universe
deb http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-updates universe
deb http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial multiverse
deb http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-updates multiverse
deb http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-backports main restricted universe multiverse
deb http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-security main restricted
deb http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-security universe
deb http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-security multiverse
""
$ sudo apt-get update
$ sudo apt-get update --fix-missing
安装必要库
$ sudo apt-get install -y tar wget git
$ sudo apt-get install -y openjdk-8-jdk
$ sudo apt-get -y install build-essential python-dev python-pip python-six python-virtualenv libcurl4-nss-dev libsasl2-dev libsasl2-modules maven libapr1-dev libsvn-dev zlib1g-dev
# 如果找不到java8,则执行下面操作
$ sudo add-apt-repository ppa:openjdk-r/ppa
$ sudo apt-get update
配置hosts
假设我现在有3个服务器,1个主服务器,2个从服务器。现在我们修改hosts文件新增如下数据
$ sudo vim /etc/hosts
""
127.0.0.1 localhost ubuntu
192.168.71.153 spark-master
192.168.71.154 spark-slave1
192.168.71.155 spark-slave2
""
设置SSH互通
$ ssh-keygen -t rsa
把从服务器的公钥发送到主服务
$ scp /home/hdgs/.ssh/id_rsa.pub hdgs@spark-master:~/.ssh/id_rsa.pub.spark-slave1
$ scp /home/hdgs/.ssh/id_rsa.pub hdgs@spark-master:~/.ssh/id_rsa.pub.spark-slave2
在主服务器上生成认证key
$ cat ~/.ssh/id_rsa.pub* >> ~/.ssh/authorized_keys
把认证的key分发到从服务器上
$ scp /home/hdgs/.ssh/authorized_keys hdgs@spark-slave1:~/.ssh/
$ scp /home/hdgs/.ssh/authorized_keys hdgs@spark-slave2:~/.ssh/
测试
$ ssh spark-slave1
$ ssh spark-slave2
下载spark 2.3和hadoop 2.9
$ cd /opt
$ sudo wget http://mirror.bit.edu.cn/apache/spark/spark-2.3.0/spark-2.3.0-bin-hadoop2.7.tgz
$ sudo tar -zxvf spark-2.3.0-bin-hadoop2.7.tgz
$ sudo mv spark-2.3.0-bin-hadoop2.7 spark
$ sudo wget http://mirror.bit.edu.cn/apache/hadoop-2.9.0.tar.gz
$ sudo tar -zxvf hadoop-2.9.0.tar.gz
$ sudo mv hadoop-2.9.0.tar.gz hadoop
# 所属者修改
$ sudo chown -R $(whoami):$(whoami) spark
$ sudo chown -R $(whoami):$(whoami) hadoop
修改配置
修改SPARK环境配置
$ cd /opt/spark
$ cp conf/spark-env.sh.template conf/spark-env.sh
$ vim conf/spark-env.sh
""
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
export HADOOP_CONF_DIR=/opt/hadoop/etc/hadoop
export SPARK_HOME=/opt/spark
export SPARK_MASTER_HOST=192.168.71.153
export SPARK_LOCAL_IP=192.168.71.153(这个地址根据不同的主或从服务器配置各自对应的本地ID)
export SPARK_DRIVER_MEMORY=4g
export SPARK_WORKER_CORES=2
export SPARK_WORKER_MEMORY=2g
""
上面的配置完成后,测试是否可以在本地运行成功
$ ./bin/run-example SparkPi 10
修改SPARK的从节点配置
$ cp conf/slaves.template conf/slaves
$ vim conf/slaves
""
192.168.71.154
192.168.71.155
""
分发配置好的spark到从服务器
# 确保从服务器的spark文件夹有权限
$ sudo mkdir /opt/spark
$ sudo mkdir /opt/hadoop
$ sudo chown -R $(whoami):$(whoami) /opt/spark
$ sudo chown -R $(whoami):$(whoami) /opt/hadoop
# 分发主服务器文件夹到从服务器
$ scp -r /opt/spark hdgs@spark-slave1:/opt/
$ scp -r /opt/spark hdgs@spark-slave2:/opt/
$ scp -r /opt/hadoop hdgs@spark-slave1:/opt/
$ scp -r /opt/hadoop hdgs@spark-slave2:/opt/
注意:记得修改从服务器spark配置下面的SPARK_LOCAL_IP地址。
启动服务
$ ./sbin/start-all.sh
启动服务后,可以访问WEB界面http://192.168.71.153:8080/
解决问题方案
Utils:66 - Service ‘sparkWorker’ could not bind on port
export SPARK_LOCAL_IP=localhost
Error: A JNI error has occurred, please check your installation and try again Exception in thread “main” java.lang.NoClassDefFoundError: org/slf4j/Logger
复制hadoop下面的log包到spark里面
$ cp /opt/hadoop/share/hadoop/common/lib/slf4j-api-1.7.25.jar /opt/spark/jars/
$ cp /opt/hadoop/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar /opt/spark/jars/
$ cp /opt/hadoop/share/hadoop/common/lib/commons-logging-1.1.3.jar /opt/spark/jars/