需准备东西:
1、3个或3个以上Linux服务器;
Linux安装安装教程见:https://blog.csdn.net/Hjchidaozhe/article/details/103434690
2、Linux jdk 8 ;
官网下载方式见:https://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html
3、ftp传输工具;
下载Filezilla 百度网盘:https://pan.baidu.com/s/149i0H94bVuC0n9QbphFegw (fsvb) (使用时,Linux默认端口22)
4、Hadoop安装包;
coluder 版本镜像地址:http://archive.cloudera.com/cdh5/cdh/5/
现在默认跳转官网,可在最后一步跳转的网址后添加 .tar.gz 例如:http://archive.cloudera.com/cdh5/cdh/5/hadoop-2.6.0-cdh5.9.3.tar.gz
Apache Hadoop下载地址:http://mirror.bit.edu.cn/apache/hadoop/common/
方式采用新建hadoop用户,将集群整个权限赋给hadoop操作,安装时,在主节点安装,拷贝至从节点
在无说明的情况下,均在root权限下操作,本文中所有密码均设置为123456
本文默认下载文件通过ftp传输至虚拟机:/tmp 下面,且所有文件安装在 /usr/local/
下面开始准备系统环境:
# 修改主机名(3台)
# 临时修改主机名
hostname centos-a
# 永久修改主机名,将文件中的名字替换
vi /etc/hostname
centos-a
# 修改hosts文件(3台)
# 编辑文件,在里面添加以下内容
vi /etc/hosts
192.168.190.128 centos-a
192.168.190.129 centos-b
192.168.190.130 centos-c
# 新建hadoop用户,并设置密码(3台)
useradd hadoop
passwd hadoop
# 输入两遍密码
# 卸载centos自带jdk,并重新安装下载版本(3台)
# 查看已存在的jdk信息:(3台)
java -version
# 查看jdk详细信息:(找出jdk文件,并将其卸载)(3台)
rpm -qa|grep jdk 或者 rpm -qa|grep java
# 执行卸载命令(3台)
rpm -e --nodeps java-1.7.0-openjdk-1.7.0.191-2.6.15.5.el7.x86_64
rpm -e --nodeps java-1.7.0-openjdk-headless-1.7.0.191-2.6.15.5.el7.x86_64
rpm -e --nodeps java-1.8.0-openjdk-headless-1.8.0.181-7.b13.el7.x86_64
rpm -e --nodeps java-1.8.0-openjdk-1.8.0.181-7.b13.el7.x86_64
# 再次查看是否卸载完成(3台)
java -version
# 移动下载好的jdk压缩包到指定目录(3台)
mv jdk-8u191-linux-x64.tar.gz /usr/local/
# 解压安装(3台)
tar -zxvf jdk-8u191-linux-x64.tar.gz
# 修改环境变量,添加以下内容(3台)
export JAVA_HOME=/usr/local/jdk1.8.0_191
export JRE_HOME=${JAVA_HOME}/jre
export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib
export PATH=${JAVA_HOME}/bin:$PATH
# 使环境变量立即生效(3台)
source /etc/profile
# 再次查看Java版本,并确认是否安装完成(3台)
java -version
# 创建3台服务器间访问密钥,免密登陆,切换到hadoop用户下,且3台都需要操作(3台)
su hadoop
# 创建密钥(hadoop用户下)(3台)
ssh-keygen -t rsa (这里一直回车默认即可)
# 复制密钥到3台,自己也需要(hadoop用户下)(3台)
ssh-copy-id -i /home/hadoop/.ssh/id_rsa.pub centos-a (输入yes,然后输入密码:123456)
ssh-copy-id -i /home/hadoop/.ssh/id_rsa.pub centos-b (输入yes,然后输入密码:123456)
ssh-copy-id -i /home/hadoop/.ssh/id_rsa.pub centos-c (输入yes,然后输入密码:123456)
# 下面开始安装hadoop,只需在主节点配置
# 切换到安装目录,并移动下载好的hadoop压缩包
mv /tmp/hadoop-2.6.0-cdh5.9.3.tar.gz -/usr/local/
cd /usr/local/
# 解压安装
tar -zxvf hadoop-2.6.0-cdh5.9.3.tar.gz
# 将整个目录权限授给hadoop用户
chown -R hadoop.hadoop /usr/local/hadoop-2.6.0-cdh5.9.3
# 下面配置hadoop相关文件,切换到配置文件目录(hadoop用户下)
cd /usr/local/hadoop-2.6.0-cdh5.9.3/etc/hadoop/
# 创建文件夹,供后续使用(hadoop用户下)
mkdir /usr/local/hadoop-2.6.0-cdh5.9.3/tmp
mkdir /usr/local/hadoop-2.6.0-cdh5.9.3/var
mkdir /usr/local/hadoop-2.6.0-cdh5.9.3/dfs
mkdir /usr/local/hadoop-2.6.0-cdh5.9.3/dfs/name
mkdir /usr/local/hadoop-2.6.0-cdh5.9.3/dfs/data
# 修改文件 slaves ,里面只放如下信息(从节点信息)(hadoop用户下)
vi slaves
centos-b
centos-c
# 修改文件 hadoop-env.sh ,找到并修改指定内容(hadoop用户下)
vi hadoop-env.sh
export JAVA_HOME=/usr/local/jdk1.8.0_191
# 修改文件 core-site.xml ,在 <configuration> </configuration>文件之间添加以下内容,注意不要重复 <configuration> </configuration>,这里只是说明添加位置(hadoop用户下)
vi core-site.xml
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/hadoop-2.6.0-cdh5.9.3/tmp</value>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://centos-a:9000</value>
</property>
</configuration>
# 修改文件 hdfs-site.xml ,添加以下内容(hadoop用户下)
vi hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>centos-a:9091</value>
<description>The secondary namenode http server address andport.</description>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///usr/local/hadoop-2.6.0-cdh5.9.3/dfs/name</value>
<description>Path on the local filesystem where the NameNodestores the namespace and transactions logs persistently.</description>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///usr/local/hadoop-2.6.0-cdh5.9.3/dfs/data</value>
<description>Comma separated list of paths on the local filesystemof a DataNode where it should store its blocks.</description>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
</configuration>
# 拷贝指定文件,并修改拷贝之后的文件(hadoop用户下)
cp mapred-site.xml.template mapred-site.xml
# 修改文件 mapred-site.xml ,添加以下内容(hadoop用户下)
vim mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
<description>Theruntime framework for executing MapReduce jobs. Can be one of local, classic or yarn.</description>
</property>
</configuration>
# 修改文件 yarn-site.xml ,添加以下内容(hadoop用户下)
vi yarn-site.xml
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>centos-a</value>
<description>The hostname of theRM.</description>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
<description>Shuffle service that needs to be set for Map Reduceapplications.</description>
</property>
<property>
<description>The address of the applications manager interface in the RM.</description>
<name>yarn.resourcemanager.address</name>
<value>centos-a:8032</value>
</property>
<property>
<description>The address of the scheduler interface.</description>
<name>yarn.resourcemanager.scheduler.address</name>
<value>centos-a:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>centos-a:8031</value>
</property>
<property>
<description>The address of the RM admin interface.</description>
<name>yarn.resourcemanager.admin.address</name>
<value>centos-a:8033</value>
</property>
<property>
<description>The http address of the RM web application.</description>
<name>yarn.resourcemanager.webapp.address</name>
<value>centos-a:8088</value>
</property>
<property>
<description>The https adddress of the RM web application.</description>
<name>yarn.resourcemanager.webapp.https.address</name>
<value>centos-a:8090</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>2048</value>
<discription>每个节点可用内存,单位MB,默认8182MB</discription>
</property>
</configuration>
# hadoop文件配置完毕,下面在主节点拷贝至另外两个节点,root用户下
scp -r /usr/local/hadoop-2.6.0-cdh5.9.3/ root@centos-b:/usr/local/
scp -r /usr/local/hadoop-2.6.0-cdh5.9.3/ root@centos-c:/usr/local/
# 在另外两个节点分别授权hadoop目录权限(另外两台)
chown -R hadoop.hadoop /usr/local/hadoop-2.6.0-cdh5.9.3
# 下面再声明hadoop命令的环境变量,编辑文件,添加以下内容(3台)
vi /etc/profile
export HADOOP_HOME=/usr/local/hadoop-2.6.0-cdh5.9.3/
export PATH=${HADOOP_HOME}/bin:$PATH
# 使环境变量立即生效(3台)
source /etc/profile
# 为防止意外,建议重启三台机器(3台)
reboot
# 重启完成后,在主节点,切换至hadoop用户下
su hadoop
# 切换目录,初始化化hadoop(hadoop用户下)
cd /usr/local/hadoop-2.6.0-cdh5.9.3/
./bin/hadoop namenode -format
# 下面开启集群,如果有需要确认,则输入yes再回车(一般第一次开启需要)(hadoop用户下)
./sbin/start-all.sh
# 然后3台服务器查看后台进程,主节点显示入下1,从节点显示如下2,则安装完成(hadoop用户下)(3台)
jps
12628 NameNode
13158 ResourceManager
16648 Jps
13274 NodeManager
12750 DataNode
12974 SecondaryNameNode
8976 NodeManager
8858 DataNode
11627 Jps
# 下面可以用文件再次测试确认安装完成(hadoop用户下)
cd /home/hadoop/
vi test.txt
hello world
hadoop fs -put test.txt /
hadoop fs -ls /
hadoop fs -cat /test.txt
# 关闭集群(hadoop用户下)
./sbin/stop-all.sh
错误提示:
一般开启时会报如下警告:WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
解决方案参考:https://blog.csdn.net/Hjchidaozhe/article/details/103430157