1.安装Hadoop
单机模式安装Hadoop
安装JAVA环境
设置环境变量,启动运行
1.1 环境准备
1)配置主机名为nn01,ip为192.168.1.21,配置yum源(系统源)
备注:由于在之前的案例中这些都已经做过,这里不再重复.
2)安装java环境
nn01 ~]# yum -y install java-1.8.0-openjdk-devel
nn01 ~]# java -version
openjdk version "1.8.0_131"
OpenJDK Runtime Environment (build 1.8.0_131-b12)
OpenJDK 64-Bit Server VM (build 25.131-b12, mixed mode)
[root@nn01 ~]# jps
1322 Jps
3)安装hadoop
nn01 ~]# tar -xf hadoop-2.7.6.tar.gz
nn01 ~]# mv hadoop-2.7.6 /usr/local/hadoop
nn01 ~]# cd /usr/local/hadoop/
hadoop]# ls
bin include libexec NOTICE.txt sbin
etc lib LICENSE.txt README.txt share
hadoop]# ./bin/hadoop //报错,JAVA_HOME没有找到
4)解决报错问题
hadoop]# rpm -ql java-1.8.0-openjdk
hadoop]# cd ./etc/hadoop/
hadoop]# vim hadoop-env.sh
25 export \
JAVA_HOME="/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.131-11.b12.el7.x86_64/jre"
33 export HADOOP_CONF_DIR="/usr/local/hadoop/etc/hadoop"
nn01 ~]# cd /usr/local/hadoop/
hadoop]# ./bin/hadoop
Usage:...
hadoop]# mkdir /usr/local/hadoop/aa
hadoop]# cp *.txt /usr/local/hadoop/aa
hadoop]# ./bin/hadoop jar \
share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.6.jar wordcount aa bb
//wordcount为参数 统计aa这个文件夹,存到bb这个文件里面(这个文件不能存在,要是存在会报错,是为了防止数据覆盖)
hadoop]# cat bb/part-r-00000 //查看
2. 安装配置Hadoop
另备三台虚拟机,安装Hadoop
使所有节点能够ping通,配置SSH信任关系
节点验证
node1 192.168.1.22
node2 192.168.1.23
node3 192.168.1.24
2.1 环境准备
1)三台机器配置主机名为node1、node2、node3,配置ip地址
2)编辑/etc/hosts(四台主机同样操作,以nn01为例)
[root@nn01 ~]# vim /etc/hosts
192.168.1.21 nn01
192.168.1.22 node1
192.168.1.23 node2
192.168.1.24 node3
3)安装java环境,在node1,node2,node3上面操作(以node1为例)
node1 ~]# yum -y install java-1.8.0-openjdk-devel
4)布置SSH信任关系
//第一次登陆不需要输入yes
nn01 ~]# vim /etc/ssh/ssh_config
Host *
GSSAPIAuthentication yes
StrictHostKeyChecking no
nn01 ~]# ssh-keygen(一路回车)
nn01 ~]# for i in 21 22 23 24 ; do ssh-copy-id 192.168.1.$i; done
//部署公钥给nn01,node1,node2,node3
5)测试信任关系
nn01 ~]# ssh node1
node1 ~]# exit
2.2 配置hadoop
1)修改slaves文件
[root@nn01 ~]# cd /usr/local/hadoop/etc/hadoop
hadoop]# vim slaves
node1
node2
node3
2)hadoop的核心配置文件core-site
hadoop]# vim core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://nn01:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/var/hadoop</value>
</property>
</configuration>
hadoop]# mkdir /var/hadoop //hadoop的数据根目录
hadoop]# ssh node1 mkdir /var/hadoop
hadoop]# ssh node2 mkdir /var/hadoop
hadoop]# ssh node3 mkdir /var/hadoop
3)配置hdfs-site文件
hadoop]# vim hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.http-address</name>
<value>nn01:50070</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>nn01:50090</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value> //存两份
</property>
</configuration>
4)同步配置到node1,node2,node3
//同步的主机都要安装rsync
//同步的主机都要安装rsync
hadoop]# ssh node1 yum –y install rsync
hadoop]# ssh node2 yum –y install rsync
hadoop]# ssh node3 yum –y install rsync
hadoop]# ssh node2 yum –y install rsync
hadoop]# ssh node3 yum –y install rsync
hadoop]# for i in 22 23 24 ; do rsync -aSH --delete /usr/local/hadoop/ [email protected].$i:/usr/local/hadoop/ -e 'ssh' & done
[1] 23260
[2] 23261
[3] 23262
5)查看是否同步成功
hadoop]# ssh node1 ls /usr/local/hadoop/
bin
etc
include
lib
libexec
LICENSE.txt
NOTICE.txt
bb
README.txt
sbin
share
aa
...
2.3 格式化
hadoop]# cd /usr/local/hadoop/
hadoop]# ./bin/hdfs namenode -format //格式化 namenode
hadoop]# ./sbin/start-dfs.sh //启动
hadoop]# jps //验证角色
11009 Jps
10707 NameNode
10894 SecondaryNameNode
hadoop]# ./bin/hdfs dfsadmin -report //查看集群是否组建成功
Live datanodes (3): //有三个角色成功