一些总结:
0. 安装Java和Homebrew
Java安装方式请问度娘。
Homebrew的安装方式:执行如下命令:
ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
1. 配置SSH
为确保在远程管理Hadoop以及Hadoop节点在用户共享时候的安全性,Hadoop需要使用SSH协议。
在Mac上执行:
ssh localhost
如果执行失败,则需要修改一下系统设置:
系统偏好设置 -> 共享 -> 打开远程登录 -> 右侧选择允许所有用户访问。
生成密钥对:
ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa
执行该命令后,会在当前用户目录的.ssh文件夹下生成id_rsa文件。然后在该目录下生成authorized_keys。命令如下:
cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys
2. 安装Hadoop
安装方式为执行以下命令:
brew install hadoop
命令执行完以后,Hadoop会被安装在/usr/local/Cellar/hadoop目录下。
2.1 配置Hadoop
在目录/usr/local/Cellar/hadoop/2.7.2/libexec/etc/hadoop下
1) 修改hadoop-env.sh文件
将其中的
# Extra Java runtime options. Empty by defaulte export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
修改为:
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true -Djava.security.krb5.realm= -Djava.security.krb5.kdc="
2) 修改core-site.xml文件为:
<configuration> <property> <name>hadoop.tmp.dir</name> <value>/usr/local/Cellar/hadoop/hdfs/tmp</value> </property> <property> <name>fs.default.name</name> <value>hdfs://localhost:9000</value> </property> </configuration>
3)修改mapred-site.xml文件为:(如果没有该文件,就把mapred-site.xml.template文件复制为该文件)
<configuration> <property> <name>mapred.job.tracker</name> <value>localhost:9010</value> </property> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> </configuration>
4)修改hdfs-site.xml文件为:
<configuration> <property> <name>dfs.replication</name> <value>1</value> </property> </configuration>
运行后台程序之前,需要格式化hdfs。执行命令如下:
hadoop namenode -format
2.2 启动Hadoop
在/usr/local/Cellar/hadoop/2.7.2/sbin目录下,执行如下命令:
#启动Hadoop ./start-dfs.sh #停止Hadoop ./stop-dfs.sh
启动以后,可以通过http://localhost:50070/ 来访问Hadoop 页面。查看Hadoop。
3. 使用Maven开发Hadoop示例
3.1 创建Maven工程
使用maven-archetype-quickstart创建项目,jar包依赖如下:
<dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>2.7.2</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>2.7.2</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>2.7.2</version> </dependency>
3.2 测试hdfs
import java.io.InputStream; import java.net.URI; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; public class Test { public static void main(String[] args) throws Exception { String uri = "hdfs://localhost:9000"; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(new URI(uri), conf); FSDataOutputStream os = fs.create(new Path("/user/lxlong/test.log")); os.write("Hello lxlong".getBytes()); os.flush(); os.close(); InputStream is = fs.open(new Path("/user/lxlong/test.log")); IOUtils.copyBytes(is, System.out, 1024, true); FileStatus[] statuses = fs.listStatus(new Path("/user/lxlong")); for(FileStatus status : statuses) { System.out.println(status); } } }
3.3 测试MapReduce
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; public class WordCount { public static class MyMapper extends Mapper<Object, Text, Text, IntWritable> { private final static IntWritable one = new IntWritable(1); private Text event = new Text(); public void map(Object key, Text value, Context context) throws IOException, InterruptedException { int idx = value.toString().indexOf(" "); if(idx > 0) { String e = value.toString().substring(0, idx); event.set(e); context.write(event, one); } } } public static class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable> { private IntWritable result = new IntWritable(); public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for(IntWritable val : values) { sum += val.get(); } result.set(sum); context.write(key, result); } } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if(otherArgs.length < 2) { System.err.println("Usage: WordCount <in> <out>"); System.exit(2); } Job job = Job.getInstance(conf, "Word Count"); job.setJarByClass(WordCount.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyReducer.class); job.setReducerClass(MyReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); } }
1) 在该工程下使用mvn clean package命令生成jar包HadoopTest-0.0.1-SNAPSHOT.jar。
2)通过:
cp HadoopTest-0.0.1-SNAPSHOT.jar /usr/local/Cellar/hadoop/2.7.2/bin
命令将jar包拷贝到Hadoop命令目录下。。(只是为了方便一些而已)
3)复制几个文件到hdfs下:
./hdfs dfs -put /tmp/input /user/lxlong/input
4)执行Hadoop任务:
./hadoop jar HadoopTest-0.0.1-SNAPSHOT.jar com.test.HadoopTest.WordCount /user/lxlong/input /user/lxlong/output
5)在hdfs上查看输出:
hdfs dfs -cat /user/lxlong/output/part-r-00000