1.pom准备
2.定义一个mapper类
3.定义一个reducer类
-----------------------------------------------------------------------------------------------------------------
此上与本地的MapReduce无任何区别,参照:
https://blog.csdn.net/weixin_44036154/article/details/103053844
大数据篇(17)--分布式计算框架MapReduce初体验(本地计算)
4.定义一个主类
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* @author kismet
* @date 2019-11-13 8:33
*/
public class WordCountDriver extends Configured implements Tool {
@Override
public int run(String[] args) throws Exception {
Job job = Job.getInstance(new Configuration(), "name");
job.setJarByClass(WordCountreduce.class);
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job,new Path("hdfs://192.168.100.88:8020/abc"));
job.setMapperClass(WordCountMap.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setReducerClass(WordCountreduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job,new Path("hdfs://192.168.100.88:8020/abb"));
job.setNumReduceTasks(2);
return job.waitForCompletion(true)?0:1;
}
public static void main(String[] args) throws Exception {
int run = ToolRunner.run(new WordCountDriver(), args);
}
}
5.重启hdfs集群,重新运行
代码编写完毕后,将代码打成jar包放到服务器上去运行
运行命令如下
hadoop jar original-demo02-1.0-SNAPSHOT.jar com.czxy2.WordCountDriver