public class MySuper { public static void main(String[] args) throws Exception { final String INPUT_PATHs = "hdfs://chaoren:9000/seq100w.txt"; final String OUT_PATHs = "hdfs://chaoren:9000/out"; Configuration conf = new Configuration(); final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATHs), conf); final Path outPath = new Path(OUT_PATHs); if(fileSystem.exists(outPath)){ fileSystem.delete(outPath, true); } final Job job = new Job(conf , MySuper.class.getSimpleName()); FileInputFormat.setInputPaths(job, INPUT_PATHs); job.setMapperClass(MyMapper2.class); job.setReducerClass(MyReducer2.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(NullWritable.class); FileOutputFormat.setOutputPath(job, outPath); job.waitForCompletion(true); } } class MyMapper2 extends Mapper<LongWritable, Text, LongWritable, NullWritable>{ long max = Long.MIN_VALUE; protected void map(LongWritable k1, Text v1, Context context) throws java.io.IOException ,InterruptedException { final long temp = Long.parseLong(v1.toString()); if(temp>max){ max = temp; } }; protected void cleanup(org.apache.hadoop.mapreduce.Mapper<LongWritable,Text,LongWritable, NullWritable>.Context context) throws java.io.IOException ,InterruptedException { context.write(new LongWritable(max), NullWritable.get()); }; } class MyReducer2 extends Reducer<LongWritable, NullWritable, LongWritable, NullWritable>{ long max = Long.MIN_VALUE; protected void reduce(LongWritable k2, java.lang.Iterable<NullWritable> arg1, org.apache.hadoop.mapreduce.Reducer<LongWritable,NullWritable,LongWritable,NullWritable>.Context arg2) throws java.io.IOException ,InterruptedException { final long temp = k2.get(); if(temp>max){ max = temp; } }; protected void cleanup(org.apache.hadoop.mapreduce.Reducer<LongWritable,NullWritable,LongWritable,NullWritable>.Context context) throws java.io.IOException ,InterruptedException { context.write(new LongWritable(max), NullWritable.get()); }; }
hadoop求最大值方法
hadoop求最大值问题,代码比求最值前N个要简单一些,因为直接使用LongWritable类型,不需要自定义hadoop对象进行比较,所以直接覆盖map和reduce方法,并且覆盖cleanup方法,这是在map和reduce都执行完成之后才会执行的方法,只需要把最大值写入即可
猜你喜欢
转载自hanyingjun318.iteye.com/blog/2195979
今日推荐
周排行