import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class ModuleMapReduce extends Configured implements Tool { static class ModuleMapper extends Mapper<LongWritable, Text, Text, IntWritable> { @Override protected void setup( Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { // TODO Auto-generated method stub super.setup(context); } @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { // TODO Auto-generated method stub super.map(key, value, context); } @Override protected void cleanup( Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { // TODO Auto-generated method stub super.cleanup(context); } @Override public void run( Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { // TODO Auto-generated method stub super.run(context); } } static class ModuleReducer extends Reducer<Text, IntWritable, Text, IntWritable> { @Override protected void setup( Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException { // TODO Auto-generated method stub super.setup(context); } @Override protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException { // TODO Auto-generated method stub super.reduce(key, values, context); } @Override protected void cleanup( Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException { // TODO Auto-generated method stub super.cleanup(context); } @Override public void run( Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException { // TODO Auto-generated method stub super.run(context); } } @Override public int run(String[] args) throws Exception { if (checkArgs(args) != 0) return checkArgs(args); Job job = getJob(); //set input/output path FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); // set run jar class when package need job.setJarByClass(ModuleMapper.class); //set map info job.setMapperClass(ModuleMapper.class); job.setInputFormatClass(TextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); //set partition job.setPartitionerClass(HashPartitioner.class); job.setNumReduceTasks(1); //set reduce info job.setReducerClass(ModuleReducer.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); return job.waitForCompletion(true) ? 0 : 1; } private Configuration getConfiguration() { Configuration conf = new Configuration(); return conf; } private Job getJob() throws IOException { Configuration conf = getConfiguration(); Job job = new Job(conf, ModuleMapReduce.class.getSimpleName()); return job; } private int checkArgs(String[] args) { if (args.length != 2) { System.err.println("args error"); return 1; } return 0; } public static void main(String[] args) throws Exception { int exitCode = ToolRunner.run(new ModuleMapReduce(), args); System.exit(exitCode); } }
MapReduce编程模板
猜你喜欢
转载自mvplee.iteye.com/blog/2232959
今日推荐
周排行