求该成绩表当中出现相同分数的分数，还有次数，以及该分数的人数

/**
 * 求该成绩表当中出现相同分数的分数，还有次数，以及该分数的人数
 * @author Administrator
 *
 */
public class HomeWork03 {

	//key---科目+分数   Text   value----人名    Text
	static class MyMapper extends Mapper<LongWritable, Text, Text, Text>{
		Text mk=new Text();
		Text mv=new Text();
		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			//获取每一行内容  进行切分
			String[] datas = value.toString().split(",");
			if(datas.length==3){
				//key   value封装
				mk.set(datas[0]+"\t"+datas[2]);
				mv.set(datas[1]);
				context.write(mk, mv);
			}
			
		}
	}
	//相同科目  相同分数的  人数     都是谁     key：相同科目  相同分数的    value：人数     都是谁 
	static class MyReducer extends Reducer<Text, Text, Text, Text>{
		Text rv=new Text();
		@Override
		//key==computer	85	values<huangzitao,liujialing,huangxiaoming>
		protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)
				throws IOException, InterruptedException {
			//循环遍历values   注意：values只能循环遍历一次  指针的操作   
			//计数变量
			int count=0;
			StringBuffer sb=new StringBuffer();
			for(Text v:values){
				count++;
				sb.append(v.toString()).append(",");
			}
			//发送
			//huangzitao,liujialing,huangxiaoming,
			rv.set(count+"\t"+sb.substring(0, sb.length()-1));
			context.write(key, rv);
		}
	}
	
	//main
	public static void main(String[] args) {
		System.setProperty("HADOOP_USER_NAME", "hadoop");
		//将mapper  reducer类进行一个封装  封装为一个任务----job（作业）
		//加载配置文件
		Configuration conf=new Configuration();
		//启动一个Job  创建一个job对象
		try {
			Job job=Job.getInstance(conf);
			//设置这个job
			//设置整个job的主函数入口
			job.setJarByClass(HomeWork03.class);
			
			//设置job的mappper的类
			job.setMapperClass(MyMapper.class);
			
			//设置job的reducer的类
			job.setReducerClass(MyReducer.class);
			
			
			//设置map输出key   value的类型
			//指定了泛型  这里为什么还要设置一次   泛型的作用范围  编译的时候生效   运行的时候泛型会自动擦除
			job.setMapOutputKeyClass(Text.class);
			job.setMapOutputValueClass(Text.class);
			
			//设置reduce的输出的k   v类型  以下方法设置的是mr的最终输出
			job.setOutputKeyClass(Text.class);
			job.setOutputValueClass(Text.class);
			
			
			//指定需要统计的文件的输入路径  FileInputFormat  文件输入类
			Path inpath=new Path("hdfs://master:9000/stuout_01");
			FileInputFormat.addInputPath(job, inpath);
			
			//指定输出目录  输出路径不能存在的  否则会报错  默认输出是覆盖式的输出  如果输出目录存在  有可能造成原始数据的丢失
			Path outpath=new Path("hdfs://master:9000/stuout_03");
			FileOutputFormat.setOutputPath(job, outpath);
			
			//提交job  执行这一句的时候 job才会提交  上面做的一系列的工作  都是设置job
			//job.submit();
			job.waitForCompletion(true);
			
		} catch (Exception e) {
			
			e.printStackTrace();
		}
	}

}
求该成绩表当中出现相同分数的分数，还有次数，以及该分数的人数

猜你喜欢