1 用户ID 试卷编号 考试时间 分数 2 user001 test1 2016-01-01 80 3 user001 test2 2016-01-03 23 4 user001 test3 2016-01-05 56 5 user001 test4 2016-01-07 55
1 package com.month10b; 2 3 import org.apache.hadoop.io.LongWritable; 4 import org.apache.hadoop.io.NullWritable; 5 import org.apache.hadoop.io.Text; 6 import org.apache.hadoop.mapreduce.Mapper; 7 8 import java.io.IOException; 9 10 public class CarMapper extends Mapper<LongWritable, Text,Text, NullWritable> { 11 12 @Override 13 protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { 14 // super.map(key, value, context); 15 16 String[] split = value.toString().split(","); 17 18 int i = Integer.parseInt(split[1].substring(split[1].length() - 1)); 19 20 if (i>=3&& i<=5){ 21 context.write(value,NullWritable.get()); 22 } 23 24
1 package com.month10b; 2 3 import org.apache.hadoop.io.NullWritable; 4 import org.apache.hadoop.io.Text; 5 import org.apache.hadoop.mapreduce.Reducer; 6
1 package com.month10b; 2 3 import org.apache.hadoop.conf.Configuration; 4 import org.apache.hadoop.fs.Path; 5 import org.apache.hadoop.io.NullWritable; 6 import org.apache.hadoop.io.Text; 7 import org.apache.hadoop.mapreduce.Job; 8 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 9 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 10 11 import java.io.IOException; 12 13 public class CarDriver { 14 public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { 15 Configuration configuration = new Configuration(); 16 17 Job job = Job.getInstance(configuration); 18 19 job.setJarByClass(CarDriver.class); 20 job.setMapperClass(CarMapper.class); 21 22 job.setMapOutputKeyClass(Text.class); 23 job.setMapOutputValueClass(NullWritable.class); 24 25 26 job.setOutputKeyClass(Text.class); 27 job.setOutputValueClass(NullWritable.class); 28 29 30 FileInputFormat.setInputPaths(job, new Path("C:\\Users\\Dell\\Desktop\\4.8\\input\\hive.txt")); 31 FileOutputFormat.setOutputPath(job, new Path("C:\\Users\\Dell\\Desktop\\4.8\\output")); 32 33 34 boolean b = job.waitForCompletion(true); 35 System.out.println(b); 36 37 38 } 39 }
7 import java.io.IOException; 8 9 public class CarReduce extends Reducer<Text,NullWritable,Text,NullWritable> { 10 11 @Override 12 protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException { 13 context.write(key,NullWritable.get()); 14 } 15 }
25 } 26 }
6 user001 test5 2016-01-09 67 7 user002 test1 2016-01-01 86 8 user002 test1 2016-01-02 45 9 user002 test2 2016-01-03 89 10 user002 test3 2016-01-05 12 11 user002 test4 2016-01-07 78 12 user002 test5 2016-01-09 76 13 user002 test5 2016-01-09 87 14 user002 test5 2016-01-11 71 15 user003 test6 2016-01-01 35 16 user003 test7 2016-01-02 97 17 user003 test1 2016-01-03 27 18 user003 test1 2016-01-04 98 19 user003 test8 2016-01-05 23 20 user003 test3 2016-01-05 88 21 user003 test3 2016-01-06 91 22 user003 test8 2016-01-07 37 23 user003 test1 2016-01-08 23 24 user003 test2 2016-01-09 97 25 user003 test3 2016-01-10 18 26 user003 test3 2016-01-12 45 27 user003 test4 2016-01-13 23 28 user003 test5 2016-01-15 78 29 user003 test5 2016-01-16 23 30 user003 test5 2016-01-18 12 31 user004 test6 2016-01-01 89 32 user004 test5 2016-01-01 12 33 user004 test7 2016-01-02 98 34 user004 test1 2016-01-03 22 35 user004 test9 2016-01-03 67 36 user004 test5 2016-01-04 29 37 user004 test8 2016-01-05 87 38 user004 test3 2016-01-06 65 39 user004 test8 2016-01-07 34 40 user004 test1 2016-01-08 12 41 user004 test2 2016-01-09 89 42 user004 test3 2016-01-10 23
1 create table hive (userid string,shijuanid string, examtime string,socre int ) 2 row format delimited fields terminated by ',' ; 3 4 4) 使用SQL按userID(用户ID)分组统计每个ID的模拟考试次数,并按照次数倒序排序。(6分) 5 select userid,count(*) as aaa from hive group by userid order by aaa desc; 6 7 5) 使用SQL统计出考试次数最多的试卷编号。(6分) 8 9 select shijuanid,count(*) bianhao from hive group by shijuanid order by bianhao desc limit 1; 10 11 6) 使用SQL统计出各试卷的考试成绩平均分,并按照分数正序排序。(6分) 12 select shijuanid,avg(socre) pjf from hive group by shijuanid order by pjf; 13 14 7) 使用SQL统计出分数最高的五条记录并按照分数倒序排序(6分) 15 select * from hive order by socre desc limit 5;
43 user004 test3 2016-01-12 77 44 user004 test4 2016-01-13 85 45 user004 test5 2016-01-15 34 46 user004 test1 2016-01-16 76 47 user004 test4 2016-01-17 57 48 49 1) 使用MapReduce对以上数据进行过滤,只留下编号为test3至test5之间的记录,并将其保存到HDFS的/car/logs目录下。其中,编写MR过滤代码逻辑(6分),运行程序并输出结果到HDFS(6分) 50 2)编写Linux脚本,内容为:将/car/logs目录中内容拷贝到hive目录中。并执行脚本,检查结果(6分) 51 3) 创建Hive表,分析拷贝过来的数据。(6分) 52 create table hive (userid string,shijuanid string, examtime string,socre int ) 53 row format delimited fields terminated by ',' ; 54 4) 使用SQL按userID(用户ID)分组统计每个ID的模拟考试次数,并按照次数倒序排序。(6分) 55 5) 使用SQL统计出考试次数最多的试卷编号。(6分) 56 6) 使用SQL统计出各试卷的考试成绩平均分,并按照分数正序排序。(6分) 57 7) 使用SQL统计出分数最高的五条记录并按照分数倒序排序(6分)