日期:2019.11.13
博客期:115
星期三
Result文件数据说明:
Ip:106.39.41.166,(城市)
Date:10/Nov/2016:00:01:02 +0800,(日期)
Day:10,(天数)
Traffic: 54 ,(流量)
Type: video,(类型:视频video或文章article)
Id: 8701(视频或者文章的id)
测试要求:
1、 数据清洗:按照进行数据清洗,并将清洗后的数据导入hive数据库中。
两阶段数据清洗:
(1)第一阶段:把需要的信息从原始日志中提取出来
ip: 199.30.25.88
time: 10/Nov/2016:00:01:03 +0800
traffic: 62
文章: article/11325
视频: video/3235
(2)第二阶段:根据提取出来的信息做精细化操作
ip--->城市 city(IP)
date--> time:2016-11-10 00:01:03
day: 10
traffic:62
type:article/video
id:11325
(3)hive数据库表结构:
create table data( ip string, time string , day string, traffic bigint,type string, id string )
2、数据处理:
·统计最受欢迎的视频/文章的Top10访问次数 (video/article)
·按照地市统计最受欢迎的Top10课程 (ip)
·按照流量统计最受欢迎的Top10课程 (traffic)
3、数据可视化:将统计结果倒入MySql数据库中,通过图形化展示的方式展现出来。
制作:
A、基础数据Bean类
1 package com.hive.basic; 2 3 import com.hive.format.IPUtil; 4 import com.hive.format.TimeUtil; 5 6 public class Bean { 7 protected String ip; 8 protected String time; 9 protected String day; 10 protected int traffic; 11 protected String type; 12 protected String id; 13 public String getIp() { 14 return ip; 15 } 16 public void setIp(String ip) { 17 this.ip = ip; 18 } 19 public String getTime() { 20 return time; 21 } 22 public String getDay() { 23 return day; 24 } 25 public void setDay(String day) { 26 this.day = day; 27 } 28 public void setTime(String time) { 29 this.time = time; 30 } 31 public int getTraffic() { 32 return traffic; 33 } 34 public void setTraffic(int traffic) { 35 this.traffic = traffic; 36 } 37 public String getType() { 38 return type; 39 } 40 public void setType(String type) { 41 this.type = type; 42 } 43 public String getId() { 44 return id; 45 } 46 public void setId(String id) { 47 this.id = id; 48 } 49 public Bean(String ip, String time, String day , int traffic, String type, String id) { 50 super(); 51 this.ip = ip; 52 this.time = time; 53 this.day = day; 54 this.traffic = traffic; 55 this.type = type; 56 this.id = id; 57 } 58 public Bean() { 59 super(); 60 // TODO 自动生成的构造函数存根 61 } 62 /*格式转换*/ 63 public void format(){ 64 this.ip = IPUtil.getCityInfo("106.39.41.166").split("\\|")[3].replace("市",""); 65 this.time = TimeUtil.deal(this.time); 66 } 67 public void display(){ 68 System.out.println(ip+","+time+","+day+","+traffic+","+type+","+id); 69 } 70 }
B、日期格式转化类
1 package com.hive.format; 2 3 import java.text.ParseException; 4 import java.text.SimpleDateFormat; 5 import java.util.Date; 6 import java.util.Locale; 7 8 public class TimeUtil { 9 public static String deal(String time){ 10 11 SimpleDateFormat sdf = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss Z", Locale.ENGLISH); 12 Date dd = null; 13 try { 14 dd = sdf.parse(time); 15 } catch (ParseException e) { 16 // TODO 自动生成的 catch 块 17 e.printStackTrace(); 18 } //将字符串改为date的格式 19 20 String resDate = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(dd); 21 22 return resDate; 23 } 24 public static void main(String[] args) throws ParseException { 25 26 String dateString = "10/Nov/2016:00:01:02 +0800"; 27 SimpleDateFormat sdf = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss Z", Locale.ENGLISH); 28 Date dd = sdf.parse(dateString); //将字符串改为date的格式 29 String resDate= new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(dd); 30 System.out.println(resDate); 31 } 32 }