第十二章 结合flume+mapreduce+hive+sqoop+mysql的综合实战练习


        我们要做的小项目是关于黑马训练营的日志分析项目,用到的日志文件大家可以到:http://download.csdn.net/detail/u012453843/9680664这个地址下载。日志文件中的内容如下(仅拿出来两行内容),可以看到一共有5列,每列代表的意思是:第一列是IP,第二列是时间,第三列是请求资源路径,第四列是访问状态(200代表访问成功),第五列是本次访问产生的流量。     - - [30/May/2013:17:38:20 +0800]   "GET /static/image/common/faq.gif HTTP/1.1"                     200   1127 - - [30/May/2013:17:38:20 +0800]   "GET /data/cache/style_1_widthauto.css?y7a HTTP/1.1"   200   1292




         定义:页面浏览量即为PV(Page View),是指所有用户浏览页面的总和,一个独立用户每打开一个页面就被记录一次。



       定义:访客数(UV)即唯一访客数,一天之内网站的独立访客数(以Cookie 为依据),一天内同一访客多次访问网站只计算1 个访客。

        定义:一天之内,访问网站的不同独立IP个数加和。其中同一IP无论访问了几个页面,独立IP 数均为1。





















xiaoye@ubuntu:~$ ./hive/bin/hive
18/04/07 23:06:14 WARN conf.HiveConf: DEPRECATED: hive.metastore.ds.retry.* no longer has any effect.  Use hive.hmshandler.retry.* instead

Logging initialized using configuration in jar:file:/home/xiaoye/hive/lib/hive-common-0.13.1-cdh5.2.0.jar!/hive-log4j.properties
hive> show tables;
Time taken: 1.42 seconds, Fetched: 2 row(s)
hive> drop table people;
Time taken: 3.937 seconds
hive> drop student;

hive> show tables;

Time taken: 0.051 seconds


hive> create external table fmhsm(ip string,logtime string,url string)    
    > partitioned by (logdate string)
    > row format delimited fields terminated by '\t' location '/cleaned';
Time taken: 1.352 seconds
hive> show tables;

Time taken: 0.07 seconds, Fetched: 1 row(s)







xiaoye@ubuntu:~$ vim daily.sh
命令:chmod +x daily.sh


package com.mapreduce.clean;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Locale;

import org.apache.commons.lang.ObjectUtils.Null;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.v2.LogParams;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.lf5.util.LogFileParser;

import com.mapreduce.clean.Cleaner.MyMapper.MyReducer;

 *  * Cleaner类为何要继承Configured类呢?因为我们要使用Tool接口类,而Tool接口类继承了Configurable类,
 * Configurable类有void setConf(Configuration conf);和Configuration
 * getConf();两个接口需要实现
 * 这就是说,如果我们直接继承Tool类,那么我们将不得不实现三个接口。但是我们的Configured类却已经帮我们实现了Configurable
 * 类的两个接口,因此我们为了方便,继承Configurable类,仅实现Tool类的run方法就可以了。
 * @author liuxin
 * @date   2018年4月8日
public class Cleaner extends Configured implements Tool{

* 该方法是Tool接口类的一个接口,类似于一个线程,我们需要实现它的run方法。
public int run(String[] arg0) throws Exception {
// 因为这是单独起的一个线程,因此变量必须是final类型的,第一个参数是输入路径,也就是我们的原始数据的路径
final String inputPath=arg0[0];
final String outputPath=arg0[1];
final Configuration conf=new Configuration();
final Job job=Job.getInstance(conf);
FileInputFormat.setInputPaths(job, inputPath);
   FileOutputFormat.setOutputPath(job, new Path(outputPath));
return 0;
public static void main(String[] args) throws Exception {
ToolRunner.run(new Cleaner(), args);


     * 自定义一个Mapper类,Mapper类的输入是<k1,v1>,输出是<k2,v2>

static class MyMapper extends Mapper<LongWritable,Text,LongWritable,Text>{
LogParser parser=new LogParser();
Text v2=new Text();
protected void map(LongWritable key,Text value,Mapper<LongWritable,Text,LongWritable,Text>.Context context) {
final String line=value.toString();
final String[] parsed=parser.parse(line);
final String ip=parsed[0];
final String logtime=parsed[1];
String url=parsed[2];
//我们要过滤掉以"GET /static"或"GET /uc_server"开头的数据(我们姑且认为这两个开头的数据是坏数据)
if(url.startsWith("GET /static") || url.startsWith("GET /uc_server")){
//如果是GET请求,我们截取"GET"和" HTTP/1.1"之间的数据,比如"GET /static/image/common/faq.gif HTTP/1.1"
url=url.substring("GET ".length()+1, url.length()-" HTTP/1.1".length());
//如果是POST请求,我们截取"POST"和" HTTP/1.1"之间的数据,比如"POST /api/manyou/my.php HTTP/1.0"
//我们要得到的是"/api/manyou/my.php HTTP/1.0"
url=url.substring("POST ".length()+1,url.length()-" HTTP/1.1".length());
try {
context.write(key, v2);
} catch (IOException e) {
// TODO Auto-generated catch block
} catch (InterruptedException e) {
// TODO Auto-generated catch block
    * 自定义一个Reducer类,输入是<k2,v2>,输出是<k3,v3>
    * @author wanghaijie
static class MyReducer extends Reducer<LongWritable,Text,Text,NullWritable>{
protected void reduce(LongWritable k2,Iterable<Text> v2s,Reducer<LongWritable,Text, Text, NullWritable>.Context context) {
for(Text v2:v2s){
try {
context.write(v2, NullWritable.get());
} catch (IOException e) {
// TODO Auto-generated catch block
} catch (InterruptedException e) {
// TODO Auto-generated catch block



package com.mapreduce.clean;

import java.text.Format;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Locale;

import junit.awtui.Logo;

 * @author liuxin
 * @date   2018年4月8日
public class LogParser {
//第一个FORMAT用来匹配日志文件中的英文时间,英文时间是[30/May/2013:17:38:20 +0800]这种格式的
public static final SimpleDateFormat format=new SimpleDateFormat("d/MMM/yyyy:HH:mm:ss",Locale.ENGLISH);
public static final SimpleDateFormat DATEFORMAT=new SimpleDateFormat("yyyyMMddHHmmss");
public static void main(String[] args){
final String s1="[30/May/2013:17:38:20+0800]\"GET/static/image/common/faq.gif HTTP/1.1\"200 1127";
LogParser parser=new LogParser();
final String[] array=parser.parse(s1);
for(String a : array){
     * 解析日志的行记录
     * @param line
     * @return 数组含有5个元素,分别是ip、时间、url、状态、流量
public String[] parse(String line){
String ip=parseIP(line);//提取ip
String time;
String url;
String status =parseStatus(line);//提取状态
String traffic=parseTraffic(line);//提取流量
return new String[]
{ip,time,url,status,traffic };
* 获取本次浏览所消耗的流量
* 字符串中关于流量的信息如:"GET /static/image/common/faq.gif HTTP/1.1" 200 1127
* 我们要得到的是1127,为了得到它,我们从最后一个"\"后的空格开始,截取到最后,然后去掉两端的空格,就剩"200 1127"
* 然后我们把"200 1127"以空格为分隔符,数组的第二个元素的值就是"1127"
* @param line
* @return
private String parseTraffic(String line) {
final String trim=line.substring(line.lastIndexOf("\"")+1).trim();
String traffic =trim.split(" ")[1];
return traffic;
* 截取访问结果Status
* 字符串中关于Status的信息如:"GET /static/image/common/faq.gif HTTP/1.1" 200 1127
* 我们要得到的是200,为了得到它,我们从最后一个"\"后的空格开始,截取到最后,然后去掉两端的空格,就剩"200 1127"
* 然后我们把"200 1127"以空格为分隔符,数组的第一个元素的值就是"200"
* @param line
* @return
private String parseStatus(String line) {
// TODO Auto-generated method stub
final String trim=line.substring(line.lastIndexOf("\"")+1).trim();
String status=trim.split(" ")[0];
return status;
* 截取字符串中的URL
* 字符串中关于URL的信息如:"GET /static/image/common/faq.gif HTTP/1.1"
* 我们截取的话当然应该从"\"的下一个字母开始,到下一个"\"结束(字符串截取包括前面,不包括后面)
* @param line
* @return
private String parseURL(String line) {
// TODO Auto-generated method stub
final int first=line.indexOf("\"");
final int last=line.lastIndexOf("\"");
String url=line.substring(first+1,last);
return url;
* 将英文时间转变为如:20130530135026这样形式的时间
* 字符串中关于时间的信息如: [30/May/2013:17:38:20 +0800] ,我们截取其中的时间,截取的开始位置是"["后面的"3",
* 结束的位置是"+0800",然后去掉前后的空格就是我们想要的英文时间"30/May/2013:17:38:20"
* 有了英文时间,我们便使用FORMAT.parse方法将time转换为时间,然后使用DATEFORMAT.format方法将时间转换为我们想要的"20130530173820"
* @param line
* @return
private String parseTime(String line) {
// TODO Auto-generated method stub
final int first=line.indexOf("[");
final int last=line.indexOf("+0800]");
String time=line.substring(first+1, last).trim();
try {
String timeFormat=DATEFORMAT.format(format.parse(time));
return timeFormat;
} catch (ParseException e) {
// TODO Auto-generated catch block

return null;

* 截取字符串中的IP 字符串如: - - [30/May/2013:17:38:20 +0800] 
* 我们以"- -"为分割符,数组的第一个值便是IP的值
* @param line
* @return
private String parseIP(String line) {
// TODO Auto-generated method stub
String ip=line.split("--")[0].trim();
return ip;



先启动hadoop集群,再开启flume ,可参照小编的上一篇博文

xiaoye@ubuntu:~$ ./flume/bin/flume-ng agent -n a4 -c conf -f conf/a3/conf -Dflume.root.logger=INFO,console




xiaoye@ubuntu:~/flume/conf$ vim a4.conf






xiaoye@ubuntu:~$ rz
rz waiting to receive.
Starting zmodem transfer.  Press Ctrl+C to cancel.
Transferring Cleaner.jar...

  100%      15 KB      15 KB/sec    00:00:01       0 Errors  


命令:xiaoye@ubuntu:~$ ./hadoop/bin/hadoop jar Cleaner.jar  /flume/20180409/events-.1523259787334  /flume/out040904



下面我们来用脚本调用Cleaner.jar,我们需要定义一个时间变量,定义变量的好处是以后就不用手动改变它了,每隔一段时间它会自动执行,我们定义的时间变量名是CURRENT,如下所示,其中echo $CURRENT只是用来测试该时间变量是否可用。%y显示的是2016的后两位16,%Y则显示的是全部。

xiaoye@ubuntu:~$ vim daily.sh 

CURRENT=`date +%Y%m%d`



xiaoye@ubuntu:~$ ./daily.sh   



CURRENT=`date -d "1 day ago" +%Y%m%d`


xiaoye@ubuntu:~$ ./daily.sh     




xiaoye@ubuntu:~$ vim daily.sh 

#CURRENT=`date -d "1 day ago" +%Y%m%d`
CURRENT=`date +%Y%m%d`
#echo $CURRENT

/home/xiaoye/hadoop/bin/hadoop jar /home/xiaoye/Cleaner.jar /flume/$CURRENT /cleaned/$CURRENT



xiaoye@ubuntu:~$ ./daily.sh   






xiaoye@ubuntu:~$ hive -e "show tables"
18/04/09 02:05:31 WARN conf.HiveConf: DEPRECATED: hive.metastore.ds.retry.* no longer has any effect.  Use hive.hmshandler.retry.* instead

Logging initialized using configuration in jar:file:/home/xiaoye/hive/lib/hive-common-0.13.1-cdh5.2.0.jar!/hive-log4j.properties

Time taken: 1.849 seconds, Fetched: 1 row(s)




xiaoye@ubuntu:~$ cat daily.sh 
#CURRENT=`date -d "1 day ago" +%Y%m%d`
CURRENT=`date +%Y%m%d`
#echo $CURRENT
#/home/xiaoye/hadoop/bin/hadoop jar /home/xiaoye/Cleaner.jar /flume/$CURRENT /cleaned/$CURRENT
/home/xiaoye/hive/bin/hive -e "alter table fmhsm add  partition (logdate='$CURRENT') location '/cleaned/$CURRENT'"


xiaoye@ubuntu:~$ ./daily.sh 

18/04/09 02:43:59 WARN conf.HiveConf: DEPRECATED: hive.metastore.ds.retry.* no longer has any effect.  Use hive.hmshandler.retry.* instead

Logging initialized using configuration in jar:file:/home/xiaoye/hive/lib/hive-common-0.13.1-cdh5.2.0.jar!/hive-log4j.properties
FAILED: SemanticException Partition spec {logdata=20180409} contains non-partition columns


xiaoye@ubuntu:~$ hive -e "select * from fmhsm limit 10"; 
18/04/09 03:16:19 WARN conf.HiveConf: DEPRECATED: hive.metastore.ds.retry.* no longer has any effect.  Use hive.hmshandler.retry.* instead

Logging initialized using configuration in jar:file:/home/xiaoye/hive/lib/hive-common-0.13.1-cdh5.2.0.jar!/hive-log4j.properties
OK - - [30/May/2013:17:38:20 +0800] "GET /data/cache/style_1_widthauto.css?y7a HTTP/1.1" 200 1292   20130530173820  data/cache/style_1_widthauto.css?y720180409 - - [30/May/2013:17:38:20 +0800] "GET /source/plugin/wsh_wx/img/wsh_zk.css HTTP/1.1" 200 1482    20130530173820  source/plugin/wsh_wx/img/wsh_zk.css20180409



#CURRENT=`date -d "1 day ago" +%Y%m%d`
CURRENT=`date +%Y%m%d`
#echo $CURRENT
#/home/xiaoye/hadoop/bin/hadoop jar /home/xiaoye/Cleaner.jar /flume/$CURRENT /cleaned/$CURRENT
#/home/xiaoye/hive/bin/hive -e "alter table fmhsm add  partition (logdate='$CURRENT') location '/cleaned/$CURRENT'"

/home/xiaoye/hive/bin/hive -e "select count(*) from fmhsm where logdate=$CURRENT"



Total MapReduce CPU Time Spent: 32 seconds 140 msec




#CURRENT=`date -d "1 day ago" +%Y%m%d`
CURRENT=`date +%Y%m%d`
#echo $CURRENT
#/home/xiaoye/hadoop/bin/hadoop jar /home/xiaoye/Cleaner.jar /flume/$CURRENT /cleaned/$CURRENT
#/home/xiaoye/hive/bin/hive -e "alter table fmhsm add  partition (logdate='$CURRENT') location '/cleaned/$CURRENT'"

#/home/xiaoye/hive/bin/hive -e "select count(*) from fmhsm where logdate=$CURRENT"

/home/xiaoye/hive/bin/hive -e "create table pv_$CURRENT row format delimited fields terminated by '\t' as select count(*) from fmhsm where logdate=$CURRENT"

xiaoye@ubuntu:~$ ./daily.sh

[root@itcast03 ~]# hive -e "show tables;";
16/11/13 00:26:30 INFO Configuration.deprecation: mapred.reduce.tasks is deprecated. Instead, use mapreduce.job.reduces
16/11/13 00:26:30 INFO Configuration.deprecation: mapred.min.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize
16/11/13 00:26:30 INFO Configuration.deprecation: mapred.reduce.tasks.speculative.execution is deprecated. Instead, use mapreduce.reduce.speculative
16/11/13 00:26:30 INFO Configuration.deprecation: mapred.min.split.size.per.node is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize.per.node
16/11/13 00:26:30 INFO Configuration.deprecation: mapred.input.dir.recursive is deprecated. Instead, use mapreduce.input.fileinputformat.input.dir.recursive
16/11/13 00:26:30 INFO Configuration.deprecation: mapred.min.split.size.per.rack is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize.per.rack
16/11/13 00:26:30 INFO Configuration.deprecation: mapred.max.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.maxsize
16/11/13 00:26:30 INFO Configuration.deprecation: mapred.committer.job.setup.cleanup.needed is deprecated. Instead, use mapreduce.job.committer.setup.cleanup.needed

Logging initialized using configuration in jar:file:/itcast/apache-hive-0.13.0-bin/lib/hive-common-0.13.0.jar!/hive-log4j.properties
Time taken: 0.661 seconds, Fetched: 3 row(s)

[root@itcast03 ~]# hive -e "select * from pv_20161109";
16/11/13 00:27:11 INFO Configuration.deprecation: mapred.reduce.tasks is deprecated. Instead, use mapreduce.job.reduces
16/11/13 00:27:11 INFO Configuration.deprecation: mapred.min.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize
16/11/13 00:27:11 INFO Configuration.deprecation: mapred.reduce.tasks.speculative.execution is deprecated. Instead, use mapreduce.reduce.speculative
16/11/13 00:27:11 INFO Configuration.deprecation: mapred.min.split.size.per.node is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize.per.node
16/11/13 00:27:11 INFO Configuration.deprecation: mapred.input.dir.recursive is deprecated. Instead, use mapreduce.input.fileinputformat.input.dir.recursive
16/11/13 00:27:11 INFO Configuration.deprecation: mapred.min.split.size.per.rack is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize.per.rack
16/11/13 00:27:11 INFO Configuration.deprecation: mapred.max.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.maxsize
16/11/13 00:27:11 INFO Configuration.deprecation: mapred.committer.job.setup.cleanup.needed is deprecated. Instead, use mapreduce.job.committer.setup.cleanup.needed

Logging initialized using configuration in jar:file:/itcast/apache-hive-0.13.0-bin/lib/hive-common-0.13.0.jar!/hive-log4j.properties
Time taken: 0.999 seconds, Fetched: 1 row(s)
[root@itcast03 ~]#

       下面我们来查询浏览次数最多的前20名客户(VIP客户),我们还在我们的daily.sh脚本中写shell命令,如下,需要说明的是,在sql语句中一般情况下如果用group函数的话,查询的内容最多是group分组的字段以及count函数,但是常量除外,也就是说,我们可以在select 语句之后加任意的常量值,我们这里便把变量$CURRENT加到了select语句当中。

[root@itcast03 ~]# vim daily.sh 
CURRENT=`date -d "4 day ago" +%Y%m%d`

#/itcast/hadoop-2.2.0/bin/hadoop jar /root/Cleaner.jar /flume/$CURRENT /cleaned/$CURRENT

#/itcast/apache-hive-0.13.0-bin/bin/hive -e "alter table hmbbs add partition (logdate=$CURRENT) location '/cleaned/$CURRENT'"

#/itcast/apache-hive-0.13.0-bin/bin/hive -e "create table pv_$CURRENT row format delimited fields terminated by '\t' as select count(*) from hmbbs where logdate=$CURRENT"

/itcast/apache-hive-0.13.0-bin/bin/hive -e "create table vip_$CURRENT row format delimited fields terminated by '\t' as select $CURRENT,ip,count(*) as hits from hmbbs where logdate=$CURRENT group by ip having hits>20 order by hits desc limit 20"


[root@itcast03 ~]# ./daily.sh 
16/11/13 00:53:22 INFO Configuration.deprecation: mapred.reduce.tasks is deprecated. Instead, use mapreduce.job.reduces
16/11/13 00:53:22 INFO Configuration.deprecation: mapred.min.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize
16/11/13 00:53:22 INFO Configuration.deprecation: mapred.reduce.tasks.speculative.execution is deprecated. Instead, use mapreduce.reduce.speculative
16/11/13 00:53:22 INFO Configuration.deprecation: mapred.min.split.size.per.node is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize.per.node
16/11/13 00:53:22 INFO Configuration.deprecation: mapred.input.dir.recursive is deprecated. Instead, use mapreduce.input.fileinputformat.input.dir.recursive
16/11/13 00:53:22 INFO Configuration.deprecation: mapred.min.split.size.per.rack is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize.per.rack
16/11/13 00:53:22 INFO Configuration.deprecation: mapred.max.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.maxsize
16/11/13 00:53:22 INFO Configuration.deprecation: mapred.committer.job.setup.cleanup.needed is deprecated. Instead, use mapreduce.job.committer.setup.cleanup.needed

Logging initialized using configuration in jar:file:/itcast/apache-hive-0.13.0-bin/lib/hive-common-0.13.0.jar!/hive-log4j.properties
Total jobs = 2
Launching Job 1 out of 2
Number of reduce tasks not specified. Estimated from input data size: 1
In order to change the average load for a reducer (in bytes):
  set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
  set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
  set mapreduce.job.reduces=<number>
Starting Job = job_1478920720232_0014, Tracking URL = http://itcast03:8088/proxy/application_1478920720232_0014/
Kill Command = /itcast/hadoop-2.2.0/bin/hadoop job  -kill job_1478920720232_0014
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1
2016-11-13 00:53:37,402 Stage-1 map = 0%,  reduce = 0%
2016-11-13 00:53:43,755 Stage-1 map = 100%,  reduce = 0%, Cumulative CPU 2.32 sec
2016-11-13 00:53:51,120 Stage-1 map = 100%,  reduce = 100%, Cumulative CPU 4.97 sec
MapReduce Total cumulative CPU time: 4 seconds 970 msec
Ended Job = job_1478920720232_0014
Launching Job 2 out of 2
Number of reduce tasks determined at compile time: 1
In order to change the average load for a reducer (in bytes):
  set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
  set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
  set mapreduce.job.reduces=<number>
Starting Job = job_1478920720232_0015, Tracking URL = http://itcast03:8088/proxy/application_1478920720232_0015/
Kill Command = /itcast/hadoop-2.2.0/bin/hadoop job  -kill job_1478920720232_0015
Hadoop job information for Stage-2: number of mappers: 1; number of reducers: 1
2016-11-13 00:53:59,723 Stage-2 map = 0%,  reduce = 0%
2016-11-13 00:54:06,019 Stage-2 map = 100%,  reduce = 0%, Cumulative CPU 1.34 sec
2016-11-13 00:54:12,282 Stage-2 map = 100%,  reduce = 100%, Cumulative CPU 2.5 sec
MapReduce Total cumulative CPU time: 2 seconds 500 msec
Ended Job = job_1478920720232_0015
Moving data to: hdfs://ns1/user/hive/warehouse/vip_20161109
Table default.vip_20161109 stats: [numFiles=1, numRows=20, totalSize=553, rawDataSize=533]
MapReduce Jobs Launched: 
Job 0: Map: 1  Reduce: 1   Cumulative CPU: 4.97 sec   HDFS Read: 12756871 HDFS Write: 60477 SUCCESS
Job 1: Map: 1  Reduce: 1   Cumulative CPU: 2.5 sec   HDFS Read: 60829 HDFS Write: 630 SUCCESS
Total MapReduce CPU Time Spent: 7 seconds 470 msec
Time taken: 47.862 seconds

[root@itcast03 ~]# hive -e "show tables;";
16/11/13 00:54:31 INFO Configuration.deprecation: mapred.reduce.tasks is deprecated. Instead, use mapreduce.job.reduces
16/11/13 00:54:31 INFO Configuration.deprecation: mapred.min.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize
16/11/13 00:54:31 INFO Configuration.deprecation: mapred.reduce.tasks.speculative.execution is deprecated. Instead, use mapreduce.reduce.speculative
16/11/13 00:54:31 INFO Configuration.deprecation: mapred.min.split.size.per.node is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize.per.node
16/11/13 00:54:31 INFO Configuration.deprecation: mapred.input.dir.recursive is deprecated. Instead, use mapreduce.input.fileinputformat.input.dir.recursive
16/11/13 00:54:31 INFO Configuration.deprecation: mapred.min.split.size.per.rack is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize.per.rack
16/11/13 00:54:31 INFO Configuration.deprecation: mapred.max.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.maxsize
16/11/13 00:54:31 INFO Configuration.deprecation: mapred.committer.job.setup.cleanup.needed is deprecated. Instead, use mapreduce.job.committer.setup.cleanup.needed

Logging initialized using configuration in jar:file:/itcast/apache-hive-0.13.0-bin/lib/hive-common-0.13.0.jar!/hive-log4j.properties
Time taken: 0.582 seconds, Fetched: 4 row(s)

[root@itcast03 ~]# hive -e "select * from vip_20161109";
16/11/13 00:55:09 INFO Configuration.deprecation: mapred.reduce.tasks is deprecated. Instead, use mapreduce.job.reduces
16/11/13 00:55:09 INFO Configuration.deprecation: mapred.min.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize
16/11/13 00:55:09 INFO Configuration.deprecation: mapred.reduce.tasks.speculative.execution is deprecated. Instead, use mapreduce.reduce.speculative
16/11/13 00:55:09 INFO Configuration.deprecation: mapred.min.split.size.per.node is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize.per.node
16/11/13 00:55:09 INFO Configuration.deprecation: mapred.input.dir.recursive is deprecated. Instead, use mapreduce.input.fileinputformat.input.dir.recursive
16/11/13 00:55:09 INFO Configuration.deprecation: mapred.min.split.size.per.rack is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize.per.rack
16/11/13 00:55:09 INFO Configuration.deprecation: mapred.max.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.maxsize
16/11/13 00:55:09 INFO Configuration.deprecation: mapred.committer.job.setup.cleanup.needed is deprecated. Instead, use mapreduce.job.committer.setup.cleanup.needed

Logging initialized using configuration in jar:file:/itcast/apache-hive-0.13.0-bin/lib/hive-common-0.13.0.jar!/hive-log4j.properties
20161109     4855
20161109 3942
20161109      1889
20161109  1877
20161109  1571
20161109  1378
20161109     1160
20161109 969
20161109  805
20161109   735
20161109 672
20161109   632
20161109   575
20161109 575
20161109    561
20161109   533
20161109    520
20161109  512
20161109  508
20161109  503

Time taken: 1.056 seconds, Fetched: 20 row(s)
[root@itcast03 ~]# 


[root@itcast03 ~]# vim daily.sh 
CURRENT=`date -d "4 day ago" +%Y%m%d`

#/itcast/hadoop-2.2.0/bin/hadoop jar /root/Cleaner.jar /flume/$CURRENT /cleaned/$CURRENT

#/itcast/apache-hive-0.13.0-bin/bin/hive -e "alter table hmbbs add partition (logdate=$CURRENT) location '/cleaned/$CURRENT'"

#/itcast/apache-hive-0.13.0-bin/bin/hive -e "create table pv_$CURRENT row format delimited fields terminated by '\t' as select count(*) from hmbbs where logdate=$CURRENT"

#/itcast/apache-hive-0.13.0-bin/bin/hive -e "create table vip_$CURRENT row format delimited fields terminated by '\t' as select $CURRENT,ip,count(*) as hits from hmbbs where logdate=$CURRENT group by ip having hits>20 order by hits desc limit 20"

/itcast/apache-hive-0.13.0-bin/bin/hive -e "create table uv_$CURRENT row format delimited fields terminated by '\t' as select count(distinct ip) from hmbbs where logdate=$CURRENT"


[root@itcast03 ~]# ./daily.sh 
16/11/13 01:08:59 INFO Configuration.deprecation: mapred.reduce.tasks is deprecated. Instead, use mapreduce.job.reduces
16/11/13 01:08:59 INFO Configuration.deprecation: mapred.min.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize
16/11/13 01:08:59 INFO Configuration.deprecation: mapred.reduce.tasks.speculative.execution is deprecated. Instead, use mapreduce.reduce.speculative
16/11/13 01:08:59 INFO Configuration.deprecation: mapred.min.split.size.per.node is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize.per.node
16/11/13 01:08:59 INFO Configuration.deprecation: mapred.input.dir.recursive is deprecated. Instead, use mapreduce.input.fileinputformat.input.dir.recursive
16/11/13 01:08:59 INFO Configuration.deprecation: mapred.min.split.size.per.rack is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize.per.rack
16/11/13 01:08:59 INFO Configuration.deprecation: mapred.max.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.maxsize
16/11/13 01:08:59 INFO Configuration.deprecation: mapred.committer.job.setup.cleanup.needed is deprecated. Instead, use mapreduce.job.committer.setup.cleanup.needed

Logging initialized using configuration in jar:file:/itcast/apache-hive-0.13.0-bin/lib/hive-common-0.13.0.jar!/hive-log4j.properties
Total jobs = 1
Launching Job 1 out of 1
Number of reduce tasks determined at compile time: 1
In order to change the average load for a reducer (in bytes):
  set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
  set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
  set mapreduce.job.reduces=<number>
Starting Job = job_1478920720232_0016, Tracking URL = http://itcast03:8088/proxy/application_1478920720232_0016/
Kill Command = /itcast/hadoop-2.2.0/bin/hadoop job  -kill job_1478920720232_0016
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1
2016-11-13 01:09:12,694 Stage-1 map = 0%,  reduce = 0%
2016-11-13 01:09:19,008 Stage-1 map = 100%,  reduce = 0%, Cumulative CPU 2.41 sec
2016-11-13 01:09:25,294 Stage-1 map = 100%,  reduce = 100%, Cumulative CPU 4.9 sec
MapReduce Total cumulative CPU time: 4 seconds 900 msec
Ended Job = job_1478920720232_0016
Moving data to: hdfs://ns1/user/hive/warehouse/uv_20161109
Table default.uv_20161109 stats: [numFiles=1, numRows=1, totalSize=6, rawDataSize=5]
MapReduce Jobs Launched: 
Job 0: Map: 1  Reduce: 1   Cumulative CPU: 4.9 sec   HDFS Read: 12756871 HDFS Write: 81 SUCCESS
Total MapReduce CPU Time Spent: 4 seconds 900 msec
Time taken: 24.247 seconds

[root@itcast03 ~]# hive -e "show tables";
16/11/13 01:09:44 INFO Configuration.deprecation: mapred.reduce.tasks is deprecated. Instead, use mapreduce.job.reduces
16/11/13 01:09:44 INFO Configuration.deprecation: mapred.min.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize
16/11/13 01:09:44 INFO Configuration.deprecation: mapred.reduce.tasks.speculative.execution is deprecated. Instead, use mapreduce.reduce.speculative
16/11/13 01:09:44 INFO Configuration.deprecation: mapred.min.split.size.per.node is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize.per.node
16/11/13 01:09:44 INFO Configuration.deprecation: mapred.input.dir.recursive is deprecated. Instead, use mapreduce.input.fileinputformat.input.dir.recursive
16/11/13 01:09:44 INFO Configuration.deprecation: mapred.min.split.size.per.rack is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize.per.rack
16/11/13 01:09:44 INFO Configuration.deprecation: mapred.max.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.maxsize
16/11/13 01:09:44 INFO Configuration.deprecation: mapred.committer.job.setup.cleanup.needed is deprecated. Instead, use mapreduce.job.committer.setup.cleanup.needed

Logging initialized using configuration in jar:file:/itcast/apache-hive-0.13.0-bin/lib/hive-common-0.13.0.jar!/hive-log4j.properties
Time taken: 0.588 seconds, Fetched: 5 row(s)

[root@itcast03 ~]# hive -e "select * from uv_20161109";
16/11/13 01:10:10 INFO Configuration.deprecation: mapred.reduce.tasks is deprecated. Instead, use mapreduce.job.reduces
16/11/13 01:10:10 INFO Configuration.deprecation: mapred.min.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize
16/11/13 01:10:10 INFO Configuration.deprecation: mapred.reduce.tasks.speculative.execution is deprecated. Instead, use mapreduce.reduce.speculative
16/11/13 01:10:10 INFO Configuration.deprecation: mapred.min.split.size.per.node is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize.per.node
16/11/13 01:10:10 INFO Configuration.deprecation: mapred.input.dir.recursive is deprecated. Instead, use mapreduce.input.fileinputformat.input.dir.recursive
16/11/13 01:10:10 INFO Configuration.deprecation: mapred.min.split.size.per.rack is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize.per.rack
16/11/13 01:10:10 INFO Configuration.deprecation: mapred.max.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.maxsize
16/11/13 01:10:10 INFO Configuration.deprecation: mapred.committer.job.setup.cleanup.needed is deprecated. Instead, use mapreduce.job.committer.setup.cleanup.needed

Logging initialized using configuration in jar:file:/itcast/apache-hive-0.13.0-bin/lib/hive-common-0.13.0.jar!/hive-log4j.properties
Time taken: 0.956 seconds, Fetched: 1 row(s)


[root@itcast03 ~]# vim daily.sh

CURRENT=`date -d "4 day ago" +%Y%m%d`

#/itcast/hadoop-2.2.0/bin/hadoop jar /root/Cleaner.jar /flume/$CURRENT /cleaned/$CURRENT

#/itcast/apache-hive-0.13.0-bin/bin/hive -e "alter table hmbbs add partition (logdate=$CURRENT) location '/cleaned/$CURRENT'"

#/itcast/apache-hive-0.13.0-bin/bin/hive -e "create table pv_$CURRENT row format delimited fields terminated by '\t' as select count(*) from hmbbs where logdate=$CURRENT"

#/itcast/apache-hive-0.13.0-bin/bin/hive -e "create table vip_$CURRENT row format delimited fields terminated by '\t' as select $CURRENT,ip,count(*) as hits from hmbbs where logdate=$CURRENT group by ip having hits>20 order by hits desc limit 20"

#/itcast/apache-hive-0.13.0-bin/bin/hive -e "create table uv_$CURRENT row format delimited fields terminated by '\t' as select count(distinct ip) from hmbbs where logdate=$CURRENT"

/itcast/apache-hive-0.13.0-bin/bin/hive -e "select count(*) from hmbbs where logdate=$CURRENT and instr(url,'member.php?mod=register')>0"


[root@itcast03 ~]# ./daily.sh 
16/11/13 01:22:28 INFO Configuration.deprecation: mapred.reduce.tasks is deprecated. Instead, use mapreduce.job.reduces
16/11/13 01:22:28 INFO Configuration.deprecation: mapred.min.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize
16/11/13 01:22:28 INFO Configuration.deprecation: mapred.reduce.tasks.speculative.execution is deprecated. Instead, use mapreduce.reduce.speculative
16/11/13 01:22:28 INFO Configuration.deprecation: mapred.min.split.size.per.node is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize.per.node
16/11/13 01:22:28 INFO Configuration.deprecation: mapred.input.dir.recursive is deprecated. Instead, use mapreduce.input.fileinputformat.input.dir.recursive
16/11/13 01:22:28 INFO Configuration.deprecation: mapred.min.split.size.per.rack is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize.per.rack
16/11/13 01:22:28 INFO Configuration.deprecation: mapred.max.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.maxsize
16/11/13 01:22:28 INFO Configuration.deprecation: mapred.committer.job.setup.cleanup.needed is deprecated. Instead, use mapreduce.job.committer.setup.cleanup.needed

Logging initialized using configuration in jar:file:/itcast/apache-hive-0.13.0-bin/lib/hive-common-0.13.0.jar!/hive-log4j.properties
Total jobs = 1
Launching Job 1 out of 1
Number of reduce tasks determined at compile time: 1
In order to change the average load for a reducer (in bytes):
  set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
  set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
  set mapreduce.job.reduces=<number>
Starting Job = job_1478920720232_0017, Tracking URL = http://itcast03:8088/proxy/application_1478920720232_0017/
Kill Command = /itcast/hadoop-2.2.0/bin/hadoop job  -kill job_1478920720232_0017
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1
2016-11-13 01:22:41,748 Stage-1 map = 0%,  reduce = 0%
2016-11-13 01:22:48,155 Stage-1 map = 100%,  reduce = 0%, Cumulative CPU 2.23 sec
2016-11-13 01:22:54,417 Stage-1 map = 100%,  reduce = 100%, Cumulative CPU 3.41 sec
MapReduce Total cumulative CPU time: 3 seconds 410 msec
Ended Job = job_1478920720232_0017
MapReduce Jobs Launched: 
Job 0: Map: 1  Reduce: 1   Cumulative CPU: 3.41 sec   HDFS Read: 12756871 HDFS Write: 3 SUCCESS
Total MapReduce CPU Time Spent: 3 seconds 410 msec
Time taken: 22.633 seconds, Fetched: 1 row(s)
[root@itcast03 ~]# 




[root@itcast03 ~]# vim daily.sh 
CURRENT=`date -d "4 day ago" +%Y%m%d`

#/itcast/hadoop-2.2.0/bin/hadoop jar /root/Cleaner.jar /flume/$CURRENT /cleaned/$CURRENT

#/itcast/apache-hive-0.13.0-bin/bin/hive -e "alter table hmbbs add partition (logdate=$CURRENT) location '/cleaned/$CURRENT'"

#/itcast/apache-hive-0.13.0-bin/bin/hive -e "create table pv_$CURRENT row format delimited fields terminated by '\t' as select count(*) from hmbbs where logdate=$CURRENT"

#/itcast/apache-hive-0.13.0-bin/bin/hive -e "create table vip_$CURRENT row format delimited fields terminated by '\t' as select $CURRENT,ip,count(*) as hits from hmbbs where logdate=$CURRENT group by ip having hits>20 order by hits desc limit 20"

#/itcast/apache-hive-0.13.0-bin/bin/hive -e "create table uv_$CURRENT row format delimited fields terminated by '\t' as select count(distinct ip) from hmbbs where logdate=$CURRENT"

#/itcast/apache-hive-0.13.0-bin/bin/hive -e "select count(*) from hmbbs where logdate=$CURRENT and instr(url,'member.php?mod=register')>0"

/itcast/sqoop-1.4.6/bin/sqoop export --connect jdbc:mysql:// --username root --password root --export-dir "/user/hive/warehouse/vip_$CURRENT" --table vip --fields-terminated-by '\t'


[root@itcast03 ~]# ./daily.sh 
Warning: /itcast/sqoop-1.4.6/../hbase does not exist! HBase imports will fail.
Please set $HBASE_HOME to the root of your HBase installation.
Warning: /itcast/sqoop-1.4.6/../hcatalog does not exist! HCatalog jobs will fail.
Please set $HCAT_HOME to the root of your HCatalog installation.
Warning: /itcast/sqoop-1.4.6/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
Warning: /itcast/sqoop-1.4.6/../zookeeper does not exist! Accumulo imports will fail.
Please set $ZOOKEEPER_HOME to the root of your Zookeeper installation.
16/11/13 01:47:25 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6
16/11/13 01:47:25 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
16/11/13 01:47:25 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
16/11/13 01:47:25 INFO tool.CodeGenTool: Beginning code generation
Sun Nov 13 01:47:25 CST 2016 WARN: Establishing SSL connection without server's identity verification is not recommended. According to MySQL 5.5.45+, 5.6.26+ and 5.7.6+ requirements SSL connection must be established by default if explicit option isn't set. For compliance with existing applications not using SSL the verifyServerCertificate property is set to 'false'. You need either to explicitly disable SSL by setting useSSL=false, or set useSSL=true and provide truststore for server certificate verification.
16/11/13 01:47:25 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `vip` AS t LIMIT 1
16/11/13 01:47:26 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `vip` AS t LIMIT 1
16/11/13 01:47:26 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /itcast/hadoop-2.2.0
Note: /tmp/sqoop-root/compile/3307c34445798be97edc02e8d0d14b08/vip.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.
16/11/13 01:47:27 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-root/compile/3307c34445798be97edc02e8d0d14b08/vip.jar
16/11/13 01:47:27 INFO mapreduce.ExportJobBase: Beginning export of vip
16/11/13 01:47:27 INFO Configuration.deprecation: mapred.jar is deprecated. Instead, use mapreduce.job.jar
16/11/13 01:47:28 INFO Configuration.deprecation: mapred.reduce.tasks.speculative.execution is deprecated. Instead, use mapreduce.reduce.speculative
16/11/13 01:47:28 INFO Configuration.deprecation: mapred.map.tasks.speculative.execution is deprecated. Instead, use mapreduce.map.speculative
16/11/13 01:47:28 INFO Configuration.deprecation: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
16/11/13 01:47:28 INFO client.RMProxy: Connecting to ResourceManager at itcast03/
16/11/13 01:47:31 INFO input.FileInputFormat: Total input paths to process : 1
16/11/13 01:47:31 INFO input.FileInputFormat: Total input paths to process : 1
16/11/13 01:47:31 INFO mapreduce.JobSubmitter: number of splits:4
16/11/13 01:47:31 INFO Configuration.deprecation: mapred.job.classpath.files is deprecated. Instead, use mapreduce.job.classpath.files
16/11/13 01:47:31 INFO Configuration.deprecation: user.name is deprecated. Instead, use mapreduce.job.user.name
16/11/13 01:47:31 INFO Configuration.deprecation: mapred.cache.files.filesizes is deprecated. Instead, use mapreduce.job.cache.files.filesizes
16/11/13 01:47:31 INFO Configuration.deprecation: mapred.cache.files is deprecated. Instead, use mapreduce.job.cache.files
16/11/13 01:47:31 INFO Configuration.deprecation: mapred.map.tasks.speculative.execution is deprecated. Instead, use mapreduce.map.speculative
16/11/13 01:47:31 INFO Configuration.deprecation: mapred.reduce.tasks is deprecated. Instead, use mapreduce.job.reduces
16/11/13 01:47:31 INFO Configuration.deprecation: mapred.mapoutput.value.class is deprecated. Instead, use mapreduce.map.output.value.class
16/11/13 01:47:31 INFO Configuration.deprecation: mapreduce.map.class is deprecated. Instead, use mapreduce.job.map.class
16/11/13 01:47:31 INFO Configuration.deprecation: mapred.job.name is deprecated. Instead, use mapreduce.job.name
16/11/13 01:47:31 INFO Configuration.deprecation: mapreduce.inputformat.class is deprecated. Instead, use mapreduce.job.inputformat.class
16/11/13 01:47:31 INFO Configuration.deprecation: mapred.input.dir is deprecated. Instead, use mapreduce.input.fileinputformat.inputdir
16/11/13 01:47:31 INFO Configuration.deprecation: mapreduce.outputformat.class is deprecated. Instead, use mapreduce.job.outputformat.class
16/11/13 01:47:31 INFO Configuration.deprecation: mapred.cache.files.timestamps is deprecated. Instead, use mapreduce.job.cache.files.timestamps
16/11/13 01:47:31 INFO Configuration.deprecation: mapred.mapoutput.key.class is deprecated. Instead, use mapreduce.map.output.key.class
16/11/13 01:47:31 INFO Configuration.deprecation: mapred.working.dir is deprecated. Instead, use mapreduce.job.working.dir
16/11/13 01:47:31 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1478920720232_0019
16/11/13 01:47:32 INFO impl.YarnClientImpl: Submitted application application_1478920720232_0019 to ResourceManager at itcast03/
16/11/13 01:47:32 INFO mapreduce.Job: The url to track the job: http://itcast03:8088/proxy/application_1478920720232_0019/
16/11/13 01:47:32 INFO mapreduce.Job: Running job: job_1478920720232_0019
16/11/13 01:47:38 INFO mapreduce.Job: Job job_1478920720232_0019 running in uber mode : false
16/11/13 01:47:38 INFO mapreduce.Job:  map 0% reduce 0%
16/11/13 01:47:52 INFO mapreduce.Job:  map 100% reduce 0%
16/11/13 01:47:54 INFO mapreduce.Job: Job job_1478920720232_0019 completed successfully
16/11/13 01:47:55 INFO mapreduce.Job: Counters: 27
        File System Counters
                FILE: Number of bytes read=0
                FILE: Number of bytes written=413456
                FILE: Number of read operations=0
                FILE: Number of large read operations=0
                FILE: Number of write operations=0
                HDFS: Number of bytes read=2070
                HDFS: Number of bytes written=0
                HDFS: Number of read operations=19
                HDFS: Number of large read operations=0
                HDFS: Number of write operations=0
        Job Counters 
                Launched map tasks=4
                Rack-local map tasks=4
                Total time spent by all maps in occupied slots (ms)=48728
                Total time spent by all reduces in occupied slots (ms)=0
        Map-Reduce Framework
                Map input records=20
                Map output records=20
                Input split bytes=601
                Spilled Records=0
                Failed Shuffles=0
                Merged Map outputs=0
                GC time elapsed (ms)=234
                CPU time spent (ms)=3470
                Physical memory (bytes) snapshot=371515392
                Virtual memory (bytes) snapshot=3365302272
                Total committed heap usage (bytes)=62390272
        File Input Format Counters 
                Bytes Read=0
        File Output Format Counters 
                Bytes Written=0
16/11/13 01:47:55 INFO mapreduce.ExportJobBase: Transferred 2.0215 KB in 26.7272 seconds (77.4492 bytes/sec)
16/11/13 01:47:55 INFO mapreduce.ExportJobBase: Exported 20 records



CURRENT=`date -d "1 day ago" +%Y%m%d`
#CURRENT=`date +%Y%m%d`
#echo $CURRENT
#/home/xiaoye/hadoop/bin/hadoop jar /home/xiaoye/Cleaner.jar /flume/$CURRENT /cleaned/$CURRENT
#/home/xiaoye/hive/bin/hive -e "alter table fmhsm add  partition (logdate='$CURRENT') location '/cleaned/$CURRENT'"

#/home/xiaoye/hive/bin/hive -e "select count(*) from fmhsm where logdate=$CURRENT"

#/home/xiaoye/hive/bin/hive -e "create table if not exists pv_$CURRENT row format delimited fields terminated by '\t' as select count(*) from fmhsm where logdate=$CURRENT"

#/home/xiaoye/hive/bin/hive -e "create table if not exists vip_$CURRENT row format delimited fields terminated by '\t' as select $CURRENT,ip,count(*) as hits from fmhsm where logdate=$CURRENT group by ip having hits>20 order by hits desc limit 20"

#/home/xiaoye/hive/bin/hive -e "create table if not exists uv_$CURRENT row format delimited fields terminated by '\t' as select count(distinct ip) from fmhsm where logdate=$CURRENT"

/home/xiaoye/hive/bin/hive -e "select count(*) fmhsm where logdate=$CURRENT and instr(url,'member.php?mod=register')>0"

