这个项目所使用到的技术主要有hadoop的mapreduce,redis,ajax,json,struts2等等内容,前端框架使用的bootstrap。
首先是mapreduce的主程序:
package org.hq.mr;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class WordCount extends Configured implements Tool{
public static class Map extends Mapper<LongWritable,Text,Text,IntWritable>{
private final static IntWritable one=new IntWritable(1);
@Override
protected void map(LongWritable key,Text value,Context context){
String line=value.toString();
String a[]=line.split("\"");
if(a[1].indexOf("sug.jsp?query")>0){
String b[]=a[1].split("query=| ");
try {
String tmp=URLDecoder.decode(b[2], "UTF-8");
} catch (UnsupportedEncodingException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
Text word=new Text(b[2]);
try {
context.write(word, one);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
public static class Reduce extends Reducer<Text,IntWritable,Text,IntWritable>{
public void reduce(Text key,Iterator<IntWritable> values,Context context)throws Exception{
int sum=0;
while(values.hasNext()){
sum+=values.next().get();
}
context.write(key, new IntWritable(sum));
}
}
/**
* @param args
* @throws IOException
*/
public int run(String[] args) throws IOException,Exception {
// TODO Auto-generated method stub
Configuration conf=getConf();
Job job=new Job(conf,"Load Redis");
job.setJarByClass(WordCount.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(RedisOutputFormat.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
RedisOutputFormat.setOutputPath(job, new Path(args[1]));
return job.waitForCompletion(true)?0:1;
}
public static void main(String args[]) throws Exception{
int ret=ToolRunner.run(new WordCount(), args);
System.exit(ret);
}
}
然后是由redis来对用户请求的数据进行键值对的存储:
package org.hq.mr;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.TaskAttemptContext;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import redis.clients.jedis.Jedis;
@SuppressWarnings("deprecation")
public class RedisOutputFormat<K,V> extends FileOutputFormat<K,V>{
public static class RedisRecordWriter<K,V> extends org.apache.hadoop.mapreduce.RecordWriter<K, V>{
private Jedis jedis;
RedisRecordWriter(Jedis jedis) {
// TODO Auto-generated constructor stub
this.jedis=jedis;
}
public void write(K key, V value) throws IOException,
InterruptedException {
// TODO Auto-generated method stub
boolean nullkey=key==null||key instanceof NullWritable;
boolean nullvalue=value==null;
if(nullkey||nullvalue) return;
String s=key.toString();
for(int i=0;i<s.length();i++){
String k=s.substring(0,i+1);
int score=Integer.parseInt(value.toString());
jedis.zincrby(k, score, s);
}
}
@Override
public void close(org.apache.hadoop.mapreduce.TaskAttemptContext arg0)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
jedis.disconnect();
}
}
@SuppressWarnings({ "deprecation", "unchecked" })
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext arg0) throws IOException {
// TODO Auto-generated method stub
return (RecordWriter<K, V>) new RedisRecordWriter<K,V>(new Jedis("192.168.178.130"));
}
@Override
public org.apache.hadoop.mapreduce.RecordWriter<K, V> getRecordWriter(
org.apache.hadoop.mapreduce.TaskAttemptContext arg0)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
return null;
}
}
注意在java环境下,redis所使用的是jedis,所以需要将jedis的包导入进来
接下来就是对于前端的开发,我们使用了ajax和json来对用户输入的信息进行处理,使用了bootstrap作为前端框架,用struts2处理业务逻辑
sug.java:
package org.robby.suggestion;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import redis.clients.jedis.Jedis;
import com.opensymphony.xwork2.ActionSupport;
public class Sug extends ActionSupport{
String query;
Set<String> result;
public String getQuery() {
return query;
}
public void setQuery(String query) {
this.query = query;
}
public Set<String> getResult() {
System.out.println(query);
Jedis jedis=new Jedis("192.168.178.130");
String a[]=query.split(" ");
jedis.zinterstore("_tmpwords", a[0],a[1]);
result=jedis.zrevrange("_tmpwords", 0, 5);
jedis.del("_tmpwords");
return result;
}
public void setResult(Set<String> result) {
this.result = result;
}
public String execute() throws Exception{
return SUCCESS;
}
}
struts.xml:
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE struts PUBLIC "-//Apache Software Foundation//DTD Struts Configuration 2.1//EN" "http://struts.apache.org/dtds/struts-2.1.dtd">
<struts>
<constant name="struts.devMode" value="true" />
<constant name="struts.action.extension" value="do"/>
<package name="ajax" extends="json-default">
<action name="sug" class="org.robby.suggestion.Sug">
<result type="json"></result>
</action>
</package>
</struts>
以上所依赖的jar包:
bootstrap:
前端:
在linux上我们需要首先安装redis,版本是3.0:
在Linux上所使用的服务器是tomcat6,同时使用了一个java文件打包插件fatjar:
同时为了在Linux上实现自动处理,写了一个脚本文件:
tmpfile=`date '+%Y%m%d%H%M%S'`.txt
echo $tmpfile
for i in `find /home/hq/tmp/log -name localhost*`
do
echo $i
if[ -s $i ]
then
cat $i >>tmpfile
rm -f $i
fi
done
if [ -s $tmpfile]
then
hadoop fs -put $tmpfile /input
rm $tmpfile
hadoop fs -rmr /output
hadoop jar count.jar org.hq.mr.WordCount /input /output
hadoop fs -rm /input/*
fi
整个完成之后,前端就可以根据用户词语搜索频率提示下拉菜单了:
注意要学会hadoop的常用命令,以及redis的一下常用命令 怎样打分 怎样加入键值对