基于hadoop的事务日志的搜索引擎的开发

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/huangqing510/article/details/44996719

这个项目所使用到的技术主要有hadoop的mapreduce,redis,ajax,json,struts2等等内容,前端框架使用的bootstrap。

首先是mapreduce的主程序:

package org.hq.mr;






import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.Iterator;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;


import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;




public class WordCount extends Configured implements Tool{

public static class Map extends Mapper<LongWritable,Text,Text,IntWritable>{
private final static IntWritable one=new IntWritable(1);
@Override
protected void map(LongWritable key,Text value,Context context){
String line=value.toString();
String a[]=line.split("\"");
if(a[1].indexOf("sug.jsp?query")>0){
String b[]=a[1].split("query=| ");
try {
String tmp=URLDecoder.decode(b[2], "UTF-8");
} catch (UnsupportedEncodingException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
Text word=new Text(b[2]);
try {
context.write(word, one);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}

}

}
public static class Reduce extends Reducer<Text,IntWritable,Text,IntWritable>{

public void reduce(Text key,Iterator<IntWritable> values,Context context)throws Exception{
int sum=0;

while(values.hasNext()){
sum+=values.next().get();
}
context.write(key, new IntWritable(sum));
}
}
/**
* @param args
* @throws IOException 
*/
public int run(String[] args) throws IOException,Exception {
// TODO Auto-generated method stub
Configuration conf=getConf();
Job job=new Job(conf,"Load Redis");
job.setJarByClass(WordCount.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(RedisOutputFormat.class);

FileInputFormat.setInputPaths(job, new Path(args[0]));
RedisOutputFormat.setOutputPath(job, new Path(args[1]));

return job.waitForCompletion(true)?0:1;

}
public static void main(String args[]) throws Exception{
int ret=ToolRunner.run(new WordCount(), args);
System.exit(ret);
}


}

然后是由redis来对用户请求的数据进行键值对的存储:

package org.hq.mr;


import java.io.IOException;




import org.apache.hadoop.io.NullWritable;


import org.apache.hadoop.mapred.TaskAttemptContext;


import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;






import redis.clients.jedis.Jedis;






@SuppressWarnings("deprecation")
public class RedisOutputFormat<K,V> extends FileOutputFormat<K,V>{

public static class RedisRecordWriter<K,V> extends org.apache.hadoop.mapreduce.RecordWriter<K, V>{
private Jedis jedis;
RedisRecordWriter(Jedis jedis) {
// TODO Auto-generated constructor stub
this.jedis=jedis;
}



public void write(K key, V value) throws IOException,
InterruptedException {
// TODO Auto-generated method stub
boolean nullkey=key==null||key instanceof NullWritable;
boolean nullvalue=value==null;

if(nullkey||nullvalue) return;
String s=key.toString();
for(int i=0;i<s.length();i++){
String k=s.substring(0,i+1);
int score=Integer.parseInt(value.toString());
jedis.zincrby(k, score, s);
}
}
@Override
public void close(org.apache.hadoop.mapreduce.TaskAttemptContext arg0)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
jedis.disconnect();
}

}
@SuppressWarnings({ "deprecation", "unchecked" })
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext arg0) throws IOException {
// TODO Auto-generated method stub
return (RecordWriter<K, V>) new RedisRecordWriter<K,V>(new Jedis("192.168.178.130"));
}
@Override
public org.apache.hadoop.mapreduce.RecordWriter<K, V> getRecordWriter(
org.apache.hadoop.mapreduce.TaskAttemptContext arg0)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
return null;
}
}

注意在java环境下,redis所使用的是jedis,所以需要将jedis的包导入进来

接下来就是对于前端的开发,我们使用了ajax和json来对用户输入的信息进行处理,使用了bootstrap作为前端框架,用struts2处理业务逻辑

sug.java:

package org.robby.suggestion;


import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;


import redis.clients.jedis.Jedis;


import com.opensymphony.xwork2.ActionSupport;


public class Sug extends ActionSupport{
String query;
Set<String> result;
public String getQuery() {
return query;
}
public void setQuery(String query) {
this.query = query;
}
public Set<String> getResult() {

System.out.println(query);
Jedis jedis=new Jedis("192.168.178.130");
String a[]=query.split(" ");

jedis.zinterstore("_tmpwords", a[0],a[1]);
result=jedis.zrevrange("_tmpwords", 0, 5);
jedis.del("_tmpwords");

return result;
}
public void setResult(Set<String> result) {
this.result = result;
}
public String execute() throws Exception{
return SUCCESS;
}

}

struts.xml:

<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE struts PUBLIC "-//Apache Software Foundation//DTD Struts Configuration 2.1//EN" "http://struts.apache.org/dtds/struts-2.1.dtd">
<struts>
  <constant name="struts.devMode" value="true" /> 
  <constant name="struts.action.extension" value="do"/> 
  <package name="ajax" extends="json-default">
  <action name="sug" class="org.robby.suggestion.Sug">
  <result type="json"></result>
  </action>
  </package>
</struts> 

   以上所依赖的jar包:


bootstrap:


前端:


在linux上我们需要首先安装redis,版本是3.0:


在Linux上所使用的服务器是tomcat6,同时使用了一个java文件打包插件fatjar:

同时为了在Linux上实现自动处理,写了一个脚本文件:

tmpfile=`date '+%Y%m%d%H%M%S'`.txt
echo $tmpfile


for i in `find /home/hq/tmp/log -name localhost*`
do
 echo $i
 if[ -s $i ]
then
cat $i >>tmpfile
rm -f $i
fi
done


if [ -s $tmpfile]
then
  hadoop fs -put $tmpfile /input
  rm $tmpfile
  hadoop fs -rmr /output
  hadoop jar count.jar org.hq.mr.WordCount /input /output
  hadoop fs -rm /input/*
fi

整个完成之后,前端就可以根据用户词语搜索频率提示下拉菜单了:

注意要学会hadoop的常用命令,以及redis的一下常用命令  怎样打分  怎样加入键值对


猜你喜欢

转载自blog.csdn.net/huangqing510/article/details/44996719