版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/PYXLY1314/article/details/52803796
再看本文前,初学者需要先去了解一下,全文检索的基本原里: 将原始的文档数据(或者自己业务系统中的各种业务数据)通过lucene的API进行转换,生成对应的索引文件(这里的索引文件其实就是可以理解成系统文件,它只能被lucene解析),至于文件的格式和命名等,在网上都有介绍;有兴趣的同学可以深入研究一下;生成索引以后我们就可以通过luncen的API进行搜索操作了,调用API,lucene会根据所以文件的路径查找到索引文件,然后解析它们,做一系列的赛选匹配,打分,等操作;最终返回搜索关键字相关的结果;
1、lucene开发涉及到的jar包
lucene-analyzers-3.6.0.jar,lucene-core-3.6.1.jar,lucene-core-3.6.1-javadoc.jar
2、生成索引的代码,简单示例:(其中有一个需要注意的点,就是每次建索引前都先删除原先的索引)
public static final String FILE_PATH_LINUX = "/www/luceneIndex/public"+File.separator+"indexs";
/**
* 针对课程生成索引
* 生成课程名称、标签索引
* @param vo 起止时间
*/
@SuppressWarnings("deprecation")
private void createCourseLucene(LuceneVO vo){
IndexWriter writer = null;
try {
File file = null;
String pathFile =LuceneTimeTask.FILE_PATH_LINUX;
file = new File(pathFile);
if(!file.exists()){
file.mkdirs();
}
Directory dir=FSDirectory.open(file);
List<Course> list = courseService.getCourseByBetweenTime(vo);
if(null!=list && list.size()>0){
Analyzer analyzer=new StandardAnalyzer(Version.LUCENE_36);
if(file.isDirectory() && file.listFiles().length == 0){
writer = new IndexWriter(dir, analyzer,
true, IndexWriter.MaxFieldLength.UNLIMITED);
} else {
deleteIndex(2,dir);
writer = new IndexWriter(dir, analyzer,
false, IndexWriter.MaxFieldLength.UNLIMITED);
}
}
SimpleDateFormat sd1 = new SimpleDateFormat("MM-dd");
SimpleDateFormat sd = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
for(Course course : list) {
Document doc=new Document();
Field f5 = new Field("courseName", course.getName(), Store.YES,Index.ANALYZED);
Field f4 = new Field("courseTag", course.getTag(), Store.YES,Index.ANALYZED);
f5.setBoost(5f);
f4.setBoost(4f);
doc.add(f5);
doc.add(f4);
doc.add(new Field("luceneType",Constant.LUCENE_COURSE, Store.YES,Index.ANALYZED));
doc.add(new Field("courseId", course.getId()+"", Store.YES,Index.NOT_ANALYZED));
doc.add(new Field("courseClickNum", course.getHits()+"", Store.YES,Index.NOT_ANALYZED));
doc.add(new Field("courseCreateTime", sd1.format(course.getCreatetime()), Store.YES,Index.NOT_ANALYZED));
doc.add(new Field("courseCreateTimeNum", course.getCreatetime().getTime()+"", Store.YES,Index.NOT_ANALYZED));
doc.add(new Field("courseSname", course.getSphotoname(), Store.YES,Index.NOT_ANALYZED));
doc.add(new Field("courseIntroduce", course.getCourseDetail().getIntroduction(), Store.YES,Index.NOT_ANALYZED));
doc.add(new Field("courseCreateTime_YMD", sd.format(course.getCreatetime()), Store.YES,Index.NOT_ANALYZED));
writer.addDocument(doc);
}
} catch (Exception e) {
System.out.println("生成课程索引失败!");
e.printStackTrace();
}finally{
if(null!=writer){
try {
writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
3、针对以上的索引文件,来完成快速搜索,简单代码如下:
/**
* 加载数据
*/
public void listmodel(){
StringBuffer sb = new StringBuffer();
IndexReader reader=null;
try {
pgResult = pgResult == null ? new PageResult<LuceneVO>()
: pgResult;
vo = vo == null ? new LuceneVO() : vo;
if(null!=vo.getSearchStr() && !"".equals(vo.getSearchStr())){
Directory dir= init();
reader=IndexReader.open(dir);
int termNo = getTermNo();
IndexSearcher searcher=new IndexSearcher(reader);
String[] serchArray=null;
Occur[] occArray=null;
SortField[] sf = null;
//int type = vo.getSearchType();
int totalRecord=0;
ScoreDoc[] scoreDocs;//定义一个检索结果集合
Analyzer analyzer=new StandardAnalyzer(Version.LUCENE_36);
serchArray = new String[]{"equipName","equipFlag","equipIntroduce","newsTitle","newsTag","newsDescription","courseName","courseTag",
"chapterName","chapterTeacherName","datumName","datumTag"};
occArray = new Occur[]{BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD
,BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD};
Query query = MultiFieldQueryParser.parse(Version.LUCENE_36, vo.getSearchStr(), serchArray,occArray, analyzer);
TopDocs topdocs = searcher.search(query, (termNo+1)*Constants.TERM_NUM);
totalRecord += topdocs.totalHits;
scoreDocs = topdocs.scoreDocs;
int begin = pgResult.getPageSize() * (pgResult.getCurrentPage() - 1) ;
int end = Math.min(begin + pgResult.getPageSize(), totalRecord);
SimpleHTMLFormatter shf = new SimpleHTMLFormatter("<b>","</b>");
Highlighter hl = new Highlighter(shf,new QueryScorer(query));
List<LuceneVO> relist = new ArrayList<LuceneVO>();
for(int i=begin; i < end; i++) {
int doc = scoreDocs[i].doc;
//int doc = list.get(i).doc;
Document document = searcher.doc(doc);
String type = document.get("luceneType");
if(type.equals(Constants.LUCENE_COURSE)){
String courseName = "<span class='color-ff0'>[网校]</span>"+document.get("courseName");
String courseIntroduce = document.get("courseIntroduce");
TokenStream ts = analyzer.tokenStream("courseName", new StringReader(courseName));
String courseNameHe =hl.getBestFragment(ts, courseName);
TokenStream ts2 = analyzer.tokenStream("courseIntroduce", new StringReader(courseIntroduce));
String courseIntroduceHe = hl.getBestFragment(ts2, courseIntroduce);
sb.append("<li><a name='listlink' href='"+ServletActionContext.getRequest().getContextPath()+"/course_sub/teaching/").append(document.get("courseId")).append(".html'")
.append(" title='").append(null!=courseIntroduceHe?courseIntroduceHe:courseIntroduce)
.append("' target='_blank' >").append(null!=courseNameHe?courseNameHe:courseName)
.append(" </a><span>")
.append(document.get("courseCreateTime"))
.append("</span></li>");
}else if(type.equals(Constants.LUCENE_EQUIP)){
String equipName = "<span class='color-ff0'>[装备]</span>"+document.get("equipName");
String equipIntroduce = document.get("equipIntroduce");
TokenStream ts = analyzer.tokenStream("equipName", new StringReader(equipName));
String equipNameHe = hl.getBestFragment(ts, equipName);
TokenStream ts2 = analyzer.tokenStream("equipIntroduce", new StringReader(equipIntroduce));
String equipIntroduceHe = hl.getBestFragment(ts2, equipIntroduce);
sb.append("<li><a name='listlink' href='"+ServletActionContext.getRequest().getContextPath()+"/equip_sub/detail/").append(document.get("equipClassCode")).
append("_").append(document.get("equipId")).append(".html'")
.append(" title='").append(null!=equipIntroduceHe?equipIntroduceHe:equipIntroduce)
.append("' target='_blank' >")
.append(null!=equipNameHe?equipNameHe:equipName)
.append(" </a><span>")
.append(document.get("equipCreateTime"))
.append("</span></li>");
}else if(type.equals(Constants.LUCENE_CHAPTER)){
String chapterName = "<span class='color-ff0'>[网校]</span>"+document.get("chapterName");
String chapterTeacherName = document.get("chapterTeacherName");
TokenStream ts = analyzer.tokenStream("chapterName", new StringReader(chapterName));
String chapterNameHe = hl.getBestFragment(ts, chapterName);
TokenStream ts1 = analyzer.tokenStream("chapterTeacherName", new StringReader(chapterTeacherName));
String chapterTeacherNameHe = hl.getBestFragment(ts1, chapterTeacherName);
sb.append("<li><a name='listlink' href='"+ServletActionContext.getRequest().getContextPath()+"/course_sub/teaching/").append(document.get("courseId")).append("_").append(document.get("chapterId")).append(".html'")
.append(" title='")
.append(null!=chapterTeacherNameHe?chapterTeacherNameHe:chapterTeacherName)
.append("' target='_blank' >")
.append(null!=chapterNameHe?chapterNameHe:chapterName)
.append(" </a><span>")
.append(document.get("chapterCreateTime"))
.append("</span></li>");
}else if(type.equals(Constants.LUCENE_NEWS)){
String newsTitle = "<span class='color-ff0'>[资讯]</span>"+document.get("newsTitle");
String newsDescription = document.get("newsDescription");
TokenStream ts = analyzer.tokenStream("newsTitle", new StringReader(newsTitle));
String newsTitleHe = hl.getBestFragment(ts, newsTitle);
TokenStream ts1 = analyzer.tokenStream("newsDescription", new StringReader(newsDescription));
String newsDescriptionHe = hl.getBestFragment(ts1, newsDescription);
sb.append("<li><a name='listlink' href='"+ServletActionContext.getRequest().getContextPath()+"/news_sub/items/").append(document.get("newsId"))
.append("_").append(document.get("newsClassCode")).append(".html'")
.append(" title='").append(null!=newsDescriptionHe?newsDescriptionHe:newsDescription)
.append("' target='_blank' >")
.append(null!=newsTitleHe?newsTitleHe:newsTitle)
.append(" </a><span>")
.append(document.get("newsCreateTime"))
.append("</span></li>");
}else{//资料
String datumName = "<span class='color-ff0'>[文档]</span>"+document.get("datumName");
TokenStream ts = analyzer.tokenStream("datumName", new StringReader(datumName));
String datumNameHe = hl.getBestFragment(ts, datumName);
sb.append("<li><a name='listlink' href='"+ServletActionContext.getRequest().getContextPath()+"/docview_sub/").append(document.get("datumId")).append(".html'")
.append(" target='_blank' >")
.append(null!=datumNameHe?datumNameHe:datumName)
.append(" </a><span>")
.append(document.get("datumCreateTime"))
.append("</span></li>");
}
}
pgResult.setList(relist);
pgResult.setTotalRecord(totalRecord);
}
} catch (Exception e) {
e.printStackTrace();
}finally{
if(null!=reader){
try {
reader.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
Map remap = new HashMap();
remap.put("sb", sb);
remap.put("currentPage", pgResult.getCurrentPage());
remap.put("totalPage", pgResult.getTotalPage());
remap.put("searchStr", vo.getSearchStr());
remap.put("searchType", vo.getSearchType());
String restr = JsonHelper.getGson().toJson(remap);
this.writeTextToResponse(restr);
}
至于复杂的搜索操作,以后有时间了在深入研究