1.下载需要的jar包和配置,github资源链接:https://github.com/zhangliqingyun/jarlist/tree/master/lucene
2.创建一个目录索引
//创建一个文件索引
@Test
public void addFileDirectory() throws Exception{
//创建一个索引目录
Directory directory = FSDirectory.open(new File("./myindex6"));
//创建一个分词器
IKAnalyzer analyzer = new IKAnalyzer();
//创建一个indexWriter
IndexWriter indexWriter = new IndexWriter(directory,analyzer,MaxFieldLength.LIMITED);
//创建一个文档
Document document = new Document();
//创建一个实体类
People people = new People();
//向实体类中添加字段数据
people.setId(9);
people.setName("美女真多啊fjhtjhtfgsjtsj方式都会认同一天我要提问");
people.setTitle("这是标题");
//向文档中添加字段数据
Field fieldId = new Field("id",people.getId().toString(),Store.YES,Index.ANALYZED);
Field fieldName = new Field("name",people.getName(),Store.YES,Index.ANALYZED);
Field fieldTitle = new Field("title",people.getTitle(),Store.YES,Index.ANALYZED);
//向文档中添加字段
document.add(fieldId);
document.add(fieldName);
document.add(fieldTitle);
//设置添加文档的相关度更大
document.setBoost(100f);
//向indexWriter中添加文档
indexWriter.addDocument(document);
//关闭indexWriter
indexWriter.close();
}
3.查询目录索引:
//查询文档索引
@Test
public void searchIndex() throws Exception{
//定义一个文档索引路径
Directory directory = FSDirectory.open(new File("./myindex"));
//创建IndexSearcher
IndexSearcher indexSearcher = new IndexSearcher(directory);
//定义一个分词器
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
//定义一个查询器
QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_30, new String[]{"title","name"}, analyzer);
//给查询条件赋值
Query query = queryParser.parse("lucene");
//向indexSearcher中添加查询条件
TopDocs topDocs = indexSearcher.search(query, 20);
//得到文档集合
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
//保存查询到的实体类集合
List<People> list = new ArrayList<People>();
//遍历文档集合
for(ScoreDoc scoreDoc : scoreDocs){
int index = scoreDoc.doc;
Document document = indexSearcher.doc(index);
People people = new People();
people.setId(Integer.parseInt(document.get("id")));
people.setName(document.get("name"));
people.setTitle(document.get("title"));
list.add(people);
}
//打印输出查询到的结果集
for(int i = 0;i < list.size();i++){
System.out.println(list.get(i).getId());
System.out.println(list.get(i).getName());
System.out.println(list.get(i).getTitle());
}
}
4.使用term查询目录索引:
//使用Term查询
@Test
public void TermQuery() throws Exception{
//查询条件
Term term = new Term("name","lucene");
Query query = new TermQuery(term);
//定义一个文档索引路径
Directory directory = FSDirectory.open(new File("./myindex"));
//创建IndexSearcher
IndexSearcher indexSearcher = new IndexSearcher(directory);
//向indexSearcher中添加查询条件
TopDocs topDocs = indexSearcher.search(query, 20);
//得到文档集合
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
//保存查询到的实体类集合
List<People> list = new ArrayList<People>();
//遍历文档集合
for(ScoreDoc scoreDoc : scoreDocs){
int index = scoreDoc.doc;
Document document = indexSearcher.doc(index);
People people = new People();
people.setId(Integer.parseInt(document.get("id")));
people.setName(document.get("name"));
people.setTitle(document.get("title"));
list.add(people);
}
//打印输出查询到的结果集
for(int i = 0;i < list.size();i++){
System.out.println(list.get(i).getId());
System.out.println(list.get(i).getName());
System.out.println(list.get(i).getTitle());
}
}
5.查询所有的目录索引
//查询所有的文档
@Test
public void queryAllDocs() throws Exception{
//查询所有的文档的query
Query query = new MatchAllDocsQuery();
//定义一个文档索引路径
Directory directory = FSDirectory.open(new File("./myindex"));
//创建IndexSearcher
IndexSearcher indexSearcher = new IndexSearcher(directory);
//向indexSearcher中添加查询条件
TopDocs topDocs = indexSearcher.search(query, 20);
//得到文档集合
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
//保存查询到的实体类集合
List<People> list = new ArrayList<People>();
//遍历文档集合
for(ScoreDoc scoreDoc : scoreDocs){
int index = scoreDoc.doc;
Document document = indexSearcher.doc(index);
People people = new People();
people.setId(Integer.parseInt(document.get("id")));
people.setName(document.get("name"));
people.setTitle(document.get("title"));
list.add(people);
}
//打印输出查询到的结果集
for(int i = 0;i < list.size();i++){
System.out.println(list.get(i).getId());
System.out.println(list.get(i).getName());
System.out.println(list.get(i).getTitle());
}
}
6.按照范围查询目录索引:
//按照范围查询
@Test
public void queryRange() throws Exception{
//按照范围查询
Query query = NumericRangeQuery.newLongRange("id", 1L, 3L, true, true);
//定义一个文档索引路径
Directory directory = FSDirectory.open(new File("./myindex2"));
//创建IndexSearcher
IndexSearcher indexSearcher = new IndexSearcher(directory);
//向indexSearcher中添加查询条件
TopDocs topDocs = indexSearcher.search(query, 20);
//得到文档集合
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
//保存查询到的实体类集合
List<People> list = new ArrayList<People>();
//遍历文档集合
for(ScoreDoc scoreDoc : scoreDocs){
int index = scoreDoc.doc;
Document document = indexSearcher.doc(index);
People people = new People();
people.setId(NumericUtils.prefixCodedToInt(document.get("id")));
people.setName(document.get("name"));
people.setTitle(document.get("title"));
list.add(people);
}
//打印输出查询到的结果集
for(int i = 0;i < list.size();i++){
System.out.println(list.get(i).getId());
System.out.println(list.get(i).getName());
System.out.println(list.get(i).getTitle());
}
}
7.使用boolean查询目录索引:
//通配符查询
@Test
public void wildcardQuery() throws Exception{
//组织查询语句
Term term = new Term("name","l*");
//按照范围查询
Query query = new WildcardQuery(term);
//定义一个文档索引路径
Directory directory = FSDirectory.open(new File("./myindex3"));
//创建IndexSearcher
IndexSearcher indexSearcher = new IndexSearcher(directory);
//向indexSearcher中添加查询条件
TopDocs topDocs = indexSearcher.search(query, 20);
//得到文档集合
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
//保存查询到的实体类集合
List<People> list = new ArrayList<People>();
//遍历文档集合
for(ScoreDoc scoreDoc : scoreDocs){
int index = scoreDoc.doc;
Document document = indexSearcher.doc(index);
People people = new People();
people.setId(Integer.parseInt(document.get("id")));
people.setName(document.get("name"));
people.setTitle(document.get("title"));
list.add(people);
}
//打印输出查询到的结果集
for(int i = 0;i < list.size();i++){
System.out.println(list.get(i).getId());
System.out.println(list.get(i).getName());
System.out.println(list.get(i).getTitle());
}
}
8.高亮显示查询:
//高亮查询
@Test
public void highLighterQuery() throws Exception{
//组织查询语句
Term term = new Term("name","北京");
TermQuery termQuery = new TermQuery(term);
Term term2 = new Term("name","美女");
TermQuery termQuery2 = new TermQuery(term2);
Term term3 = new Term("name","北京美女");
TermQuery termQuery3 = new TermQuery(term3);
BooleanQuery booleanQuery = new BooleanQuery();
booleanQuery.add(termQuery, Occur.SHOULD);
booleanQuery.add(termQuery2,Occur.SHOULD);
booleanQuery.add(termQuery3,Occur.SHOULD);
//索引查询的路径
Directory directory = FSDirectory.open(new File("./myindex6"));
//创建一个indexSearcher
IndexSearcher indexSearcher = new IndexSearcher(directory);
//设置高亮显示的样式
Formatter formatter = new SimpleHTMLFormatter("<font color='red'>","</font>");
//设置一个结果查询
Scorer scorer = new QueryScorer(booleanQuery);
//创建一个高亮器
Highlighter highLighter = new Highlighter(formatter, scorer);
//搜索的摘要
Fragmenter fragmenter = new SimpleFragmenter(10);
//设置高亮摘要
highLighter.setTextFragmenter(fragmenter);
//添加查询条件
TopDocs topDocs = indexSearcher.search(booleanQuery, 20);
System.out.println(topDocs.totalHits);
//赋值到结果集中
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
//保存people的结果集
List<People> list = new ArrayList<People>();
//遍历结果集
for(ScoreDoc scoreDoc : scoreDocs){
int index = scoreDoc.doc;
System.out.println("相关度得分:"+scoreDoc.score);
Document document = indexSearcher.doc(index);
People people = new People();
people.setId(Integer.parseInt(document.get("id")));
people.setName(document.get("name"));
people.setTitle(document.get("title"));
Analyzer analyzer = new IKAnalyzer();
String name = highLighter.getBestFragment(analyzer, "name", people.getName());
people.setName(name);
list.add(people);
}
//遍历打印输出list中存储的值
for(int i = 0;i < list.size();i++){
System.out.println(list.get(i).getId());
System.out.println(list.get(i).getName());
System.out.println(list.get(i).getTitle());
}
indexSearcher.close();
}
9.创建内存索引
//创建一个内存索引库
@Test
public void addMemoryIndex() throws Exception{
//创建一个内存索引库目录
Directory ramDirectory = new RAMDirectory();
//创建一个分词器
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
//创建一个IndexWriter
IndexWriter indexWriter = new IndexWriter(ramDirectory, analyzer, MaxFieldLength.LIMITED);
//创建一个实体类people
People people = new People();
//给实体类people添加字段值
people.setId(1);
people.setAge(24);
people.setName("lucene是一个搜搜服务器");
//创建一个添加文档
Document document = new Document();
//创建文档的添加字段
Field fieldId = new Field("id",people.getId().toString(),Store.YES,Index.ANALYZED);
Field fieldAge = new Field("age",people.getAge().toString(),Store.YES,Index.ANALYZED);
Field fieldName = new Field("name",people.getName(),Store.YES,Index.ANALYZED);
document.add(fieldId);
document.add(fieldAge);
document.add(fieldName);
indexWriter.addDocument(document);
indexWriter.close();
//调用查询索引的方法
searchIndex(ramDirectory);
}
10.删除目录索引:
//删除索引
@Test
public void deleteIndex() throws Exception{
//创建一个索引目录
Directory directory = FSDirectory.open(new File("./myindex5"));
//创建一个分词器
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
//创建一个indexwriter
IndexWriter indexWriter = new IndexWriter(directory,analyzer,MaxFieldLength.LIMITED);
//组织删除的条件语句
Term term = new Term("name","hello");
indexWriter.deleteDocuments(term);
indexWriter.close();
}
11.更新目录索引(先删除,后增加):
//更新索引
@Test
public void updateIndex() throws Exception{
//创建一个索引目录
Directory directory = FSDirectory.open(new File("./myindex5"));
//创建一个分词器
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
//创建一个indexwriter
IndexWriter indexWriter = new IndexWriter(directory,analyzer,MaxFieldLength.LIMITED);
//要更新的查询条件
Term term = new Term("name","hello");
//创建一个实体类对象
People people = new People();
//给实体对象赋值
people.setAge(24);
people.setId(2);
people.setName("这是更新后的hello");
//更新为的新文档
Document document = new Document();
//给文档赋值
Field fieldId = new Field("id",people.getId().toString(),Store.YES,Index.ANALYZED);
Field fieldAge = new Field("age",people.getAge().toString(),Store.YES,Index.ANALYZED);
Field fieldName = new Field("name",people.getName(),Store.YES,Index.ANALYZED);
//把字段添加到文档中
document.add(fieldId);
document.add(fieldAge);
document.add(fieldName);
//执行更新操作
indexWriter.updateDocument(term, document);
//关闭indexWriter
indexWriter.close();
}
12.优化目录索引(合并相同项):
//优化(合并相同的数据)
@Test
public void optimize() throws Exception{
//操作索引的目录
Directory directory = FSDirectory.open(new File("./myindex5"));
//创建一个分词器
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
//创建一个indexWriter
IndexWriter indexWriter = new IndexWriter(directory,analyzer,MaxFieldLength.LIMITED);
//优化indexWriter
indexWriter.optimize();
indexWriter.close();
}
13.中分分词器:
①导jar包:
②导入配置文件:
③配置IKAnalyzer.cfg.xml内容