版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/am45337908/article/details/50115407
程序源码为Myeclipse10建立的,具体项目在http://download.csdn.net/download/am45337908/9312911
工程的结构如下:
程序实例:
1.建立索引,Indexer.java 其中args[0]存放生成的Lucene索引的路径、args[0]存放被索引文件的路径
在run configurations 里设置
<span style="font-size:24px;">package lucenemeet;
/**
* Copyright Manning Publications Co.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific lan
*/
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Version;
import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.io.FileReader;
// From chapter 1
/**
* This code was originally written for
* Erik's Lucene intro java.net article
*/
public class Indexer {
public static void main(String[] args) throws Exception {
if (args.length != 2) {
throw new IllegalArgumentException("Usage: java " + Indexer.class.getName()
+ " <index dir> <data dir>");
}
String indexDir = args[0]; //1指定目录创建索引
String dataDir = args[1]; //2制定目录中的txt文件进行索引
long start = System.currentTimeMillis();
Indexer indexer = new Indexer(indexDir);
int numIndexed;
try {
numIndexed = indexer.index(dataDir, new TextFilesFilter());//被索引的文件数
} finally {
indexer.close();
}
long end = System.currentTimeMillis();
System.out.println("Indexing " + numIndexed + " files took "
+ (end - start) + " milliseconds");
}
private IndexWriter writer;
public Indexer(String indexDir) throws IOException {
Directory dir = FSDirectory.open(new File(indexDir));
writer = new IndexWriter(dir, //3 Directory
new StandardAnalyzer( //3
Version.LUCENE_30),//3
true, //3
IndexWriter.MaxFieldLength.UNLIMITED); //创建lucene index writer
}
public void close() throws IOException { //关闭indexer writer
writer.close(); //4
}
public int index(String dataDir, FileFilter filter)
throws Exception {
File[] files = new File(dataDir).listFiles();
for (File f: files) {
if (!f.isDirectory() &&
!f.isHidden() &&
f.exists() &&
f.canRead() &&
(filter == null || filter.accept(f))) {
indexFile(f);
}
}
return writer.numDocs(); //5
}
private static class TextFilesFilter implements FileFilter {
public boolean accept(File path) {
return path.getName().toLowerCase() //6判断是否以他结尾
.endsWith(".txt"); //6
}
}
protected Document getDocument(File f) throws Exception {
Document doc = new Document();
doc.add(new Field("contents", new FileReader(f))); //7索引文件内容
doc.add(new Field("filename", f.getName(), //8
Field.Store.YES, Field.Index.NOT_ANALYZED));//8索引文件名字
doc.add(new Field("fullpath", f.getCanonicalPath(), //9
Field.Store.YES, Field.Index.NOT_ANALYZED));//9索引完整的路径
return doc;
}
private void indexFile(File f) throws Exception {
System.out.println("Indexing " + f.getCanonicalPath());
Document doc = getDocument(f);
writer.addDocument(doc); //10向lucene索引中添加文档
}
}
/*
#1 Create index in this directory
#2 Index *.txt files from this directory
#3 Create Lucene IndexWriter
#4 Close IndexWriter
#5 Return number of documents indexed
#6 Index .txt files only, using FileFilter
#7 Index file content
#8 Index file name
#9 Index file full path
#10 Add document to Lucene index
*/</span>
设置成功后运行程序如下
2.搜索索引,Searcher.java
其中args[0]存放Indexer生成的Lucene索引文件路径、args[1]待查询的文字
在run configurations 里设置,此处我们检索"patent"这个单词
package lucenemeet;
/**
* Copyright Manning Publications Co.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific lan
*/
import org.apache.lucene.document.Document;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.util.Version;
import java.io.File;
import java.io.IOException;
// From chapter 1
/**
* This code was originally written for
* Erik's Lucene intro java.net article
*/
public class Searcher {
public static void main(String[] args) throws IllegalArgumentException,
IOException, ParseException {
if (args.length != 2) {
throw new IllegalArgumentException("Usage: java " + Searcher.class.getName()
+ " <index dir> <query>");
}
String indexDir = args[0]; //1 已经创建好的索引
String q = args[1]; //2 待索引的字段
search(indexDir, q);
}
public static void search(String indexDir, String q)
throws IOException, ParseException {
Directory dir = FSDirectory.open(new File(indexDir)); //3
IndexSearcher is = new IndexSearcher(dir); //3 打来索引文件
QueryParser parser = new QueryParser(Version.LUCENE_30, // 4
"contents", //4
new StandardAnalyzer( //4
Version.LUCENE_30)); //4创建一个解析器
Query query = parser.parse(q); //4 解析查询字符串
long start = System.currentTimeMillis();
TopDocs hits = is.search(query, 10); //5搜索索引
long end = System.currentTimeMillis();
System.err.println("Found " + hits.totalHits + //6
" document(s) (in " + (end - start) + // 6
" milliseconds) that matched query '" + // 6
q + "':"); // 6记录搜索状态
for(ScoreDoc scoreDoc : hits.scoreDocs) {
Document doc = is.doc(scoreDoc.doc); //7 返回匹配文本
System.out.println(doc.get("fullpath")); //8
}
is.close(); //9
}
}
/*
#1 Parse provided index directory
#2 Parse provided query string
#3 Open index
#4 Parse query
#5 Search index
#6 Write search stats
#7 Retrieve matching document
#8 Display filename
#9 Close IndexSearcher
*/
设置成功后程序运行结果如下:
关于索引过程的核心类,会在接下来深入学习分析