第一步:下载lucene的核心包
lucene-core-3.6.1-javadoc.jar (3.5 MB)
lucene-core-3.6.1.jar (1.5 MB)
拷贝到项目的lib 文件夹里
第二步:
在C盘下建立source文件夹 (C:\source)
source文件夹存放待索引的文件,例如,建立两个文件,名称为 test1.txt test2.txt 。
test1.txt文件内容为:欢迎来到绝对秋香的博客。
test2.txt文件内容为:绝对秋香引领你走向潮流。
在C盘下再建立index文件夹,存放索引文件 (C:\index)
扫描二维码关注公众号,回复:
806157 查看本文章
第三步,建立索引类 TextFileIndexer ,并运行主函数
package com.newtouchone.lucene; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.util.Date; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; public class TextFileIndexer { public static void main(String[] args) throws Exception { /* 指明要索引文件夹的位置,这里是C盘的source文件夹下 */ File fileDir = new File("C:\\source"); /* 这里放索引文件的位置 */ File indexDir = new File("C:\\index"); Directory dir = FSDirectory.open(indexDir); Analyzer luceneAnalyzer = new StandardAnalyzer(Version.LUCENE_36); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36,luceneAnalyzer); iwc.setOpenMode(OpenMode.CREATE); IndexWriter indexWriter = new IndexWriter(dir,iwc); File[] textFiles = fileDir.listFiles(); long startTime = new Date().getTime(); //增加document到索引去 for (int i = 0; i < textFiles.length; i++) { if (textFiles[i].isFile() && textFiles[i].getName().endsWith(".txt")) { System.out.println("File " + textFiles[i].getCanonicalPath() + "正在被索引...."); String temp = FileReaderAll(textFiles[i].getCanonicalPath(), "GBK"); System.out.println(temp); Document document = new Document(); Field FieldPath = new Field("path", textFiles[i].getPath(), Field.Store.YES, Field.Index.NO); Field FieldBody = new Field("body", temp, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); document.add(FieldPath); document.add(FieldBody); indexWriter.addDocument(document); } } indexWriter.close(); //测试一下索引的时间 long endTime = new Date().getTime(); System.out .println("这花费了" + (endTime - startTime) + " 毫秒来把文档增加到索引里面去!" + fileDir.getPath()); } public static String FileReaderAll(String FileName, String charset) throws IOException { BufferedReader reader = new BufferedReader(new InputStreamReader( new FileInputStream(FileName), charset)); String line = new String(); String temp = new String(); while ((line = reader.readLine()) != null) { temp += line; } reader.close(); return temp; } }
输出结果为:
File C:\source\test1.txt正在被索引.... 欢迎来到绝对秋香的博客。 File C:\source\test2.txt正在被索引.... 绝对秋香引领你走向潮流。 这花费了641 毫秒来把文档增加到索引里面去!C:\source
第四步,建立测试类TestQuery,并运行主函数,输出测试结果
package com.newtouchone.lucene; import java.io.File; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; public class TestQuery { public static void main(String[] args) throws IOException, ParseException { String index = "C:\\index"; //搜索的索引路径 IndexReader reader = IndexReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); ScoreDoc[] hits = null; String queryString = "绝对秋香"; //搜索的关键词 Query query = null; Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); try { QueryParser qp = new QueryParser(Version.LUCENE_36,"body", analyzer); query = qp.parse(queryString); } catch (ParseException e) { } if (searcher != null) { TopDocs results = searcher.search(query,10); //返回最多为10条记录 hits = results.scoreDocs; if (hits.length > 0) { System.out.println("找到:" + hits.length + " 个结果!"); } searcher.close(); } } }
测试输出结果为:
找到:2 个结果!
附件homework.rar为项目文件,解压部署则可运行该lucene案例