一:将文件夹下面的子文件作为数据源
package lucene; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; import org.apache.lucene.index.*; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import java.io.*; /** * 将文件夹下面的子文件作为数据源 * Created by xhga on 2018/5/31. */ public class LuceneFile { public static void main(String[] args) throws IOException, InterruptedException { getMultiSearcher("D:\\wenjian"); // 文件夹 下面就是你要查找的文件 } private static Document createDocument(String title, String content) { Document doc = new Document(); // 存储两个信息:fileName:文件名 content:文件类型 doc.add(new Field("fileName", title, TextField.TYPE_STORED)); doc.add(new Field("content", content, TextField.TYPE_STORED)); //如需添加在添加一条就是了 如: //doc.add(new Field("author", author, TextField.TYPE_STORED)); // 作者标识 return doc; } public static IndexSearcher getMultiSearcher(String parentPath) throws IOException, InterruptedException{ Analyzer analyzer = new StandardAnalyzer(); Directory idx; // 将索引存在内存中 idx = new RAMDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(idx, iwc); File file = new File(parentPath); File[] files = file.listFiles(); // 存储信息 for (int i = 0 ; i < files.length ; i ++) { File file1 = files[i]; StringBuilder result = new StringBuilder(); BufferedReader in=new BufferedReader(new InputStreamReader(new FileInputStream(file1))); String str; while ((str = in.readLine()) != null) { result.append(System.lineSeparator()+str); } writer.addDocument(createDocument(file1.toString(), result.toString())); in.close(); } writer.commit(); writer.close(); IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(idx)); Query content = null; try { // 需要查询的类型,内容(content:表示内容,fileName:表示文件名) content = new QueryParser("content", analyzer).parse("content:'硬实力'"); } catch (ParseException e) { e.printStackTrace(); } TopDocs topdoc = searcher.search(content, 10); System.out.println("匹配到的文件数量:"+topdoc.totalHits+"查询时间时间:"+System.currentTimeMillis()); ScoreDoc[] hits= topdoc.scoreDocs; for(ScoreDoc scoreDoc:hits){ Document hitDoc = searcher.doc(scoreDoc.doc); System.out.println("文件名:"+hitDoc.get("fileName")+","+hitDoc.get("content")); } return searcher; } }
二:设置指定内容(可以通过查询数据库,作为数据源)
package lucene; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.RAMDirectory; import java.nio.file.Paths; import java.util.Random; /** * 设置指定内容(可以通过查询数据库,作为数据源) */ public class LuceneData { private static Document createDocument(String title, String content) { Document doc = new Document(); doc.add(new Field("content", content, TextField.TYPE_STORED)); doc.add(new Field("title", title, TextField.TYPE_STORED)); doc.add(new Field("author", "bobliu", TextField.TYPE_STORED)); return doc; } /** * lucene简单实例 索引 查询 经济,分词器:标准分词器 */ public static void testDemo() throws Exception{ Analyzer analyzer = new StandardAnalyzer(); Directory idx; // 将索引存入指定位置 //idx = FSDirectory.open(Paths.get("D:\\index")); // 将索引存在内存中 idx = new RAMDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(idx, iwc); String[] strings = new String[]{"爱","王","张","李","周","马","习","花"}; for (int i = 0; i < 100000; i++) { Random random = new Random(); int i1 = random.nextInt(strings.length); int i2 = random.nextInt(strings.length); int i3 = random.nextInt(strings.length); String s = strings[i1] + strings[i2] + strings[i3]; // 设置内容 writer.addDocument(createDocument(String.valueOf(i), s)); } writer.commit(); writer.close(); IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(idx)); Query content = new QueryParser("content", analyzer).parse("content:'王王王'"); TopDocs topdoc = searcher.search(content, 10); System.out.println("命中个数:"+topdoc.totalHits+"时间:"+System.currentTimeMillis()); ScoreDoc[] hits= topdoc.scoreDocs; for(ScoreDoc scoreDoc:hits){ Document hitDoc = searcher.doc(scoreDoc.doc); System.out.println(hitDoc.get("content")+","+hitDoc.get("title")); } } public static void main(String[] args) { try { testDemo(); } catch (Exception e) { e.printStackTrace(); } } }