package com.search.crawler; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.Date; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; public class IndexProcess { private static String indexPath = "src/indexFiles"; //生成索引存放目录 public static void createIndex(String docsPath) { File docDir = new File(docsPath); if (!docDir.exists() || !docDir.canRead()) { System.out .println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = null; try { dir = FSDirectory.open(new File(indexPath)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); IndexWriter writer = new IndexWriter(dir, analyzer, true, new IndexWriter.MaxFieldLength(25000)); indexDocs(writer, docDir); writer.close(); dir.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { File[] files = file.listFiles(); if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, files[i]); } } } else { FileInputStream fis = null; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { fnfe.printStackTrace(); } try { // make a new, empty document Document doc = new Document(); //create index of path doc.add( new Field("path", file.getPath(), Field.Store.YES, Field.Index.ANALYZED)); //create index of content doc.add(new Field("content", new FileReader(file))); writer.addDocument(doc); } finally { fis.close(); } } } } static void search(String key ,String value) { Date startTime = new Date(); // Directory dir = null; try { dir = FSDirectory.open(new File(indexPath)); IndexSearcher searcher = new IndexSearcher(dir,true); QueryParser par = new QueryParser(Version.LUCENE_31,key,new StandardAnalyzer(Version.LUCENE_31)); Query query = null; try { query = par.parse(value); } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } // System.out.println(query.toString()); TopDocs topDocs = searcher.search(query, null, 1000); ScoreDoc[] scores = topDocs.scoreDocs; for (ScoreDoc soc : scores) { System.out.println(soc+"\t"+searcher.doc(soc.doc).get("path") ); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } Date endTime = new Date(); System.out.println("总共花了" + (endTime.getTime() - startTime.getTime())+ "毫秒时间"); } /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub IndexProcess.createIndex("src/index"); System.out.println("search starting :"); IndexProcess.search("content","revision"); } }
基于 lucene3.1 开发demo.
猜你喜欢
转载自xiaoxia001.iteye.com/blog/1025884
今日推荐
周排行