1.全文搜索框架lucene和mysql like%对比
全文索引是查询完然后建立索引,再对搜索的词拆分,再根据索引查找东西。系统需要维护索引。
mysql like%用法是全表遍历一遍,效率相对比较慢。
2.github(项目)
https://github.com/dajitui/spring-boot-lucene-ik
3.详细的过程
我初衷也是想和数据库结合的,so
利用jpa查询得到数据,由于一般数据量都是众多的,所以不能用数据库的like进行查询!
得到数据后,需要通过关联lucene版本和分值器,再创建一个文本保存索引,然后写入。
Directory directory=null; IndexWriterConfig config=null; IndexWriter iwriter=null; try { //索引库的存储目录 directory = FSDirectory.open(new File(dir)); //关联当前lucence版本和分值器 config = new IndexWriterConfig(Version.LUCENE_47, analyzer); //传入目录和分词器 iwriter = new IndexWriter(directory, config); iwriter.commit(); //写入到目录文件中 iwriter.addDocument(doc); //提交事务 iwriter.commit(); //关闭流 iwriter.close(); } catch (IOException e) { e.printStackTrace(); }
把数据写进document,再写入刚刚创建的文件里面
//获取每行数据 Map<String, Object> lineData = queryFood.get(i); //创建Document对象 Document doc = new Document(); //获取每列数据 Field foodid=new Field("foodid",lineData.get("foodid").toString(),TextField.TYPE_STORED); Field foodname=new Field("foodname",lineData.get("foodname").toString(),TextField.TYPE_STORED); Field price=new Field("price",lineData.get("price").toString(),TextField.TYPE_STORED); Field imagepath=new Field("imagepath",lineData.get("imagepath").toString(),TextField.TYPE_STORED); //添加到Document中 doc.add(foodid); doc.add(foodname); doc.add(price); doc.add(imagepath); //调用,创建索引库 indexDemo.write(doc);
当搜索的时候呢?读取文件,通过索引,
//索引库的存储目录 Directory directory = FSDirectory.open(new File(dir)); //读取索引库的存储目录 DirectoryReader ireader = DirectoryReader.open(directory); //搜索类 IndexSearcher isearcher = new IndexSearcher(ireader); //lucence查询解析器,用于指定查询的属性名和分词器 QueryParser parser = new QueryParser(Version.LUCENE_47, field, analyzer); //搜索 Query query = parser.parse(value); //最终被分词后添加的前缀和后缀处理器,默认是粗体<B></B> SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("<font color=red>","</font>"); //高亮搜索的词添加到高亮处理器中 Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); //获取搜索的结果,指定返回document返回的个数 ScoreDoc[] hits = isearcher.search(query, null, 5).scoreDocs; List<Map> list=new ArrayList<Map>(); //遍历,输出 for (int i = 0; i < hits.length; i++) { int id = hits[i].doc; Document hitDoc = isearcher.doc(hits[i].doc); Map map=new HashMap(); map.put("foodid", hitDoc.get("foodid")); //获取到foodname String foodname=hitDoc.get("foodname"); //将查询的词和搜索词匹配,匹配到添加前缀和后缀 TokenStream tokenStream = TokenSources.getAnyTokenStream(isearcher.getIndexReader(), id, "foodname", analyzer); //传入的第二个参数是查询的值 TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, foodname, false, 10); String foodValue=""; for (int j = 0; j < frag.length; j++) { if ((frag[j] != null) && (frag[j].getScore() > 0)) { //获取 foodname 的值 foodValue=((frag[j].toString())); } } map.put("foodname", foodValue); map.put("price", hitDoc.get("price")); map.put("imagepath", hitDoc.get("imagepath")); list.add(map); } ireader.close(); directory.close(); return list;
luncene索引维护
上面是创建索引
增量添加索引
/** * 增加索引 * * @throws Exception */ public static void insert() throws Exception { String text5 = "hello,goodbye,man,woman"; Date date1 = new Date(); analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); directory = FSDirectory.open(new File(INDEX_DIR)); IndexWriterConfig config = new IndexWriterConfig( Version.LUCENE_CURRENT, analyzer); indexWriter = new IndexWriter(directory, config); Document doc1 = new Document(); doc1.add(new TextField("filename", "text5", Store.YES)); doc1.add(new TextField("content", text5, Store.YES)); indexWriter.addDocument(doc1); indexWriter.commit(); indexWriter.close(); Date date2 = new Date(); System.out.println("增加索引耗时:" + (date2.getTime() - date1.getTime()) + "ms\n"); }
删除索引
/** * 删除索引 * * @param str 删除的关键字 * @throws Exception */ public static void delete(String str) throws Exception { Date date1 = new Date(); analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); directory = FSDirectory.open(new File(INDEX_DIR)); IndexWriterConfig config = new IndexWriterConfig( Version.LUCENE_CURRENT, analyzer); indexWriter = new IndexWriter(directory, config); indexWriter.deleteDocuments(new Term("filename",str)); indexWriter.close(); Date date2 = new Date(); System.out.println("删除索引耗时:" + (date2.getTime() - date1.getTime()) + "ms\n"); }
更新索引
/** * 更新索引 * * @throws Exception */ public static void update() throws Exception { String text1 = "update,hello,man!"; Date date1 = new Date(); analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); directory = FSDirectory.open(new File(INDEX_DIR)); IndexWriterConfig config = new IndexWriterConfig( Version.LUCENE_CURRENT, analyzer); indexWriter = new IndexWriter(directory, config); Document doc1 = new Document(); doc1.add(new TextField("filename", "text1", Store.YES)); doc1.add(new TextField("content", text1, Store.YES)); indexWriter.updateDocument(new Term("filename","text1"), doc1); indexWriter.close(); Date date2 = new Date(); System.out.println("更新索引耗时:" + (date2.getTime() - date1.getTime()) + "ms\n"); }
根据索引查询
/** * 关键字查询 * * @param str * @throws Exception */ public static void search(String str) throws Exception { directory = FSDirectory.open(new File(INDEX_DIR)); analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); DirectoryReader ireader = DirectoryReader.open(directory); IndexSearcher isearcher = new IndexSearcher(ireader); QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "content",analyzer); Query query = parser.parse(str); ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs; for (int i = 0; i < hits.length; i++) { Document hitDoc = isearcher.doc(hits[i].doc); System.out.println(hitDoc.get("filename")); System.out.println(hitDoc.get("content")); } ireader.close(); directory.close(); }