public class SynonymAnalyzer extends Analyzer{ @Override protected TokenStreamComponents createComponents(String fieldname, Reader reader) { SynonymMap.Builder builder = new SynonymMap.Builder(true); //Be sure the boolean last arg you pass there is the one you want. There are significant tradeoffs here. //Add as many terms as you like here... builder.add(new CharsRef("中华"), new CharsRef("华夏"), true); // builder.add(new CharsRef("中国"), new CharsRef("华夏"), true); // builder.add(new CharsRef("喀什米尔"), new CharsRef("克什米尔"), true); Set<String> keys = DataCache.SYNONYMS.keySet(); for (String key : keys) { String[] samewords = DataCache.SYNONYMS.get(key); for (String word : samewords) { builder.add(new CharsRef(key), new CharsRef(word), true); // System.out.println(key+"--"+word); } } // CharsRef multiWordCharsRef = new CharsRef(); // SynonymMap.Builder.join(new String[]{"中华", "华夏"}, multiWordCharsRef); // builder.add(new CharsRef("中国"), multiWordCharsRef, true); SynonymMap mySynonymMap = null; try { mySynonymMap = builder.build(); // mySynonymMap = null; } catch (IOException e) { e.printStackTrace(); } Tokenizer source = new AnsjTokenizer(new ToAnalysis(reader), reader,null,true); //Tokenizer source = new AnsjTokenizer(new IndexAnalysis(reader), reader,null,true); // Tokenizer source = new ClassicTokenizer(Version.LUCENE_46, reader); TokenStream filter = new StandardFilter(Version.LUCENE_40, source); filter = new LowerCaseFilter(Version.LUCENE_40,filter); filter = new SynonymFilter(filter, mySynonymMap, false); //Whatever other filter you want to add to the chain, being mindful of order. return new TokenStreamComponents(source, filter); } }
lucene同义词分词器
猜你喜欢
转载自itace.iteye.com/blog/2023123
今日推荐
周排行