Elasticsearch实战(三):高级搜索(中文+拼音+首字母+简繁转换+补全)

1、中文搜索、英文搜索、中英混搜  如:“紫光股份”,“紫光gufen”

2、全拼搜索、首字母搜索、中文+全拼、中文+首字母混搜   如:“ziguanggufen”,“紫光gufen”,“南京gf”,“zggf”,“ziguanggf”等等组合

3、简繁搜索

4.关键字开头

首先安装pinyin插件、ik插件、stconvert插件。

Elasticsearch插件(一):ik分词

Elasticsearch插件(二):pinyin分词

Elasticsearch插件(三):stconvert

索引模版示例:

PUT _template/lxc

{

  "index_patterns": "lxc*",

  "settings": {

    "index.number_of_replicas": "1",

    "index.number_of_shards": "5",

    "index.translog.flush_threshold_size": "512mb",

    "index.translog.sync_interval": "60s",

    "index.codec": "best_compression",

    "analysis" : {

             "filter": {

                "edge_ngram_filter": {

                    "type":     "edge_ngram",

                    "min_gram": 1,

                    "max_gram": 50                    

                },                

                "pinyin_simple_filter":{

                    "type" : "pinyin",

                    "keep_first_letter":true,

                    "keep_separate_first_letter" : true,

                    "keep_full_pinyin" : false,

                    "keep_original" : false,

                    "limit_first_letter_length" : 50,

                    "lowercase" : true

                },

                "pinyin_full_filter":{

                    "type" : "pinyin",

                    "keep_first_letter":false,

                    "keep_separate_first_letter" : false,

                    "keep_full_pinyin" : true,                        

                    "none_chinese_pinyin_tokenize":true,

                    "keep_original" : false,

                    "limit_first_letter_length" : 50,

                    "lowercase" : true

                },

                "t2s_convert":{

                      "type": "stconvert",

                      "delimiter": ",",

                      "convert_type": "t2s"

               }

            },

            "char_filter" : {

                "charconvert" : {

                    "type" : "mapping",

                    "mappings_path":"char_filter.txt"

                },

                "tsconvert" : {

                    "type" : "stconvert",

                    "convert_type" : "t2s"

                }

            },    



            "analyzer": {

                "ngramIndexAnalyzer": {

                    "type": "custom",

                    "tokenizer": "keyword",

                    "filter": ["edge_ngram_filter","lowercase"],

                    "char_filter" : ["charconvert","tsconvert"]

                },

                "ngramSearchAnalyzer": {

                    "type": "custom",

                    "tokenizer": "keyword",   

                    "filter":["lowercase"],

                    "char_filter" : ["charconvert","tsconvert"]

                },    

                "ikIndexAnalyzer": {

                    "type": "custom",

                    "tokenizer": "ik_smart",                   

                    "char_filter" : ["charconvert","tsconvert"]

                },

                "ikSearchAnalyzer": {

                    "type": "custom",

                    "tokenizer": "ik_smart",                       

                    "char_filter" : ["charconvert","tsconvert"]

                },                    

                "pinyiSimpleIndexAnalyzer":{                   

                    "tokenizer" : "keyword",

                    "filter": ["pinyin_simple_filter","edge_ngram_filter","lowercase"]                                    

                },                

                "pinyiSimpleSearchAnalyzer":{

                    "tokenizer" : "keyword",     

                    "filter": ["pinyin_simple_filter","lowercase"]    

                },

                "pinyiFullIndexAnalyzer":{                   

                    "tokenizer" : "keyword",

                    "filter": ["pinyin_full_filter","lowercase"]                                    

                },                

                "pinyiFullSearchAnalyzer":{

                    "tokenizer" : "keyword",     

                    "filter": ["pinyin_full_filter","lowercase"]    

                },

                "tsconvert" : {

                           "tokenizer" : "tsconvert"

                }

            },

            "tokenizer" : {

                "tsconvert" : {

                    "type" : "stconvert",

                    "delimiter" : "#",

                    "keep_both" : false,

                    "convert_type" : "t2s"

                }

            }

       }

    },

  "mappings": {

    "doc": {

      "dynamic": "strict",

      "properties": {

        "@timestamp": {

          "type": "date"

        },

        "@version": {

          "type": "keyword"

        },

        "id": {

          "type": "keyword"

        },

        "word": {

          "type": "text",

                  "fields":{

                      "ngram": {

                            "type": "text",

                            "analyzer" : "ngramIndexAnalyzer"

                      },

                      "spy": {

                            "type": "text",

                            "analyzer" : "pinyiSimpleIndexAnalyzer"

                      },

                      "fpy": {

                            "type": "text",

                            "analyzer" : "pinyiFullIndexAnalyzer"

                      },

                      "iks": {

                            "type": "text",

                            "analyzer" : "ikIndexAnalyzer"

                      }

                }

        }

      }

    }

  }

}

更多:Elasticsearch深入理解专栏

——————————————————————————————————

作者:桃花惜春风

转载请标明出处,原文地址:  

https://blog.csdn.net/xiaoyu_BD/article/details/81778399

如果感觉本文对您有帮助,请留下您的赞,您的支持是我坚持写作最大的动力,谢谢!

猜你喜欢

转载自blog.csdn.net/xiaoyu_BD/article/details/81778399