版权声明:本文为博主原创文章,未经博主允许不得转载。如若转载,请注明出处! https://blog.csdn.net/Homewm/article/details/84785503
#tokenizer function, this will make 3 grams of each query def get_ngrams(query): tempQuery = str(query) ngrams = [] for i in range(0,len(tempQuery)-3): ngrams.append(tempQuery[i:i+3]) return ngrams #by zgd def get_ngrams_zgd(input): output = {} n = 3 for i in range(len(input) - n + 1): ngramTemp = " ".join(input[i:i + n]) if ngramTemp not in output: output[ngramTemp] = 0 output[ngramTemp] += 1 return output