import pysparnn.cluster_index as ci
from sklearn.feature_extraction.text import TfidfVectorizer
data = [
"hello world",
"oh hello there",
"Play it",
"Play it again Sam",
]
tv = TfidfVectorizer()
tv.fit(data)
#特征向量
features_vec = tv.transform(data)
#建立搜索索引
cp = ci.MultiClusterIndex(features_vec,data)
#搜索带有索引的
search_data = [
"oh there",
"Play it again Frank"
]
search_feature_vec = tv.transform(search_data)
#k是返回的个数,k_clusters代表聚类的个数
print(cp.search(search_feature_vec,k = 1,k_clusters=2,return_distance=False))
[['oh hello there'], ['Play it again Sam']]
pysparnn 模块使用,相似句子召回
猜你喜欢
转载自www.cnblogs.com/LiuXinyu12378/p/12386172.html
今日推荐
周排行