机器学习实战上的Apriori算法最后结果看着不过瘾,随着算法思路另外实现了一遍。
import itertools def load_data(): return [[1,3,4],[2,3,5],[1,2,3,5],[2,5]] def creat_list(str_num,num_combine): cat={} p=[] for i in itertools.combinations(str_num,num_combine): p.append(i) for j in range(len(p)): if p[j] not in cat.keys(): cat[p[j]]=0 return cat def scan_data(data_set,str_num,num_combine): p=[] cat = creat_list(str_num,num_combine) keys=list(cat.keys()) for j in range(len(data_set)): if len(data_set[j])>=num_combine: for k in itertools.combinations(data_set[j],num_combine): p.append(k) for j in range(len(keys)): for k in range(len(p)): if keys[j] ==p[k]: cat[keys[j]]+=1 return cat def choose_min(cat,min_support_rate,trans): for key in list(cat.keys()): support_rate=float(cat[key])/trans if support_rate<min_support_rate: del(cat[key]) return cat def apriori(data_set,str_num,num_combine,min_support_rate,trans): cat=scan_data(data_set,str_num,num_combine) cat=choose_min(cat,min_support_rate,trans) cat_list=[] while cat!={}: new_str=[] for key in cat.keys(): new_str.append(key[0]) new_str=set(new_str) new_str=list(new_str) cat=scan_data(data_set,new_str,num_combine) num_combine+=1 cat=choose_min(cat,min_support_rate,trans) cat_list.append(cat) return cat_list if __name__=="__main__": str_num=[1,2,3,4,5] min_rate=0.7 trans=len(load_data()) data_set=load_data() print(apriori(data_set,str_num,1,min_rate,trans))
比书本上的原算法多了更多的系数可选,操作性更强。
图片是以上算法的结果