数据格式如下
有每个品牌的关注imei
#encoding:utf-8
import os
import pandas as pd
uv_table = pd.read_table('nan1.txt', sep = '\t', encoding = 'utf-8')
print(uv_table.columns)
uv_table['uv1'] = 1
brand_list = list(uv_table['brand'].drop_duplicates())
uv_cross = []
for i in brand_list:
for j in brand_list:
temp = {}
data1 = uv_table[uv_table['brand'] == i]
data2 = uv_table[uv_table['brand'] == j]
data_join = pd.merge(data1, data2, how = 'inner', on = ['imei'])
uv = len(data_join)
temp['brand1'] = i
temp['brand2'] = j
temp['uv_cross'] = uv
uv_cross.append(temp)
print (u'正在计算品牌 %s,品牌 %s 的交叉UV' %(i, j))
result = pd.DataFrame(uv_cross)
result.to_csv('result.csv',encoding='utf_8_sig',index=False)
结果: