reducer 按key聚合

#coding=utf8

import sys
import itertools
import json


reload(sys)
sys.setdefaultencoding("utf8")
 

def get_line():
"""
return key, value
"""
for line in sys.stdin:
    yield line.strip().split('\t', 1)


for key, lines in itertools.groupby(get_line(), key=lambda x: x[0]):
    #print key, list(lines)
    res = []
    for it in list(lines):
        res.append(it[1])

    print '%s\t%s' % (key, ' '.join(res))

猜你喜欢

转载自www.cnblogs.com/pengwang52/p/10066295.html