情景:
有 A(parsed_pvlog) B(pdetail) 两个文件,
当A.uuid==B.uuid时,将A文件中某一个字段(cpssrc)加到B文件中
方法一:
A B 文件 读一行全字段转为字典作为值。两层dict嵌套。{uu的值:{一行转为dict}}
遍历A的k v ,如果db.get(k),db[k]['cpssrc']=v.get('cpssrc','')
A B 各自转为dict 内存中数量比较大,最后遍历A的K V
方法二:
将A文件的每一行转化为一个字典kvs=parse_logstr_to_dict(line),如果 kvs.get('cpssrc','') 将uuid的值作为key,cpssrc的值作为值放入字典userCps
遍历B文件每一行转化为一个字典kvs=parse_logstr_to_dict(line),获得uuid对应的值uuid,从userCps获取键为uuid的值放入键为cpssrc的原来B文件每一行字典kvs中
这样内存中只有A文件的字典 userCps(uuid的值,'cpssrc的值'),B文件直接取,取到加入当前每一行转化的dict
#coding=utf-8
import os
import sys
from datetime import datetime, timedelta
reload(sys)
sys.setdefaultencoding('utf8')
#解析日志为字典,输入k1=v1 \t k2=v2 \t k3=v3 \t ...
def parse_logstr_to_dict(logstr,rdsp = '\t'):
log_dict = {}
kvs = logstr.strip('\n').split(rdsp)
for kv in kvs:
pos = kv.find('=')
if pos < 0 : continue
k = kv[0:pos]
v = kv[pos+1:]
log_dict[k] = v
return log_dict
#将字典转为list
def trans_dict_2_logstr( log_dict, rdsp='\t' ):
result = []
for k in log_dict:
result.append( '%s=%s' % (k, log_dict[k]) )
return rdsp.join( result )
def add_cpssrc_to_pdetail(log1,log2,outFile):
f = open(outFile, "w")
for line2 in open(log2): #遍历b文件的每一行
for line1 in open(log1):#遍历a文件的每一行
line1Dict=parse_logstr_to_dict(line1)
if(line2.find(line1Dict['uu'])>=0):
line2=line2.strip("\n")+"\t"+"cpssrc="+line1Dict['cpssrc']+"\n"
f.writelines(line2)
continue
f.close()
# add_cpssrc_to_pdetail("c://parsed_pvlog.log","c://pdetail.log","c://hello.log")
# 方法一
def load(log1,log2,outFile):
f = open(outFile, "w")
f1=open(log1)
f2=open(log2)
da={}
db={}
for s in f1: #加载A//uu为k,一行全字段字典作为值。两层dict嵌套。{uu的值:{一行转为dict}},大dict中嵌套小dict
myd=parse_logstr_to_dict(s)
da[myd['uu']]=myd
for s in f2: #加载B//uu为k,值一行全字段字典
myd = parse_logstr_to_dict(s)
db[myd['uu']] = myd
print 44
for k, v in da.items():
if db.get(k):
db[k]['cpssrc']=v.get('cpssrc','')#['cpssrc']
for k,v in db.items():
f.write(trans_dict_2_logstr(v)+"\r\n")
f.close()
load("c://parsed_pvlog.log", "c://pdetail.log", "c://hello4.log")
# 方法二
#uuid-->cpssrc
pvlog="c://parsed_pvlog.log"
oldPdetailLog="c://pdetail.log"
newPdetailLog="c://my.log"
fw = open(newPdetailLog, "w")
userCps={}
f=open(pvlog)
for line in f:
kvs=parse_logstr_to_dict(line)
if kvs.get('cpssrc',''):
userCps[kvs.get('uu')]=kvs.get('cpssrc')
f.close()
f=open(oldPdetailLog)
for line in f:
kvs=parse_logstr_to_dict(line)
uuid=kvs.get('uu','')
kvs['cpssrc']=userCps.get(uuid,'')
fw.write(trans_dict_2_logstr(kvs))
fw.write('\r\n')
fw.close()
f.close()