pandas里map、lambda、merge的使用注意:map()为Series的函数,DataFrame不能使用,DataFrame用applymap()
# -*- coding:utf-8 -*- from datetime import datetime import pandas as pd def Main(): print("开始。。。。。") print("加载规则数据") print(datetime.now().strftime('%Y-%m-%d %H:%M:%S')) uname = ['ad', 'host', 'domain', 'url', 'ref', 'time', 'os', 'os_type'] df_list = pd.read_csv('/data/u_lx_data/zhangqm/sh/yanjie/fudan/muying/muying_11yue_all.txt', header=None,names=uname, sep='\t') print("11yue源数据读完。。。。。") df_url_ts = pd.read_csv('/data/u_lx_data/zhangqm/sh/yanjie/fudan/muying/babytree.txt', sep=',', header=None,names=['ts', 'url'],dtype={'ts' : str}) print("url+ts 读完。。。。。") target_dir = '/data/u_lx_data/zhangqm/sh/yanjie/fudan/muying/python/url_ts_result.txt' #把time转换成string并替换空格 df_list['ts'] = df_list['time'].map(lambda x: str(x).replace(' ', '')[0:10]) print("ts 取完。。。。。") df_result = pd.merge(df_list, df_url_ts, how='inner', on=['ts', 'url'])[['ts', 'url']].drop_duplicates() print("merge完。。。。。") df_result.to_csv(target_dir,header=None,sep='\t',index=False) print("文件写入完。。。。。") # pd.to_csv(target_dir,header=None,sep='\t') if __name__ == "__main__": Main()
版权声明:本文为博主原创文章,未经博主允许不得转载。