Python 解析 hadoop-namenode.log 入mysql
# -*- coding:utf-8 -*- import sys import pandas as pd from sqlalchemy import create_engine import MySQLdb reload(sys) sys.setdefaultencoding("utf8") def log(): # 读取 hadoop-hadoop-namenode-HadoopdeMacBook-Pro.local.log fr = open("/Users/hadoop/software/hadoop-2.7.3/logs/hadoop-hadoop-namenode-HadoopdeMacBook-Pro.local.log") # 定义字典,接收数据 log_dict = { 'date': [] , 'time': [] , 'date_time': [] , 'web_name': [] } # 逐行解析 for line in fr: st = line.split(',')[0] if len(st) == 19: log_dict['date'].append(st.split(' ')[0]) log_dict['time'].append(st.split(' ')[1]) log_dict['date_time'].append(st.split(' ')[0] + ' ' + st.split(' ')[1]) log_dict['web_name'].append("www.baidu.com") dt = pd.DataFrame(data=log_dict) return dt def to_mysql(dataframe): connect = create_engine('mysql+mysqldb://root:[email protected]:3306/mysql_python?charset=utf8') pd.io.sql.to_sql(dataframe , "namenode_log" , con=connect , schema="mysql_python" , if_exists="append" ) def delete_data(): db = MySQLdb.connect(host='127.0.0.1' , user='root' , passwd='20180423' , db='mysql_python' , port=3306 , charset='utf8') cursor = db.cursor() cursor.execute("delete from mysql_python.namenode_log;") cursor.close() db.commit() db.close() if __name__ == '__main__': delete_data() dataframe = log() to_mysql(dataframe) print "finish!!!"
返回mysql查看数据: