1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import linecache import re import time,datetime import glob import os import conn w = '''112.224.65.85 - - [20/Aug/2013:00:01:02 +0800] "POST /api/topic/comments HTTP/1.1" 200 3804 "-" "Corax/0.7.0 CFNetwork/609.1.4 Darwin/13.0.0" "-" "1.173" "0.005"''' files_dir = "/home/corax/ops/data/" bak_dir = "/home/corax/ops/data/backup"
def readfile(path): filename = [] files = glob.glob(path + '*.log') return files
def readtime(path): read_time = [] files = glob.glob(path + '*.log') print files for i in files: read_time.append(i.split('_')[1].split('.')[0]) read_time = set(read_time) return read_time
def timestamp(time_file): return time.mktime(time.strptime(time_file,'%Y%b%d %H:%M:%S'))
def datestamp(date_name): return time.strptime(date_name,'%Y%b%d')
def handle_log(log_file): ip = r"?P<ip>[\d.]*" date = r"?P<date>\d+" month = r"?P<month>\w+" year = r"?P<year>\d+" log_time = r"?P<time>\S+" method = r"?P<method>\S+" request = r"?P<request>\S+" status = r"?P<status>\d+" bodyBytesSent = r"?P<bodyBytesSent>\d+" refer = r"""?P<refer> [^\"]* """ userAgent=r"""?P<userAgent> \S* """ forwardr=r"""?P<forwardr> [^\"]* """ request_time=r"""?P<request_time> [^\"]* """ response_time=r"""?P<response_time> [^\"]* """ p = re.compile(r"(%s)\ -\ -\ \[(%s)/(%s)/(%s)\:(%s)\ [\S]+\]\ \"(%s)?[\s]?(%s)?.*?\"\ (%s)\ (%s)\ \"(%s)\"\ \"(%s).*?\"\ \"(%s)\"\ \"(%s)\"\ \"(%s)\"" %(ip, date, month, year, log_time, method, request, status, bodyBytesSent, refer, userAgent, forwardr, request_time, response_time ), re.VERBOSE)
s = time.time()
log_list = [] for l in log_file: f = open(l,'r') file_all = f.read() m = re.findall(p,file_all) for g in m: time_all = '%s%s%s %s'%(g[3], g[2], g[1], g[4]) time_format = timestamp(time_all) date = time.strftime("%Y%m%d",datestamp('%s%s%s'%(g[3], g[2], g[1]))) hour = g[4].split(":")[0] if g[12] != "-": req_time = float(g[12]) else: req_time = None if g[13] != "-" and len(g[13])<=5: res_time = float(g[13]) else: res_time = None log = {'ip':g[0],'time':time_format,'method':g[5],'request':g[6],'status':g[7],'bodyBytesSent':g[8],'refer':g[9],'userAgent':g[10],'forwardr':g[11],'request_time':req_time,'response_time':res_time,'date':int(date),'hour':int(hour)}
conn.db.log.insert(log) f.close() print "mv %s %s "%(l,bak_dir) os.system("mv %s %s "%(l,bak_dir)) print time.time() - s
if __name__ == '__main__': lf = readfile(files_dir) print lf read_time = readtime(files_dir) print read_time handle_log(lf) |