python 分析access.log,根据IP粗略统计次数,
#-*- coding: utf-8 -*-
import sys
import os
import pandas as pd
from collections import Counter
import xlwt
import re
def getlog():
current_dir=os.path.dirname(__file__)
logdir="E:/mypython/mypython123/log/log"#日志文件所在目录
#存放统计所需的日志相关字段
logfile_format=os.path.join(current_dir,"log.txt")
for eachfile in os.listdir(logdir):
logfile=os.path.join(logdir,eachfile)
with open(logfile, 'r') as fo:
for line in fo:
spline=line.split()
#过滤字段中异常部分
if spline[0]=="127.0.0.1":
pass
else:
print(spline)
# print("IP:{}时间:{}接口:{}".format(spline[0],spline[3],spline[6]))
with open(logfile_format, 'a+') as fw:
fw.write(spline[0])
fw.write('\t')
fw.write(spline[3])
fw.write('\t')
fw.write(spline[6])
fw.write('\n')
def countip():
lines=[]
ips=[]
current_dir=os.path.dirname(__file__)
logfile_format=os.path.join(current_dir,"log.txt")
with open(logfile_format,'r') as f:
for line in f:
ipme=re.search("\d{3}\.\d{3}\.\d{3}\.\d+",line)
if ipme is not None:
ips.append(ipme.group())
ip_counter=Counter(ips)
ip_c=ip_counter.most_common()
return ip_c
def toexcel(ip_c,name):
# 关于样式
style_head = xlwt.XFStyle() # 初始化样式
font = xlwt.Font() # 初始化字体相关
font.name = "微软雅黑"
font.bold = True
font.colour_index = 1 # TODO 必须是数字索引
bg = xlwt.Pattern() # 初始背景图案
bg.pattern = xlwt.Pattern.SOLID_PATTERN # May be: NO_PATTERN, SOLID_PATTERN, or 0x00 through 0x12
bg.pattern_fore_colour = 4 # May be: 8 through 63. 0 = Black, 1 = White, 2 = Red, 3 = Green, 4 = Blue, 5 = Yellow, 6 = Magenta, 7 = Cyan, 16 = Maroon, 17 = Dark Green, 18 = Dark Blue, 19 = Dark Yellow , almost brown), 20 = Dark Magenta, 21 = Teal, 22 = Light Gray, 23 = Dark Gray
# 设置字体
style_head.font = font
# 设置背景
style_head.pattern = bg
# 创建一个excel
excel = xlwt.Workbook()
# 添加工作区
sheet = excel.add_sheet(name)
# 标题信息
head = ["IP","访问次数"]
for index,value in enumerate(head):
sheet.write(0,index,value,style_head)
for index,item in enumerate(ip_c,1):
for i,value in enumerate(item):
sheet.write(index,i,value)
excel.save("E:/mypython/mypython123/log/mylog.xlsx")
def main():
# 读取日志中有效数据,生成log.txt
getlog()
# ip统计
ips = countip()
#生成excel
toexcel(ips,'test1')
if __name__ == '__main__':
main()