原理
request_time是从接收到客户端的第一个字节开始,到把所有的响应数据都发送完为止。
upstream_response_time是从与后端建立TCP连接开始到接收完响应数据并关闭连接为止。
所以,request_time会大于等于upstream_response_time。
1. 配置nginx 访问日志
http {
#$remote_addr:客户端IP地址;
#$remote_user:客户端用户名称;
#$time_local:访问时间;
#"$request":HTTP请求方法、访问的URL、HTTP协议版本和请求头信息的组合;
#$status:响应状态码;
#$body_bytes_sent:响应的字节数(不包含响应头的字节数);
#$http_referer:referer信息;
#$http_user_agent:用户终端的User Agent;
#默认格式
log_format main '$remote_addr - $remote_user [$time_local] '
'"$request" $status $body_bytes_sent '
'"$http_referer" "$http_user_agent"';
# $http_x_forwarded_for 客户端真实IP地址
# $proxy_add_x_forwarded_for 代理服务器IP地址
# $upstream_response_time 向上游服务器发送请求并等待响应的时间
# $request_time nginx处理请求所消耗的时间
# $content_length:“Content-Length”请求头的值,等同于$http_content_length。即,nginx从客户端收到的请求头中Content-Length字段的值,不是nginx返回给客户端响应中的Content-Length字段($sent_http_content_length)的值。
# $request_length:请求的字节数(包括请求行、请求头和请求体)。注意,由于$request_length是请求解析过程中不断累加的,如果解析请求时出现异常或提前完成,则$request_length只是已经累加部分的长度,并不是nginx从客户端收到的完整请求的总字节数(包括请求行、请求头、请求体)。例如,向nginx的静态文件的URL POST数据,则POST的数据(即请求体)不会计算在内
log_format mylog '$remote_addr - $remote_user [$time_local] '
'"$request" $status $body_bytes_sent '
'"$http_referer" "$http_user_agent" '
'"$http_x_forwarded_for" "$proxy_add_x_forwarded_for" '
'$upstream_response_time $request_time $content_length $request_length ';
#添加一个map对象; 配置传参为uri,默认为通过返回1,如果匹配到以下的正则表达式.*\.(gif|jpg|jpeg|png|bmp|swf|js|css) 返回0
map $uri $mylogFilter {
default 1;
# ~^ 表示匹配以什么规则开头
~^(.*\.(html|js|css|svg|ico|gif|jpg|jpeg|png|bmp|swf|webp|woff|ttf)$) 0;
}
access_log /var/log/nginx/access.log mylog if=$mylogFilter;
}
# end http
生成日志样本
172.172.101.7 - - [01/Jun/2023:15:16:02 +0800] "POST /p4/core/c/checkSaveYjResult HTTP/1.1" 200 163 "http://10.70.100.100:18083/main.html" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.5359.95 Safari/537.36" "-" "172.172.101.7" 0.004 0.004 113 785
172.172.101.7 - - [01/Jun/2023:15:16:03 +0800] "POST /p4/core/c/switchTestPaperForTeacher HTTP/1.1" 200 162 "http://10.70.100.100:18083/main.html" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.5359.95 Safari/537.36" "-" "172.172.101.7" 0.063 0.076 19591 20273
172.172.101.7 - - [01/Jun/2023:15:16:04 +0800] "POST /p4/core/c/glyGetPaperByCoreStuId HTTP/1.1" 200 8256 "http://10.70.100.100:18083/main.html" "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.5359.95 Safari/537.36" "-" "172.172.101.7" 0.019 0.020 136 813
10.73.31.63 - - [01/Jun/2023:15:16:04 +0800] "POST /p4/app/loadCode HTTP/1.1" 200 218 "https://10.70.100.100/main.html?v=1685602349279" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36" "-" "10.73.31.63" 0.002 0.002 37 637
10.73.31.63 - - [01/Jun/2023:15:16:04 +0800] "POST /p4/front/queryNumber HTTP/1.1" 200 129 "https://10.70.100.100/main.html?v=1685602349279" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36" "-" "10.73.31.63" 0.007 0.008 0 820
10.73.31.63 - - [01/Jun/2023:15:16:04 +0800] "POST /p4/front/a/queryExamSharedGroupCount HTTP/1.1" 200 115 "https://10.70.100.100/main.html?v=1685602349279" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36" "-" "10.73.31.63" 0.012 0.012 0 850
10.73.31.63 - - [01/Jun/2023:15:16:04 +0800] "POST /p4/front/a/queryExamSharedGroupList HTTP/1.1" 200 377 "https://10.70.100.100/main.html?v=1685602349279" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36" "-" "10.73.31.63" 0.009 0.009 42 924
10.80.32.92 - - [01/Jun/2023:15:16:04 +0800] "GET /p4/front/a/checkMarkExamPaper?sharedDetailId=4193840bffaa49748d326f260275c9ae&sharedGroupId=fb0fbba7fa2a452585e492eb69ff8b26 HTTP/1.1" 200 118 "https://10.70.100.100/markpaper.html?id=fb0fbba7fa2a452585e492eb69ff8b26&num=10" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36" "-" "10.80.32.92" 0.003 0.003 - 919
10.73.31.63 - - [01/Jun/2023:15:16:04 +0800] "POST /p4/front/a/queryExamSharedGroupCount HTTP/1.1" 200 115 "https://10.70.100.100/main.html?v=1685602349279" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36" "-" "10.73.31.63" 0.010 0.011 0 850
2.用Java解析日志并插入到postgresql 中
public class NginxAccessLog {
public static void main(String[] args) throws Exception {
initDb();
InputStream is;
is = NginxAccessLog.class.getResourceAsStream("access.log");
// is = new FileInputStream("E:\\exam\\access20230601_095702.log");
readLines(is);
}
static Connection conn;
static PreparedStatement ps;
//是否保存到数据库
static boolean SAVE = true;
//是否打印日志
static boolean DEBUG = true;
private static void initDb() throws Exception {
if (SAVE) {
Class.forName("org.postgresql.Driver");
String url = "jdbc:postgresql://localhost:5432/testdb?user=postgres&password=*****&ssl=false&ApplicationName=testjdbc";
conn = DriverManager.getConnection(url);
var st = conn.createStatement();
var sql = """
-- drop table IF EXISTS nginx_access_log;
CREATE UNLOGGED TABLE IF NOT EXISTS nginx_access_log (
id serial NOT NULL ,
file_name VARCHAR(100) ,
line_no bigint NOT NULL,
request_url_md5 varchar(32),
remote_addr varchar(32) ,
remote_user varchar(32) ,
time_local TIMESTAMP,
request_method varchar(32) ,
request_url VARCHAR(500) ,
request_version varchar(32) ,
status INT,
body_bytes_sent BIGINT,
http_referer VARCHAR(2000) ,
http_user_agent VARCHAR(1000) ,
http_x_forwarded_for varchar(32) ,
proxy_add_x_forwarded_for varchar(100) ,
upstream_response_time DECIMAL(8,4),
request_time DECIMAL(8,4),
content_length BIGINT,
request_length BIGINT,
PRIMARY KEY (id)
) ;
COMMENT ON TABLE nginx_access_log IS 'nginx 访问日志分析';
COMMENT ON COLUMN nginx_access_log.file_name IS '日志文件名称';
COMMENT ON COLUMN nginx_access_log.line_no IS '行号';
COMMENT ON COLUMN nginx_access_log.request_url_md5 IS 'url md5';
COMMENT ON COLUMN nginx_access_log.remote_addr IS '客户端IP地址';
COMMENT ON COLUMN nginx_access_log.remote_user IS '客户端用户名称';
COMMENT ON COLUMN nginx_access_log.time_local IS '访问时间';
COMMENT ON COLUMN nginx_access_log.request_method IS 'http 请求方法';
COMMENT ON COLUMN nginx_access_log.request_url IS 'http 请求url';
COMMENT ON COLUMN nginx_access_log.request_version IS 'http 请求版本';
COMMENT ON COLUMN nginx_access_log.status IS '响应状态码';
COMMENT ON COLUMN nginx_access_log.body_bytes_sent IS '返回给客户端的响应体的字节数(即不含响应头)';
COMMENT ON COLUMN nginx_access_log.http_referer IS '来源referer信息';
COMMENT ON COLUMN nginx_access_log.http_user_agent IS '用户终端的User Agent';
COMMENT ON COLUMN nginx_access_log.http_x_forwarded_for IS '客户端真实IP地址';
COMMENT ON COLUMN nginx_access_log.proxy_add_x_forwarded_for IS '代理服务器IP地址 ';
COMMENT ON COLUMN nginx_access_log.upstream_response_time IS '向上游服务器发送请求并等待响应的时间';
COMMENT ON COLUMN nginx_access_log.request_time IS '处理请求所消耗的时间';
COMMENT ON COLUMN nginx_access_log.content_length IS 'Content-Length请求头的值';
COMMENT ON COLUMN nginx_access_log.request_length IS '请求的字节数(包括请求行、请求头和请求体)。注意,由于$request_length是请求解析过程中不断累加的';
""";
st.execute(sql);
sql = """
insert into nginx_access_log (
file_name
,line_no
,request_url_md5
,remote_addr
,remote_user
,time_local
,request_method
,request_url
,request_version
,status
,body_bytes_sent
,http_referer
,http_user_agent
,http_x_forwarded_for
,proxy_add_x_forwarded_for
,upstream_response_time
,request_time
,content_length
,request_length
)
values(?,?,MD5(?),?,?,? ::TIMESTAMP,?,?,?,?,?,?,?,?,?,?,?,?,?)
""";
ps = conn.prepareStatement(sql);
}
}
// 处理数据
static void processData(Map<String, String> data, int lineNo, boolean hasMore) throws Exception {
log("data:", data);
if (SAVE) {
if (!hasMore) {
ps.executeBatch();
ps.close();
} else if (lineNo % 200 == 0) {
ps.executeBatch();
} else {
int index = 1;
ps.setObject(index++, data.getOrDefault("file_name", ""));
ps.setObject(index++, lineNo);
ps.setObject(index++, data.getOrDefault("request_url", ""));
ps.setObject(index++, data.getOrDefault("remote_addr", ""));
ps.setObject(index++, data.getOrDefault("remote_user", ""));
ps.setObject(index++, data.getOrDefault("time_local", "01/May/1970:00:00:00"));
ps.setObject(index++, data.getOrDefault("request_method", ""));
ps.setObject(index++, data.getOrDefault("request_url", ""));
ps.setObject(index++, data.getOrDefault("request_version", ""));
ps.setObject(index++, Integer.valueOf(data.getOrDefault("status", "0")));
ps.setObject(index++, Long.valueOf(data.getOrDefault("body_bytes_sent", "0")));
ps.setObject(index++, data.getOrDefault("http_referer", ""));
ps.setObject(index++, data.getOrDefault("http_user_agent", ""));
ps.setObject(index++, data.getOrDefault("http_x_forwarded_for", ""));
ps.setObject(index++, data.getOrDefault("proxy_add_x_forwarded_for", ""));
ps.setObject(index++, Double.valueOf(data.getOrDefault("upstream_response_time", "0")));
ps.setObject(index++, Double.valueOf(data.getOrDefault("request_time", "0")));
ps.setObject(index++, Long.valueOf(data.getOrDefault("content_length", "0")));
ps.setObject(index++, Long.valueOf(data.getOrDefault("request_length", "0")));
ps.addBatch();
}
}
}
// 读取多行
static List<String> readLines(InputStream is) throws Exception {
String line;
int i = 1;
try (Scanner sc = new Scanner(is)) {
boolean hasMore = true;
while (hasMore) {
line = sc.nextLine();
System.out.printf("%06d: %s\n", i, line);
var m = parseLine(line);
hasMore = sc.hasNextLine();
processData(m, i, hasMore);
i++;
}
}
return null;
}
// 读取单行
static Map<String, String> parseLine(String line) {
log("----------------------------------", "");
String v;
Map<String, String> m = new HashMap<>();
try (Scanner sc = new Scanner(line).useDelimiter("\s+")) {
// $remote_addr:客户端IP地址
v = sc.next();
m.put("remote_addr", v);
log("remote_addr:", v);
// $remote_user:客户端用户名称
v = sc.next();
log("remote_user:", v);
m.put("remote_user", v);
log(sc.next(), ""); // 一个 -
// $time_local:访问时间
v = sc.next();
sc.next();// 跳过时区
v = v.substring(1);// 去掉头[
m.put("time_local", v);
log("time_local:", v);
// "$request":HTTP请求方法、访问的URL、HTTP协议版本和请求头信息的组合
v = sc.next();
v = v.substring(1);// 去掉开始"
m.put("request_method", v);
log("request_method:", v);
v = sc.next();
int pIndex = v.indexOf("?");
v = v.substring(0, pIndex > 0 ? pIndex : v.length());// 去掉参数,只保留url
m.put("request_url", v);
log("request_url:", v);
v = sc.next();
v = v.substring(0, v.length() - 1);// 去掉结尾"
m.put("request_version", v);
log("request_version:", v);
// $status:响应状态码
v = sc.next();
m.put("status", v);
log("status:", v);
// $body_bytes_sent:响应的字节数(不包含响应头的字节数)
v = sc.next();
m.put("body_bytes_sent", v);
log("body_bytes_sent:", v);
// $http_referer:referer信息
v = sc.next();
m.put("http_referer", v);
log("http_referer:", v);
// $http_user_agent:用户终端的User Agent
v = sc.next();
String http_user_agent = v;
while (!v.endsWith("\"") && sc.hasNext()) {
v = sc.next();
http_user_agent += " " + v;
}
m.put("http_user_agent", http_user_agent);
log("http_user_agent:", http_user_agent);
// $http_x_forwarded_for 客户端真实IP地址
v = sc.next();
m.put("http_user_agent", v);
log("http_x_forwarded_for:", v);
// $proxy_add_x_forwarded_for 代理服务器IP地址,可能有多个
// "10.80.38.54, 127.0.0.1"
v = sc.next();
String proxy_add_x_forwarded_for = v;
while (!v.endsWith("\"") && sc.hasNext()) {
v = sc.next();
proxy_add_x_forwarded_for += " " + v;
}
m.put("proxy_add_x_forwarded_for", proxy_add_x_forwarded_for);
log("proxy_add_x_forwarded_for:", proxy_add_x_forwarded_for);
// $upstream_response_time 向上游服务器发送请求并等待响应的时间
// 不知道为什么格式是这样 "10.107.43.42" 1.006, 1.002, 1.006, 0.010 3.023 - 433
v = sc.next();
while (v.endsWith(",") && sc.hasNext()) {
//跳过中间的 1.006, 1.002, 1.006,
v = sc.next();
}
v = v.replaceAll("-", "0");
m.put("upstream_response_time", v);
log("upstream_response_time:", v);
// $request_time 处理请求所消耗的时间
v = sc.next();
m.put("request_time", v);
log("request_time:", v);
// $content_length:“Content-Length”请求头的值,
v = sc.next();
v = v.replaceAll("-", "0");
m.put("content_length", v);
log("content_length:", v);
// $request_length:请求的字节数
v = sc.next();
v = v.replaceAll("-", "0");
m.put("request_length", v);
log("request_length:", v);
log("============================================", "");
}
return m;
}
static void log(String lable, Object msg) {
if (DEBUG) {
System.out.println(lable + msg);
}
}
}
3. 用sql统计分析
SELECT
t.request_url_md5
, COUNT(t.request_url) AS "请求次数"
, ROUND( 100*SUM(CASE WHEN t.status>=500 THEN 1 ELSE 0 END )/COUNT(t.request_url),0) "status500(%)"
, ROUND( 100*SUM(CASE WHEN t.status>=400 AND t.status<500 THEN 1 ELSE 0 END )/COUNT(t.request_url),0) "status400(%)"
, ROUND( 100*SUM(CASE WHEN t.status>=200 AND t.status<300 THEN 1 ELSE 0 END )/COUNT(t.request_url),0) "status200(%)"
,min(t.request_url) AS "请求URL"
,MIN(t.body_bytes_sent) AS "最小响应体字节数"
,MAX(t.body_bytes_sent) AS "最大响应体字节数"
,ROUND( AVG(t.body_bytes_sent),0) AS "平均响应体的字节数"
,MIN(t.upstream_response_time) AS "最小tomcat响应时间(秒)"
,MAX(t.upstream_response_time) AS "最大tomcat响应时间(秒)"
,ROUND( AVG(t.upstream_response_time),3) AS "平均tomcat响应时间(秒)"
,MIN(t.request_time) AS "最小nginx响应时间(秒)"
,MAX(t.request_time) AS "最大nginx响应时间(秒)"
,ROUND( AVG(t.request_time),3) AS "平均nginx响应时间(秒)"
,MIN(t.content_length) AS "最小content_length"
,MAX(t.content_length) AS "最大content_length"
,ROUND( AVG(t.content_length),0) AS "平均content_length"
,MIN(t.request_length) AS "最小request_length"
,MAX(t.request_length) AS "最大request_length"
,ROUND( AVG(t.request_length),0) AS "平均request_length"
FROM nginx_access_log t
-- 不以 /a/b/c/开头
WHERE
POSITION('/a/b/c/' IN request_url)<=0
AND
-- 不以 /d开头
POSITION('/d/' IN request_url)>0
group by t.request_url_md5
-- ORDER BY request_url
ORDER BY ROUND( AVG(t.upstream_response_time),3) DESC --"平均tomcat响应时间(秒)"
--ORDER BY ROUND( AVG(t.request_time),3) DESC --"平均nginx响应时间(秒)"
-- SELECT POSITION ('11/p4student/api/ws/' IN '/p4student/api/ws/0/24b888c355c7472a9205993cbfffdf39')
4. 分析结果
注意优化 “平均tomcat响应时间(秒)” 时间长的,且"请求次数" 比较大的接口
当然也可以写别的sql来分析哪些ip 访问最频繁, 按天分组求访问量等…