Java
请求信息
GET / HTTP/1.1
Host: localhost:7003
Connection: keep-alive
Pragma: no-cache
Cache-Control: no-cache
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7,ja;q=0.6,pt;q=0.5
Cookie: test2=ab+cd; test1=ab%2Bcd
服务端
@Controller
@Slf4j
public class MainController {
@Autowired
private HttpServletRequest request;
@GetMapping("/")
public @ResponseBody
String index() {
Cookie[] cookies = request.getCookies();
if (null != cookies) {
for (Cookie cookie : cookies) {
log.info(cookie.getName() + "=" + cookie.getValue());
}
}
return "index";
}
}
控制台输出
2019-05-16 18:03:32.770 INFO 10114 --- [nio-7003-exec-1] net.mengkang.demo.MainController : test2=ab+cd
2019-05-16 18:03:32.770 INFO 10114 --- [nio-7003-exec-1] net.mengkang.demo.MainController : test1=ab%2Bcd
Php
GET / HTTP/1.1
Host: localhost:8084
Connection: keep-alive
Pragma: no-cache
Cache-Control: no-cache
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3
Accept-Encoding: gzip, deflate, br
Accept-Language: zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7,ja;q=0.6,pt;q=0.5
Cookie: test2=ab+cd; test1=ab%2Bcd
服务端
var_exprot($_COOKIE);
array (
'test2' => 'ab cd',
'test1' => 'ab+cd',
)
结果对比
发现Java
是不会对cookie
数据做任何处理,但是php
则会默认进行一次urldecode
操作,这导致了,两边系统里面获取同一cookie
时,结果不一致的 bug。
类似的问题 PHP 在解析外部变量时的一个 BUG
Php 源码分析
主要查看两处源码
main/php_variables.c
ext/standard/url.c
SAPI_API SAPI_TREAT_DATA_FUNC(php_default_treat_data)
{
...
switch (arg) {
case PARSE_GET:
case PARSE_STRING:
separator = PG(arg_separator).input;
break;
case PARSE_COOKIE:
separator = ";\0"; //可以在我们浏览器里看到请求的header里面cookie的分隔符就是这个
break;
}
var = php_strtok_r(res, separator, &strtok_buf);
while (var) {
val = strchr(var, '=');
if (arg == PARSE_COOKIE) {
/* Remove leading spaces from cookie names, needed for multi-cookie header where ; can be followed by a space */
while (isspace(*var)) {
var++;
}
if (var == val || *var == '\0') {
goto next_cookie;
}
}
if (++count > PG(max_input_vars)) {
php_error_docref(NULL, E_WARNING, "Input variables exceeded " ZEND_LONG_FMT ". To increase the limit change max_input_vars in php.ini.", PG(max_input_vars));
break;
}
if (val) { /* have a value */
size_t val_len;
size_t new_val_len;
*val++ = '\0';
php_url_decode(var, strlen(var));
val_len = php_url_decode(val, strlen(val));
val = estrndup(val, val_len);
if (sapi_module.input_filter(arg, var, &val, val_len, &new_val_len)) {
php_register_variable_safe(var, val, new_val_len, &array);
}
efree(val);
} else {
size_t val_len;
size_t new_val_len;
php_url_decode(var, strlen(var));
val_len = 0;
val = estrndup("", val_len);
if (sapi_module.input_filter(arg, var, &val, val_len, &new_val_len)) {
php_register_variable_safe(var, val, new_val_len, &array);
}
efree(val);
}
next_cookie:
var = php_strtok_r(NULL, separator, &strtok_buf);
}
if (free_buffer) {
efree(res);
}
}
我们看到cookie的值会被执行php_url_decode
操作,下面附带其源码,且加上一段测试代码
#include <stdio.h>
#include <ctype.h>
#include <memory.h>
static int php_htoi(char *s) {
int value;
int c;
c = ((unsigned char *) s)[0];
if (isupper(c))
c = tolower(c);
value = (c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10) * 16;
c = ((unsigned char *) s)[1];
if (isupper(c))
c = tolower(c);
value += c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10;
return (value);
}
size_t php_url_decode(char *str, size_t len) {
char *dest = str;
char *data = str;
while (len--) {
if (*data == '+') {
*dest = ' ';
} else if (*data == '%' && len >= 2 && isxdigit((int) *(data + 1)) && isxdigit((int) *(data + 2))) {
*dest = (char) php_htoi(data + 1);
data += 2;
len -= 2;
} else {
*dest = *data;
}
data++;
dest++;
}
*dest = '\0';
return dest - str;
}
int main() {
char a[6] = {"ab+cd"};
php_url_decode(a, strlen(a));
printf("%s\n", a);
return 0;
}
上面php_url_decode
用到了php_htoi
,这个是因为urlencode
是按照rfc1738
对字符串中除了 -_. 之外的所有非字母数字字符都将被替换成百分号(%)后跟两位十六进制数。htoi
作用就是Converting Hexadecimal Digits Into Integers
。然后把计算出来的整型转换为char
,存回处理完之后的字符数组里。
扩展讨论
rawurlencode
与urlencode
的区别是什么?
手册上的解释是:
urlencode
返回字符串,此字符串中除了 -_. 之外的所有非字母数字字符都将被替换成百分号(%)后跟两位十六进制数,空格则编码为加号(+)。此编码与 WWW 表单 POST 数据的编码方式是一样的,同时与 application/x-www-form-urlencoded 的媒体类型编码方式一样。由于历史原因,此编码在将空格编码为加号(+)方面与 » RFC3986 编码(参见 rawurlencode())不同。
PHPAPI size_t php_raw_url_decode(char *str, size_t len)
{
char *dest = str;
char *data = str;
while (len--) {
if (*data == '%' && len >= 2 && isxdigit((int) *(data + 1))
&& isxdigit((int) *(data + 2))) {
#ifndef CHARSET_EBCDIC
*dest = (char) php_htoi(data + 1);
#else
*dest = os_toebcdic[(char) php_htoi(data + 1)];
#endif
data += 2;
len -= 2;
} else {
*dest = *data;
}
data++;
dest++;
}
*dest = '\0';
return dest - str;
}
通过源码可以看到就是对+
处理没有了。
总结
不管是$_GET
,$_POST
,$_COOKIE
的数据都会经过urldecode
的二手数据,这个导致和JAVA那边获取的cookie值不一样了就。