经查特殊表情为utf8mb4格式,数据据也有这个编码但改成那个编码也没插入成功,因为项目中不用展示这些表情 ,所以没去管了,所以干脆用空字符串替换掉,代码如下,代码中用到的Range类引自guava包中的类
static Range<Character> utf84ByteRange = Ranges.closed("\u1F601".charAt(0), "\u1F64F".charAt(0)); //把utf-8了字节编码的字符去掉 //参见: http://cenalulu.github.io/linux/character-encoding/ // 所谓Emoji就是一种在Unicode位于\u1F601-\u1F64F区段的字符 private static String replaceUtf84byte(String str) { if (StringUtils.isBlank(str)) return StringUtils.EMPTY; char[] chars = str.toCharArray(); for (int i = 0; i < chars.length; i++) { if (Character.isSurrogate(chars[i])) { //参见: http://stackoverflow.com/questions/14981109/checking-utf-8-data-type-3-byte-or-4-byte-unicode Logger.warn(chars[i] + "字符为 surrogate code unit, utf-8为4字节编码, 替换成空格"); chars[i] = ' '; //替换成空格 }else if (utf84ByteRange.contains(chars[i])) { Logger.warn(chars[i] + "字符为 在\\u1F601-\\u1F64F, utf-8为4字节编码, 替换成空格"); chars[i] = ' '; //替换成空格 } } return new String(chars); }