版权声明:无意呢。 https://blog.csdn.net/qq_41900081/article/details/85399192
一、练习-数字对应的中文
找出 E5 B1 8C 这3个十六进制对应UTF-8编码的汉字
参考代码1
// 找出 E5 B1 8C 这3个十六进制对应UTF-8编码的汉字
byte[] bs = new byte[3];
bs[0] = (byte) 0xE5;
bs[1] = (byte) 0xB1;
bs[2] = (byte) 0x8C;
String str = new String(bs, "UTF-8");
System.out.println("E5B18C 对应的字符是:" + str);
参考代码2
String s = "E5B18C";
int total = s.length() / 2;
byte[] buffer = new byte[total];
for (int i = 0; i < total; i++) {
int start = i * 2;
//先将字符转换为16进制数字,再强制转换为字节
buffer[i] = (byte) Integer.parseInt(s.substring(start, start + 2), 16);
}
String result = new String(buffer, "UTF-8");
System.out.println("E5B18C 对应的字符是:" + result);
运作结果
二、练习-移除BOM
如果用记事本根据UTF-8编码保存汉字就会在最前面生成一段标示符,这个标示符用于表示该文件是使用UTF-8编码的,找出这段标示符对应的十六进制,并且开发一个方法,自动去除这段标示符
在移除BOM之前,我们首先要知道UTF-8的BOM对应的编码是多少,测试代码如下
package test;
import java.io.File;
import java.io.FileInputStream;
public class Test1 {
public static void main(String[] args) {
File f = new File("D:/Test/test1.txt");
try(FileInputStream fis = new FileInputStream(f)) {
byte[] bytes = new byte[(int) f.length()];
fis.read(bytes);
System.out.println("文件text1.txt中的内容:"+new String(bytes,"UTF-8"));
System.out.print("对应的16进制表示:");
for(byte b : bytes) {
int a = b & 0x0000ff;
String hexString = Integer.toHexString(a).toUpperCase();
System.out.print(hexString+" ");
}
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
}
测试结果
从测试结果以及上一个练习题可以知道,UTF-8的BOM对应的编码为:EF BB BF,且为前三个,到这里就可以开始解决我们的问题了
参考代码1
package review4;
import java.io.File;
import java.io.FileInputStream;
import java.util.Arrays;
public class RemoveBOM {
public static void main(String[] args) {
File f = new File("D:/Test/test1.txt");
try(FileInputStream fis = new FileInputStream(f)) {
byte[] read = new byte[(int) f.length()];
fis.read(read);
System.out.print("文件中读出来的数据是:");
System.out.println(new String(read,"UTF-8"));
removeBom(f);
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
public static void removeBom(File f) {
try(FileInputStream fis = new FileInputStream(f)) {
byte[] read = new byte[(int) f.length()];
byte[] result = new byte[(int) f.length()-3];
fis.read(read);
System.out.print("除去BOM后的结果:");
for(int i = 3; i < read.length; i++) {
int a = read[i] & 0x000000ff;
result[i-3] = (byte) a;
}
System.out.println(new String(result,"UTF-8"));
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
}
参考代码2
package review4;
import java.io.File;
import java.io.FileInputStream;
import java.util.Arrays;
public class RemoveBOM {
public static void main(String[] args) {
File f = new File("D:/Test/test1.txt");
try(FileInputStream fis = new FileInputStream(f)) {
byte[] read = new byte[(int) f.length()];
fis.read(read);
System.out.print("文件中读出来的数据是:");
System.out.println(new String(read,"UTF-8"));
byte[] result = removeBom(read);
System.out.println("除去BOM后的结果:"+new String(result,"UTF-8"));
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
public static byte[] removeBom(byte[] read) {
return Arrays.copyOfRange(read, 3,read.length);
}
}
运行结果