import java.io.*; public class charsetTest { public static String charsetType(String fileName) throws IOException { BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), "utf-8")); String str=reader.readLine(); int len1=str.length(); reader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), "gbk")); str=reader.readLine(); int len2=str.length(); if(len1<=len2){ return "utf-8"; } else{ return "gbk"; } } public static void main(String[] args) throws IOException { System.out.println("gbk.txt:"+charsetType("gbk.txt")); System.out.println("utf.txt:"+charsetType("utf.txt")); } }
在日常处理中文文本的过程中发现的一种有趣现象,实际运行可准确判断中文字符编码类型,但有无例外情况尚不可知