有些错误的编码解码方式生成的字符串可能导致编码丢失无法还原或只能部分还原,下面测试例子使用utf-8和ISO-8859-1是可以正常还原的
import java.io.UnsupportedEncodingException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Objects;
class Scratch {
public static void main(String[] args) throws UnsupportedEncodingException {
transferCharSet();
}
public static void transferCharSet() throws UnsupportedEncodingException {
String test = "正常字符串";
// 以UTF-8方式编码
byte[] testBytes = test.getBytes(StandardCharsets.UTF_8);
// 以ISO-8859-1方式解码
String str = new String(testBytes, StandardCharsets.ISO_8859_1);
// 字符串原文: 正常字符串,字符串错误解码: æ£å¸¸å符串
// 还原编码需要使用ISO-8859-1编码,然后以UTF-8解码
System.out.println("字符串原文: " + test + ",字符串错误解码: " + str);
String decode = null;
String code = null;
String[] charsets = new String[]{"GBK", "GB2312", "ISO-8859-1", "windows-1252", "GB18030", "Big5", "UTF-8"};
for (int i = 0; i < charsets.length; i++) {
for (int j = 0; j < charsets.length; j++) {
if (i != j) {
String s = new String(str.getBytes(charsets[i]), charsets[j]);
if (Objects.equals(s, test)) {
code = charsets[i];
decode = charsets[j];
}
System.out.printf("先按照%s获取字符串的二进制:,然后按%s编码解读这个二进制,得到一个新的字符串:%s,字符编码: %s%n", charsets[i], charsets[j], s, Arrays.toString(str.getBytes(charsets[i])));
}
}
}
if (decode == null) {
System.out.println("无法还原");
} else {
System.out.println("编码: " + code + ", 解码: " + decode);
}
}
}