zoukankan html css js c++ java

和荣笔记从 Unicode 到 GB2312 转换表制作程式

在我发表了 GB2312 到 Unicode 的转换表以后，收到了读者信件，寻求 Unicode 到 GB2312 的转换表。

下面的程式便可以用来制作这样的转换表。程式的输出结果收入下一章之中。

  1/** *//**
  2* UnicodeGB2312.java
  3* Copyright (c) 1997-2003 by Dr. Herong Yang
  4*/
  5import java.io.*;
  6import java.nio.*;
  7import java.nio.charset.*;
  8class UnicodeGB2312 {
  9static OutputStream out = null;
 10static char hexDigit[] = {'0', '1', '2', '3', '4', '5', '6', '7',
 11'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
 12static int b_out[] = {201,267,279,293,484,587,625,657,734,782,827,
 13874,901,980,1001,5590,8801};
 14static int e_out[] = {216,268,280,294,494,594,632,694,748,794,836,
 15894,903,994,1594,5594,9494};
 16public static void main(String[] a) {
 17try {
 18out = new FileOutputStream("unicode_gb2312.gb");
 19writeCode();
 20out.close();
 21} catch (IOException e) {
 22System.out.println(e.toString());
 23}
 24}
 25public static void writeCode() throws IOException {
 26CharsetEncoder gbec = Charset.forName("GBK").newEncoder();
 27char[] ca = new char[1];
 28CharBuffer cb = null;
 29ByteBuffer gbbb = null;
 30writeHeader();
 31int count = 0;
 32for (int i=0; i<0x010000; i++) {
 33ca[0] = (char) i;
 34cb = CharBuffer.wrap(ca);
 35try {
 36gbbb = gbec.encode(cb);
 37} catch (CharacterCodingException e) {
 38gbbb = null;
 39}
 40if (validGB(gbbb)) {
 41count++;
 42writeHex((byte) (ca[0] >>> 8));
 43writeHex((byte) (ca[0] & 0xff));
 44writeString(" ");
 45writeByteBuffer(gbbb,2);
 46writeString(" ");
 47writeByte(gbbb.get(0));
 48writeByte(gbbb.get(1));
 49if (count%5 == 0) writeln();
 50else writeString("   ");
 51}
 52}
 53if (count%5 != 0) writeln();
 54writeFooter();
 55System.out.println("Number of GB characters wrote: "+count);
 56}
 57public static boolean validGB(ByteBuffer gbbb) {
 58if (gbbb==null) return false;
 59else if (gbbb.limit()!=2) return false;
 60else {
 61byte hi = gbbb.get(0);
 62byte lo = gbbb.get(1);
 63if ((hi&0xFF)<0xA0) return false;
 64if ((lo&0xFF)<0xA0) return false;
 65int i = (hi&0xFF) - 0xA0;
 66int j = (lo&0xFF) - 0xA0;
 67if (i<1 || i>94) return false;
 68if (j<1 || j>94) return false;
 69for (int l=0; l<b_out.length; l++) {
 70if (i*100+j>=b_out[l] && i*100+j<=e_out[l]) return false;
 71}
 72}
 73return true;
 74}
 75public static void writeHeader() throws IOException {
 76writeString("<pre>");
 77writeln();
 78writeString("Uni. GB   ");
 79writeGBSpace();
 80writeString("   ");
 81writeString("Uni. GB   ");
 82writeGBSpace();
 83writeString("   ");
 84writeString("Uni. GB   ");
 85writeGBSpace();
 86writeString("   ");
 87writeString("Uni. GB   ");
 88writeGBSpace();
 89writeString("   ");
 90writeString("Uni. GB   ");
 91writeGBSpace();
 92writeln();
 93writeln();
 94}
 95public static void writeFooter() throws IOException {
 96writeString("</pre>");
 97writeln();
 98}
 99public static void writeln() throws IOException {
100out.write(0x0D);
101out.write(0x0A);
102}
103public static void writeGBSpace() throws IOException {
104out.write(0xA1);
105out.write(0xA1);
106}
107public static void writeByteBuffer(ByteBuffer b, int l)
108throws IOException {
109int i = 0;
110if (b==null) {
111writeString("null");
112i = 2;
113} else {
114for (i=0; i<b.limit(); i++) writeHex(b.get(i));
115}
116for (int j=i; j<l; j++) writeString("  ");
117}
118public static void writeString(String s) throws IOException {
119if (s!=null) {
120for (int i=0; i<s.length(); i++) {
121out.write((int) (s.charAt(i) & 0xFF));
122}
123}
124}
125public static void writeHex(byte b) throws IOException {
126out.write((int) hexDigit[(b >> 4) & 0x0F]);
127out.write((int) hexDigit[b & 0x0F]);
128}
129public static void writeByte(byte b) throws IOException {
130out.write(b & 0xFF);
131}
132}
133

上面的程式发表后，又有读者来信要求对程式加以说明，以便理解。其实这个程式的逻辑很简单，阅读时仅需注意以下几点：

一， Unicode 字符集的全体编码都在 0x0000 和 0xFFFF 之间，所以子程式 writeCode() 使用了一个循环复句，以变量 i 走遍了 Unicode 的全体可能编码。

二，把单个 Unicode 编码转换成 GB2312 编码的关键语句是：gbec.encode(cb)，它使用了 JDK 中 CharsetEncoder 的中文编码功能。注意，GBK 是由 GB2312 扩张而成。JDK 只提供 GBK 编码功能。

三，由于 Unicode 字符集比 GB2312 大，gbec.encode(cb) 输出的编码有许多是坏码，或者是 GBK 的扩张码，所以要用子程式 validGB() 进行验证。

四，程式的其它部分主要是用于输出的列表制作。

查看全文

相关阅读:
jmeter定时器
 创建视图的语句
 实时监控-网络、磁盘
 Nginx环境搭建准备
 nmon使用命令
 tomcat服务器安装方法
 Selenium_高级操作_鼠标操作选择下拉框
 实例：对象库如何工作：
自动化
 【Linux】环境搭建

原文地址：https://www.cnblogs.com/meil/p/635875.html

和荣笔记 从 Unicode 到 GB2312 转换表制作程式

和荣笔记从 Unicode 到 GB2312 转换表制作程式