zoukankan      html  css  js  c++  java
  • 转:UTF8与GB2312之间的互换

    UTF-8与GB2312之间的互换

    作者:吴康彬
      相信一定有不少的程序开发人员时常会遇到字符编码的问题,而这个问题也是非常让人头痛的。因为这些都是潜在的错误,要找出这些错误也得要有这方面的开发经验才行。特别是在处理xml文档时 ,该问题的出现就更加的频繁了,有一次用java写服务器端程序,用vc写客户端与之交互。交互的协议都是用xml写的。结果在通讯时老是发现数据接受不正确。纳闷!于是用抓取网络数据包工具抓取数据,后来才发现原来是java上xml的头是这样的<?xml version="1.0" encoding="UTF-8"?>,而vc上默认的是GB2312。所以一遇到汉字数据就不正确了。去网上找资料,这方面的文章好象特别少,针对像这样的问题,下面我介绍一下我自己写的一个转换程序。当然,程序很简单。如果有画蛇添足的地方,还望各位高手一笑了之。
      如果您对UTF-8、Unicode、GB2312等还是很陌生的话,请查看http://www.linuxforum.net/books/UTF-8-Unicode.html,我这里就不浪费口舌了。下面介绍一下WinAPI的两个函数:WideCharToMultiByte、MultiByteToWideChar。

    函数原型:

    int WideCharToMultiByte(
     UINT CodePage, // code page
     DWORD dwFlags, // performance and mapping flags
     LPCWSTR lpWideCharStr, // wide-character string
     int cchWideChar, // number of chars in string
     LPSTR lpMultiByteStr, // buffer for new string
     int cbMultiByte, // size of buffer
     LPCSTR lpDefaultChar, // default for unmappable chars
     LPBOOL lpUsedDefaultChar // set when default char used
    ); //将宽字符转换成多个窄字符

    int MultiByteToWideChar(
     UINT CodePage, // code page
     DWORD dwFlags, // character-type options
     LPCSTR lpMultiByteStr, // string to map
     int cbMultiByte, // number of bytes in string
     LPWSTR lpWideCharStr, // wide-character buffer
     int cchWideChar // size of buffer
    );//将多个窄字符转换成宽字符      需要用到的一些函数:
    CString CXmlProcess::HexToBin(CString string)//将16进制数转换成2进制
    {
     if( string == "0") return "0000";
     if( string == "1") return "0001";
     if( string == "2") return "0010";
     if( string == "3") return "0011";
     if( string == "4") return "0100";
     if( string == "5") return "0101";
     if( string == "6") return "0110";
     if( string == "7") return "0111";
     if( string == "8") return "1000";
     if( string == "9") return "1001";
     if( string == "a") return "1010";
     if( string == "b") return "1011";
     if( string == "c") return "1100";
     if( string == "d") return "1101";
     if( string == "e") return "1110";
     if( string == "f") return "1111";
     return "";
    }


    CString CXmlProcess::BinToHex(CString BinString)//将2进制数转换成16进制
    {
     if( BinString == "0000") return "0";
     if( BinString == "0001") return "1";
     if( BinString == "0010") return "2";
     if( BinString == "0011") return "3";
     if( BinString == "0100") return "4";
     if( BinString == "0101") return "5";
     if( BinString == "0110") return "6";
     if( BinString == "0111") return "7";
     if( BinString == "1000") return "8";
     if( BinString == "1001") return "9";
     if( BinString == "1010") return "a";
     if( BinString == "1011") return "b";
     if( BinString == "1100") return "c";
     if( BinString == "1101") return "d";
     if( BinString == "1110") return "e";
     if( BinString == "1111") return "f";
     return "";
    }

    int CXmlProcess::BinToInt(CString string)//2进制字符数据转换成10进制整型
    {
     int len =0;
     int tempInt = 0;
     int strInt = 0;
     for(int i =0 ;i < string.GetLength() ;i ++)
     {
             tempInt = 1;
             strInt = (int)string.GetAt(i)-48;
             for(int k =0 ;k < 7-i ; k++)
             {
       tempInt = 2*tempInt;
             }
             len += tempInt*strInt;
     }
     return len;
    }      UTF-8转换成GB2312先把UTF-8转换成Unicode.然后再把Unicode通过函数WideCharToMultiByte转换成GB2312
    WCHAR* CXmlProcess::UTF_8ToUnicode(char *ustart)  //把UTF-8转换成Unicode
    {
     char char_one;
     char char_two;
     char char_three;
     int Hchar;
     int Lchar;
     char uchar[2];
     WCHAR *unicode;
     CString string_one;
     CString string_two;
     CString string_three;
     CString combiString;
     char_one = *ustart;
     char_two = *(ustart+1);
     char_three = *(ustart+2);
     string_one.Format("%x",char_one);
     string_two.Format("%x",char_two);
     string_three.Format("%x",char_three);
     string_three = string_three.Right(2);
     string_two = string_two.Right(2);
     string_one = string_one.Right(2);
     string_three = HexToBin(string_three.Left(1))+HexToBin(string_three.Right(1));
     string_two = HexToBin(string_two.Left(1))+HexToBin(string_two.Right(1));
     string_one = HexToBin(string_one.Left(1))+HexToBin(string_one.Right(1));
     combiString = string_one +string_two +string_three;
     combiString = combiString.Right(20);
     combiString.Delete(4,2);
     combiString.Delete(10,2);
     Hchar = BinToInt(combiString.Left(8));
     Lchar = BinToInt(combiString.Right(8));
     uchar[1] = (char)Hchar;
     uchar[0] = (char)Lchar;
     unicode = (WCHAR *)uchar;
     return unicode;
    }

    char * CXmlProcess::UnicodeToGB2312(unsigned short uData)  //把Unicode 转换成 GB2312
    {
     char *buffer ;
     buffer = new char[sizeof(WCHAR)];
     WideCharToMultiByte(CP_ACP,NULL,&uData,1,buffer,sizeof(WCHAR),NULL,NULL);
     return buffer;
    }        GB2312转换成UTF-8:先把GB2312通过函数MultiByteToWideChar转换成Unicode.然后再把Unicode通过拆开Unicode后拼装成UTF-8。

    WCHAR * CXmlProcess::Gb2312ToUnicode(char *gbBuffer)  //GB2312 转换成 Unicode
    {
     WCHAR *uniChar;
     uniChar = new WCHAR[1];
     ::MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,gbBuffer,2,uniChar,1);
     return uniChar;
    }
    char * CXmlProcess::UnicodeToUTF_8(WCHAR *UniChar) // Unicode 转换成UTF-8
    {
     char *buffer;
     CString strOne;
     CString strTwo;
     CString strThree;
     CString strFour;
     CString strAnd;
     buffer = new char[3];
     int hInt,lInt;
     hInt = (int)((*UniChar)/256);
     lInt = (*UniChar)%256;
     CString string ;
     string.Format("%x",hInt);
     strTwo = HexToBin(string.Right(1));
     string = string.Left(string.GetLength() - 1);
     strOne = HexToBin(string.Right(1));
     string.Format("%x",lInt);
     strFour = HexToBin(string.Right(1));
     string = string.Left(string.GetLength() -1);
     strThree = HexToBin(string.Right(1));
     strAnd = strOne +strTwo + strThree + strFour;
     strAnd.Insert(0,"1110");
     strAnd.Insert(8,"10");
     strAnd.Insert(16,"10");
     strOne = strAnd.Left(8);
     strAnd = strAnd.Right(16);
     strTwo = strAnd.Left(8);
     strThree = strAnd.Right(8);
     *buffer = (char)BinToInt(strOne);
     buffer[1] = (char)BinToInt(strTwo);
     buffer[2] = (char)BinToInt(strThree);
     return buffer;
    }     例子:将GB2312转换成UTF-8的调用:
    char * CXmlProcess::translateCharToUTF_8(char *xmlStream, int len)
    {
     int newCharLen =0 ;
     int oldCharLen = 0;
     int revCharLen = len;
     char* newCharBuffer;
     char* finalCharBuffer;
     char *buffer ;
     CString string;
     buffer  = new char[sizeof(WCHAR)];
     newCharBuffer = new char[int(1.5*revCharLen)];//设置最大的一个缓冲区
     while(oldCharLen < revCharLen)
     {
      if( *(xmlStream + oldCharLen) >= 0)
      {
       *(newCharBuffer+newCharLen) = *(xmlStream +oldCharLen);
       newCharLen ++;
       oldCharLen ++;
      }//如果是英文直接复制就可以
      else
      {
       WCHAR *pbuffer = this->Gb2312ToUnicode(xmlStream+oldCharLen);
       buffer = this->UnicodeToUTF_8(pbuffer);
       *(newCharBuffer+newCharLen) = *buffer;
       *(newCharBuffer +newCharLen +1) = *(buffer + 1);
       *(newCharBuffer +newCharLen +2) = *(buffer + 2);
       newCharLen += 3;
       oldCharLen += 2;
      }
     }
     newCharBuffer[newCharLen] = ''\0'';
     CString string1 ;
     string1.Format("%s",newCharBuffer);
     finalCharBuffer = new char[newCharLen+1];
     memcpy(finalCharBuffer,newCharBuffer,newCharLen+1);
     return finalCharBuffer;
    }
    程序都非常的简单,由于实在太穷。已经吃了两天的方便面。所以现在头昏,程序的详细说明就不写了。程序员到了像我这样的地步也真是少见。工资低没有办法。哎!!!!

    本文来自CSDN博客,转载请标明出处:http://blog.csdn.net/andylin02/archive/2010/01/22/5224962.aspx

  • 相关阅读:
    信息检索重点关键字
    信息检索重点关键字
    信息检索重点关键字
    信息检索关键词部分
    信息检索关键词部分
    信息检索关键词部分
    输入五个国家的名称按字母顺序排列输出
    把一个整数按大小顺序插入已排好序的数组中
    快放假了
    清炒苦瓜
  • 原文地址:https://www.cnblogs.com/cumtb3S/p/1758381.html
Copyright © 2011-2022 走看看