zoukankan      html  css  js  c++  java
  • [导入]UTF8与GB2312之间的互换(转过来再看看)

    UTF-8与GB2312之间的互换

    作者:吴康彬

      相信一定有不少的程序开发人员时常会遇到字符编码的问题,而这个问题也是非常让人头痛的。因为这些都是潜在的错误,要找出这些错误也得要有这方面的开发经验才行。特别是在处理xml文档时 ,该问题的出现就更加的频繁了,有一次用java写服务器端程序,用vc写客户端与之交互。交互的协议都是用xml写的。结果在通讯时老是发现数据接受不正确。纳闷!于是用抓取网络数据包工具抓取数据,后来才发现原来是java上xml的头是这样的<?xml version="1.0" encoding="UTF-8"?>,而vc上默认的是GB2312。所以一遇到汉字数据就不正确了。去网上找资料,这方面的文章好象特别少,针对像这样的问题,下面我介绍一下我自己写的一个转换程序。当然,程序很简单。如果有画蛇添足的地方,还望各位高手一笑了之。
      如果您对UTF-8、Unicode、GB2312等还是很陌生的话,请查看http://www.linuxforum.net/books/UTF...yteToWideChar。

    函数原型:
    int WideCharToMultiByte(
    UINT CodePage, // code page
    DWORD dwFlags, // performance and mapping flags
    LPCWSTR lpWideCharStr, // wide-character string
    int cchWideChar, // number of chars in string
    LPSTR lpMultiByteStr, // buffer for new string
    int cbMultiByte, // size of buffer
    LPCSTR lpDefaultChar, // default for unmappable chars
    LPBOOL lpUsedDefaultChar // set when default char used
    ); //将宽字符转换成多个窄字符

    int MultiByteToWideChar(
    UINT CodePage, // code page
    DWORD dwFlags, // character-type options
    LPCSTR lpMultiByteStr, // string to map
    int cbMultiByte, // number of bytes in string
    LPWSTR lpWideCharStr, // wide-character buffer
    int cchWideChar // size of buffer
    );//将多个窄字符转换成宽字符 需要用到的一些函数:
    CString CXmlProcess::HexToBin(CString string)//将16进制数转换成2进制
    {
    if( string == "0") return "0000";
    if( string == "1") return "0001";
    if( string == "2") return "0010";
    if( string == "3") return "0011";
    if( string == "4") return "0100";
    if( string == "5") return "0101";
    if( string == "6") return "0110";
    if( string == "7") return "0111";
    if( string == "8") return "1000";
    if( string == "9") return "1001";
    if( string == "a") return "1010";
    if( string == "b") return "1011";
    if( string == "c") return "1100";
    if( string == "d") return "1101";
    if( string == "e") return "1110";
    if( string == "f") return "1111";
    return "";
    }


    CString CXmlProcess::BinToHex(CString BinString)//将2进制数转换成16进制
    {
    if( BinString == "0000") return "0";
    if( BinString == "0001") return "1";
    if( BinString == "0010") return "2";
    if( BinString == "0011") return "3";
    if( BinString == "0100") return "4";
    if( BinString == "0101") return "5";
    if( BinString == "0110") return "6";
    if( BinString == "0111") return "7";
    if( BinString == "1000") return "8";
    if( BinString == "1001") return "9";
    if( BinString == "1010") return "a";
    if( BinString == "1011") return "b";
    if( BinString == "1100") return "c";
    if( BinString == "1101") return "d";
    if( BinString == "1110") return "e";
    if( BinString == "1111") return "f";
    return "";
    }

    int CXmlProcess::BinToInt(CString string)//2进制字符数据转换成10进制整型
    {
    int len =0;
    int tempInt = 0;
    int strInt = 0;
    for(int i =0 ;i < string.GetLength() ;i ++)
    {
    tempInt = 1;
    strInt = (int)string.GetAt(i)-48;
    for(int k =0 ;k < 7-i ; k++)
    {
    tempInt = 2*tempInt;
    }
    len += tempInt*strInt;
    }
    return len;
    }   UTF-8转换成GB2312先把UTF-8转换成Unicode.然后再把Unicode通过函数WideCharToMultiByte转换成GB2312
    WCHAR* CXmlProcess::UTF_8ToUnicode(char *ustart) //把UTF-8转换成Unicode
    {
    char char_one;
    char char_two;
    char char_three;
    int Hchar;
    int Lchar;
    char uchar[2];
    WCHAR *unicode;
    CString string_one;
    CString string_two;
    CString string_three;
    CString combiString;
    char_one = *ustart;
    char_two = *(ustart+1);
    char_three = *(ustart+2);
    string_one.Format("%x",char_one);
    string_two.Format("%x",char_two);
    string_three.Format("%x",char_three);
    string_three = string_three.Right(2);
    string_two = string_two.Right(2);
    string_one = string_one.Right(2);
    string_three = HexToBin(string_three.Left(1))+HexToBin(string_three.Right(1));
    string_two = HexToBin(string_two.Left(1))+HexToBin(string_two.Right(1));
    string_one = HexToBin(string_one.Left(1))+HexToBin(string_one.Right(1));
    combiString = string_one +string_two +string_three;
    combiString = combiString.Right(20);
    combiString.Delete(4,2);
    combiString.Delete(10,2);
    Hchar = BinToInt(combiString.Left(8));
    Lchar = BinToInt(combiString.Right(8));
    uchar[1] = (char)Hchar;
    uchar[0] = (char)Lchar;
    unicode = (WCHAR *)uchar;
    return unicode;
    }

    char * CXmlProcess::UnicodeToGB2312(unsigned short uData) //把Unicode 转换成 GB2312
    {
    char *buffer ;
    buffer = new char[sizeof(WCHAR)];
    WideCharToMultiByte(CP_ACP,NULL,&uData,1,buffer,sizeof(WCHAR),NULL,NULL);
    return buffer;
    }   GB2312转换成UTF-8:先把GB2312通过函数MultiByteToWideChar转换成Unicode.然后再把Unicode通过拆开Unicode后拼装成UTF-8。

    WCHAR * CXmlProcess::Gb2312ToUnicode(char *gbBuffer) //GB2312 转换成 Unicode
    {
    WCHAR *uniChar;
    uniChar = new WCHAR[1];
    ::MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,gbBuffer,2,uniChar,1);
    return uniChar;
    }
    char * CXmlProcess::UnicodeToUTF_8(WCHAR *UniChar) // Unicode 转换成UTF-8
    {
    char *buffer;
    CString strOne;
    CString strTwo;
    CString strThree;
    CString strFour;
    CString strAnd;
    buffer = new char[3];
    int hInt,lInt;
    hInt = (int)((*UniChar)/256);
    lInt = (*UniChar)%256;
    CString string ;
    string.Format("%x",hInt);
    strTwo = HexToBin(string.Right(1));
    string = string.Left(string.GetLength() - 1);
    strOne = HexToBin(string.Right(1));
    string.Format("%x",lInt);
    strFour = HexToBin(string.Right(1));
    string = string.Left(string.GetLength() -1);
    strThree = HexToBin(string.Right(1));
    strAnd = strOne +strTwo + strThree + strFour;
    strAnd.Insert(0,"1110");
    strAnd.Insert(8,"10");
    strAnd.Insert(16,"10");
    strOne = strAnd.Left(8);
    strAnd = strAnd.Right(16);
    strTwo = strAnd.Left(8);
    strThree = strAnd.Right(8);
    *buffer = (char)BinToInt(strOne);
    buffer[1] = (char)BinToInt(strTwo);
    buffer[2] = (char)BinToInt(strThree);
    return buffer;
    } 例子:将GB2312转换成UTF-8的调用:
    char * CXmlProcess::translateCharToUTF_8(char *xmlStream, int len)
    {
    int newCharLen =0 ;
    int oldCharLen = 0;
    int revCharLen = len;
    char* newCharBuffer;
    char* finalCharBuffer;
    char *buffer ;
    CString string;
    buffer = new char[sizeof(WCHAR)];
    newCharBuffer = new char[int(1.5*revCharLen)];//设置最大的一个缓冲区
    while(oldCharLen < revCharLen)
    {
    if( *(xmlStream + oldCharLen) >= 0)
    {
    *(newCharBuffer+newCharLen) = *(xmlStream +oldCharLen);
    newCharLen ++;
    oldCharLen ++;
    }//如果是英文直接复制就可以
    else
    {
    WCHAR *pbuffer = this->Gb2312ToUnicode(xmlStream+oldCharLen);
    buffer = this->UnicodeToUTF_8(pbuffer);
    *(newCharBuffer+newCharLen) = *buffer;
    *(newCharBuffer +newCharLen +1) = *(buffer + 1);
    *(newCharBuffer +newCharLen +2) = *(buffer + 2);
    newCharLen += 3;
    oldCharLen += 2;
    }
    }
    newCharBuffer[newCharLen] = ''\0'';
    CString string1 ;
    string1.Format("%s",newCharBuffer);
    finalCharBuffer = new char[newCharLen+1];
    memcpy(finalCharBuffer,newCharBuffer,newCharLen+1);
    return finalCharBuffer;
    }
    WCHAR * CXmlProcess::Gb2312ToUnicode(char *gbBuffer) //GB2312 转换成 Unicode
    {
    WCHAR *uniChar;
    uniChar = new WCHAR[1];
    ::MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,gbBuffer,2,uniChar,1);
    return uniChar;
    }
    char * CXmlProcess::UnicodeToUTF_8(WCHAR *UniChar) // Unicode 转换成UTF-8
    {
    char *buffer;
    CString strOne;
    CString strTwo;
    CString strThree;
    CString strFour;
    CString strAnd;
    buffer = new char[3];
    int hInt,lInt;
    hInt = (int)((*UniChar)/256);
    lInt = (*UniChar)%256;
    CString string ;
    string.Format("%x",hInt);
    strTwo = HexToBin(string.Right(1));
    string = string.Left(string.GetLength() - 1);
    strOne = HexToBin(string.Right(1));
    string.Format("%x",lInt);
    strFour = HexToBin(string.Right(1));
    string = string.Left(string.GetLength() -1);
    strThree = HexToBin(string.Right(1));
    strAnd = strOne +strTwo + strThree + strFour;
    strAnd.Insert(0,"1110");
    strAnd.Insert(8,"10");
    strAnd.Insert(16,"10");
    strOne = strAnd.Left(8);
    strAnd = strAnd.Right(16);
    strTwo = strAnd.Left(8);
    strThree = strAnd.Right(8);
    *buffer = (char)BinToInt(strOne);
    buffer[1] = (char)BinToInt(strTwo);
    buffer[2] = (char)BinToInt(strThree);
    return buffer;
    } 例子:将GB2312转换成UTF-8的调用:
    char * CXmlProcess::translateCharToUTF_8(char *xmlStream, int len)
    {
    int newCharLen =0 ;
    int oldCharLen = 0;
    int revCharLen = len;
    char* newCharBuffer;
    char* finalCharBuffer;
    char *buffer ;
    CString string;
    buffer = new char[sizeof(WCHAR)];
    newCharBuffer = new char[int(1.5*revCharLen)];//设置最大的一个缓冲区
    while(oldCharLen < revCharLen)
    {
    if( *(xmlStream + oldCharLen) >= 0)
    {
    *(newCharBuffer+newCharLen) = *(xmlStream +oldCharLen);
    newCharLen ++;
    oldCharLen ++;
    }//如果是英文直接复制就可以
    else
    {
    WCHAR *pbuffer = this->Gb2312ToUnicode(xmlStream+oldCharLen);
    buffer = this->UnicodeToUTF_8(pbuffer);
    *(newCharBuffer+newCharLen) = *buffer;
    *(newCharBuffer +newCharLen +1) = *(buffer + 1);
    *(newCharBuffer +newCharLen +2) = *(buffer + 2);
    newCharLen += 3;
    oldCharLen += 2;
    }
    }
    newCharBuffer[newCharLen] = ''\0'';
    CString string1 ;
    string1.Format("%s",newCharBuffer);
    finalCharBuffer = new char[newCharLen+1];
    memcpy(finalCharBuffer,newCharBuffer,newCharLen+1);
    return finalCharBuffer;
    }

    这两个函数就可以解决问题了,多谢kongming兄

    int WideCharToMultiByte(
    UINT CodePage, // code page
    DWORD dwFlags, // performance and mapping flags
    LPCWSTR lpWideCharStr, // wide-character string
    int cchWideChar, // number of chars in string
    LPSTR lpMultiByteStr, // buffer for new string
    int cbMultiByte, // size of buffer
    LPCSTR lpDefaultChar, // default for unmappable chars
    LPBOOL lpUsedDefaultChar // set when default char used
    ); //将宽字符转换成多个窄字符

    int MultiByteToWideChar(
    UINT CodePage, // code page
    DWORD dwFlags, // character-type options
    LPCSTR lpMultiByteStr, // string to map
    int cbMultiByte, // number of bytes in string
    LPWSTR lpWideCharStr, // wide-character buffer
    int cchWideChar // size of buffer
    );//将多个窄字符转换成宽字符 需要用到的一些函数:

    ///////////////////////

    正好自己用得上,我看能不能在Flash或是ASP里也转换一下!


    文章来源:http://computer.mblogger.cn/wucountry/posts/31298.aspx
    ================================
      /\_/\                        
     (=^o^=)  Wu.Country@侠缘      
     (~)@(~)  一辈子,用心做一件事!
    --------------------------------
      学而不思则罔,思而不学则怠!  
    ================================
  • 相关阅读:
    mysql报错:java.sql.SQLException: The server time zone value 'Öйú±ê׼ʱ¼ä' is unrecognized or represents more than one time zone.
    MD5登陆密码的生成
    15. 3Sum、16. 3Sum Closest和18. 4Sum
    11. Container With Most Water
    8. String to Integer (atoi)
    6. ZigZag Conversion
    5. Longest Palindromic Substring
    几种非线性激活函数介绍
    AI初探1
    AI初探
  • 原文地址:https://www.cnblogs.com/WuCountry/p/305754.html
Copyright © 2011-2022 走看看