zoukankan      html  css  js  c++  java
  • c++ 字符集转换

    转码整理, 资料来源于网络

    charset.h

    #pragma once
    
    #include <iostream>
    #include <string>
    
    std::string  UnicodeToAnsi(const std::wstring& unicode);
    std::wstring AnsiToUnicode(const std::string& ansi);
    
    std::string  AnsiToUtf8(const std::string& strSrc);
    std::string  Utf8ToAnsi(const std::string& strSrc);
    
    std::string  UnicodeToUtf8(const std::wstring& wstrSrc);
    std::wstring Utf8ToUnicode(const std::string& strSrc);
    
    std::string  GBKToUtf8(const std::string& gbk);
    std::string  Utf8ToGBK(const std::string& utf8);
    
    std::wstring GB2312ToUnicode(const std::string& gb2312);
    std::string  UnicodeToGB2312(const std::wstring& unicode);
    
    std::wstring BIG5ToUnicode(const std::string& big5);
    std::string  UnicodeToBIG5(const std::wstring& unicode);
    
    std::string  FBIG5ToGB2312(const std::string& big5);
    std::string  GB2312ToFBIG5(const std::string gb2312);
    
    bool IsUTF8(const void* pBuffer, long size);

    main.cpp

    #include "charset.h"
    
    void showHex(const char* bytes, int len) {
        for (int i = 0; i < len; i++) {
            printf("%02x ", (unsigned char)bytes[i]);
        }
    }
    
    void showHex(std::string charset, std::string str) {
        printf("%10s: ", charset.data());
        showHex(str.data(), str.size());
        printf("
    ");
    }
    
    void showHex(std::string charset, std::wstring str) {
        printf("%10s: ", charset.data());
        showHex((char*)str.data(), 2 * str.size());
        printf("
    ");
    }
    
    int main(int argc, char* argv[])
    {
        std::wstring wstr(L"中abc国");
        std::string str("中abc国");
    
        std::string ansi;
        std::string utf8;
        std::string gbk;
        std::wstring unicode;
    
        showHex("unicode", wstr);
        showHex("ansi", str);
    
        ansi = UnicodeToAnsi(wstr); showHex("ansi", ansi);
    
        unicode = AnsiToUnicode(ansi); showHex("unicode", unicode);
    
        utf8 = AnsiToUtf8(str); showHex("utf8", utf8);
        ansi = Utf8ToAnsi(utf8); showHex("ansi", ansi);
    
        utf8 = UnicodeToUtf8(wstr); showHex("utf8", utf8);
        unicode = Utf8ToUnicode(utf8); showHex("unicode", unicode);
    
        gbk = Utf8ToGBK(utf8); showHex("gbk", gbk);
        utf8 = GBKToUtf8(gbk); showHex("utf8", utf8);
    
        getchar();
        return 0;
    }

    charset.cpp

    #inchude "charset.h"
    #include <Windows.h>
    
    
    std::string UnicodeToAnsi(const std::wstring& unicode)
    {
        LPCWCH ptr = unicode.c_str();
        /** 分配目标空间, 一个16位Unicode字符最多可以转为4个字节int size = static_cast<int>( wstrSrc.size() * 4 + 10 );*/
        int size = WideCharToMultiByte(CP_THREAD_ACP, 0, ptr, -1, NULL, 0, NULL, NULL);
    
        std::string strRet(size, 0);
        int len = WideCharToMultiByte(CP_THREAD_ACP, 0, ptr, -1, (LPSTR)strRet.c_str(), size, NULL, NULL);
    
        return strRet;
    }
    
    std::wstring AnsiToUnicode(const std::string& ansi)
    {
        LPCCH ptr = ansi.c_str();
        int size = MultiByteToWideChar(CP_ACP, 0, ptr, -1, NULL, NULL);
    
        std::wstring wstrRet(size, 0);
        int len = MultiByteToWideChar(CP_ACP, 0, ptr, -1, (LPWSTR)wstrRet.c_str(), size);
    
        return wstrRet;
    }
    
    std::string AnsiToUtf8(const std::string& ansi)
    {
        LPCCH ptr = ansi.c_str();
        /* 分配目标空间, 长度为 Ansi 编码的两倍 */
        int size = MultiByteToWideChar(CP_ACP, 0, ptr, -1, NULL, NULL);
    
        std::wstring wstrTemp(size, 0);
        int len = MultiByteToWideChar(CP_ACP, 0, ptr, -1, (LPWSTR)wstrTemp.c_str(), size);
    
        return UnicodeToUtf8(wstrTemp);
    }
    
    std::string Utf8ToAnsi(const std::string& utf8)
    {
        std::wstring wstrTemp = Utf8ToUnicode(utf8);
    
        LPCWCH ptr = wstrTemp.c_str();
        int size = WideCharToMultiByte(CP_ACP, 0, ptr, -1, NULL, 0, NULL, NULL);
    
        std::string strRet(size, 0);
        int len = WideCharToMultiByte(CP_ACP, 0, ptr, -1, (LPSTR)strRet.c_str(), size, NULL, NULL);
    
        return strRet;
    }
    
    std::string UnicodeToUtf8(const std::wstring& unicode)
    {
        /* 分配目标空间, 一个16位Unicode字符最多可以转为4个字节 */
        LPCWCH ptr = unicode.c_str();
        int size = WideCharToMultiByte(CP_UTF8, 0, ptr, -1, NULL, 0, NULL, NULL);
    
        std::string strRet(size, 0);
        int len = WideCharToMultiByte(CP_UTF8, 0, ptr, -1, (char*)strRet.c_str(), size, NULL, NULL);
    
        return strRet;
    }
    
    std::wstring Utf8ToUnicode(const std::string& utf8)
    {
        LPCCH ptr = utf8.c_str();
        int size = MultiByteToWideChar(CP_UTF8, 0, ptr, -1, NULL, NULL);
    
        std::wstring wstrRet(size, 0);
        int len = MultiByteToWideChar(CP_UTF8, 0, ptr, -1, (LPWSTR)wstrRet.c_str(), size);
    
        return wstrRet;
    }
    
    
    std::string GBKToUtf8(const std::string& gbk)
    {
        return AnsiToUtf8(gbk);
    }
    
    std::string Utf8ToGBK(const std::string& utf8)
    {
        return Utf8ToAnsi(utf8);
    }
    
    bool IsUTF8(const void* pBuffer, long size)
    {
        bool isUTF8 = true;
        unsigned char* start = (unsigned char*)pBuffer;
        unsigned char* end = (unsigned char*)pBuffer + size;
        while (start < end)
        {
            if (*start < 0x80) { /*(10000000): 值小于0x80的为ASCII字符*/
                start++;
            }
            else if (*start < (0xC0)) { /*(11000000): 值介于0x80与0xC0之间的为无效UTF-8字符*/
                isUTF8 = false;
                break;
            }
            else if (*start < (0xE0)) { /*(11100000): 此范围内为2字节UTF-8字符  */
                if (start >= end - 1) {
                    break;
                }
                if ((start[1] & (0xC0)) != 0x80) {
                    isUTF8 = false;
                    break;
                }
                start += 2;
            }
            else if (*start < (0xF0)) { /**(11110000): 此范围内为3字节UTF-8字符*/
                if (start >= end - 2) {
                    break;
                }
                if ((start[1] & (0xC0)) != 0x80 || (start[2] & (0xC0)) != 0x80) {
                    isUTF8 = false;
                    break;
                }
                start += 3;
            }
            else {
                isUTF8 = false;
                break;
            }
        }
    
        return isUTF8;
    }
    
    
    
    //GB2312 转换成 Unicode
    std::wstring GB2312ToUnicode(const std::string& gb2312)
    {
        UINT nCodePage = 936; //GB2312
        int size = MultiByteToWideChar(nCodePage, 0, gb2312.c_str(), -1, NULL, 0);
    
        std::wstring wstrRet(size, 0);
        MultiByteToWideChar(nCodePage, 0, gb2312.c_str(), -1, (LPWSTR)wstrRet.c_str(), size);
    
        return wstrRet;
    }
    
    //BIG5 转换成 Unicode
    std::wstring BIG5ToUnicode(const std::string& big5)
    {
        UINT nCodePage = 950; //BIG5
        int size = MultiByteToWideChar(nCodePage, 0, big5.c_str(), -1, NULL, 0);
    
        std::wstring wstrRet(size, 0);
        MultiByteToWideChar(nCodePage, 0, big5.c_str(), -1, (LPWSTR)wstrRet.c_str(), size);
    
        return wstrRet;
    }
    
    //Unicode 转换成 GB2312
    std::string UnicodeToGB2312(const std::wstring& unicode)
    {
        UINT nCodePage = 936; //GB2312
        int size = WideCharToMultiByte(nCodePage, 0, unicode.c_str(), -1, NULL, 0, NULL, NULL);
    
        std::string strRet(size, 0);
        WideCharToMultiByte(nCodePage, 0, unicode.c_str(), -1, (LPSTR)strRet.c_str(), size, NULL, NULL);
    
        return strRet;
    }
    
    //Unicode 转换成 BIG5
    std::string UnicodeToBIG5(const std::wstring& unicode)
    {
        UINT nCodePage = 950; //BIG5
        int size = WideCharToMultiByte(nCodePage, 0, unicode.c_str(), -1, NULL, 0, NULL, NULL);
    
        std::string strRet(size, 0);
        WideCharToMultiByte(nCodePage, 0, unicode.c_str(), -1, (LPSTR)strRet.c_str(), size, NULL, NULL);
    
        return strRet;
    }
    
    //繁体中文BIG5 转换成 简体中文 GB2312
    std::string FBIG5ToGB2312(const std::string& big5)
    {
        LCID lcid = MAKELCID(MAKELANGID(LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED), SORT_CHINESE_PRC);
        std::wstring unicode = BIG5ToUnicode(big5);
    
        std::string gb2312 = UnicodeToGB2312(unicode);
        int size = LCMapStringA(lcid, LCMAP_SIMPLIFIED_CHINESE, gb2312.c_str(), -1, NULL, 0);
    
        std::string strRet(size, 0);
        LCMapStringA(0x0804, LCMAP_SIMPLIFIED_CHINESE, gb2312.c_str(), -1, (LPSTR)strRet.c_str(), size);
    
        return strRet;
    }
    
    //简体中文 GB2312 转换成 繁体中文BIG5
    std::string GB2312ToFBIG5(const std::string gb2312)
    {
        LCID lcid = MAKELCID(MAKELANGID(LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED), SORT_CHINESE_PRC);
        int size = LCMapStringA(lcid, LCMAP_TRADITIONAL_CHINESE, gb2312.c_str(), -1, NULL, 0);
    
        std::string strRet(size, 0);
        LCMapStringA(lcid, LCMAP_TRADITIONAL_CHINESE, gb2312.c_str(), -1, (LPSTR)strRet.c_str(), size);
    
        std::wstring unicode = GB2312ToUnicode(strRet);
        std::string big5 = UnicodeToBIG5(unicode);
    
        return big5;
    }
  • 相关阅读:
    在C#代码中应用Log4Net(二)典型的使用方式
    在C#代码中应用Log4Net(一)简单使用Log4Net
    Windows Azure Active Directory (2) Windows Azure AD基础
    Windows Azure Virtual Network (6) 设置Azure Virtual Machine固定公网IP (Virtual IP Address, VIP) (1)
    Windows Azure Active Directory (1) 前言
    Azure China (6) SAP 应用在华登陆 Windows Azure 公有云
    Microsoft Azure News(3) Azure新的基本实例上线 (Basic Virtual Machine)
    Microsoft Azure News(2) 在Microsoft Azure上运行SAP应用程序
    Microsoft Azure News(1) 新的数据中心Japan East, Japan West and Brazil South
    Windows Azure HandBook (2) Azure China提供的服务
  • 原文地址:https://www.cnblogs.com/baigoogledu/p/7098135.html
Copyright © 2011-2022 走看看