zoukankan      html  css  js  c++  java
  • c++ 字符集转换

    转码整理, 资料来源于网络

    charset.h

    #pragma once
    
    #include <iostream>
    #include <string>
    
    std::string  UnicodeToAnsi(const std::wstring& unicode);
    std::wstring AnsiToUnicode(const std::string& ansi);
    
    std::string  AnsiToUtf8(const std::string& strSrc);
    std::string  Utf8ToAnsi(const std::string& strSrc);
    
    std::string  UnicodeToUtf8(const std::wstring& wstrSrc);
    std::wstring Utf8ToUnicode(const std::string& strSrc);
    
    std::string  GBKToUtf8(const std::string& gbk);
    std::string  Utf8ToGBK(const std::string& utf8);
    
    std::wstring GB2312ToUnicode(const std::string& gb2312);
    std::string  UnicodeToGB2312(const std::wstring& unicode);
    
    std::wstring BIG5ToUnicode(const std::string& big5);
    std::string  UnicodeToBIG5(const std::wstring& unicode);
    
    std::string  FBIG5ToGB2312(const std::string& big5);
    std::string  GB2312ToFBIG5(const std::string gb2312);
    
    bool IsUTF8(const void* pBuffer, long size);

    main.cpp

    #include "charset.h"
    
    void showHex(const char* bytes, int len) {
        for (int i = 0; i < len; i++) {
            printf("%02x ", (unsigned char)bytes[i]);
        }
    }
    
    void showHex(std::string charset, std::string str) {
        printf("%10s: ", charset.data());
        showHex(str.data(), str.size());
        printf("
    ");
    }
    
    void showHex(std::string charset, std::wstring str) {
        printf("%10s: ", charset.data());
        showHex((char*)str.data(), 2 * str.size());
        printf("
    ");
    }
    
    int main(int argc, char* argv[])
    {
        std::wstring wstr(L"中abc国");
        std::string str("中abc国");
    
        std::string ansi;
        std::string utf8;
        std::string gbk;
        std::wstring unicode;
    
        showHex("unicode", wstr);
        showHex("ansi", str);
    
        ansi = UnicodeToAnsi(wstr); showHex("ansi", ansi);
    
        unicode = AnsiToUnicode(ansi); showHex("unicode", unicode);
    
        utf8 = AnsiToUtf8(str); showHex("utf8", utf8);
        ansi = Utf8ToAnsi(utf8); showHex("ansi", ansi);
    
        utf8 = UnicodeToUtf8(wstr); showHex("utf8", utf8);
        unicode = Utf8ToUnicode(utf8); showHex("unicode", unicode);
    
        gbk = Utf8ToGBK(utf8); showHex("gbk", gbk);
        utf8 = GBKToUtf8(gbk); showHex("utf8", utf8);
    
        getchar();
        return 0;
    }

    charset.cpp

    #inchude "charset.h"
    #include <Windows.h>
    
    
    std::string UnicodeToAnsi(const std::wstring& unicode)
    {
        LPCWCH ptr = unicode.c_str();
        /** 分配目标空间, 一个16位Unicode字符最多可以转为4个字节int size = static_cast<int>( wstrSrc.size() * 4 + 10 );*/
        int size = WideCharToMultiByte(CP_THREAD_ACP, 0, ptr, -1, NULL, 0, NULL, NULL);
    
        std::string strRet(size, 0);
        int len = WideCharToMultiByte(CP_THREAD_ACP, 0, ptr, -1, (LPSTR)strRet.c_str(), size, NULL, NULL);
    
        return strRet;
    }
    
    std::wstring AnsiToUnicode(const std::string& ansi)
    {
        LPCCH ptr = ansi.c_str();
        int size = MultiByteToWideChar(CP_ACP, 0, ptr, -1, NULL, NULL);
    
        std::wstring wstrRet(size, 0);
        int len = MultiByteToWideChar(CP_ACP, 0, ptr, -1, (LPWSTR)wstrRet.c_str(), size);
    
        return wstrRet;
    }
    
    std::string AnsiToUtf8(const std::string& ansi)
    {
        LPCCH ptr = ansi.c_str();
        /* 分配目标空间, 长度为 Ansi 编码的两倍 */
        int size = MultiByteToWideChar(CP_ACP, 0, ptr, -1, NULL, NULL);
    
        std::wstring wstrTemp(size, 0);
        int len = MultiByteToWideChar(CP_ACP, 0, ptr, -1, (LPWSTR)wstrTemp.c_str(), size);
    
        return UnicodeToUtf8(wstrTemp);
    }
    
    std::string Utf8ToAnsi(const std::string& utf8)
    {
        std::wstring wstrTemp = Utf8ToUnicode(utf8);
    
        LPCWCH ptr = wstrTemp.c_str();
        int size = WideCharToMultiByte(CP_ACP, 0, ptr, -1, NULL, 0, NULL, NULL);
    
        std::string strRet(size, 0);
        int len = WideCharToMultiByte(CP_ACP, 0, ptr, -1, (LPSTR)strRet.c_str(), size, NULL, NULL);
    
        return strRet;
    }
    
    std::string UnicodeToUtf8(const std::wstring& unicode)
    {
        /* 分配目标空间, 一个16位Unicode字符最多可以转为4个字节 */
        LPCWCH ptr = unicode.c_str();
        int size = WideCharToMultiByte(CP_UTF8, 0, ptr, -1, NULL, 0, NULL, NULL);
    
        std::string strRet(size, 0);
        int len = WideCharToMultiByte(CP_UTF8, 0, ptr, -1, (char*)strRet.c_str(), size, NULL, NULL);
    
        return strRet;
    }
    
    std::wstring Utf8ToUnicode(const std::string& utf8)
    {
        LPCCH ptr = utf8.c_str();
        int size = MultiByteToWideChar(CP_UTF8, 0, ptr, -1, NULL, NULL);
    
        std::wstring wstrRet(size, 0);
        int len = MultiByteToWideChar(CP_UTF8, 0, ptr, -1, (LPWSTR)wstrRet.c_str(), size);
    
        return wstrRet;
    }
    
    
    std::string GBKToUtf8(const std::string& gbk)
    {
        return AnsiToUtf8(gbk);
    }
    
    std::string Utf8ToGBK(const std::string& utf8)
    {
        return Utf8ToAnsi(utf8);
    }
    
    bool IsUTF8(const void* pBuffer, long size)
    {
        bool isUTF8 = true;
        unsigned char* start = (unsigned char*)pBuffer;
        unsigned char* end = (unsigned char*)pBuffer + size;
        while (start < end)
        {
            if (*start < 0x80) { /*(10000000): 值小于0x80的为ASCII字符*/
                start++;
            }
            else if (*start < (0xC0)) { /*(11000000): 值介于0x80与0xC0之间的为无效UTF-8字符*/
                isUTF8 = false;
                break;
            }
            else if (*start < (0xE0)) { /*(11100000): 此范围内为2字节UTF-8字符  */
                if (start >= end - 1) {
                    break;
                }
                if ((start[1] & (0xC0)) != 0x80) {
                    isUTF8 = false;
                    break;
                }
                start += 2;
            }
            else if (*start < (0xF0)) { /**(11110000): 此范围内为3字节UTF-8字符*/
                if (start >= end - 2) {
                    break;
                }
                if ((start[1] & (0xC0)) != 0x80 || (start[2] & (0xC0)) != 0x80) {
                    isUTF8 = false;
                    break;
                }
                start += 3;
            }
            else {
                isUTF8 = false;
                break;
            }
        }
    
        return isUTF8;
    }
    
    
    
    //GB2312 转换成 Unicode
    std::wstring GB2312ToUnicode(const std::string& gb2312)
    {
        UINT nCodePage = 936; //GB2312
        int size = MultiByteToWideChar(nCodePage, 0, gb2312.c_str(), -1, NULL, 0);
    
        std::wstring wstrRet(size, 0);
        MultiByteToWideChar(nCodePage, 0, gb2312.c_str(), -1, (LPWSTR)wstrRet.c_str(), size);
    
        return wstrRet;
    }
    
    //BIG5 转换成 Unicode
    std::wstring BIG5ToUnicode(const std::string& big5)
    {
        UINT nCodePage = 950; //BIG5
        int size = MultiByteToWideChar(nCodePage, 0, big5.c_str(), -1, NULL, 0);
    
        std::wstring wstrRet(size, 0);
        MultiByteToWideChar(nCodePage, 0, big5.c_str(), -1, (LPWSTR)wstrRet.c_str(), size);
    
        return wstrRet;
    }
    
    //Unicode 转换成 GB2312
    std::string UnicodeToGB2312(const std::wstring& unicode)
    {
        UINT nCodePage = 936; //GB2312
        int size = WideCharToMultiByte(nCodePage, 0, unicode.c_str(), -1, NULL, 0, NULL, NULL);
    
        std::string strRet(size, 0);
        WideCharToMultiByte(nCodePage, 0, unicode.c_str(), -1, (LPSTR)strRet.c_str(), size, NULL, NULL);
    
        return strRet;
    }
    
    //Unicode 转换成 BIG5
    std::string UnicodeToBIG5(const std::wstring& unicode)
    {
        UINT nCodePage = 950; //BIG5
        int size = WideCharToMultiByte(nCodePage, 0, unicode.c_str(), -1, NULL, 0, NULL, NULL);
    
        std::string strRet(size, 0);
        WideCharToMultiByte(nCodePage, 0, unicode.c_str(), -1, (LPSTR)strRet.c_str(), size, NULL, NULL);
    
        return strRet;
    }
    
    //繁体中文BIG5 转换成 简体中文 GB2312
    std::string FBIG5ToGB2312(const std::string& big5)
    {
        LCID lcid = MAKELCID(MAKELANGID(LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED), SORT_CHINESE_PRC);
        std::wstring unicode = BIG5ToUnicode(big5);
    
        std::string gb2312 = UnicodeToGB2312(unicode);
        int size = LCMapStringA(lcid, LCMAP_SIMPLIFIED_CHINESE, gb2312.c_str(), -1, NULL, 0);
    
        std::string strRet(size, 0);
        LCMapStringA(0x0804, LCMAP_SIMPLIFIED_CHINESE, gb2312.c_str(), -1, (LPSTR)strRet.c_str(), size);
    
        return strRet;
    }
    
    //简体中文 GB2312 转换成 繁体中文BIG5
    std::string GB2312ToFBIG5(const std::string gb2312)
    {
        LCID lcid = MAKELCID(MAKELANGID(LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED), SORT_CHINESE_PRC);
        int size = LCMapStringA(lcid, LCMAP_TRADITIONAL_CHINESE, gb2312.c_str(), -1, NULL, 0);
    
        std::string strRet(size, 0);
        LCMapStringA(lcid, LCMAP_TRADITIONAL_CHINESE, gb2312.c_str(), -1, (LPSTR)strRet.c_str(), size);
    
        std::wstring unicode = GB2312ToUnicode(strRet);
        std::string big5 = UnicodeToBIG5(unicode);
    
        return big5;
    }
  • 相关阅读:
    Spring中常用的配置和注解详解
    SpringBoot中的常用配置
    Maven项目创建问题
    hibernate缓存:一级缓存和二级缓存
    Hibernate标准查询
    Hibernate中Hql的查询
    Hibernate中对象的三种状态
    Hibernate中使用load和get加载的区别
    Spring增强
    Spring代理模式
  • 原文地址:https://www.cnblogs.com/baigoogledu/p/7098135.html
Copyright © 2011-2022 走看看