zoukankan      html  css  js  c++  java
  • utf8_to_utf16

    17down voteaccepted

    Here's some code. Only lightly tested and there's probably a few improvements. Call this function to convert a UTF-8 string to a UTF-16 wstring. If it thinks the input string is not UTF-8 then it will throw an exception, otherwise it returns the equivalent UTF-16 wstring.

    std::wstring utf8_to_utf16(const std::string& utf8)
    {
        std::vector<unsigned long> unicode;
        size_t i = 0;
        while (i < utf8.size())
        {
            unsigned long uni;
            size_t todo;
            bool error = false;
            unsigned char ch = utf8[i++];
            if (ch <= 0x7F)
            {
                uni = ch;
                todo = 0;
            }
            else if (ch <= 0xBF)
            {
                throw std::logic_error("not a UTF-8 string");
            }
            else if (ch <= 0xDF)
            {
                uni = ch&0x1F;
                todo = 1;
            }
            else if (ch <= 0xEF)
            {
                uni = ch&0x0F;
                todo = 2;
            }
            else if (ch <= 0xF7)
            {
                uni = ch&0x07;
                todo = 3;
            }
            else
            {
                throw std::logic_error("not a UTF-8 string");
            }
            for (size_t j = 0; j < todo; ++j)
            {
                if (i == utf8.size())
                    throw std::logic_error("not a UTF-8 string");
                unsigned char ch = utf8[i++];
                if (ch < 0x80 || ch > 0xBF)
                    throw std::logic_error("not a UTF-8 string");
                uni <<= 6;
                uni += ch & 0x3F;
            }
            if (uni >= 0xD800 && uni <= 0xDFFF)
                throw std::logic_error("not a UTF-8 string");
            if (uni > 0x10FFFF)
                throw std::logic_error("not a UTF-8 string");
            unicode.push_back(uni);
        }
        std::wstring utf16;
        for (size_t i = 0; i < unicode.size(); ++i)
        {
            unsigned long uni = unicode[i];
            if (uni <= 0xFFFF)
            {
                utf16 += (wchar_t)uni;
            }
            else
            {
                uni -= 0x10000;
                utf16 += (wchar_t)((uni >> 10) + 0xD800);
                utf16 += (wchar_t)((uni & 0x3FF) + 0xDC00);
            }
        }
        return utf16;
    }
    shareimprove this answer

    http://stackoverflow.com/questions/7153935/how-to-convert-utf-8-stdstring-to-utf-16-stdwstring

    #pragma once
    #include <string>
    
    #ifdef tstring
    #error ""tstring" Macro has been defined."
    #else
    #ifdef _UNICODE
    #define tstring wstring
    #else
    #define tstring string
    #endif
    #endif
    
    class EncodingConverter
    {
    public:
        static int AnsiStrToWideStr(std::string& strSrc, std::wstring& strDest)
        {
            int nLen = strSrc.length() + 1;
            int nRet = 0;
    
            nLen *=  sizeof(wchar_t);
    
            wchar_t* pszW = new wchar_t[nLen];
            memset(pszW, 0, nLen);
    
            nRet = MultiByteToWideChar(CP_ACP, 0, strSrc.c_str(), -1, pszW, nLen); 
    
            strDest = pszW;
            delete[] pszW;
    
            return nRet;
        };
    
        static int WideStrToAnsiStr(std::wstring& strSrc, std::string& strDest)
        {
            int nLen = strSrc.length() + 1;
            int nRet = 0;
    
            nLen *= sizeof(wchar_t);
    
            char* pszA = new char[nLen];
            memset(pszA, 0, nLen);
    
    
            nRet = WideCharToMultiByte(CP_ACP, 0, strSrc.c_str(), -1, pszA, nLen, NULL, NULL); 
    
            strDest = pszA;
            delete[] pszA;
    
            return nRet;
        };
    
        static int AnsiStrToTStr(std::string& strSrc, std::tstring& strDest)
        {
            int nRet = 0;
    
    #ifdef _UNICODE
            nRet = AnsiStrToWideStr(strSrc, strDest);
    #else
            strDest = strSrc;
            nRet = strDest.length();
    #endif
    
            return nRet;
        };
    
        static int TStrToAnsiStr(std::tstring& strSrc, std::string& strDest)
        {
            int nRet = 0;
    
    #ifdef _UNICODE
            nRet = WideStrToAnsiStr(strSrc, strDest);
    #else
            strDest = strSrc;
            nRet = strDest.length();
    #endif
    
            return nRet;
        };
    
        static int WideStrToTStr(std::wstring& strSrc, std::tstring& strDest)
        {
            int nRet = 0;
    
    #ifdef _UNICODE
            strDest = strSrc;
            nRet = strDest.length();
    #else
            nRet = WideStrToAnsiStr(strSrc, strDest);
    #endif
    
            return nRet;
        };
    
        static int TStrToWideStr(std::tstring& strSrc, std::wstring& strDest)
        {
            int nRet = 0;
    
    #ifdef _UNICODE
            strDest = strSrc;
            nRet = strDest.length();
    #else
            nRet = AnsiStrToWideStr(strSrc, strDest);
    #endif
    
            return nRet;
        };
    
        static std::string ToAnsiString(const wchar_t* lpStr)
        {
            std::wstring wide_string = lpStr;
            std::string ansi_string;
    
            WideStrToAnsiStr(wide_string, ansi_string);
            return ansi_string;
        };
    
        static std::string ToAnsiString(const char* lpStr)
        {
            return std::string(lpStr);
        };
    
        static std::wstring ToWideString(const wchar_t* lpStr)
        {
            return std::wstring(lpStr);
        };
    
        static std::wstring ToWideString(const char* lpStr)
        {
            std::string ansi_string = lpStr;
            std::wstring wide_string;
    
            AnsiStrToWideStr(ansi_string, wide_string);
            return wide_string;
        };
    
        static std::tstring ToTString(const char* lpStr)
        {
    #ifdef _UNICODE
            return ToWideString(lpStr);
    #else
            return ToAnsiString(lpStr);
    #endif
        };
    
        static std::tstring ToTString(const wchar_t* lpStr)
        {
    #ifdef _UNICODE
            return ToWideString(lpStr);
    #else
            return ToAnsiString(lpStr);
    #endif
        };
    
        static int WideStrToUtf8Str(std::wstring& strSrc, std::string& strDest)
        {
            int nRet = 0;
            int nLen = 0;
    
            nLen = WideCharToMultiByte(CP_UTF8, 0, strSrc.c_str(), -1, NULL, 0, NULL, NULL);
    
            char * lpUtf8Str = new char[nLen+1];
            memset(lpUtf8Str, 0, nLen);
            nRet = WideCharToMultiByte(CP_UTF8, 0, strSrc.c_str(), -1, lpUtf8Str, nLen, NULL, NULL);
            strDest = lpUtf8Str;
            delete[] lpUtf8Str;
    
            return nRet;
        };
    
        static int AnsiStrToUtf8Str(std::string& strSrc, std::string& strDest)
        {
            int nRet = 0;
            std::wstring wide_string;
    
            nRet = AnsiStrToWideStr(strSrc, wide_string);
            nRet = WideStrToUtf8Str(wide_string, strDest);
    
            return nRet;
        };
    
        static int Utf8StrToWideStr(const std::string& strSrc, std::wstring& strDest)
        {
            int nRet = 0;
            int nLen = 0;
    
            nLen = MultiByteToWideChar(CP_UTF8, 0, strSrc.c_str(), -1, NULL, 0);
    
            wchar_t* lpWideStr = new wchar_t[nLen];
            memset(lpWideStr, 0, nLen*sizeof(lpWideStr[0]));
            nRet = MultiByteToWideChar(CP_UTF8, 0, strSrc.c_str(), -1, lpWideStr, nLen);
            strDest = lpWideStr;
            delete[] lpWideStr;
    
            return nRet;
        };
    
        static int Utf8StrToAnsiStr(const std::string& strSrc, std::string& strDest)
        {
            int nRet = 0;
            std::wstring wide_string;
    
            nRet = Utf8StrToWideStr(strSrc, wide_string);
            nRet = WideStrToAnsiStr(wide_string, strDest);
    
            return nRet;
        };    
    
        static int Utf8StrToTStr(const std::string& strSrc, std::tstring& strDest)
        {
    #ifdef UNICODE
            return Utf8StrToWideStr(strSrc, strDest);
    #else
            return Utf8StrToAnsiStr(strSrc, strDest);
    #endif
        };    
    
        static std::string ToUtf8String(const std::string& str)
        {
            std::string ansi_string = str;
            std::string utf8_string;
    
            AnsiStrToUtf8Str(ansi_string, utf8_string);
            return utf8_string;
        };
    
        static std::string ToUtf8String(const std::wstring& str)
        {
            std::wstring wide_string = str;
            std::string utf8_string;
    
            WideStrToUtf8Str(wide_string, utf8_string);
            return utf8_string;
        };
    };

    https://github.com/yaocoder/utility/blob/master/src/common/EncodingConverter.h

  • 相关阅读:
    深入浅出MySQL灵魂十连问,你真的有把握吗?
    sharding-jdbc
    计算表数据大小,加查询表数据大小情况sql
    高并发下数据库分库分表面试题整理
    干货|一次MySQL两千万数据大表的优化过程,三种解决方案
    CompletableFuture 使用详解
    mysql innodbd 锁
    mysql : show processlist 详解
    微信支付V2.0-python
    python代码打包加密
  • 原文地址:https://www.cnblogs.com/findumars/p/5866920.html
Copyright © 2011-2022 走看看