zoukankan      html  css  js  c++  java
  • Windows下编码转换相关(UTF-8 & UTF-16)

    一、ANSI转换为UTF-16

    #include <windows.h>

    wchar_t* AnsiToUnicodeBegin(const char* str)
    {
      char* sz = new char[strlen(str)+1];
      strcpy(sz, str);
      int wLen = MultiByteToWideChar(CP_ACP, 0, sz, -1, NULL, 0);
      wchar_t* wBuf = new wchar_t[wLen+1];
      MultiByteToWideChar(CP_ACP, 0, sz, -1, wBuf, wLen);
      delete[] sz;
      return wBuf;
    }

    void AnsiToUnicodeEnd(wchar_t* wstr_chaged)
    {
      if(wstr_chaged)
      {
        delete[] wstr_chaged;
        wstr_chaged = NULL;
      }
    }

    二、UTF-8转换为UTF-16

    wchar_t* UTF8ToUnicodeBegin(const char* str)
    {
      char* sz = new char[strlen(str)+1];
      strcpy(sz, str);
      int wLen = MultiByteToWideChar(CP_UTF8, 0, sz, -1, NULL, 0);
      wchar_t* wBuf = new wchar_t[wLen+1];
      MultiByteToWideChar(CP_UTF8, 0, sz, -1, wBuf, wLen);
      delete[] sz;
      return wBuf;
    }

    void UTF8ToUnicodeEnd(wchar_t* wstr_chaged)
    {
      if(wstr_chaged)
      {
        delete[] wstr_chaged;
        wstr_chaged = NULL;
      }
    }

    三、UTF-16转换为UTF-8

    char* UnicodeToUTF8Begin(const wchar_t* wstr)
    {
      wchar_t *sz = new wchar_t[wcslen(wstr)+1];
      wcscpy(sz,wstr);
      int len = WideCharToMultiByte(CP_UTF8,0,sz,-1,NULL,0,0,0);
      char* buf = new char[len+1];
      WideCharToMultiByte(CP_UTF8,0,sz,-1,buf,len,NULL,NULL);
      delete[] sz;
      return buf;
    }

    void UnicodeToUTF8End(const char* str_chaged)
    {
      if(str_chaged)
      {
        delete[] str_chaged;
        str_chaged = NULL;
      }
    }

    四、读写UTF-8格式文件,转换为UTF-16处理,最后转回UTF-8写入

    int main()
    {
      char lineBuf[256]={0};
      char headFlag[3];

      FILE *fpr;
      fpr = fopen("c:\test.txt","rb");
      fread(headFlag,1,3,fpr);
      fgets(lineBuf,256,fpr);
      wchar_t* wstr = UTF8ToUnicodeBegin(lineBuf);

      char* str = UnicodeToUTF8Begin(wstr);

      FILE *fpw;
      fpw = fopen("c:\outputUTF-8.txt","w");
      fwrite(headFlag,1,3,fpw);
      fwrite(str,1,strlen(str),fpw);


      UnicodeToUTF8End(str);
      UTF8ToUnicodeEnd(wstr);

      fclose(fpr);
      fclose(fpw);

      system("pause");
      return 0;
    }

    下面是我用C++写的一个转换类。功能是读取UTF8文件到wstring中处理,然后再转换为UTF8编码的字符串,最后写回文件中。

     1 #ifndef CHARACTERCONVERT_H_
     2 #define CHARACTERCONVERT_H_
     3 
     4 #include <string>
     5 namespace MyLIB
     6 {
     7 
     8     class CharacterConvert
     9     {
    10     public:
    11         static void ConvertUTF8ToUnicode(const std::string& strUtf8,std::wstring& strUtf16);
    12         static void ConvertUnicodeToUTF8(const std::wstring& strUtf16,std::string& strUtf8);
    13     private:
    14         CharacterConvert(void);
    15         ~CharacterConvert(void);
    16     };
    17 
    18 }
    19 #endif
     1 #include "StdAfx.h"
     2 #include "CharacterConvert.h"
     3 #include <Windows.h>
     4 
     5 
     6 using namespace MyLIB;
     7 
     8 void CharacterConvert::ConvertUTF8ToUnicode(const std::string& strUtf8,std::wstring& strUtf16)
     9 {
    10     int wLen = MultiByteToWideChar(CP_UTF8, 0, strUtf8.c_str(), -1, NULL, 0);
    11     wchar_t* wBuf = new wchar_t[wLen+1];
    12     if(wBuf==NULL)
    13     {
    14         return;
    15     }
    16     MultiByteToWideChar(CP_UTF8, 0, strUtf8.c_str(), -1, wBuf, wLen);
    17     strUtf16.assign(wBuf);
    18     if(wBuf!=NULL)
    19     {
    20         delete[] wBuf;
    21         wBuf = NULL;
    22     }
    23 }
    24 
    25 void CharacterConvert::ConvertUnicodeToUTF8(const std::wstring& strUtf16,std::string& strUtf8)
    26 {
    27     int len = WideCharToMultiByte(CP_UTF8,0,strUtf16.c_str(),-1,NULL,0,0,0);
    28     char *buf = new char[len+1];
    29     if(buf==NULL)
    30     {
    31         return;
    32     }
    33     WideCharToMultiByte(CP_UTF8,0,strUtf16.c_str(),-1,buf,len,NULL,NULL);
    34     strUtf8.assign(buf);
    35     if(buf!=NULL)
    36     {
    37         delete[] buf;
    38         buf=NULL;
    39     }
    40 }
     1 // STLTest.cpp : 
    2 // 3 4 #include "stdafx.h" 5 #include <iostream> 6 #include <fstream> 7 #include <string> 8 #include <algorithm> 9 #include "CharacterConvert.h" 10 using namespace std; 11 12 13 14 int _tmain(int argc, _TCHAR* argv[]) 15 { 16 string input; 17 wstring output; 18 string utf8; 19 ifstream fin("testUTF8.txt",ios_base::in|ios_base::binary); 20 if(!fin.is_open()) 21 { 22 return -1; 23 } 24 ofstream fout("UTF8Output.txt",ios_base::out|ios_base::binary); 25 if(!fout.is_open()) 26 { 27 return -1; 28 } 29 30 while(getline(fin,input)) 31 { 32 MyLIB::CharacterConvert::ConvertUTF8ToUnicode(input,output); 33 MyLIB::CharacterConvert::ConvertUnicodeToUTF8(output,utf8); 34 fout << utf8 << endl; 35 } 36 37 return 0; 38 }
  • 相关阅读:
    luogu P1833 樱花 看成混合背包
    luogu P1077 摆花 基础记数dp
    luogu P1095 守望者的逃离 经典dp
    Even Subset Sum Problem CodeForces
    Maximum White Subtree CodeForces
    Sleeping Schedule CodeForces
    Bombs CodeForces
    病毒侵袭持续中 HDU
    病毒侵袭 HDU
    Educational Codeforces Round 35 (Rated for Div. 2)
  • 原文地址:https://www.cnblogs.com/elitiwin/p/3965308.html
Copyright © 2011-2022 走看看