zoukankan      html  css  js  c++  java
  • Windows下编码转换相关(UTF-8 & UTF-16)

    一、ANSI转换为UTF-16

    #include <windows.h>

    wchar_t* AnsiToUnicodeBegin(const char* str)
    {
      char* sz = new char[strlen(str)+1];
      strcpy(sz, str);
      int wLen = MultiByteToWideChar(CP_ACP, 0, sz, -1, NULL, 0);
      wchar_t* wBuf = new wchar_t[wLen+1];
      MultiByteToWideChar(CP_ACP, 0, sz, -1, wBuf, wLen);
      delete[] sz;
      return wBuf;
    }

    void AnsiToUnicodeEnd(wchar_t* wstr_chaged)
    {
      if(wstr_chaged)
      {
        delete[] wstr_chaged;
        wstr_chaged = NULL;
      }
    }

    二、UTF-8转换为UTF-16

    wchar_t* UTF8ToUnicodeBegin(const char* str)
    {
      char* sz = new char[strlen(str)+1];
      strcpy(sz, str);
      int wLen = MultiByteToWideChar(CP_UTF8, 0, sz, -1, NULL, 0);
      wchar_t* wBuf = new wchar_t[wLen+1];
      MultiByteToWideChar(CP_UTF8, 0, sz, -1, wBuf, wLen);
      delete[] sz;
      return wBuf;
    }

    void UTF8ToUnicodeEnd(wchar_t* wstr_chaged)
    {
      if(wstr_chaged)
      {
        delete[] wstr_chaged;
        wstr_chaged = NULL;
      }
    }

    三、UTF-16转换为UTF-8

    char* UnicodeToUTF8Begin(const wchar_t* wstr)
    {
      wchar_t *sz = new wchar_t[wcslen(wstr)+1];
      wcscpy(sz,wstr);
      int len = WideCharToMultiByte(CP_UTF8,0,sz,-1,NULL,0,0,0);
      char* buf = new char[len+1];
      WideCharToMultiByte(CP_UTF8,0,sz,-1,buf,len,NULL,NULL);
      delete[] sz;
      return buf;
    }

    void UnicodeToUTF8End(const char* str_chaged)
    {
      if(str_chaged)
      {
        delete[] str_chaged;
        str_chaged = NULL;
      }
    }

    四、读写UTF-8格式文件,转换为UTF-16处理,最后转回UTF-8写入

    int main()
    {
      char lineBuf[256]={0};
      char headFlag[3];

      FILE *fpr;
      fpr = fopen("c:\test.txt","rb");
      fread(headFlag,1,3,fpr);
      fgets(lineBuf,256,fpr);
      wchar_t* wstr = UTF8ToUnicodeBegin(lineBuf);

      char* str = UnicodeToUTF8Begin(wstr);

      FILE *fpw;
      fpw = fopen("c:\outputUTF-8.txt","w");
      fwrite(headFlag,1,3,fpw);
      fwrite(str,1,strlen(str),fpw);


      UnicodeToUTF8End(str);
      UTF8ToUnicodeEnd(wstr);

      fclose(fpr);
      fclose(fpw);

      system("pause");
      return 0;
    }

    下面是我用C++写的一个转换类。功能是读取UTF8文件到wstring中处理,然后再转换为UTF8编码的字符串,最后写回文件中。

     1 #ifndef CHARACTERCONVERT_H_
     2 #define CHARACTERCONVERT_H_
     3 
     4 #include <string>
     5 namespace MyLIB
     6 {
     7 
     8     class CharacterConvert
     9     {
    10     public:
    11         static void ConvertUTF8ToUnicode(const std::string& strUtf8,std::wstring& strUtf16);
    12         static void ConvertUnicodeToUTF8(const std::wstring& strUtf16,std::string& strUtf8);
    13     private:
    14         CharacterConvert(void);
    15         ~CharacterConvert(void);
    16     };
    17 
    18 }
    19 #endif
     1 #include "StdAfx.h"
     2 #include "CharacterConvert.h"
     3 #include <Windows.h>
     4 
     5 
     6 using namespace MyLIB;
     7 
     8 void CharacterConvert::ConvertUTF8ToUnicode(const std::string& strUtf8,std::wstring& strUtf16)
     9 {
    10     int wLen = MultiByteToWideChar(CP_UTF8, 0, strUtf8.c_str(), -1, NULL, 0);
    11     wchar_t* wBuf = new wchar_t[wLen+1];
    12     if(wBuf==NULL)
    13     {
    14         return;
    15     }
    16     MultiByteToWideChar(CP_UTF8, 0, strUtf8.c_str(), -1, wBuf, wLen);
    17     strUtf16.assign(wBuf);
    18     if(wBuf!=NULL)
    19     {
    20         delete[] wBuf;
    21         wBuf = NULL;
    22     }
    23 }
    24 
    25 void CharacterConvert::ConvertUnicodeToUTF8(const std::wstring& strUtf16,std::string& strUtf8)
    26 {
    27     int len = WideCharToMultiByte(CP_UTF8,0,strUtf16.c_str(),-1,NULL,0,0,0);
    28     char *buf = new char[len+1];
    29     if(buf==NULL)
    30     {
    31         return;
    32     }
    33     WideCharToMultiByte(CP_UTF8,0,strUtf16.c_str(),-1,buf,len,NULL,NULL);
    34     strUtf8.assign(buf);
    35     if(buf!=NULL)
    36     {
    37         delete[] buf;
    38         buf=NULL;
    39     }
    40 }
     1 // STLTest.cpp : 
    2 // 3 4 #include "stdafx.h" 5 #include <iostream> 6 #include <fstream> 7 #include <string> 8 #include <algorithm> 9 #include "CharacterConvert.h" 10 using namespace std; 11 12 13 14 int _tmain(int argc, _TCHAR* argv[]) 15 { 16 string input; 17 wstring output; 18 string utf8; 19 ifstream fin("testUTF8.txt",ios_base::in|ios_base::binary); 20 if(!fin.is_open()) 21 { 22 return -1; 23 } 24 ofstream fout("UTF8Output.txt",ios_base::out|ios_base::binary); 25 if(!fout.is_open()) 26 { 27 return -1; 28 } 29 30 while(getline(fin,input)) 31 { 32 MyLIB::CharacterConvert::ConvertUTF8ToUnicode(input,output); 33 MyLIB::CharacterConvert::ConvertUnicodeToUTF8(output,utf8); 34 fout << utf8 << endl; 35 } 36 37 return 0; 38 }
  • 相关阅读:
    VMware虚拟机中调整Linux分区大小手记(转发)
    Linux下查看文件和文件夹大小的df和du命令
    Hadoop 安装 (4) SSH无密码验证配置
    Hadoop 安装(3) JDK 的安装
    Hadoop安装(2)安装hadoop 前的centos 设置
    Hadoop 安装大纲
    Hadoop 安装(1) CENTOS 安装与配置
    WLW 截屏插件
    查看Myeclipse中集成的Eclipse的版本号
    Quartz关闭Tomcat时异常:The web application [/****] appears to have started a thread named [startQuertz_Worker-1] buthas
  • 原文地址:https://www.cnblogs.com/elitiwin/p/3965308.html
Copyright © 2011-2022 走看看