zoukankan html css js c++ java

Windows下编码转换相关(UTF-8 & UTF-16)

一、ANSI转换为UTF-16

#include <windows.h>

wchar_t* AnsiToUnicodeBegin(const char* str)
{
　　char* sz = new char[strlen(str)+1];
　　strcpy(sz, str);
　　int wLen = MultiByteToWideChar(CP_ACP, 0, sz, -1, NULL, 0);
　　wchar_t* wBuf = new wchar_t[wLen+1];
　　MultiByteToWideChar(CP_ACP, 0, sz, -1, wBuf, wLen);
　　delete[] sz;
　　return wBuf;
}

void AnsiToUnicodeEnd(wchar_t* wstr_chaged)
{
　　if(wstr_chaged)
　　{
　　　　delete[] wstr_chaged;
　　　　wstr_chaged = NULL;
　　}
}

二、UTF-8转换为UTF-16

wchar_t* UTF8ToUnicodeBegin(const char* str)
{
　　char* sz = new char[strlen(str)+1];
　　strcpy(sz, str);
　　int wLen = MultiByteToWideChar(CP_UTF8, 0, sz, -1, NULL, 0);
　　wchar_t* wBuf = new wchar_t[wLen+1];
　　MultiByteToWideChar(CP_UTF8, 0, sz, -1, wBuf, wLen);
　　delete[] sz;
　　return wBuf;
}

void UTF8ToUnicodeEnd(wchar_t* wstr_chaged)
{
　　if(wstr_chaged)
　　{
　　　　delete[] wstr_chaged;
　　　　wstr_chaged = NULL;
　　}
}

三、UTF-16转换为UTF-8

char* UnicodeToUTF8Begin(const wchar_t* wstr)
{
　　wchar_t *sz = new wchar_t[wcslen(wstr)+1];
　　wcscpy(sz,wstr);
　　int len = WideCharToMultiByte(CP_UTF8,0,sz,-1,NULL,0,0,0);
　　char* buf = new char[len+1];
　　WideCharToMultiByte(CP_UTF8,0,sz,-1,buf,len,NULL,NULL);
　　delete[] sz;
　　return buf;
}

void UnicodeToUTF8End(const char* str_chaged)
{
　　if(str_chaged)
　　{
　　　　delete[] str_chaged;
　　　　str_chaged = NULL;
　　}
}

四、读写UTF-8格式文件，转换为UTF-16处理，最后转回UTF-8写入

int main()
{
　　char lineBuf[256]={0};
　　char headFlag[3];

　　FILE *fpr;
　　fpr = fopen("c:\test.txt","rb");
　　fread(headFlag,1,3,fpr);
　　fgets(lineBuf,256,fpr);
　　wchar_t* wstr = UTF8ToUnicodeBegin(lineBuf);

　　char* str = UnicodeToUTF8Begin(wstr);

　　FILE *fpw;
　　fpw = fopen("c:\outputUTF-8.txt","w");
　　fwrite(headFlag,1,3,fpw);
　　fwrite(str,1,strlen(str),fpw);

　　UnicodeToUTF8End(str);
　　UTF8ToUnicodeEnd(wstr);

　　fclose(fpr);
　　fclose(fpw);

　　system("pause");
　　return 0;
}

下面是我用C++写的一个转换类。功能是读取UTF8文件到wstring中处理，然后再转换为UTF8编码的字符串，最后写回文件中。

 1 #ifndef CHARACTERCONVERT_H_
 2 #define CHARACTERCONVERT_H_
 3 
 4 #include <string>
 5 namespace MyLIB
 6 {
 7 
 8     class CharacterConvert
 9     {
10     public:
11         static void ConvertUTF8ToUnicode(const std::string& strUtf8,std::wstring& strUtf16);
12         static void ConvertUnicodeToUTF8(const std::wstring& strUtf16,std::string& strUtf8);
13     private:
14         CharacterConvert(void);
15         ~CharacterConvert(void);
16     };
17 
18 }
19 #endif

 1 #include "StdAfx.h"
 2 #include "CharacterConvert.h"
 3 #include <Windows.h>
 4 
 5 
 6 using namespace MyLIB;
 7 
 8 void CharacterConvert::ConvertUTF8ToUnicode(const std::string& strUtf8,std::wstring& strUtf16)
 9 {
10     int wLen = MultiByteToWideChar(CP_UTF8, 0, strUtf8.c_str(), -1, NULL, 0);
11     wchar_t* wBuf = new wchar_t[wLen+1];
12     if(wBuf==NULL)
13     {
14         return;
15     }
16     MultiByteToWideChar(CP_UTF8, 0, strUtf8.c_str(), -1, wBuf, wLen);
17     strUtf16.assign(wBuf);
18     if(wBuf!=NULL)
19     {
20         delete[] wBuf;
21         wBuf = NULL;
22     }
23 }
24 
25 void CharacterConvert::ConvertUnicodeToUTF8(const std::wstring& strUtf16,std::string& strUtf8)
26 {
27     int len = WideCharToMultiByte(CP_UTF8,0,strUtf16.c_str(),-1,NULL,0,0,0);
28     char *buf = new char[len+1];
29     if(buf==NULL)
30     {
31         return;
32     }
33     WideCharToMultiByte(CP_UTF8,0,strUtf16.c_str(),-1,buf,len,NULL,NULL);
34     strUtf8.assign(buf);
35     if(buf!=NULL)
36     {
37         delete[] buf;
38         buf=NULL;
39     }
40 }

 1 // STLTest.cpp : 
 2 //
 3 
 4 #include "stdafx.h"
 5 #include <iostream>
 6 #include <fstream>
 7 #include <string>
 8 #include <algorithm>
 9 #include "CharacterConvert.h"
10 using namespace std;
11 
12 
13 
14 int _tmain(int argc, _TCHAR* argv[])
15 {
16     string input;
17     wstring output;
18     string utf8;
19     ifstream fin("testUTF8.txt",ios_base::in|ios_base::binary);
20     if(!fin.is_open())
21     {
22         return -1;
23     }
24     ofstream fout("UTF8Output.txt",ios_base::out|ios_base::binary);
25     if(!fout.is_open())
26     {
27         return -1;
28     }
29     
30     while(getline(fin,input))
31     {
32         MyLIB::CharacterConvert::ConvertUTF8ToUnicode(input,output);
33         MyLIB::CharacterConvert::ConvertUnicodeToUTF8(output,utf8);
34         fout << utf8 << endl;
35     }
36     
37     return 0;
38 }

查看全文

相关阅读:
VMware虚拟机中调整Linux分区大小手记（转发）
Linux下查看文件和文件夹大小的df和du命令
 Hadoop 安装（4） SSH无密码验证配置
 Hadoop 安装（3） JDK 的安装
 Hadoop安装（2）安装hadoop 前的centos 设置
 Hadoop 安装大纲
 Hadoop 安装（1） CENTOS 安装与配置
 WLW 截屏插件
 查看Myeclipse中集成的Eclipse的版本号
 Quartz关闭Tomcat时异常:The web application [/****] appears to have started a thread named [startQuertz_Worker-1] buthas

原文地址：https://www.cnblogs.com/elitiwin/p/3965308.html