zoukankan      html  css  js  c++  java
  • unicode字符和多字节字符的相互转换接口

    作者:朱金灿

    来源:http://blog.csdn.net/clever101

     

               发现开源代码的可利用资源真多,从sqlite3的源码中抠出了几个字符转换接口,稍微改造下了发现还挺好用的。下面是实现代码:


    /*
    ** Convert a UTF-8 string to microsoft unicode (UTF-16?). 
    **
    ** Space to hold the returned string is obtained from malloc.
    */
    static WCHAR *utf8ToUnicode(const char *zFilename){
    
    	int nChar;
    	WCHAR *zWideFilename;
    
    	nChar = MultiByteToWideChar(CP_UTF8, 0, zFilename, -1, NULL, 0);
    	zWideFilename = static_cast<WCHAR *>(malloc(nChar*sizeof(zWideFilename[0])));
    	if( zWideFilename==0 ){
    
    		return 0;
    
    	}
    	nChar = MultiByteToWideChar(CP_UTF8, 0, zFilename, -1, zWideFilename, nChar);
    	if( nChar==0 ){
    
    		free(zWideFilename);
    		zWideFilename = 0;
    
    	}
    	return zWideFilename;
    
    }
    
    /*
    ** Convert microsoft unicode to UTF-8.  Space to hold the returned string is
    ** obtained from malloc().
    */
    static char *unicodeToUtf8(const WCHAR *zWideFilename){
    
    	int nByte;
    	char *zFilename;
    
    	nByte = WideCharToMultiByte(CP_UTF8, 0, zWideFilename, -1, 0, 0, 0, 0);
    	zFilename = static_cast<char*>(malloc( nByte ));
    	if( zFilename==0 ){
    
    		return 0;
    
    	}
    	nByte = WideCharToMultiByte(CP_UTF8, 0, zWideFilename, -1, zFilename, nByte,
    		0, 0);
    	if( nByte == 0 )
    	{
    		free(zFilename);
    		zFilename = 0;
    	}
    	return zFilename;
    
    }
    
    /*
    ** Convert an ansi string to microsoft unicode, based on the
    ** current codepage settings for file apis.
    ** 
    ** Space to hold the returned string is obtained
    ** from malloc.
    */
    static WCHAR *mbcsToUnicode(const char *zFilename){
    
    	int nByte;
    	WCHAR *zMbcsFilename;
    	int codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP;
    
    	nByte = MultiByteToWideChar(codepage, 0, zFilename, -1, NULL,0)*sizeof(WCHAR);
    	zMbcsFilename = static_cast<WCHAR*>(malloc( nByte*sizeof(zMbcsFilename[0])));
    	if( zMbcsFilename==0 ){
    
    		return 0;
    
    	}
    	nByte = MultiByteToWideChar(codepage, 0, zFilename, -1, zMbcsFilename, nByte);
    	if( nByte==0 )
    	{
    		free(zMbcsFilename);
    		zMbcsFilename = 0;
    	}
    	return zMbcsFilename;
    
    }
    
    /*
    ** Convert microsoft unicode to multibyte character string, based on the
    ** user's Ansi codepage.
    **
    ** Space to hold the returned string is obtained from
    ** malloc().
    */
    static char* unicodeToMbcs(const WCHAR* zWideFilename){
    
    	int nByte;
    	char *zFilename;
    	int codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP;
    
    	nByte = WideCharToMultiByte(codepage, 0, zWideFilename, -1, 0, 0, 0, 0);
    	zFilename = static_cast<char*>(malloc(nByte ));
    	if( zFilename==0 ){
    
    		return 0;
    
    	}
    	nByte = WideCharToMultiByte(codepage, 0, zWideFilename, -1, zFilename, nByte,
    		0, 0);
    	if( nByte == 0 ){
    
    		free(zFilename);
    		zFilename = 0;
    	}
    	return zFilename;
    
    }
    
    /*
    ** Convert multibyte character string to UTF-8.  Space to hold the
    ** returned string is obtained from malloc().
    */
    static char* mbcsToUtf8(const char *zFilename){
    
    	char *zFilenameUtf8;
    	WCHAR *zTmpWide;
    
    	zTmpWide = mbcsToUnicode(zFilename);
    	if( zTmpWide==0 ){
    
    		return 0;
    
    	}
    	zFilenameUtf8 = unicodeToUtf8(zTmpWide);
    	free(zTmpWide);
    	return zFilenameUtf8;
    }
    
    /*
    ** Convert UTF-8 to multibyte character string.  Space to hold the 
    ** returned string is obtained from malloc().
    */
    static char* utf8ToMbcs(const char *zFilename){
    
    	char *zFilenameMbcs;
    	WCHAR* zTmpWide;
    
    	zTmpWide = utf8ToUnicode(zFilename);
    	if( zTmpWide==0 ){
    
    		return 0;
    
    	}
    	zFilenameMbcs = unicodeToMbcs(zTmpWide);
    	free(zTmpWide);
    	return zFilenameMbcs;
    }
    
    std::string MbcsToUtf8( const char* pszMbcs )
    {
    	std::string str;
    	WCHAR   *pwchar=0;
    	CHAR    *pchar=0;
    	int len=0;
    	int codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP;
    	len=MultiByteToWideChar(codepage, 0, pszMbcs, -1, NULL,0);
    	pwchar=new WCHAR[len];
    	if(pwchar!=0)
    	{
    		len = MultiByteToWideChar(codepage, 0, pszMbcs, -1, pwchar, len);
    		if( len!=0 )
    		{
    			len = WideCharToMultiByte(CP_UTF8, 0, pwchar, -1, 0, 0, 0, 0);
    			pchar=new CHAR[len];
    			if(pchar!=0)
    			{
    				len = WideCharToMultiByte(CP_UTF8, 0, pwchar, -1, pchar, len,0, 0);
    				if(len!=0)                
    				{
    					str = pchar;                   
    				}
    				delete pchar;
    			}
    			delete pwchar;
    		}
    	}
    	return str;
    }
        

             要测试这些接口,为此我写了一个测试工程,是读取一个xml文件把里面的字符进行转换的,测试工程的代码下载地址如下:

    unicode字符和多字节字符的相互转换接口及测试工程

  • 相关阅读:
    C# 字典类 Dictionary 基本用法 Mark
    SQL语句监测耗时
    jQuery Select Option 操作 删除新增
    C# DataTable 过滤重复数据
    IE8 overflow:hidden 无效问题解决方案
    动态拼接LINQ 查询条件
    解决.net中"未能创建 Mutex”异常
    创建Cookies 包含子健和无子健的创建及用法 做个笔记留着参考
    常用的一些加密算法,留着以备不时之需
    Centos7 nginx安装
  • 原文地址:https://www.cnblogs.com/lanzhi/p/6469870.html
Copyright © 2011-2022 走看看