zoukankan      html  css  js  c++  java
  • C/C++ GBK和UTF8之间的转换

    {

      

    关于GBK和UTF-8之间的转换,很多初学者会很迷茫。

    一般来说GBK和UTF-8是文字的编码方式,其对应的内码是不一样的,所以GBK和UTF-8的转换需要对内码进行一一映射,然后进行转换。

    对于一般系统上的工程,一般使用libiconv即可,但是对于嵌入式或手机操作系统,libiconv显得就有点庞大了。

    在这里提供GBK和UTF8转换以及全半角、大小写转换等函数,希望对手机开发的同学有所帮助,特别是在iOS上开发的同学。

    strnormalize.h

    strnormalize.c


    具体全半角、简繁体转换使用方法见下代码:

     #include "strnormalize.h"
     #include <stdio.h>
     #include <stdlib.h>
     #include <string.h>
     
     int main(int argc, char **argv)
     {
         str_normalize_init();
         unsigned options = SNO_TO_LOWER | SNO_TO_HALF;
         if (argc > 1) options = atoi(argv[1]);
     
         char *buffer = (char *)malloc(65536);
         memset(buffer, 0, 65536);
         while (fgets(buffer, 65536, stdin))
         {   
             str_normalize_utf8(buffer, options);
             printf("%s", buffer);
         }   
         free(buffer);
     
         return 0;
     }

    UTF-8和GBK转换使用方法如下:

    #include "strnormalize.h"
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <stdint.h>

    int main(int argc, char **argv)
    {
        str_normalize_init();
        const char *utf8 = "我是utf-8字符!";
        const char *gbk = "����GBK�ַ���";
        uint32_t utf8_len = strlen(utf8);
        uint32_t gbk_len = strlen(utf8);
        uint32_t utf8buffer_len = utf8_len * 3 + 1;
        uint32_t gbkbuffer_len = gbk_len * 2 + 1;
        char *utf8buffer = (char *)malloc(utf8buffer_len);
        char *gbkbuffer = (char *)malloc(gbkbuffer_len);
        memset(utf8buffer, 0, utf8buffer_len);
        memset(gbkbuffer, 0, gbkbuffer_len);
        utf8_to_gbk(utf8, utf8_len, &gbkbuffer, &gbkbuffer_len);
        gbk_to_utf8(gbk, gbk_len, &utf8buffer, &utf8buffer_len);
        printf("utf8: %s<=>%d gbkbuffer: %s<=>%d ", utf8, utf8_len, gbkbuffer, gbkbuffer_len);
        printf("gbk: %s<=>%d utf8buffer: %s<=>%d ", gbk, gbk_len, utf8buffer, utf8buffer_len);
        free(utf8buffer);
        free(gbkbuffer);
        return 0;
    }

    strnormalize.h

    /**
     * Copyright(c) 2012-2013, All Rights Reserved.
     *
     * @file strnormalize.h
     * @details Check GBK character you could do
     *     code >= 0x8000 && _pGbk2Utf16[code - 0x8000] != 0
     * @author cnangel
     * @version 1.0.0
     * @date 2012/10/09 11:44:58
     */
    
    #ifndef __STRNORMALIZE_H__
    #define __STRNORMALIZE_H__
    
    #ifdef __cplusplus
    extern "C" {
    #endif
    
    #define SNO_TO_LOWER        1
    #define SNO_TO_UPPER        2
    #define SNO_TO_HALF         4
    #define SNO_TO_SIMPLIFIED   8
    
    void str_normalize_init();
    void str_normalize_gbk(char *text, unsigned options);
    void str_normalize_utf8(char *text, unsigned options);
    
    int gbk_to_utf8(const char *from, unsigned int from_len, char **to, unsigned int *to_len);
    int utf8_to_gbk(const char *from, unsigned int from_len, char **to, unsigned int *to_len);
    
    #ifdef __cplusplus
    }
    #endif
    
    #endif /* __STRNORMALIZE_H__ */

    strnormalize.c

    }

  • 相关阅读:
    BZOJ 1051: [HAOI2006]受欢迎的牛( tarjan )
    BZOJ 2208: [Jsoi2010]连通数( DFS )
    js效果-鼠标提示框
    CSS-背景
    CSS-文本
    tomcat 安装 for Mac
    CSS选择器基础
    POJ3349-Snowflake Snow Snowflakes-Hash
    数据库的三大设计范式
    HDU5120-Intersection-两个圆环相交面积
  • 原文地址:https://www.cnblogs.com/YZFHKMS-X/p/11989545.html
Copyright © 2011-2022 走看看