zoukankan      html  css  js  c++  java
  • iconv编码转换

    当前测试pc的编码为:UTF-8

    iconv_t iconv_open(const char *tocode, const char *fromcode);
    size_t iconv(iconv_t cd,
                        char **inbuf, size_t *inbytesleft,
                        char **outbuf, size_t *outbytesleft);
    int iconv_close(iconv_t cd);

    utf-8转gb2312

    int i = 0;
    char *inbuf = "王浡";
    size_t inbytesleft = strlen(inbuf);
    size_t outbytesleft = 3 * inbytesleft;
    char *outbuf = (char*)malloc(outbytesleft);
    memset(outbuf, 0, outbytesleft);
    size_t bytes = outbytesleft;
    
    char *ib = inbuf;
    char *ob = outbuf;
    
    //iconv_t cd = iconv_open("gbk", "utf-8");
    iconv_t cd = iconv_open("gb2312", "utf-8");
    
    int ret = iconv(cd, &ib, &inbytesleft, &ob, &outbytesleft);
    if(ret < 0)
    {
        perror("iconv");
        return -1;
    }
    
    bytes -= outbytesleft;
    
    printf("inbuf: %s
    ", inbuf);
    printf("inbytesleft: %zu
    ", strlen(inbuf));
    printf("outbuf: ");
    for(i = 0; i < bytes; i++)
    {
        printf("0x%x ", *(unsigned char*)&outbuf[i]);
    }
    printf("
    ");
    printf("outbytesleft: %zu
    ", outbytesleft);
    
    iconv_close(cd);
    free(outbuf);
    # ./a.out 
    iconv: Invalid or incomplete multibyte or wide character

    GB2312编码适用于汉字处理、汉字通信等系统之间的信息交换,通行于中国大陆
    GBK编码支持国际标准和国家标准中的全部中日韩汉字
    将gb2312换成gbk即可

    # ./a.out     
    inbuf: 王浡
    inbytesleft: 6
    outbuf: 0xcd, 0xf5, 0x9b, 0xc2, 
    outbytesleft: 14

    注意:iconv会将inbuf、outbuf地址改变,传参请小心

    unicode转utf-8

    int i = 0;
    char inbuf[] = {0x8b, 0x73, 0x61, 0x6d};
    size_t inbytesleft = sizeof(inbuf);
    size_t outbytesleft = 3 * inbytesleft;
    char *outbuf = (char*)malloc(outbytesleft);
    memset(outbuf, 0, outbytesleft);
    size_t bytes = outbytesleft;
    
    char *ib = inbuf;
    char *ob = outbuf;
    
    iconv_t cd = iconv_open("utf-8", "unicode");
    
    int ret = iconv(cd, &ib, &inbytesleft, &ob, &outbytesleft);
    if(ret < 0)
    {
        perror("iconv");
        return -1;
    }
    
    bytes -= outbytesleft;
    
    printf("inbytesleft: %zu
    ", strlen(inbuf));
    printf("outbuf: %s
    	", outbuf);
    for(i = 0; i < bytes; i++)
    {
        printf("0x%x ", *(unsigned char*)&outbuf[i]);
    }
    printf("
    ");
    printf("outbytesleft: %zu
    ", outbytesleft);
    
    iconv_close(cd);
    free(outbuf);
    ./a.out    
    inbytesleft: 4
    outbuf: 王浡
            0xe7, 0x8e, 0x8b, 0xe6, 0xb5, 0xa1, 
    outbytesleft: 6

    转换过程

    unicode(16进制 2个字节)      utf-8(2进制 3个字节)
    0800 - FFFF                 1110xxxx 10xxxxxx 10xxxxxx
    
    王:11100111 10001110 10001011    --> 01110011 10001011   --> 0x73 0x8b
    浡:11100110 10110101 10100001    --> 01101101 01100001   --> 0x6d 0x61

    命令

    Usage: iconv [OPTION...] [FILE...]

    -f:输入编码
    -t:输出编码
    -l:列举所有已知的字符集
    -c:输出中忽略无效字符
    -o:输出文件
    -s:关闭警告

    # iconv -f GB2312 -t utf-8 text.gb2312 -o text.utf8

    官网http://www.gnu.org/savannah-checkouts/gnu/libiconv/

  • 相关阅读:
    简单排序(冒泡、选择、插入)
    配置Tomcat数据源
    使用spring的邮件发送功能
    安装Tomcat
    Spring-MongoDB简单操作
    cisco ASA ios升级或恢复
    ASA 用TFTP 备份配置方法
    DELL MD3200i存储控制器解锁方法
    IBM ServerGuide引导盘全系列下载网址
    ASA5520远程配置 telnet,ssh
  • 原文地址:https://www.cnblogs.com/zhangxuechao/p/11709688.html
Copyright © 2011-2022 走看看