zoukankan      html  css  js  c++  java
  • 利用 libiconv 实现汉字编码 utf-8 格式 和 gbk格式的相互转换

    参考文章:http://jimmee.iteye.com/blog/2174693

    关于windows上编译libiconv的库,请参见:http://www.cnblogs.com/tangxin-blog/p/5608751.html

      1 #include <stdio.h>
      2 #include <string.h>
      3 #include <stdint.h>
      4 #include <stdlib.h>
      5 #include "iconv.h"
      6 
      7 #define MAX_BUF_SIZE 1024
      8 
      9 int code_convert(char *from_charset, char *to_charset, char *inbuf, size_t inlen,
     10     char *outbuf, size_t outlen) {
     11     iconv_t cd;
     12     char **pin = &inbuf;
     13     char **pout = &outbuf;
     14 
     15     cd = iconv_open(to_charset, from_charset);
     16     if (cd == 0)
     17         return -1;
     18     memset(outbuf, 0, outlen);
     19     if (iconv(cd, pin, &inlen, pout, &outlen) == -1)
     20         return -1;
     21     iconv_close(cd);
     22     *pout = '';
     23 
     24     return 0;
     25 }
     26 
     27 int utf8_to_gbk(char *inbuf, size_t inlen, char *outbuf, size_t outlen) {
     28     return code_convert("utf-8", "gbk", inbuf, inlen, outbuf, outlen);
     29 }
     30 
     31 int gbk_to_utf8(char *inbuf, size_t inlen, char *outbuf, size_t outlen) {
     32     return code_convert("gbk", "utf-8", inbuf, inlen, outbuf, outlen);
     33 }
     34 
     35 void read_file(char buf[], const int32_t max_buf_size, const char *file_name)
     36 {
     37     FILE * pFile;
     38     long lSize;
     39     size_t result;
     40     fopen_s(&pFile, file_name, "rb");
     41     if (pFile == NULL) { fputs("File error
    ", stderr); exit(1); }
     42     // obtain file size:
     43     fseek(pFile, 0, SEEK_END);
     44     lSize = ftell(pFile);
     45     rewind(pFile);
     46     if (lSize >= max_buf_size){ fputs("file too large
    ", stderr); exit(1); }
     47     result = fread(buf, 1, lSize, pFile);
     48     if (result != lSize) { fputs("Reading error
    ", stderr); exit(3); }
     49     fclose(pFile);
     50 }
     51 
     52 //将gbk编码的str分隔成一个一个的字符,并判断是否是汉字,并输出编码,包括简体和繁体
     53 void GetToken(const char *str)
     54 {
     55     int32_t i = 0;
     56     int32_t len = strlen(str);
     57     short high, low;
     58     uint32_t code;
     59     char cstr[3];
     60     for (; i < len; ++i)
     61     {
     62         if (str[i] >= 0 || i == len - 1)
     63         {
     64             printf("%c >> no
    ", str[i]);   //ASCII字符
     65         }
     66         else
     67         {
     68             // 计算编码
     69             high = (short)str[i] + 256;
     70             low = (short)str[i + 1] + 256;
     71             code = high * 256 + low;
     72 
     73             //获取字符
     74             cstr[0] = str[i];
     75             cstr[1] = str[i + 1];
     76             cstr[2] = 0;
     77             i++;
     78 
     79             printf("%s >> 0x%x", cstr, code);
     80             if ((code >= 0xB0A1 && code <= 0xF7FE) || (code >= 0x8140 && code <= 0xA0FE) || (code >= 0xAA40 && code <= 0xFEA0))
     81             {
     82                 printf(" yes
    ");
     83             }
     84             else
     85             {
     86                 printf(" no
    ");
     87             }
     88         }
     89     }
     90 }
     91 
     92 int main(int argc, char *argv[])
     93 {
     94     char in_buf[MAX_BUF_SIZE] = { 0 }, out_buf[MAX_BUF_SIZE] = { 0 };
     95     read_file(in_buf, MAX_BUF_SIZE, "chinese_gbk.txt");
     96     printf("%s
    ", in_buf);
     97     GetToken(in_buf);
     98     read_file(in_buf, MAX_BUF_SIZE, "chinese_utf8.txt");
     99     printf("%s
    ", in_buf);
    100     GetToken(in_buf);
    101     utf8_to_gbk(in_buf, strlen(in_buf), out_buf, MAX_BUF_SIZE);
    102     printf("%s
    ", out_buf);
    103     GetToken(out_buf);
    104     getchar();
    105     return 0;
    106 }

     完整工程demo:http://download.csdn.net/detail/tangxin19930330/9557218

  • 相关阅读:
    架构阅读笔记4
    python读取docx内容
    python转换doc为docx
    使用Navicat连接oracle问题及解决
    扩充虚拟机磁盘
    虚拟机无法打开内核
    六个常见属性场景
    架构阅读笔记3
    架构学习
    PHP中的加密方式有如下几种
  • 原文地址:https://www.cnblogs.com/tangxin-blog/p/5610044.html
Copyright © 2011-2022 走看看