zoukankan      html  css  js  c++  java
  • 将各种编码格式的文本文件转换为ANSI格式

    日常工作中,我们可能会遇到处理不同格式编码的文本文件的需求,这个问题如果处理不好,就会有中文乱码等棘手的问题。

    以下这个文章写的很不错:

    阮一峰:字符编码笔记:ASCII,Unicode和UTF-8 
    http://www.ruanyifeng.com/blog/2007/10/ascii_unicode_and_utf-8.html

    理解这些编码知识后,我们就可以编写C++代码来完成任务了。下面的函数会把UTF-8, UNICODE Big-endian, UNICODE Little-endian这三种格式编码的文本文件转换成ANSI文本文件。参数为文本文件路径。只用于Windows平台。

      1 /*
      2  * 将路径fpath所指的文件从各种编码格式,转换为ANSI格式
      3  *
      4  * Copyright (c) 2013 赵子清, All rights reserved.
      5  *
      6  */
      7 
      8 #define CODE_FORMAT_ANSI        1
      9 #define CODE_FORMAT_UTF8        2
     10 #define CODE_FORMAT_UNICODE_LE  3
     11 #define CODE_FORMAT_UNICODE_BE  4
     12 
     13 typedef int ErrorCode;
     14 #define    ERR_OK                  0
     15 #define    ERR_FILE_OPEN_FAILED    1001
     16 
     17 #define SWAP16(x) \
     18     ((((x) & 0x00ff) << 8) | \
     19     (((x) & 0xff00) >> 8) )
     20 
     21 #define SWAP32(x) \
     22     ((((x) & 0x000000ff) << 24) | \
     23     (((x) & 0x0000ff00) <<  8) | \
     24     (((x) & 0x00ff0000) >>  8) | \
     25     (((x) & 0xff000000) >> 24) )
     26 
     27 #define SAFE_DELETE(x) if((x)!=0) {delete[] (x); (x) = 0; }
     28 
     29 ErrorCode  ConvertFormat(const char* fpath)
     30 {
     31 #ifdef _MSC_VER
     32     assert(fpath != 0);
     33 
     34     FILE* fp = ::fopen(fpath, "rb");
     35     if(fp == NULL)
     36         return ERR_FILE_OPEN_FAILED;
     37 
     38     int fmtFlag;
     39     int fmt = CODE_FORMAT_ANSI;
     40     ::fread(&fmtFlag, sizeof(int), 1, fp);
     41     ::fclose(fp);
     42 
     43     fmtFlag = SWAP32(fmtFlag);
     44     if((fmtFlag & 0xffffff00) == 0xefbbbf00)
     45         fmt = CODE_FORMAT_UTF8;
     46     else if((fmtFlag & 0xffff0000) == 0xfffe0000)
     47         fmt = CODE_FORMAT_UNICODE_LE;
     48     else if((fmtFlag & 0xffff0000) == 0xfeff0000)
     49         fmt = CODE_FORMAT_UNICODE_BE;
     50 
     51 
     52     if(fmt == CODE_FORMAT_ANSI)
     53         return ERR_OK;
     54 
     55     fp = ::fopen(fpath, "rb");
     56     char* txt = 0;
     57     wchar_t* wtxt = 0;
     58     long flen = 0L;
     59     ::fseek(fp, 0L, SEEK_END);
     60     flen = ftell(fp);
     61     ::rewind(fp);
     62     if(fmt == CODE_FORMAT_UTF8)
     63     {
     64         txt = new char[flen+1];
     65         ::fread(txt, 1, flen, fp);
     66         txt[flen] = '\0';
     67     }
     68     else if(fmt == CODE_FORMAT_UNICODE_LE || fmt == CODE_FORMAT_UNICODE_BE)
     69     {
     70         wtxt = new wchar_t[flen/2 +1];
     71         ::fread(wtxt, 2, flen/2, fp);
     72         if(fmt == CODE_FORMAT_UNICODE_BE)
     73         {
     74             for(int i=0; i < flen/2; i++)
     75                 wtxt[i] = SWAP16(wtxt[i]);
     76         }
     77         wtxt[flen/2] = L'\0';
     78     }
     79     
     80     ::fclose(fp);
     81 
     82     int nLen;
     83     wchar_t* pwstr = 0;
     84     char* pstr = 0;
     85     switch (fmt)
     86     {
     87     case CODE_FORMAT_UTF8:
     88         nLen = ::MultiByteToWideChar(CP_UTF8, 0, txt+3, -1, NULL, 0);
     89         pwstr = new wchar_t[nLen+1];
     90         nLen = ::MultiByteToWideChar(CP_UTF8, 0, txt+3, -1, pwstr, nLen);
     91         nLen = ::WideCharToMultiByte(CP_ACP, 0, pwstr, -1, NULL, 0, NULL, NULL);
     92         pstr = new char[nLen];
     93         ::memset(pstr, 0, nLen);
     94         nLen = ::WideCharToMultiByte(CP_ACP, 0, pwstr, -1, pstr, 
     95                                 nLen, NULL, NULL);
     96         break;
     97     case CODE_FORMAT_UNICODE_LE:
     98     case CODE_FORMAT_UNICODE_BE:
     99         nLen = ::WideCharToMultiByte(CP_ACP, 0, wtxt+1, -1, NULL, 0, NULL, NULL);
    100         pstr = new char[nLen];
    101         ::memset(pstr, 0, nLen);
    102         nLen = ::WideCharToMultiByte(CP_ACP, 0, wtxt+1, -1, pstr, 
    103                                         nLen, NULL, NULL);
    104         break;
    105     default:
    106         break;
    107     }
    108 
    109     fp = ::fopen(fpath, "wb");
    110     ::fwrite(pstr, 1, nLen-1, fp);
    111     ::fclose(fp);
    112 
    113     SAFE_DELETE(txt);
    114     SAFE_DELETE(wtxt);
    115     SAFE_DELETE(pstr);
    116     SAFE_DELETE(pwstr);
    117 
    118 #endif
    119 
    120     return ERR_OK;
    121 }
  • 相关阅读:
    C#调用WinAPI(转)
    C++升级到C#,内存数据读取问题
    锦里未成行
    创业用人九招成功法则
    彩霞满天
    特别提醒: 7种不良习惯直接影响你晋升!
    生意大展示:49种简易创业方法大比拼
    阴阳天
    受益无穷的28条职场语录
    哈佛:创业者需具备的素质及培养方法
  • 原文地址:https://www.cnblogs.com/zzqcn/p/3043730.html
Copyright © 2011-2022 走看看