zoukankan      html  css  js  c++  java
  • VC++ 读取UTF-8和ANSI编码文件

     判断是否是UTF-8文件:

    bool IsUTF8Text(const void* pBuffer, long size)
    {
        bool IsUTF8 = true;
        unsigned char* start = (unsigned char*)pBuffer;
        unsigned char* end = (unsigned char*)pBuffer + size;
        while (start < end)
        {
            if (*start < 0x80) // (10000000): 值小于0x80的为ASCII字符    
            {
                start++;
            }
            else if (*start < (0xC0)) // (11000000): 值介于0x80与0xC0之间的为无效UTF-8字符    
            {
                IsUTF8 = false;
                break;
            }
            else if (*start < (0xE0)) // (11100000): 此范围内为2字节UTF-8字符    
            {
                if (start >= end - 1)
                {
                    break;
                }
    
                if ((start[1] & (0xC0)) != 0x80)
                {
                    IsUTF8 = false;
                    break;
                }
    
                start += 2;
            }
            else if (*start < (0xF0)) // (11110000): 此范围内为3字节UTF-8字符    
            {
                if (start >= end - 2)
                {
                    break;
                }
    
                if ((start[1] & (0xC0)) != 0x80 || (start[2] & (0xC0)) != 0x80)
                {
                    IsUTF8 = false;
                    break;
                }
    
                start += 3;
            }
            else
            {
                IsUTF8 = false;
                break;
            }
        }
    
        return IsUTF8;
    }
    
    bool IsUTF8File(const char* pFileName)
    {
        FILE *f = NULL;
        fopen_s(&f, pFileName, "rb");
        if (NULL == f)
        {
            return false;
        }
    
        fseek(f, 0, SEEK_END);
        long lSize = ftell(f);
        fseek(f, 0, SEEK_SET);  //或rewind(f);  
    
        char *pBuff = new char[lSize + 1];
        memset(pBuff, 0, lSize + 1);
        fread(pBuff, lSize, 1, f);
        fclose(f);
    
        bool bIsUTF8 = IsUTF8Text(pBuff, lSize);
        delete[]pBuff;
        pBuff = NULL;
    
        return bIsUTF8;
    }

    读取文件:

    CString GetFile(CString filename, UINT CodePage)
    {
        CFile fileR;
        CString strFile = L"";
        if (!fileR.Open(filename, CFile::modeRead | CFile::typeBinary))
        {
            return strFile;
        }
        BYTE head[3];
        fileR.Read(head, 3);
        if (!(head[0] == 0xEF && head[1] == 0xBB && head[2] == 0xBF))
        {
            fileR.SeekToBegin();
        }
        ULONGLONG FileSize = fileR.GetLength();
        char* pContent = (char*)calloc(FileSize + 1, sizeof(char));
        fileR.Read(pContent, FileSize);
        fileR.Close();
        int n = MultiByteToWideChar(CodePage, 0, pContent, FileSize + 1, NULL, 0);
        wchar_t* pWideChar = (wchar_t*)calloc(n + 1, sizeof(wchar_t));
        MultiByteToWideChar(CodePage, 0, pContent, FileSize + 1, pWideChar, n);
        strFile = CString(pWideChar);
        free(pContent);
        free(pWideChar);
    
        return strFile;
    }
  • 相关阅读:
    go语言基础学习
    VBA汇总同目录下的所有工作簿数据到另一个工作簿,并进行统计
    彻底关闭win10后台同步数据(转自技术社区)
    在WIN10上安装ESXI-Comstomer (转自技术社区)
    squid代理允许FTP访问设置
    Powershell 脚本判断制定路径下文件是否存在(来源于网络-转载)
    Python集合(set)类型的操作 (转)
    python3.5.2中文字符乱码问题解决
    Debian 中文环境设置
    Python 列表推导实例
  • 原文地址:https://www.cnblogs.com/2018shawn/p/12167291.html
Copyright © 2011-2022 走看看