zoukankan      html  css  js  c++  java
  • C++ 读写 UTF-8 with bom 文本文件

    FileHelper.h
    #pragma once
    #include <iostream>
    #include <fstream>
    #include <string>
    #include <string.h>
    using namespace std;
    
    #ifdef _WIN32
    #include <Windows.h>
    #endif
    
    typedef enum TextFileType
    {
        TextFileType_ANSI = 0,
        TextFileType_UNICODE,
        TextFileType_UTF8
    }TEXTFILETYPE;
    
    class FileHelper
    {
    public:
        #ifdef _WIN32
            static string UTF8ToGB(const char* str);
        #endif
    
        static TEXTFILETYPE GetTextFileType(const std::string & strFileName);
        static string ReadTextFile(const std::string & strFileName);
        static void WriteTextFile(const std::string & strFileName, const std::string & strContent);
    };

    FileHelper.cpp
    #include "FileHelper.h"
    
    TEXTFILETYPE FileHelper::GetTextFileType(const std::string & strFileName)
    {
        TEXTFILETYPE fileType = TextFileType_ANSI;
        std::ifstream file;
        file.open(strFileName.c_str(), std::ios_base::in);
        bool bUnicodeFile = false;
    
        if (file.good())
        {
            char szFlag[3] = { 0 };
            file.read(szFlag, sizeof(char) * 3);
            if ((unsigned char)szFlag[0] == 0xFF
                && (unsigned char)szFlag[1] == 0xFE)
            {
                fileType = TextFileType_UNICODE;
            }
            else if ((unsigned char)szFlag[0] == 0xEF
                && (unsigned char)szFlag[1] == 0xBB
                && (unsigned char)szFlag[2] == 0xBF)
            {
                fileType = TextFileType_UTF8;
            }
        }
    
        file.close();
        return fileType;
    }
    
    string FileHelper::ReadTextFile(const std::string & strFileName)
    {
        TEXTFILETYPE fileType = FileHelper::GetTextFileType(strFileName);
        if (fileType != TextFileType_UTF8)
        {
            cout << "UTF-8 file needed...!" << endl;
            return "";
        }
    
        FILE * fp = NULL;
        fp = fopen(strFileName.c_str(), "rb");
        fseek(fp, 0, SEEK_END);
        size_t size = ftell(fp);
        fseek(fp, 0, SEEK_SET);
    
        std::string result;
    
        if (fp != NULL)
        {
            // UTF-8 file should offset 3 byte from start position.
            fseek(fp, sizeof(char) * 3, 0);
            int buferSize = (int)size - 3;
            char* szBuf = new char[buferSize + 1];
            memset(szBuf, 0, sizeof(char) * (buferSize + 1));
            fread(szBuf, sizeof(char), buferSize, fp);
            result.append(szBuf);
            delete szBuf;
        }
    
        fclose(fp);
    
    #ifdef _WIN32
        result = UTF8ToGB(result.c_str());
    #endif
    
        return result;
    }
    
    void FileHelper::WriteTextFile(const std::string & strFileName, const std::string & strContent)
    {
        FILE * pFile;
        char buffer[] = { 0xEF , 0xBB , 0xBF };
        pFile = fopen(strFileName.c_str(), "wb");
        fwrite(buffer, sizeof(char), sizeof(buffer), pFile);
        fwrite(strContent.c_str(), sizeof(char), strContent.size(), pFile);
        fclose(pFile);
    }
    
    #ifdef _WIN32
    string FileHelper::UTF8ToGB(const char* str)
    {
        string result;
        WCHAR *strSrc;
        LPSTR szRes;
    
        //获得临时变量的大小
        int i = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0);
        strSrc = new WCHAR[i + 1];
        MultiByteToWideChar(CP_UTF8, 0, str, -1, strSrc, i);
    
        //获得临时变量的大小
        i = WideCharToMultiByte(CP_ACP, 0, strSrc, -1, NULL, 0, NULL, NULL);
        szRes = new CHAR[i + 1];
        WideCharToMultiByte(CP_ACP, 0, strSrc, -1, szRes, i, NULL, NULL);
    
        result = szRes;
        delete[]strSrc;
        delete[]szRes;
    
        return result;
    }
    #endif
    桂棹兮兰桨,击空明兮溯流光。
  • 相关阅读:
    Leetcode 538. Convert BST to Greater Tree
    Leetcode 530. Minimum Absolute Difference in BST
    Leetcode 501. Find Mode in Binary Search Tree
    Leetcode 437. Path Sum III
    Leetcode 404. Sum of Left Leaves
    Leetcode 257. Binary Tree Paths
    Leetcode 235. Lowest Common Ancestor of a Binary Search Tree
    Leetcode 226. Invert Binary Tree
    Leetcode 112. Path Sum
    Leetcode 111. Minimum Depth of Binary Tree
  • 原文地址:https://www.cnblogs.com/nanfei/p/14701194.html
Copyright © 2011-2022 走看看