下面这段话转自:https://blog.csdn.net/lightlater/article/details/6326338
关于文本文件的文件头
第一 ANSI文件的文件头为空,不需要处理;
第二 UNICODE文件的文件头为0xFF,0xFE共计两个字节,读取时需要偏移两个字节再行读取;
第三 UTF-8文件的文件头为0xEF,0xBB,0xBF共计三个字节,读取时需要偏移三个字节后再行读取;
1.ansi格式txt文件
1 void readAnsiTXT(){ 2 string filename = "ansi.txt"; 3 ifstream fin(filename.c_str()); 4 if (!fin.is_open()){ 5 cout << "open failed! "; 6 } 7 char ch; 8 string msg = ""; 9 while (fin.get(ch)){ 10 msg += ch; 11 } 12 cout << msg << " "; 13 }
2.Unicode格式
转载:https://blog.csdn.net/hxfhq1314/article/details/80344669
memset函数:https://baike.baidu.com/item/memset/4747579?fr=aladdin
setlocal函数:https://www.runoob.com/cprogramming/c-function-setlocale.html
void readUnicodeTXT(){ string filename = "unicode.txt"; ifstream fin; fin.open(filename, ios::binary); fin.seekg(2, ios::beg); wstring wstrLine; while (!fin.eof()) { wchar_t wch; fin.read((char *)(&wch), 2); wstrLine.append(1, wch); } string str = ws2s(wstrLine); str.erase(str.size()-1, 1);//删除结尾重复的一个字符 cout << str << endl; } std::string ws2s(const std::wstring& ws) { std::string curLocale = setlocale(LC_ALL, NULL); // C 库函数 char *setlocale(int category, const char *locale) 设置或读取地域化信息。 setlocale(LC_ALL, "chs"); const wchar_t* _Source = ws.c_str(); size_t _Dsize = 2 * ws.size() + 1; char *_Dest = new char[_Dsize]; memset(_Dest, 0, _Dsize); wcstombs(_Dest, _Source, _Dsize); std::string result = _Dest; delete[]_Dest; setlocale(LC_ALL, curLocale.c_str()); return result; }
utf8格式:
1 void readUtf8TXT(){ 2 string str = "utf8.txt"; 3 wstring res=L""; 4 std::locale loc("chs"); 5 std::wcout.imbue(loc); 6 std::wifstream wif(str, ios::binary); 7 codecvt_utf8<wchar_t, 0x10ffff, std::consume_header>* codecvToUnicode = new codecvt_utf8<wchar_t, 0x10ffff, std::consume_header>; 8 if (wif.is_open()){ 9 wif.imbue(std::locale(wif.getloc(), codecvToUnicode)); 10 wstring wline; 11 while (getline(wif, wline)){ 12 wstring convert; 13 for (auto c : wline){ 14 if (c != L'