zoukankan      html  css  js  c++  java
  • C++ Stream与编码转换

    1.自己动手改造codecvt来读写Unicode(LE)代码:

    #include <iostream>
    #include <fstream>
    #include <locale>
    #include "TextCodeChange.h"
    using namespace std;
    
    //#define endl L"
    "
    int main()
    {
        locale loc(locale::classic(), new class NullCodecvt);
        wstring wstr;
        wifstream wfin(L"test.txt", ios_base::binary);
        wofstream wfout(L"test1.txt", ios_base::binary);;
        wfout.imbue(loc);
        wchar_t header[1] = { 0xFEFF };
        wfout.write(header, 1);
    
        wfin.imbue(loc);
        wcout.imbue(locale(""));
        while (std::getline(wfin, wstr))
        //while (wfin >> wstr)
        {
            wcout << wstr << endl;
            wfout << wstr << endl;
        }
        wfin.close();
        wfout.close();
        return 0;
    }

    头文件TextCodeChange.h

    using std::codecvt ;
    typedef codecvt < wchar_t , char , mbstate_t > NullCodecvtBase ;
    class NullCodecvt
        : public NullCodecvtBase
    {
    
    public:
        typedef wchar_t _E ;
        typedef char _To ;
        typedef mbstate_t _St ;
    
        explicit NullCodecvt( size_t _R=0 ) : NullCodecvtBase(_R) { }
    
    protected:
        virtual result do_in( _St& _State ,
                       const _To* _F1 , const _To* _L1 , const _To*& _Mid1 ,
                       _E* F2 , _E* _L2 , _E*& _Mid2
                       ) const
        {
            return noconv ;
        }
        virtual result do_out( _St& _State ,
                       const _E* _F1 , const _E* _L1 , const _E*& _Mid1 ,
                       _To* F2, _E* _L2 , _To*& _Mid2
                       ) const
        {
            return noconv ;
        }
        virtual result do_unshift( _St& _State ,
                _To* _F2 , _To* _L2 , _To*& _Mid2 ) const
        {
            return noconv ;
         }
        virtual int do_length( _St& _State , const _To* _F1 ,
               const _To* _L1 , size_t _N2 ) const _THROW0()
        {
            return (_N2 < (size_t)(_L1 - _F1)) ? _N2 : _L1 - _F1 ;
        }
        virtual bool do_always_noconv() const _THROW0()
        {
            return true ;
        }
        virtual int do_max_length() const _THROW0()
        {
            return 2 ;
        }
        virtual int do_encoding() const _THROW0()
        {
            return 2 ;
        }
    } ;

    2.利用C++11标准中提供的 codecvt_utf16来读写UTF-16LE文件,读出的UTF-16LE字符放入wchar_t中处理。

    #include <iostream>
    #include <fstream>
    #include <locale>
    #include <codecvt>
    using namespace std;
    
    int main()
    {
        wchar_t header[1] = { 0xFEFF };// Unicode little endian's header
        wstring wstr;
        wifstream wfin(L"test.txt");
    
        std::locale loc(std::locale(), new std::codecvt_utf16<wchar_t,0x10ffff,std::little_endian>);
        std::wofstream wfout("test1.txt", std::ios::binary);
        wfout.imbue(loc);
        wfin.imbue(loc);
        wcout.imbue(locale(""));
        wfout.write(header, 1);
        while (wfin >> wstr)
        {
            wcout << wstr << endl;
            wfout << wstr << L"
    ";
        }
        wfin.close();
        wfout.close();
        return 0;
    }

     3.利用C++11标准中提供的 codecvt_utf16来读写UTF-16BE文件,读出的UTF-16BE字符放入wchar_t中处理。

    #include <iostream>
    #include <fstream>
    #include <locale>
    #include <codecvt>
    using namespace std;
    
    int main()
    {
        wchar_t header[1] = { 0xFFFE };// Unicode little endian's header
        wstring wstr;
        wifstream wfin(L"test.txt");
    
        std::locale loc(std::locale(), new std::codecvt_utf16<wchar_t, 0x10ffff, std::generate_header>);
        std::wofstream wfout("test1.txt", std::ios::binary);
        wfout.imbue(loc);
        wfin.imbue(loc);
        wcout.imbue(locale(""));
        wfout.write(header, 1);
        while (wfin >> wstr)
        {
            wcout << wstr << endl;
            wfout << wstr << L"
    ";
        }
        wfin.close();
        wfout.close();
        return 0;
    }

    4.利用C++11标准中提供的codecvt_utf8来读写UTF-8文件,读出的UTF-8字符放入wchar_t中处理。

    #include <iostream>
    #include <fstream>
    #include <locale>
    #include <codecvt>
    using namespace std;
    
    int main()
    {
        wstring wstr;
        wifstream wfin(L"test.txt");
    
        std::locale loc(std::locale(), new std::codecvt_utf8<wchar_t>);
        wofstream wfout("test1.txt");
        wfout.imbue(loc);
        wfin.imbue(loc);
        wcout.imbue(locale(""));
        while (wfin >> wstr)
        {
            wcout << wstr << endl;
            wfout << wstr << endl;
        }
        wfin.close();
        wfout.close();
        return 0;
    }

     5.UTF8编码转换UTF-16编码的应用

    #include <iostream>
    #include <fstream>
    #include <locale>
    #include <codecvt>
    #include <cvtwstring>
    using namespace std;
    
    int main()
    {
        //读取UTF-8文件的内容并转换为UTF16编码放入wstring中
        std::wifstream wfin("test.txt");
        wfin.imbue(std::locale(wfin.getloc(), new std::codecvt_utf8_utf16<wchar_t>));
    
        wstring wstr;
        wfin >> wstr;
    
        //创建一个utf8编码转换器
        wstring_convert<codecvt_utf8<wchar_t> > myconv;
        //将UTF16编码转换为UTF8编码
        string mbstring = myconv.to_bytes(wstr);
        cout << mbstring;
        //将UTF8编码转换为UTF16编码
        wstr = myconv.from_bytes(mbstring);
        wcout.imbue(locale(""));
        wcout << wstr;
        wfin.close();
        return 0;
    }
  • 相关阅读:
    图论分支-Tarjan初步-边双联通分量
    图论分支-Tarjan初步-割点和割边
    简谈图论重要性&&图论总结
    Angular本地数据存储LocalStorage
    (转)AngularJS中使用的表单验证
    (转)AngularJS判断checkbox/复选框是否选中并实时显示
    (转载)JavaScript世界万物诞生记
    (转载)关于My97 datepicker与Angular ng-model绑定问题解决。
    mysql
    mysql
  • 原文地址:https://www.cnblogs.com/elitiwin/p/4248167.html
Copyright © 2011-2022 走看看