zoukankan      html  css  js  c++  java
  • C/C++ 字符编码的转换(ut8、gb2312)

    //这是个类strCoding (strCoding.h文件)
    #pragma once
    #include 
    <iostream>
    #include 
    <string>
    #include 
    <windows.h>
    using namespace std;

    class strCoding
    {
    public:
        strCoding(
    void);
        
    ~strCoding(void);
       
        
    void UTF_8ToGB2312(string &pOut, char *pText, int pLen);//utf_8转为gb2312
        void GB2312ToUTF_8(string& pOut,char *pText, int pLen); //gb2312 转utf_8
        string UrlGB2312(char * str);                           //urlgb2312编码
        string UrlUTF8(char * str);                             //urlutf8 编码
        string UrlUTF8Decode(string str);                  //urlutf8解码
        string UrlGB2312Decode(string str);                //urlgb2312解码
      
    private:
        
    void Gb2312ToUnicode(WCHAR* pOut,char *gbBuffer);
        
    void UTF_8ToUnicode(WCHAR* pOut,char *pText);
        
    void UnicodeToUTF_8(char* pOut,WCHAR* pText);
        
    void UnicodeToGB2312(char* pOut,WCHAR uData);
        
    char  CharToInt(char ch);
        
    char StrToBin(char *str);

    }
    ;
    //这是个类strCoding (strCoding.cpp文件)
    #include "StdAfx.h"

    #include 
    ".\strcoding.h"



    strCoding::strCoding(
    void)
    {
    }


    strCoding::
    ~strCoding(void)
    {
    }

    void strCoding::Gb2312ToUnicode(WCHAR* pOut,char *gbBuffer)
    {
        ::MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,gbBuffer,
    2,pOut,1);
        
    return;
    }

    void strCoding::UTF_8ToUnicode(WCHAR* pOut,char *pText)
    {
        
    char* uchar = (char *)pOut;
         
        uchar[
    1= ((pText[0& 0x0F<< 4+ ((pText[1>> 2& 0x0F);
        uchar[
    0= ((pText[1& 0x03<< 6+ (pText[2& 0x3F);

        
    return;
    }


    void strCoding::UnicodeToUTF_8(char* pOut,WCHAR* pText)
    {
        
    // 注意 WCHAR高低字的顺序,低字节在前,高字节在后
        char* pchar = (char *)pText;

        pOut[
    0= (0xE0 | ((pchar[1& 0xF0>> 4));
        pOut[
    1= (0x80 | ((pchar[1& 0x0F<< 2)) + ((pchar[0& 0xC0>> 6);
        pOut[
    2= (0x80 | (pchar[0& 0x3F));

        
    return;
    }

    void strCoding::UnicodeToGB2312(char* pOut,WCHAR uData)
    {
        WideCharToMultiByte(CP_ACP,NULL,
    &uData,1,pOut,sizeof(WCHAR),NULL,NULL);
        
    return;
    }


    //做为解Url使用
    char strCoding:: CharToInt(char ch){
            
    if(ch>='0' && ch<='9')return (char)(ch-'0');
            
    if(ch>='a' && ch<='f')return (char)(ch-'a'+10);
            
    if(ch>='A' && ch<='F')return (char)(ch-'A'+10);
            
    return -1;
    }

    char strCoding::StrToBin(char *str){
            
    char tempWord[2];
            
    char chn;

            tempWord[
    0= CharToInt(str[0]);                         //make the B to 11 -- 00001011
            tempWord[1= CharToInt(str[1]);                         //make the 0 to 0  -- 00000000

            chn 
    = (tempWord[0<< 4| tempWord[1];                //to change the BO to 10110000

            
    return chn;
    }



    //UTF_8 转gb2312
    void strCoding::UTF_8ToGB2312(string &pOut, char *pText, int pLen)
    {
         
    char buf[4];
         
    char* rst = new char[pLen + (pLen >> 2+ 2];
        memset(buf,
    0,4);
        memset(rst,
    0,pLen + (pLen >> 2+ 2);

        
    int i =0;
        
    int j = 0;
          
        
    while(i < pLen)
        
    {
            
    if(*(pText + i) >= 0)
            
    {
                
                rst[j
    ++= pText[i++];
            }

            
    else                 
            
    {
                WCHAR Wtemp;

                
                UTF_8ToUnicode(
    &Wtemp,pText + i);
                  
                UnicodeToGB2312(buf,Wtemp);
                
                unsigned 
    short int tmp = 0;
                tmp 
    = rst[j] = buf[0];
                tmp 
    = rst[j+1= buf[1];
                tmp 
    = rst[j+2= buf[2];

                
    //newBuf[j] = Ctemp[0];
                
    //newBuf[j + 1] = Ctemp[1];

                i 
    += 3;    
                j 
    += 2;   
            }

            
      }

        rst[j]
    ='\0';
       pOut 
    = rst; 
        delete []rst;
    }


    //GB2312 转为 UTF-8
    void strCoding::GB2312ToUTF_8(string& pOut,char *pText, int pLen)
    {
        
    char buf[4];
        memset(buf,
    0,4);

        pOut.clear();

        
    int i = 0;
        
    while(i < pLen)
        
    {
            
    //如果是英文直接复制就可以
            if( pText[i] >= 0)
            
    {
                
    char asciistr[2]={0};
                asciistr[
    0= (pText[i++]);
                pOut.append(asciistr);
            }

            
    else
            
    {
                WCHAR pbuffer;
                Gb2312ToUnicode(
    &pbuffer,pText+i);

                UnicodeToUTF_8(buf,
    &pbuffer);

                pOut.append(buf);

                i 
    += 2;
            }

        }


        
    return;
    }

    //把str编码为网页中的 GB2312 url encode ,英文不变,汉字双字节  如%3D%AE%88
    string strCoding::UrlGB2312(char * str)
    {
        
    string dd;
        size_t len 
    = strlen(str);
        
    for (size_t i=0;i<len;i++)
        
    {
            
    if(isalnum((BYTE)str[i]))
            
    {
                
    char tempbuff[2];
                sprintf(tempbuff,
    "%c",str[i]);
                dd.append(tempbuff);
            }

            
    else if (isspace((BYTE)str[i]))
            
    {
                dd.append(
    "+");
            }

            
    else
            
    {
                
    char tempbuff[4];
                sprintf(tempbuff,
    "%%%X%X",((BYTE*)str)[i] >>4,((BYTE*)str)[i] %16);
                dd.append(tempbuff);
            }


        }

        
    return dd;
    }


    //把str编码为网页中的 UTF-8 url encode ,英文不变,汉字三字节  如%3D%AE%88

    string strCoding::UrlUTF8(char * str)
    {
        
    string tt;
        
    string dd;
        GB2312ToUTF_8(tt,str,(
    int)strlen(str));

        size_t len
    =tt.length();
        
    for (size_t i=0;i<len;i++)
        
    {
            
    if(isalnum((BYTE)tt.at(i)))
            
    {
                
    char tempbuff[2]={0};
                sprintf(tempbuff,
    "%c",(BYTE)tt.at(i));
                dd.append(tempbuff);
            }

            
    else if (isspace((BYTE)tt.at(i)))
            
    {
                dd.append(
    "+");
            }

            
    else
            
    {
                
    char tempbuff[4];
                sprintf(tempbuff,
    "%%%X%X",((BYTE)tt.at(i)) >>4,((BYTE)tt.at(i)) %16);
                dd.append(tempbuff);
            }


        }

        
    return dd;
    }

    //把url GB2312解码
    string strCoding::UrlGB2312Decode(string str)
    {
       
    string output="";
            
    char tmp[2];
            
    int i=0,idx=0,ndx,len=str.length();
            
            
    while(i<len){
                    
    if(str[i]=='%'){
                            tmp[
    0]=str[i+1];
                            tmp[
    1]=str[i+2];
                            output 
    += StrToBin(tmp);
                            i
    =i+3;
                    }

                    
    else if(str[i]=='+'){
                            output
    +=' ';
                            i
    ++;
                    }

                    
    else{
                            output
    +=str[i];
                            i
    ++;
                    }

            }

            
            
    return output;
    }

    //把url utf8解码
    string strCoding::UrlUTF8Decode(string str)
    {
         
    string output="";

        
    string temp =UrlGB2312Decode(str);//

        UTF_8ToGB2312(output,(
    char *)temp.data(),strlen(temp.data()));

        
    return output;

    }

    //test
    #include "stdafx.h"
    #include "strCoding.h"

    using namespace std;


    int main()

        
         strCoding cfm;
         
    string keyword="大家好,欢迎你";
         
    string Temp="";
         
    string Output="";

         
    //把关键字做url的utf8编码
         Temp= cfm.UrlUTF8((char *)keyword.data());
         cout
    <<Temp<<endl;
        
         
    //把url的utf8编码的结果解码
         Temp =cfm.UrlUTF8Decode(Temp);
         cout
    <<Temp<<endl;

         
    //把关键字做url的gb2312编码
         Temp =cfm.UrlGB2312((char *)keyword.data());
         cout
    <<Temp<<endl;
         
         
    //把url的gb2312编码的结果解码
         Temp =cfm.UrlGB2312Decode(Temp);
         cout
    <<Temp<<endl;


         
    //把关键字GB2312转UTF_8
        
         cfm.GB2312ToUTF_8(Output,(
    char *)keyword.data(),strlen(keyword.data()));
         cout
    <<Output<<endl;

         
    //把GB2312转UTF_8转为中文
         cfm.UTF_8ToGB2312(Temp,(char *)Output.data(),strlen(Output.data()));
         cout
    <<Temp<<endl;


        
    //system("pasue");
         getchar();

        
    return 0;
        
    //
    }


    在VC7win32下调试通过
  • 相关阅读:
    转(Java中的递归思想)
    stuff thing object 区别
    论文阅读笔记ECCV 2018: Factorizable net: an efficient subgraphbased framework for scene graph generation
    论文阅读笔记Adversarial Learning with Mask Reconstruction for TextGuidedImage Inpainting
    论文阅读笔记Image Generation from Scene Graphs
    评价gan好坏的指标:IS和FID
    深度学习——正则化(L1\L2)(还没搞明白
    Adam
    L1 L2 SmoothL1损失函数
    ground truth
  • 原文地址:https://www.cnblogs.com/cfam/p/835301.html
Copyright © 2011-2022 走看看