zoukankan      html  css  js  c++  java
  • UTF8转换为GB编码gb2312转换为utf-8

    这个方法是用windows的字符集转换的,跟sybase
    的unicode码表可能在某些符号上有差别,对于大部分字符来说,尤其是
    汉字,应该不会有问题的,如果要求比较高的话,可以买sybase的
    unicode开发包,:P
    [code]
    #include <stdio.h>

    #include <stdlib.h>

    #include <string.h>

    #include <locale.h>

    #include <ctype.h>

    #include <mbstring.h>



    size_t mbstoutf8(unsigned char* utf, unsigned char* mbs)

    {

    size_t mbl = 0;

    size_t wcneed = mbstowcs(NULL, mbs, mbl);

    wchar_t* wc = NULL;

    size_t u8need = 0;

    wchar_t* wp = NULL;



    mbl = _mbslen(mbs);

    wcneed = mbstowcs(NULL, mbs, mbl);



    wc= calloc(wcneed + 1, sizeof(wchar_t));

    if (!wc)

    return -1;

    if (mbstowcs(wc, mbs, mbl) < mbl)

    return -1;



    wc[mbl] = L'';

    wp = wc;

    if (!utf)

    {

    while (*wp)

    {

    if ((*wp & 0x7f) == *wp)

    u8need += 1;

    else if ((*wp & 0x7ff) == *wp)

    u8need += 2;

    else

    u8need +=3;

    *wp++;

    }

    }

    else

    {

    while (*wp)

    {

    if ((*wp & 0x7f) == *wp)

    {

    *utf++ = *wp & 0x7f;

    u8need += 1;

    }

    else if ((*wp & 0x7ff) == *wp)

    {

    *utf++ = 0xc0 | (*wp >> 6);

    *utf++ = 0x80 | (*wp & 0x3f);

    u8need += 2;

    }

    else

    {

    *utf++ = 0xe0 | (*wp >> 12);

    *utf++ = 0x80 | ((*wp >> 6) & 0x3f);

    *utf++ = 0x80 | (*wp & 0x3f);

    u8need += 3;

    }

    *wp++;

    }

    *utf = 0;

    }



    free(wc);

    return u8need;

    }



    int main(void)

    {

    char mbs[81];

    size_t sz = 0;

    setlocale(LC_CTYPE, ".936");

    for (; ; )

    {

    fgets(mbs, 80, stdin);

    mbs[strlen(mbs) - 1] = '';

    if (!*mbs)

    break;

    sz = mbstoutf8(NULL, mbs);

    if (sz != -1)

    {

    char* u = malloc(sz + 1);

    if (u)

    {

    mbstoutf8(u, mbs);

    puts(u);

    free(u);

    }

    }

    }

    return 0;

    }
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <locale.h>
    #include <ctype.h>
    #include <mbstring.h>
    #include <wchar.h>
    size_t utf8tombs(unsigned char* mbs, wchar_t* utf)
    {
    size_t wcl = 0;
    size_t mbneed = wcstombs(NULL, utf, wcl);
    char* mc = NULL;
    size_t gbneed = 0;
    char* mp = NULL;
    wcl = wcslen(utf);
    gbneed = wcstombs(NULL, utf, wcl);
    mc= calloc(gbneed + 1, sizeof(unsigned char));
    if (!mc)
    return -1;
    if (wcstombs(mc, utf, wcl) < wcl)
    return -1;
    mc[wcl] = '';
    mp = mc;
    if (!mbs)
    {
    while (*mp)
    {
    if ((*mp & 0x80) == *mp)
    gbneed += 1;
    else if ((*mp & 0x800) == *mp)
    gbneed += 2;
    else
    gbneed +=3;
    *mp++;
    }
    }
    else
    {
    while (*mp)
    {
    if ((*mp & 0x80) == *mp)
    {
    *mbs++ = *mp & 0x80;
    gbneed += 1;
    }
    else if ((*mp & 0x800) == *mp)
    {
    *mbs++ = 0x3f | (*mp << 6);
    *mbs++ = 0x7f | (*mp & 0xc0);

    gbneed += 2;
    }
    else
    {
    *mbs++ = 0x1f | (*mp << 12);
    *mbs++ = 0x7f | ((*mp << 6) & 0xc0);
    *mbs++ = 0x7f | (*mp & 0xc0);
    gbneed += 3;
    }
    *mp++;
    }
    *mbs = 0;
    }
    free(mc);
    return gbneed;
    }
    int main(void)
    {
    char utf[81];
    size_t sz = 0;
    setlocale(LC_CTYPE, ".936");
    for (; ; )
    {
    fgets(utf, 80, stdin);
    utf[strlen(utf) - 1] = '';
    if (!*utf)
    break;
    sz = utf8tombs(NULL, utf);
    if (sz != -1)
    {
    char* u = malloc(sz + 1);
    if (u)
    {
    utf8tombs(u, utf);
    puts(u);
    free(u);
    }
    }
    }
    return 0;
    } [/code]

  • 相关阅读:
    2020前端学习路线 之完结篇
    axios 请求超时,设置重新请求的完美解决方法
    如何终止前端发起的请求?
    轮询与长轮询
    最全React技术栈技术资料汇总(收藏)
    React 服务端渲染完美的解决方案
    将数组格式的字符串转换成数组
    Cannot read property 'map' of undefined
    计算机编码方式简介
    python01之文件处理
  • 原文地址:https://www.cnblogs.com/pengkunfan/p/3794663.html
Copyright © 2011-2022 走看看