zoukankan      html  css  js  c++  java
  • Busybox支持中文的解决办法

    转载:http://blog.csdn.net/wavemcu/article/details/7202908

    ***************************************************************************************************************************
    作者:EasyWave                                                                                 时间:2012.01.15

    类别:linux驱动开发                                                                           声明:转载,请保留链接

    ***************************************************************************************************************************

    在嵌入式linux系统中,busybox是最常见的用来构建文件系统的。可是从busybox1.17.0以上之后,对ls命令不做修改是无法显 示中文的。就算是内核设置了支持中文的话,在shell下用ls命令也是无法显示中文的,这是因为busybox1.17.0以后版本对中文的支持进行了 限制。现在就来讲讲如何修改让busybox1.17.0以上版本支持中文,要想让busybox1.17.0以上支持中文,需要修改两个文 件:printable_string.c以及unicode.c 。下面来分析,为什么ls命令无法显示中文。请看printable_string.c未修改过的代码:

    const char* FAST_FUNC printable_string(uni_stat_t *stats, const char *str)
    {
        static char *saved[4];
        static unsigned cur_saved; /* = 0 */
    
        char *dst;
        const char *s;
    
        s = str;
        while (1) {
            unsigned char c = *s;
            if (c == '') {
                /* 99+% of inputs do not need conversion */
                if (stats) {
                    stats->byte_count = (s - str);
                    stats->unicode_count = (s - str);
                    stats->unicode_width = (s - str);
                }
                return str;
            }
            if (c < ' ')
                break;
            if (c >= 0x7f)
                break;
            s++;
        }
    
    #if ENABLE_UNICODE_SUPPORT
        dst = unicode_conv_to_printable(stats, str);
    #else
        {
            char *d = dst = xstrdup(str);
            while (1) {
                unsigned char c = *d;
                if (c == '')
                    break;
                    if (c < ' ' || c >= 0x7f)
                       *d = '?';
                d++;
            }
            if (stats) {
                stats->byte_count = (d - dst);
                stats->unicode_count = (d - dst);
                stats->unicode_width = (d - dst);
            }
        }
    #endif
    
        free(saved[cur_saved]);
        saved[cur_saved] = dst;
        cur_saved = (cur_saved + 1) & (ARRAY_SIZE(saved)-1);
    
        return dst;
    }

    从上面代码23和24行以及37和38行可以看出:大于0x7F的字符直接被break掉,或者直接被“?”代替了。所以就算是linux内核设置了支持中文,也是无法显示出来的,被“?”代替了。修改红色加粗的代码如下:

    const char* FAST_FUNC printable_string(uni_stat_t *stats, const char *str)
    {
        static char *saved[4];
        static unsigned cur_saved; /* = 0 */
    
        char *dst;
        const char *s;
    
        s = str;
        while (1) {
            unsigned char c = *s;
            if (c == '') {
                /* 99+% of inputs do not need conversion */
                if (stats) {
                    stats->byte_count = (s - str);
                    stats->unicode_count = (s - str);
                    stats->unicode_width = (s - str);
                }
                return str;
            }
            if (c < ' ')
                break;
        /*
            if (c >= 0x7f)
                break;
        */
            s++;
        }
    
    #if ENABLE_UNICODE_SUPPORT
        dst = unicode_conv_to_printable(stats, str);
    #else
        {
            char *d = dst = xstrdup(str);
            while (1) {
                unsigned char c = *d;
                if (c == '')
                    break;
                if (c < ' ' /*|| c >= 0x7f */)
                    *d = '?';
                d++;
            }
            if (stats) {
                stats->byte_count = (d - dst);
                stats->unicode_count = (d - dst);
                stats->unicode_width = (d - dst);
            }
        }
    #endif
    
        free(saved[cur_saved]);
        saved[cur_saved] = dst;
        cur_saved = (cur_saved + 1) & (ARRAY_SIZE(saved)-1);
    
        return dst;
    }

    经过以上的修改之后,同时busybox1.17.0配置的时候没有选中[] Support Unicode的话,那么采用ls命令是可以看到中文的,这个我自己已经亲自测试过的。可是还有一种情况:busybox1.17.0在配置的时候选中了:[*] Support Unicode,见下:

    在配置里,有Support Unicode选上的: 
    Busybox Settings->General Configuration->
       │ │[ ] Enable locale support (system needs locale for this to work)     │ │  
       │ │[*] Support Unicode                                                  │ │  
       │ │[*] Support for --long-options                                       │ │  
      

    那么这样还需要修改一个文件,这个文件就是:unicode.c。如果不修改这个文件,ls命令也是无法显示出中文的。见下未修改的代码:

    static char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags)
    {
        char *dst;
        unsigned dst_len;
        unsigned uni_count;
        unsigned uni_width;
    
        if (unicode_status != UNICODE_ON) {
            char *d;
            if (flags & UNI_FLAG_PAD) {
                d = dst = xmalloc(width + 1);
                while ((int)--width >= 0) {
                    unsigned char c = *src;
                    if (c == '') {
                        do
                            *d++ = ' ';
                        while ((int)--width >= 0);
                        break;
                    }
                    *d++ = (c >= ' ' && c < 0x7f) ? c : '?';
                    src++;
                }
                *d = '';
            } else {
                d = dst = xstrndup(src, width);
                while (*d) {
                    unsigned char c = *d;
                    if (c < ' ' || c >= 0x7f)
                        *d = '?';
                    d++;
                }
            }
            if (stats) {
                stats->byte_count = (d - dst);
                stats->unicode_count = (d - dst);
                stats->unicode_width = (d - dst);
            }
            return dst;
        }
    
        dst = NULL;
        uni_count = uni_width = 0;
        dst_len = 0;
        while (1) {
            int w;
            wchar_t wc;
    
    #if ENABLE_UNICODE_USING_LOCALE
            {
                mbstate_t mbst = { 0 };
                ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst);
                /* If invalid sequence is seen: -1 is returned,
                 * src points to the invalid sequence, errno = EILSEQ.
                 * Else number of wchars (excluding terminating L'')
                 * written to dest is returned.
                 * If len (here: 1) non-L'' wchars stored at dest,
                 * src points to the next char to be converted.
                 * If string is completely converted: src = NULL.
                 */
                if (rc == 0) /* end-of-string */
                    break;
                if (rc < 0) { /* error */
                    src++;
                    goto subst;
                }
                if (!iswprint(wc))
                    goto subst;
            }
    #else
            src = mbstowc_internal(&wc, src);
            /* src is advanced to next mb char
             * wc == ERROR_WCHAR: invalid sequence is seen
             * else: wc is set
             */
            if (wc == ERROR_WCHAR) /* error */
                goto subst;
            if (wc == 0) /* end-of-string */
                break;
    #endif
            if (CONFIG_LAST_SUPPORTED_WCHAR && wc > CONFIG_LAST_SUPPORTED_WCHAR)
                goto subst;
            w = wcwidth(wc);
            if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0) /* non-printable wchar */
             || (!ENABLE_UNICODE_COMBINING_WCHARS && w <= 0)
             || (!ENABLE_UNICODE_WIDE_WCHARS && w > 1)
            ) {
     subst:
                wc = CONFIG_SUBST_WCHAR;
                w = 1;
            }
            width -= w;
            /* Note: if width == 0, we still may add more chars,
             * they may be zero-width or combining ones */
            if ((int)width < 0) {
                /* can't add this wc, string would become longer than width */
                width += w;
                break;
            }
    
            uni_count++;
            uni_width += w;
            dst = xrealloc(dst, dst_len + MB_CUR_MAX);
    #if ENABLE_UNICODE_USING_LOCALE
            {
                mbstate_t mbst = { 0 };
                dst_len += wcrtomb(&dst[dst_len], wc, &mbst);
            }
    #else
            dst_len += wcrtomb_internal(&dst[dst_len], wc);
    #endif
        }
    
        /* Pad to remaining width */
        if (flags & UNI_FLAG_PAD) {
            dst = xrealloc(dst, dst_len + width + 1);
            uni_count += width;
            uni_width += width;
            while ((int)--width >= 0) {
                dst[dst_len++] = ' ';
            }
        }
        dst[dst_len] = '';
        if (stats) {
            stats->byte_count = dst_len;
            stats->unicode_count = uni_count;
            stats->unicode_width = uni_width;
        }
    
        return dst;
    }

    见上面20行和28行,需要修改一下,修改后的代码见下:

    static char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags)
    {
        char *dst;
        unsigned dst_len;
        unsigned uni_count;
        unsigned uni_width;
    
        if (unicode_status != UNICODE_ON) {
            char *d;
            if (flags & UNI_FLAG_PAD) {
                d = dst = xmalloc(width + 1);
                while ((int)--width >= 0) {
                    unsigned char c = *src;
                    if (c == '') {
                        do
                            *d++ = ' ';
                        while ((int)--width >= 0);
                        break;
                    }
                    *d++ = (c >= ' '/* && c < 0x7f */) ? c : '?';
                    src++;
                }
                *d = '';
            } else {
                d = dst = xstrndup(src, width);
                while (*d) {
                    unsigned char c = *d;
                    if (c < ' '/* || c >= 0x7f */)
                        *d = '?';
                    d++;
                }
            }
            if (stats) {
                stats->byte_count = (d - dst);
                stats->unicode_count = (d - dst);
                stats->unicode_width = (d - dst);
            }
            return dst;
        }
    
        dst = NULL;
        uni_count = uni_width = 0;
        dst_len = 0;
        while (1) {
            int w;
            wchar_t wc;
    
    #if ENABLE_UNICODE_USING_LOCALE
            {
                mbstate_t mbst = { 0 };
                ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst);
                /* If invalid sequence is seen: -1 is returned,
                 * src points to the invalid sequence, errno = EILSEQ.
                 * Else number of wchars (excluding terminating L'')
                 * written to dest is returned.
                 * If len (here: 1) non-L'' wchars stored at dest,
                 * src points to the next char to be converted.
                 * If string is completely converted: src = NULL.
                 */
                if (rc == 0) /* end-of-string */
                    break;
                if (rc < 0) { /* error */
                    src++;
                    goto subst;
                }
                if (!iswprint(wc))
                    goto subst;
            }
    #else
            src = mbstowc_internal(&wc, src);
            /* src is advanced to next mb char
             * wc == ERROR_WCHAR: invalid sequence is seen
             * else: wc is set
             */
            if (wc == ERROR_WCHAR) /* error */
                goto subst;
            if (wc == 0) /* end-of-string */
                break;
    #endif
            if (CONFIG_LAST_SUPPORTED_WCHAR && wc > CONFIG_LAST_SUPPORTED_WCHAR)
                goto subst;
            w = wcwidth(wc);
            if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0) /* non-printable wchar */
             || (!ENABLE_UNICODE_COMBINING_WCHARS && w <= 0)
             || (!ENABLE_UNICODE_WIDE_WCHARS && w > 1)
            ) {
     subst:
                wc = CONFIG_SUBST_WCHAR;
                w = 1;
            }
            width -= w;
            /* Note: if width == 0, we still may add more chars,
             * they may be zero-width or combining ones */
            if ((int)width < 0) {
                /* can't add this wc, string would become longer than width */
                width += w;
                break;
            }
    
            uni_count++;
            uni_width += w;
            dst = xrealloc(dst, dst_len + MB_CUR_MAX);
    #if ENABLE_UNICODE_USING_LOCALE
            {
                mbstate_t mbst = { 0 };
                dst_len += wcrtomb(&dst[dst_len], wc, &mbst);
            }
    #else
            dst_len += wcrtomb_internal(&dst[dst_len], wc);
    #endif
        }
    
        /* Pad to remaining width */
        if (flags & UNI_FLAG_PAD) {
            dst = xrealloc(dst, dst_len + width + 1);
            uni_count += width;
            uni_width += width;
            while ((int)--width >= 0) {
                dst[dst_len++] = ' ';
            }
        }
        dst[dst_len] = '';
        if (stats) {
            stats->byte_count = dst_len;
            stats->unicode_count = uni_count;
            stats->unicode_width = uni_width;
        }
    
        return dst;
    }

    经过以上修改之后,就算配置支持Unicode,ls命令也是可以支持中文的。同时也可以进入中文目录可以文件夹。

  • 相关阅读:
    DS博客作业02--栈和队列
    指针
    C语言博客作业04--数组
    函数
    留言板
    第三周-自主学习任务-面向对象基础与类的识别
    DS博客作业05--查找
    DS博客作业04--图
    DS博客作业03--树
    DS博客作业02--栈和队列
  • 原文地址:https://www.cnblogs.com/pengdonglin137/p/3631346.html
Copyright © 2011-2022 走看看