zoukankan      html  css  js  c++  java
  • 【文文殿下】浅析scanf源码

    本文仅做理性上的愉悦,无实际用途。

    scanf实际的调用

    我们直接使用的scanf其实是这样写的

    int __cdecl scanf (
            const char *format,
            ...
            )
    {
            va_list arglist;
            va_start(arglist, format);
            return vscanf_fn(_input_l, format, NULL, arglist);
    }
    
    

    我们可以看到,他其实是调用了这三个函数: va_list va_start vscanf_fn

    我们跳转到vscanf_fn的实现

    int __cdecl vscanf_fn (
            INPUTFN inputfn,
            const char *format,
            _locale_t plocinfo,
            va_list arglist
            )
    /*
     * stdin 'SCAN', 'F'ormatted
     */
    {
        int retval = 0;
     
        _VALIDATE_RETURN( (format != NULL), EINVAL, EOF);
     
        _lock_str2(0, stdin);
        __try {
            retval = (inputfn(stdin, format, plocinfo, arglist));
        }
        __finally {
            _unlock_str2(0, stdin);
        }
     
        return(retval);
    }
    
    

    我们发现,vscanf_fn实际上是使用了inputfn这个函数。我们进一步跟进,找到了input.c这个文件

    /***
    *int _input(stream, format, arglist), static int input(format, arglist)
    *
    *Purpose:
    *   get input items (data items or literal matches) from the input stream
    *   and assign them if appropriate to the items thru the arglist. this
    *   function is intended for internal library use only, not for the user
    *
    *   The _input entry point is for the normal scanf() functions
    *   The input entry point is used when compiling for _cscanf() [CPRFLAF
    *   defined] and is a static function called only by _cscanf() -- reads from
    *   console.
    *
    *   This code also defines _input_s, which works differently for %c, %s & %[.
    *   For these, _input_s first picks up the next argument from the variable
    *   argument list & uses it as the maximum size of the character array pointed
    *   to by the next argument in the list.
    *
    *Entry:
    *   FILE *stream - file to read from
    *   char *format - format string to determine the data to read
    *   arglist - list of pointer to data items
    *
    *Exit:
    *   returns number of items assigned and fills in data items
    *   returns EOF if error or EOF found on stream before 1st data item matched
    *
    *Exceptions:
    *
    *******************************************************************************/
    

    有几个关键函数:

    static _TINT __cdecl _inc(FILE* fileptr)
    {
        return (_gettc_nolock(fileptr));
    }
    
    

    _inc的功能是调出缓冲区第一个字符

    static void __cdecl _un_inc(_TINT chr, FILE* fileptr)
    {
        if (_TEOF != chr) {
            _ungettc_nolock(chr,fileptr);
        }
    }
    
    

    _un_inc函数,将刚才_inc调出的函数重新放进缓冲区

    static _TINT __cdecl _whiteout(int* counter, FILE* fileptr)
    {
        _TINT ch;
     
        do
        {
            ++*counter;
            ch = _inc(fileptr);
     
            if (ch == _TEOF)
            {
                break;
            }
        }
        while(_istspace((_TUCHAR)ch));
        return ch;
    }
    

    _whiteout函数,将从缓冲区开头开始的所有连续空白字符调出

    最后一步检查缓冲区,如果缓冲区可读字符为0,那么清空缓冲区

    format的解析

    while (*format) {
     
            if (_istspace((_TUCHAR)*format)) {
     
                UN_INC(EAT_WHITE()); /* put first non-space char back */
     
                do {
                    tch = *++format;
                } while (_istspace((_TUCHAR)tch));
     
                continue;
         ………………
    
    

    这里的UN_INC(EAT_WHITE()),是把当初EAT_WHITE读出的第一个非空白字符再放入缓冲区。

    上面代码完成对键盘缓冲区中空白符的清理,直到正常读取第一个字符。

    当读入%号,进行处理:

    if (_T('%') == *format && _T('%') != *(format + 1))
    

    解析完格式后,开始真正的实现,我们以%d做例子:

    if (_T('^') == *scanptr) {
                                ++scanptr;
                                --reject; /* set reject to 255 */
                            }
     
                            /* Allocate "table" on first %[] spec */
    #if ALLOC_TABLE
                            if (table == NULL) {
                                table = (char*)_malloc_crt(TABLESIZE);
                                if ( table == NULL)
                                    goto error_return;
                                malloc_flag = 1;
                            }zuolizi
    #endif  /* ALLOC_TABLE */
                            memset(table, 0, TABLESIZE);
     
     
                            if (LEFT_BRACKET == comchr)
                                if (_T(']') == *scanptr) {
                                    prevchar = _T(']');
                                    ++scanptr;
     
                                    table[ _T(']') >> 3] = 1 << (_T(']') & 7);
     
                                }
     
                            while (_T(']') != *scanptr) {
     
                                rngch = *scanptr++;
     
                                if (_T('-') != rngch ||
                                     !prevchar ||           /* first char */
                                     _T(']') == *scanptr) /* last char */
     
                                    table[(prevchar = rngch) >> 3] |= 1 << (rngch & 7);
     
                                else {  /* handle a-z type set */
     
                                    rngch = *scanptr++; /* get end of range */
     
                                    if (prevchar < rngch)  /* %[a-z] */
                                        last = rngch;
                                    else {              /* %[z-a] */
                                        last = prevchar;
                                        prevchar = rngch;
                                    }
                                    /* last could be 0xFF, so we handle it at the end of the for loop */
                                    for (rngch = prevchar; rngch < last; ++rngch)
                                    {
                                        table[rngch >> 3] |= 1 << (rngch & 7);
                                    }
                                    table[last >> 3] |= 1 << (last & 7);
     
                                    prevchar = 0;
     
                                }
                            }
    

    reject反转标记,如果出现^ 则reject = FF; 其后方便进行 ^ 进行反转。

    对于[ ]字符集,有一个char table[32]来保存256个ascii字符。(此处每个char为8bits,所以有32组可以完全包含256个ascii字符)

    微软对table中字符做了这样的处理:

    table[rngch >> 3] |= 1 << (rngch & 7);
    

    即:将所读的字符串分到32组中【rngch>>3相当于除以8】,每个table[n]有8bits,每个bit中,出现的字符位会被置为1,未出现则为0,这样就完美囊括了256个ASCII字符。

    判断字符是否存在,直接这样处理:

    (table[ch >> 3] ^ reject) & (1 << (ch & 7))
    

    在其中有很多判断_ISXDIGIT(ch)的,假若不是阿拉伯数字,则会执行跳出当前%d字符读取,执行1313行的 ++format; /* skip to next char */

    %d跳过了 的读取,继续读取下一个字符。

    if (_T('%') == *format && _T('%') != *(format + 1)) {
    		
    		……………………
    			
    		   ++format;  /* skip to next char */
            } else  /*  ('%' != *format) */
    		{
    		………………………
    		}
    
    

    在读代码时候读到一个函数 hextodec

    static _TINT __cdecl _hextodec ( _TCHAR chr)
    {
        return _ISDIGIT(chr) ? chr : (chr & ~(_T('a') - _T('A'))) - _T('A') + 10 + _T('0');
    }
    
    

    将读取的16进制字符 0 - F转成 10进制数,然后scanf就结束了

    我们有一个非常简单的scanf的实现(臭不要脸的调用vscanf_fn)

    #include<cstdio>
    #include<cstdarg>
    int my_scanf(char* fmt,...)
    {
        int ret=0;
        va_list args;
        va_start(args,fmt);
        vscanf(fmt,args);
        va_end(args);
        return ret;
    }
    int main()
    {
        int a;
        my_scanf("%d",&a);
        printf("%d",a);
        return 0;
    }
    
  • 相关阅读:
    威尔逊置信区间
    mysql函数
    python操作mysql之pymysql
    oracle 表分区
    Python连接sqlserver数据库之pymssql
    python-subprocess
    python-shutil
    python+logging
    公钥加密-DES-RSA
    python-Web-flask-蓝图和单元测试
  • 原文地址:https://www.cnblogs.com/Syameimaru/p/10109729.html
Copyright © 2011-2022 走看看