zoukankan      html  css  js  c++  java
  • fnmatch源码阅读

    源码下载地址如下:

    http://web.mit.edu/freebsd/csup/fnmatch.h
    http://web.mit.edu/freebsd/csup/fnmatch.c

    代码整体不错,但是中有一些地方稍有不足。我没有去改(添加了一些{},以便结构清晰)。

    /*
    * Copyright (c) 1989, 1993, 1994
    *	The Regents of the University of California.  All rights reserved.
    *
    * This code is derived from software contributed to Berkeley by
    * Guido van Rossum.
    *
    * Redistribution and use in source and binary forms, with or without
    * modification, are permitted provided that the following conditions
    * are met:
    * 1. Redistributions of source code must retain the above copyright
    *    notice, this list of conditions and the following disclaimer.
    * 2. Redistributions in binary form must reproduce the above copyright
    *    notice, this list of conditions and the following disclaimer in the
    *    documentation and/or other materials provided with the distribution.
    * 3. All advertising materials mentioning features or use of this software
    *    must display the following acknowledgement:
    *	This product includes software developed by the University of
    *	California, Berkeley and its contributors.
    * 4. Neither the name of the University nor the names of its contributors
    *    may be used to endorse or promote products derived from this software
    *    without specific prior written permission.
    *
    * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
    * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
    * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
    * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
    * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
    * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    * SUCH DAMAGE.
    *
    * From FreeBSD fnmatch.c 1.11
    * $Id: fnmatch.c,v 1.3 1997/08/19 02:34:30 jdp Exp $
    */
    
    #ifndef	_FNMATCH_H_
    #define	_FNMATCH_H_
    
    #define	FNM_NOMATCH	1	/* Match failed. */
    
    #define	FNM_NOESCAPE	0x01	/* 禁用反斜杠进行转义 */
    #define	FNM_PATHNAME	0x02	/* 斜杠只能被斜杠匹配(即不能被*或者?匹配) */
    #define	FNM_PERIOD	0x04	/* Period must be matched by period. */
    /*如果这个标志设置了,string 里的起始点号必须匹配 pattern 里的点号。
    一个点号被认为是起始点号,如果它是string 第一个字符,或者如果同时设
    置了 FNM_PATHNAME,紧跟在斜杠后面的点号。
    */
    #define	FNM_LEADING_DIR	0x08	/* Ignore /<tail> after Imatch. */
    /*如果这个标志(GNU 扩展)设置了,模式必须匹配跟随在斜杠之后的 string
    的初始片断。这个标志主要是给 glibc 内部使用并且只在一定条件下实现。
    即只匹配目录路径部分,不匹配到具体文件名
    */
    #define	FNM_CASEFOLD	0x10	/* 模式匹配忽略大小写. */
    #define FNM_PREFIX_DIRS	0x20	/* Directory prefixes of pattern match too. */
    
    /* Make this compile successfully with "gcc -traditional" */
    #ifndef __STDC__
    #define const	/* empty */
    #endif
    
    int	 fnmatch(const char *, const char *, int);
    
    #endif /* !_FNMATCH_H_ */
    
    #if defined(LIBC_SCCS) && !defined(lint)
    static char sccsid[] = "@(#)fnmatch.c	8.2 (Berkeley) 4/16/94";
    #endif /* LIBC_SCCS and not lint */
    
    /*
    * 函数fnmatch(),如POSIX 1003.2-1992 B.6节所述。
    * 将文件名或者目录名与pattern进行比较
    */
    
    #include <ctype.h>
    #include <string.h>
    #include <stdio.h>
    
    // 定义字符串结尾标志
    #define	EOS	''
    
    static const char *rangematch(const char *, char, int);
    
    int
    fnmatch(const char *pattern, const char *string, int flags)
    {
    	const char *stringstart;
    	char c, test;
    
    	for (stringstart = string;;){
            // 逐个取匹配字符串中的成分(分为*?和range三种)
    		switch (c = *pattern++) {
    		case EOS:   // 没有匹配串的情况
                // 如果忽略'/'后面的部分,则匹配成功
    			if ((flags & FNM_LEADING_DIR) && *string == '/'){
    				return (0);
    			}
    			// 如果string也是空串,则匹配成功
    			return (*string == EOS ? 0 : FNM_NOMATCH);
    		case '?':   // 匹配单个任意字符
    			// string为空则不能匹配
    			if (*string == EOS){
    				return (FNM_NOMATCH);
    			}
    			// 判断'/'是否只能由'/'进行匹配
    			if (*string == '/' && (flags & FNM_PATHNAME)){
    				return (FNM_NOMATCH);
    			}
    			// 判断是否string中的起始'.'必须匹配pattern中的'.'(即'?'不能匹配'.')
    			if (*string == '.' && (flags & FNM_PERIOD) &&
    				(string == stringstart ||
    				((flags & FNM_PATHNAME) && *(string - 1) == '/'))){
    				return (FNM_NOMATCH);
    			}
    			// 匹配成功则匹配string的下一个字符
    			++string;
    			break;
    		case '*':   // 匹配单个或多个任意字符
    			c = *pattern;
    			/* 多个'*'当做一个 */
    			while (c == '*'){
    				c = *++pattern;
    			}
    			// 判断是否需要对'.'进行处理
    			if (*string == '.' && (flags & FNM_PERIOD) &&
    				(string == stringstart ||
    				((flags & FNM_PATHNAME) && *(string - 1) == '/'))){
    				return (FNM_NOMATCH);
    			}
    
    			/* 优化 * 在匹配串结尾或者在 /. 之前的匹配*/
    			if (c == EOS){  // 在结尾
                    // 判断 * 是否不匹配斜杠
    				if (flags & FNM_PATHNAME){
                        // 不匹配斜杠,则判断是否忽略'/'之后部分
    					return ((flags & FNM_LEADING_DIR) ||
    						((strchr(string, '/') == NULL) ? 0 : FNM_NOMATCH));
    				}else{
    					return (0);
    				}
    			}
    			else if (c == '/' && flags & FNM_PATHNAME) { // 在 /. 之前
                    // 如果string后没有 '/'则匹配失败
    				if ((string = strchr(string, '/')) == NULL){
    					return (FNM_NOMATCH);
    				}
    				break;
    			}
    
    			/* 非特殊情况下,递归匹配 */
    			while ((test = *string) != EOS) {
                    // 不对'.'进行特殊处理,进行匹配(则只需判断'/'匹配情况)
    				if (!fnmatch(pattern, string, flags & ~FNM_PERIOD)){
    					return (0); // 匹配成功
    				}
    				// 对 '/'进行处理(斜杠只匹配斜杠,则匹配失败)
    				if (test == '/' && flags & FNM_PATHNAME){
    					break;
    				}
    				++string;
    			}
    			// 返回匹配失败(即*没有匹配成功,'.'和'/'上的匹配没有成功)
    			return (FNM_NOMATCH);
    		case '[':   // range 范围匹配
    			if (*string == EOS){
    				return (FNM_NOMATCH);   // 空串匹配失败
    			}
    			if (*string == '/' && flags & FNM_PATHNAME){
    				return (FNM_NOMATCH);   // '/'匹配失败
    			}
    			if ((pattern =
    				rangematch(pattern, *string, flags)) == NULL){
    				return (FNM_NOMATCH);   // 范围匹配失败
    			}
    			++string;
    			break;
    		case '\':  // 斜杠匹配(判断是否需要转义)
    			if (!(flags & FNM_NOESCAPE)) {
    				if ((c = *pattern++) == EOS) {
    					c = '\';
    					--pattern;
    				}
    			}
    			/* 非上述部分,则直接匹配单个字符 */
    		default:
    			if (c == *string){
                    ;   // 直接匹配上了
    			}else if ((flags & FNM_CASEFOLD) &&
    				(tolower((unsigned char)c) ==
    					tolower((unsigned char)*string))){
    				;   // 忽略大小写匹配成功
    			}
    			else if ((flags & FNM_PREFIX_DIRS) && *string == EOS &&
    				((c == '/' && string != stringstart) ||
    				(string == stringstart + 1 && *stringstart == '/'))){
    				return (0); // 匹配成功
    			}
    			else{
    				return (FNM_NOMATCH); // 匹配失败
    			}
    			string++;
    			break;
    		}
    	}
    	/* NOTREACHED */
    }
    
    // 字符范围匹配
    // pattern传入如 [a-x]*** 形式的字符串
    // 匹配失败或匹配到EOS结束(也是失败),返回NULL
    // 成功返回匹配串的下一个匹配成分首地址
    static const char *
    rangematch(const char *pattern, char test, int flags)
    {
        // 此处没有对c进行初始化,可能出问题(栈上变量默认值未定)
    	int negate, ok;
    	char c, c2;
    
    	/*
    	* A bracket expression starting with an unquoted circumflex
    	* character produces unspecified results
    	* 以无引号 ^ 字符开始的方括号表达式,将产生未指定的结果
    	* (IEEE 1003.2-1992,3.13.2).  此实现将其视为 '!',以与正则表达式语法保持一致.
    	* J.T. Conklin (conklin@ngai.kaleida.com)
    	*/
    	// 检测方括号表达式中第一个字符
    	// 如果为!或者^,则对后面匹配的结果取反
    	if ((negate = (*pattern == '!' || *pattern == '^'))){
    		++pattern;
    	}
    
    	// 忽略大小写,则转为小写处理
    	if (flags & FNM_CASEFOLD){
    		test = tolower((unsigned char)test);
    	}
    	// 循环到方括号表达式结束
    	for (ok = 0; (c = *pattern++) != ']';) {
            // 如果没有禁用转义,获取字符
    		if (c == '\' && !(flags & FNM_NOESCAPE)){
    			c = *pattern++;
    		}
    		// 匹配结束
    		if (c == EOS){
    			return (NULL);
    		}
            // 忽略大小写,则转为小写
    		if (flags & FNM_CASEFOLD){
    			c = tolower((unsigned char)c);
            }
            // 如果当前匹配项c 的下一个是'-',则获取'-'后面的一个字符
            // 例如,匹配串为 [a-x] 当前c为a,则c2为x,表示匹配a-x之间字符
    		if (*pattern == '-'
    			&& (c2 = *(pattern + 1)) != EOS && c2 != ']') {
    			pattern += 2;
    			// 判断是否需要转义
    			if (c2 == '\' && !(flags & FNM_NOESCAPE)){
    				c2 = *pattern++;
    			}
    			if (c2 == EOS){
    				return (NULL);
    			}
                // 判断是否区分大小写
    			if (flags & FNM_CASEFOLD){
    				c2 = tolower((unsigned char)c2);
    			}
                // 判断test是否位于 [c,c2]区间
    			if ((unsigned char)c <= (unsigned char)test &&
    				(unsigned char)test <= (unsigned char)c2){
    				ok = 1;
    			}
    		}
    		else if (c == test){
    			ok = 1;
    		}
    	}
    	// 返回匹配结果
    	return (ok == negate ? NULL : pattern);
    }
    
  • 相关阅读:
    HDU3336 Count the string —— KMP next数组
    CodeForces
    51Nod 1627 瞬间移动 —— 组合数学
    51Nod 1158 全是1的最大子矩阵 —— 预处理 + 暴力枚举 or 单调栈
    51Nod 1225 余数之和 —— 分区枚举
    51Nod 1084 矩阵取数问题 V2 —— 最小费用最大流 or 多线程DP
    51Nod 机器人走方格 V3 —— 卡特兰数、Lucas定理
    51Nod XOR key —— 区间最大异或值 可持久化字典树
    HDU4825 Xor Sum —— Trie树
    51Nod 1515 明辨是非 —— 并查集 + 启发式合并
  • 原文地址:https://www.cnblogs.com/oloroso/p/6861576.html
Copyright © 2011-2022 走看看