// // _MATCH.CPP // // Source file for ArchiveLib 1.0 // // Copyright (c) Greenleaf Software, Inc. 1994 // All Rights Reserved // // CONTENTS // // is_pattern() // is_valid_pattern() // matche() // matche_after_star() // match() // main() (For testing, w/conditional compile) // // DESCRIPTION // // Some nice code written by J. Kercheval, in the public domain. // This code provides us with the pattern matching functions used // by the ALName functions. It all seems to work without any trouble // at all. // // I tried to change this file as little as possible. I modified the // name of the file, and removed BOOLEAN, TRUE, and FALSE from the // header file. Other than that, it is just as I found it. // // REVISION HISTORY // // May 22, 1994 1.0A : First release // // #include "arclib.h" #pragma hdrstop /* EPSHeader File: match.c Author: J. Kercheval Created: Sat, 01/05/1991 22:21:49 */ /* EPSRevision History J. Kercheval Wed, 02/20/1991 22:29:01 Released to Public Domain J. Kercheval Fri, 02/22/1991 15:29:01 fix '\' bugs (two :( of them) J. Kercheval Sun, 03/10/1991 19:31:29 add error return to matche() J. Kercheval Sun, 03/10/1991 20:11:11 add is_valid_pattern code J. Kercheval Sun, 03/10/1991 20:37:11 beef up main() J. Kercheval Tue, 03/12/1991 22:25:10 Released as V1.1 to Public Domain J. Kercheval Thu, 03/14/1991 22:22:25 remove '\' for DOS file parsing J. Kercheval Mon, 05/13/1991 21:49:05 ifdef full match code J. Kercheval Mon, 01/06/1992 21:31:44 add match character defines */ /* * Wildcard Pattern Matching */ #include "arclib.h" #include "_match.h" // // The next five lines used to be in the header file // #ifndef BOOLEAN #define BOOLEAN int #define TRUE 1 #define FALSE 0 #endif /* character defines */ #define MATCH_CHAR_SINGLE '?' #define MATCH_CHAR_KLEENE_CLOSURE '*' #define MATCH_CHAR_RANGE_OPEN '[' #define MATCH_CHAR_RANGE '-' #define MATCH_CHAR_RANGE_CLOSE ']' #define MATCH_CHAR_LITERAL '\\' #define MATCH_CHAR_NULL '\0' #define MATCH_CHAR_CARAT_NEGATE '^' #define MATCH_CHAR_EXCLAMATION_NEGATE '!' /* forward function prototypes */ int matche_after_star( register const char *pattern, register char *text); int fast_match_after_star(register char *pattern, register char *text); /*---------------------------------------------------------------------------- * * Return TRUE if PATTERN has any special wildcard characters * ---------------------------------------------------------------------------*/ BOOLEAN is_pattern( const char *p ) { while (*p) { switch (*p++) { case MATCH_CHAR_SINGLE: case MATCH_CHAR_KLEENE_CLOSURE: case MATCH_CHAR_RANGE_OPEN: #ifndef FILE_MATCH case MATCH_CHAR_LITERAL: #endif return TRUE; } } return FALSE; } /*---------------------------------------------------------------------------- * * Return TRUE if PATTERN has is a well formed regular expression according * to the above syntax * * error_type is a return code based on the type of pattern error. Zero is * returned in error_type if the pattern is a valid one. error_type return * values are as follows: * * PATTERN_VALID - pattern is well formed #ifndef FILE_MATCH * PATTERN_ESC - pattern has invalid escape ('\' at end of pattern) #endif * PATTERN_RANGE - [..] construct has a no end range in a '-' pair (ie [a-]) * PATTERN_CLOSE - [..] construct has no end bracket (ie [abc-g ) * PATTERN_EMPTY - [..] construct is empty (ie []) * ---------------------------------------------------------------------------*/ BOOLEAN is_valid_pattern( const char *p, int *error_type ) { /* init error_type */ *error_type = PATTERN_VALID; /* loop through pattern to EOS */ while (*p) { /* determine pattern type */ switch (*p) { #ifndef FILE_MATCH /* check literal escape, it cannot be at end of pattern */ case MATCH_CHAR_LITERAL: if (!*++p) { *error_type = PATTERN_ESC; return FALSE; } p++; break; #endif /* the [..] construct must be well formed */ case MATCH_CHAR_RANGE_OPEN: p++; /* if the next character is ']' then bad pattern */ if (*p == MATCH_CHAR_RANGE_CLOSE) { *error_type = PATTERN_EMPTY; return FALSE; } /* if end of pattern here then bad pattern */ if (!*p) { *error_type = PATTERN_CLOSE; return FALSE; } /* loop to end of [..] construct */ while (*p != MATCH_CHAR_RANGE_CLOSE) { /* check for literal escape */ if (*p == MATCH_CHAR_LITERAL) { p++; /* if end of pattern here then bad pattern */ if (!*p++) { *error_type = PATTERN_ESC; return FALSE; } } else p++; /* if end of pattern here then bad pattern */ if (!*p) { *error_type = PATTERN_CLOSE; return FALSE; } /* if this a range */ if (*p == MATCH_CHAR_RANGE) { /* we must have an end of range */ if (!*++p || *p == MATCH_CHAR_RANGE_CLOSE) { *error_type = PATTERN_RANGE; return FALSE; } else { /* check for literal escape */ if (*p == MATCH_CHAR_LITERAL) p++; /* if end of pattern here then bad pattern */ if (!*p++) { *error_type = PATTERN_ESC; return FALSE; } } } } break; /* all other characters are valid pattern elements */ case MATCH_CHAR_KLEENE_CLOSURE: case MATCH_CHAR_SINGLE: default: /* "normal" character */ p++; break; } } return TRUE; } /*---------------------------------------------------------------------------- * * Match the pattern PATTERN against the string TEXT; * * returns MATCH_VALID if pattern matches, or an errorcode as follows * otherwise: * * MATCH_PATTERN - bad pattern #ifndef FILE_MATCH * MATCH_LITERAL - match failure on literal mismatch #endif * MATCH_RANGE - match failure on [..] construct * MATCH_ABORT - premature end of text string * MATCH_END - premature end of pattern string * MATCH_VALID - valid match * * * A match means the entire string TEXT is used up in matching. * * In the pattern string: * `*' matches any sequence of characters (zero or more) * `?' matches any character * [SET] matches any character in the specified set, * [!SET] or [^SET] matches any character not in the specified set. * \ is allowed within a set to escape a character like ']' or '-' * * A set is composed of characters or ranges; a range looks like character * hyphen character (as in 0-9 or A-Z). [0-9a-zA-Z_] is the minimal set of * characters allowed in the [..] pattern construct. Other characters are * allowed (ie. 8 bit characters) if your system will support them. * * To suppress the special syntactic significance of any of `[]*?!^-\', and * match the character exactly, precede it with a `\'. * ---------------------------------------------------------------------------*/ int matche( const char *p, char *t) { register char range_start, range_end; /* start and end in range */ BOOLEAN invert; /* is this [..] or [!..] */ BOOLEAN member_match; /* have I matched the [..] construct? */ BOOLEAN loop; /* should I terminate? */ for (; *p; p++, t++) { /* if this is the end of the text then this is the end of the match */ if (!*t) { return (*p == MATCH_CHAR_KLEENE_CLOSURE && *++p == MATCH_CHAR_NULL) ? MATCH_VALID : MATCH_ABORT; } /* determine and react to pattern type */ switch (*p) { /* single any character match */ case MATCH_CHAR_SINGLE: break; /* multiple any character match */ case MATCH_CHAR_KLEENE_CLOSURE: return matche_after_star(p, t); /* [..] construct, single member/exclusion character match */ case MATCH_CHAR_RANGE_OPEN:{ /* move to beginning of range */ p++; /* check if this is a member match or exclusion match */ invert = FALSE; if (*p == MATCH_CHAR_EXCLAMATION_NEGATE || *p == MATCH_CHAR_CARAT_NEGATE) { invert = TRUE; p++; } /* if closing bracket here or at range start then we have * a malformed pattern */ if (*p == MATCH_CHAR_RANGE_CLOSE) { return MATCH_PATTERN; } member_match = FALSE; loop = TRUE; while (loop) { /* if end of construct then loop is done */ if (*p == MATCH_CHAR_RANGE_CLOSE) { loop = FALSE; continue; } /* matching a '!', '^', '-', '\' or a ']' */ if (*p == MATCH_CHAR_LITERAL) { range_start = range_end = *++p; } else { range_start = range_end = *p; } /* if end of pattern then bad pattern (Missing ']') */ if (!*p) return MATCH_PATTERN; /* check for range bar */ if (*++p == MATCH_CHAR_RANGE) { /* get the range end */ range_end = *++p; /* if end of pattern or construct then bad * pattern */ if (range_end == MATCH_CHAR_NULL || range_end == MATCH_CHAR_RANGE_CLOSE) return MATCH_PATTERN; /* special character range end */ if (range_end == MATCH_CHAR_LITERAL) { range_end = *++p; /* if end of text then we have a bad pattern */ if (!range_end) return MATCH_PATTERN; } /* move just beyond this range */ p++; } /* if the text character is in range then match * found. make sure the range letters have the proper * relationship to one another before comparison */ if (range_start < range_end) { if (*t >= range_start && *t <= range_end) { member_match = TRUE; loop = FALSE; } } else { if (*t >= range_end && *t <= range_start) { member_match = TRUE; loop = FALSE; } } } /* if there was a match in an exclusion set then no match */ /* if there was no match in a member set then no match */ if ((invert && member_match) || !(invert || member_match)) return MATCH_RANGE; /* if this is not an exclusion then skip the rest of the * [...] construct that already matched. */ if (member_match) { while (*p != MATCH_CHAR_RANGE_CLOSE) { /* bad pattern (Missing MATCH_CHAR_RANGE_CLOSE) */ if (!*p) return MATCH_PATTERN; /* skip exact match */ if (*p == MATCH_CHAR_LITERAL) { p++; /* if end of text then we have a bad pattern */ if (!*p) return MATCH_PATTERN; } /* move to next pattern char */ p++; } } break; } #ifndef FILE_MATCH /* next character is quoted and must match exactly */ case MATCH_CHAR_LITERAL: /* move pattern pointer to quoted char and fall through */ p++; /* if end of text then we have a bad pattern */ if (!*p) return MATCH_PATTERN; #endif /* must match this character exactly */ default: if (*p != *t) return MATCH_LITERAL; } } /* if end of text not reached then the pattern fails */ if (*t) return MATCH_END; else return MATCH_VALID; } /*---------------------------------------------------------------------------- * * recursively call matche() with final segment of PATTERN and of TEXT. * ---------------------------------------------------------------------------*/ int matche_after_star( register const char *p, register char *t) { register int match = 0; register nextp; /* pass over existing ? and * in pattern */ while (*p == MATCH_CHAR_SINGLE || *p == MATCH_CHAR_KLEENE_CLOSURE) { /* take one char for each ? and + */ if (*p == MATCH_CHAR_SINGLE) { /* if end of text then no match */ if (!*t++) { return MATCH_ABORT; } } /* move to next char in pattern */ p++; } /* if end of pattern we have matched regardless of text left */ if (!*p) { return MATCH_VALID; } /* get the next character to match which must be a literal or '[' */ nextp = *p; #ifndef FILE_MATCH if (nextp == MATCH_CHAR_LITERAL) { nextp = p[1]; /* if end of text then we have a bad pattern */ if (!nextp) return MATCH_PATTERN; } #endif /* Continue until we run out of text or definite result seen */ do { /* a precondition for matching is that the next character in the * pattern match the next character in the text or that the next * pattern char is the beginning of a range. Increment text pointer * as we go here */ if (nextp == *t || nextp == MATCH_CHAR_RANGE_OPEN) { match = matche(p, t); } /* if the end of text is reached then no match */ if (!*t++) match = MATCH_ABORT; } while (match != MATCH_VALID && match != MATCH_ABORT && match != MATCH_PATTERN); /* return result */ return match; } /*---------------------------------------------------------------------------- * * match() is a shell to matche() to return only BOOLEAN values. * ---------------------------------------------------------------------------*/ BOOLEAN match( char *p, char *t) { int error_type; error_type = matche(p, t); return (error_type == MATCH_VALID) ? TRUE : FALSE; } #ifdef TEST /* * This test main expects as first arg the pattern and as second arg * the match string. Output is yaeh or nay on match. If nay on * match then the error code is parsed and written. */ #include int main(int argc, char *argv[]) { int error; int is_valid_error; if (argc != 3) { printf("Usage: MATCH Pattern Text\n"); } else { printf("Pattern: %s\n", argv[1]); printf("Text : %s\n", argv[2]); if ( !is_pattern(argv[1])) { printf(" First Argument Is Not A Pattern\n"); } else { #ifdef FILE_MATCH match(argv[1], argv[2]) ? printf("TRUE") : printf("FALSE"); #endif error = matche(argv[1], argv[2]); is_valid_pattern(argv[1], &is_valid_error); switch (error) { case MATCH_VALID: printf(" Match Successful"); if (is_valid_error != PATTERN_VALID) printf(" -- is_valid_pattern() is complaining\n"); else printf("\n"); break; #ifndef FILE_MATCH case MATCH_LITERAL: printf(" Match Failed on Literal\n"); break; #endif case MATCH_RANGE: printf(" Match Failed on [..]\n"); break; case MATCH_ABORT: printf(" Match Failed on Early Text Termination\n"); break; case MATCH_END: printf(" Match Failed on Early Pattern Termination\n"); break; case MATCH_PATTERN: switch (is_valid_error) { case PATTERN_VALID: printf(" Internal Disagreement On Pattern\n"); break; #ifndef FILE_MATCH case PATTERN_ESC: printf(" Literal Escape at End of Pattern\n"); break; #endif case PATTERN_RANGE: printf(" No End of Range in [..] Construct\n"); break; case PATTERN_CLOSE: printf(" [..] Construct is Open\n"); break; case PATTERN_EMPTY: printf(" [..] Construct is Empty\n"); break; default: printf(" Internal Error in is_valid_pattern()\n"); } break; default: printf(" Internal Error in matche()\n"); break; } } } return (0); } #endif