Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
6725 | siemargl | 1 | /* |
2 | Copyright (c) 1990-2005 Info-ZIP. All rights reserved. |
||
3 | |||
4 | See the accompanying file LICENSE, version 2000-Apr-09 or later |
||
5 | (the contents of which are also included in unzip.h) for terms of use. |
||
6 | If, for some reason, all these files are missing, the Info-ZIP license |
||
7 | also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html |
||
8 | */ |
||
9 | /*--------------------------------------------------------------------------- |
||
10 | |||
11 | match.c |
||
12 | |||
13 | The match() routine recursively compares a string to a "pattern" (regular |
||
14 | expression), returning TRUE if a match is found or FALSE if not. This |
||
15 | version is specifically for use with unzip.c: as did the previous match() |
||
16 | routines from SEA and J. Kercheval, it leaves the case (upper, lower, or |
||
17 | mixed) of the string alone, but converts any uppercase characters in the |
||
18 | pattern to lowercase if indicated by the global var pInfo->lcflag (which |
||
19 | is to say, string is assumed to have been converted to lowercase already, |
||
20 | if such was necessary). |
||
21 | |||
22 | GRR: reversed order of text, pattern in matche() (now same as match()); |
||
23 | added ignore_case/ic flags, Case() macro. |
||
24 | |||
25 | PaulK: replaced matche() with recmatch() from Zip, modified to have an |
||
26 | ignore_case argument; replaced test frame with simpler one. |
||
27 | |||
28 | --------------------------------------------------------------------------- |
||
29 | |||
30 | Copyright on recmatch() from Zip's util.c (although recmatch() was almost |
||
31 | certainly written by Mark Adler...ask me how I can tell :-) ): |
||
32 | |||
33 | Copyright (C) 1990-1992 Mark Adler, Richard B. Wales, Jean-loup Gailly, |
||
34 | Kai Uwe Rommel and Igor Mandrichenko. |
||
35 | |||
36 | Permission is granted to any individual or institution to use, copy, |
||
37 | or redistribute this software so long as all of the original files are |
||
38 | included unmodified, that it is not sold for profit, and that this copy- |
||
39 | right notice is retained. |
||
40 | |||
41 | --------------------------------------------------------------------------- |
||
42 | |||
43 | Match the pattern (wildcard) against the string (fixed): |
||
44 | |||
45 | match(string, pattern, ignore_case, sepc); |
||
46 | |||
47 | returns TRUE if string matches pattern, FALSE otherwise. In the pattern: |
||
48 | |||
49 | `*' matches any sequence of characters (zero or more) |
||
50 | `?' matches any single character |
||
51 | [SET] matches any character in the specified set, |
||
52 | [!SET] or [^SET] matches any character not in the specified set. |
||
53 | |||
54 | A set is composed of characters or ranges; a range looks like ``character |
||
55 | hyphen character'' (as in 0-9 or A-Z). [0-9a-zA-Z_] is the minimal set of |
||
56 | characters allowed in the [..] pattern construct. Other characters are |
||
57 | allowed (i.e., 8-bit characters) if your system will support them. |
||
58 | |||
59 | To suppress the special syntactic significance of any of ``[]*?!^-\'', in- |
||
60 | side or outside a [..] construct, and match the character exactly, precede |
||
61 | it with a ``\'' (backslash). |
||
62 | |||
63 | Note that "*.*" and "*." are treated specially under MS-DOS if DOSWILD is |
||
64 | defined. See the DOSWILD section below for an explanation. Note also |
||
65 | that with VMSWILD defined, '%' is used instead of '?', and sets (ranges) |
||
66 | are delimited by () instead of []. |
||
67 | |||
68 | ---------------------------------------------------------------------------*/ |
||
69 | |||
70 | |||
71 | #define __MATCH_C /* identifies this source module */ |
||
72 | |||
73 | /* define ToLower() in here (for Unix, define ToLower to be macro (using |
||
74 | * isupper()); otherwise just use tolower() */ |
||
75 | #define UNZIP_INTERNAL |
||
76 | #include "unzip.h" |
||
77 | |||
78 | #ifndef THEOS /* the Theos port defines its own variant of match() */ |
||
79 | |||
80 | #if 0 /* this is not useful until it matches Amiga names insensitively */ |
||
81 | #ifdef AMIGA /* some other platforms might also want to use this */ |
||
82 | # define ANSI_CHARSET /* MOVE INTO UNZIP.H EVENTUALLY */ |
||
83 | #endif |
||
84 | #endif /* 0 */ |
||
85 | |||
86 | #ifdef ANSI_CHARSET |
||
87 | # ifdef ToLower |
||
88 | # undef ToLower |
||
89 | # endif |
||
90 | /* uppercase letters are values 41 thru 5A, C0 thru D6, and D8 thru DE */ |
||
91 | # define IsUpper(c) (c>=0xC0 ? c<=0xDE && c!=0xD7 : c>=0x41 && c<=0x5A) |
||
92 | # define ToLower(c) (IsUpper((uch) c) ? (unsigned) c | 0x20 : (unsigned) c) |
||
93 | #endif |
||
94 | #define Case(x) (ic? ToLower(x) : (x)) |
||
95 | |||
96 | #ifdef VMSWILD |
||
97 | # define WILDCHAR '%' |
||
98 | # define BEG_RANGE '(' |
||
99 | # define END_RANGE ')' |
||
100 | #else |
||
101 | # define WILDCHAR '?' |
||
102 | # define BEG_RANGE '[' |
||
103 | # define END_RANGE ']' |
||
104 | #endif |
||
105 | |||
106 | #if 0 /* GRR: add this to unzip.h someday... */ |
||
107 | #if !(defined(MSDOS) && defined(DOSWILD)) |
||
108 | #ifdef WILD_STOP_AT_DIR |
||
109 | #define match(s,p,ic,sc) (recmatch((ZCONST uch *)p,(ZCONST uch *)s,ic,sc) == 1) |
||
110 | #else |
||
111 | #define match(s,p,ic) (recmatch((ZCONST uch *)p,(ZCONST uch *)s,ic) == 1) |
||
112 | #endif |
||
113 | int recmatch OF((ZCONST uch *pattern, ZCONST uch *string, |
||
114 | int ignore_case __WDLPRO)); |
||
115 | #endif |
||
116 | #endif /* 0 */ |
||
117 | static int recmatch OF((ZCONST uch *pattern, ZCONST uch *string, |
||
118 | int ignore_case __WDLPRO)); |
||
119 | static char *isshexp OF((ZCONST char *p)); |
||
120 | static int namecmp OF((ZCONST char *s1, ZCONST char *s2)); |
||
121 | |||
122 | |||
123 | /* match() is a shell to recmatch() to return only Boolean values. */ |
||
124 | |||
125 | int match(string, pattern, ignore_case __WDL) |
||
126 | ZCONST char *string, *pattern; |
||
127 | int ignore_case; |
||
128 | __WDLDEF |
||
129 | { |
||
130 | #if (defined(MSDOS) && defined(DOSWILD)) |
||
131 | char *dospattern; |
||
132 | int j = strlen(pattern); |
||
133 | |||
134 | /*--------------------------------------------------------------------------- |
||
135 | Optional MS-DOS preprocessing section: compare last three chars of the |
||
136 | wildcard to "*.*" and translate to "*" if found; else compare the last |
||
137 | two characters to "*." and, if found, scan the non-wild string for dots. |
||
138 | If in the latter case a dot is found, return failure; else translate the |
||
139 | "*." to "*". In either case, continue with the normal (Unix-like) match |
||
140 | procedure after translation. (If not enough memory, default to normal |
||
141 | match.) This causes "a*.*" and "a*." to behave as MS-DOS users expect. |
||
142 | ---------------------------------------------------------------------------*/ |
||
143 | |||
144 | if ((dospattern = (char *)malloc(j+1)) != NULL) { |
||
145 | strcpy(dospattern, pattern); |
||
146 | if (!strcmp(dospattern+j-3, "*.*")) { |
||
147 | dospattern[j-2] = '\0'; /* nuke the ".*" */ |
||
148 | } else if (!strcmp(dospattern+j-2, "*.")) { |
||
149 | char *p = MBSCHR(string, '.'); |
||
150 | |||
151 | if (p) { /* found a dot: match fails */ |
||
152 | free(dospattern); |
||
153 | return 0; |
||
154 | } |
||
155 | dospattern[j-1] = '\0'; /* nuke the end "." */ |
||
156 | } |
||
157 | j = recmatch((uch *)dospattern, (uch *)string, ignore_case __WDL); |
||
158 | free(dospattern); |
||
159 | return j == 1; |
||
160 | } else |
||
161 | #endif /* MSDOS && DOSWILD */ |
||
162 | return recmatch((uch *)pattern, (uch *)string, ignore_case __WDL) == 1; |
||
163 | } |
||
164 | |||
165 | |||
166 | |||
167 | static int recmatch(p, s, ic __WDL) |
||
168 | ZCONST uch *p; /* sh pattern to match */ |
||
169 | ZCONST uch *s; /* string to which to match it */ |
||
170 | int ic; /* true for case insensitivity */ |
||
171 | __WDLDEF /* directory sepchar for WildStopAtDir mode, or 0 */ |
||
172 | /* Recursively compare the sh pattern p with the string s and return 1 if |
||
173 | * they match, and 0 or 2 if they don't or if there is a syntax error in the |
||
174 | * pattern. This routine recurses on itself no more deeply than the number |
||
175 | * of characters in the pattern. */ |
||
176 | { |
||
177 | unsigned int c; /* pattern char or start of range in [-] loop */ |
||
178 | |||
179 | /* Get first character, the pattern for new recmatch calls follows */ |
||
180 | c = *p; INCSTR(p); |
||
181 | |||
182 | /* If that was the end of the pattern, match if string empty too */ |
||
183 | if (c == 0) |
||
184 | return *s == 0; |
||
185 | |||
186 | /* '?' (or '%') matches any character (but not an empty string). */ |
||
187 | if (c == WILDCHAR) |
||
188 | #ifdef WILD_STOP_AT_DIR |
||
189 | /* If uO.W_flag is non-zero, it won't match '/' */ |
||
190 | return (*s && (!sepc || *s != (uch)sepc)) |
||
191 | ? recmatch(p, s + CLEN(s), ic, sepc) : 0; |
||
192 | #else |
||
193 | return *s ? recmatch(p, s + CLEN(s), ic) : 0; |
||
194 | #endif |
||
195 | |||
196 | /* '*' matches any number of characters, including zero */ |
||
197 | #ifdef AMIGA |
||
198 | if (c == '#' && *p == '?') /* "#?" is Amiga-ese for "*" */ |
||
199 | c = '*', p++; |
||
200 | #endif /* AMIGA */ |
||
201 | if (c == '*') { |
||
202 | #ifdef WILD_STOP_AT_DIR |
||
203 | if (sepc) { |
||
204 | /* check for single "*" or double "**" */ |
||
205 | # ifdef AMIGA |
||
206 | if ((c = p[0]) == '#' && p[1] == '?') /* "#?" is Amiga-ese for "*" */ |
||
207 | c = '*', p++; |
||
208 | if (c != '*') { |
||
209 | # else /* !AMIGA */ |
||
210 | if (*p != '*') { |
||
211 | # endif /* ?AMIGA */ |
||
212 | /* single "*": this doesn't match the dirsep character */ |
||
213 | for (; *s && *s != (uch)sepc; INCSTR(s)) |
||
214 | if ((c = recmatch(p, s, ic, sepc)) != 0) |
||
215 | return (int)c; |
||
216 | /* end of pattern: matched if at end of string, else continue */ |
||
217 | if (*p == '\0') |
||
218 | return (*s == 0); |
||
219 | /* continue to match if at sepc in pattern, else give up */ |
||
220 | return (*p == (uch)sepc || (*p == '\\' && p[1] == (uch)sepc)) |
||
221 | ? recmatch(p, s, ic, sepc) : 2; |
||
222 | } |
||
223 | /* "**": this matches slashes */ |
||
224 | ++p; /* move p behind the second '*' */ |
||
225 | /* and continue with the non-W_flag code variant */ |
||
226 | } |
||
227 | #endif /* WILD_STOP_AT_DIR */ |
||
228 | if (*p == 0) |
||
229 | return 1; |
||
230 | if (isshexp((ZCONST char *)p) == NULL) { |
||
231 | /* Optimization for rest of pattern being a literal string: |
||
232 | * If there are no other shell expression chars in the rest |
||
233 | * of the pattern behind the multi-char wildcard, then just |
||
234 | * compare the literal string tail. |
||
235 | */ |
||
236 | ZCONST uch *srest; |
||
237 | |||
238 | srest = s + (strlen((ZCONST char *)s) - strlen((ZCONST char *)p)); |
||
239 | if (srest - s < 0) |
||
240 | /* remaining literal string from pattern is longer than rest |
||
241 | * of test string, there can't be a match |
||
242 | */ |
||
243 | return 0; |
||
244 | else |
||
245 | /* compare the remaining literal pattern string with the last |
||
246 | * bytes of the test string to check for a match |
||
247 | */ |
||
248 | #ifdef _MBCS |
||
249 | { |
||
250 | ZCONST uch *q = s; |
||
251 | |||
252 | /* MBCS-aware code must not scan backwards into a string from |
||
253 | * the end. |
||
254 | * So, we have to move forward by character from our well-known |
||
255 | * character position s in the test string until we have |
||
256 | * advanced to the srest position. |
||
257 | */ |
||
258 | while (q < srest) |
||
259 | INCSTR(q); |
||
260 | /* In case the byte *srest is a trailing byte of a multibyte |
||
261 | * character in the test string s, we have actually advanced |
||
262 | * past the position (srest). |
||
263 | * For this case, the match has failed! |
||
264 | */ |
||
265 | if (q != srest) |
||
266 | return 0; |
||
267 | return ((ic |
||
268 | ? namecmp((ZCONST char *)p, (ZCONST char *)q) |
||
269 | : strcmp((ZCONST char *)p, (ZCONST char *)q) |
||
270 | ) == 0); |
||
271 | } |
||
272 | #else /* !_MBCS */ |
||
273 | return ((ic |
||
274 | ? namecmp((ZCONST char *)p, (ZCONST char *)srest) |
||
275 | : strcmp((ZCONST char *)p, (ZCONST char *)srest) |
||
276 | ) == 0); |
||
277 | #endif /* ?_MBCS */ |
||
278 | } else { |
||
279 | /* pattern contains more wildcards, continue with recursion... */ |
||
280 | for (; *s; INCSTR(s)) |
||
281 | if ((c = recmatch(p, s, ic __WDL)) != 0) |
||
282 | return (int)c; |
||
283 | return 2; /* 2 means give up--match will return false */ |
||
284 | } |
||
285 | } |
||
286 | |||
287 | /* Parse and process the list of characters and ranges in brackets */ |
||
288 | if (c == BEG_RANGE) { |
||
289 | int e; /* flag true if next char to be taken literally */ |
||
290 | ZCONST uch *q; /* pointer to end of [-] group */ |
||
291 | int r; /* flag true to match anything but the range */ |
||
292 | |||
293 | if (*s == 0) /* need a character to match */ |
||
294 | return 0; |
||
295 | p += (r = (*p == '!' || *p == '^')); /* see if reverse */ |
||
296 | for (q = p, e = 0; *q; INCSTR(q)) /* find closing bracket */ |
||
297 | if (e) |
||
298 | e = 0; |
||
299 | else |
||
300 | if (*q == '\\') /* GRR: change to ^ for MS-DOS, OS/2? */ |
||
301 | e = 1; |
||
302 | else if (*q == END_RANGE) |
||
303 | break; |
||
304 | if (*q != END_RANGE) /* nothing matches if bad syntax */ |
||
305 | return 0; |
||
306 | for (c = 0, e = (*p == '-'); p < q; INCSTR(p)) { |
||
307 | /* go through the list */ |
||
308 | if (!e && *p == '\\') /* set escape flag if \ */ |
||
309 | e = 1; |
||
310 | else if (!e && *p == '-') /* set start of range if - */ |
||
311 | c = *(p-1); |
||
312 | else { |
||
313 | unsigned int cc = Case(*s); |
||
314 | |||
315 | if (*(p+1) != '-') |
||
316 | for (c = c ? c : *p; c <= *p; c++) /* compare range */ |
||
317 | if ((unsigned)Case(c) == cc) /* typecast for MSC bug */ |
||
318 | return r ? 0 : recmatch(q + 1, s + 1, ic __WDL); |
||
319 | c = e = 0; /* clear range, escape flags */ |
||
320 | } |
||
321 | } |
||
322 | return r ? recmatch(q + CLEN(q), s + CLEN(s), ic __WDL) : 0; |
||
323 | /* bracket match failed */ |
||
324 | } |
||
325 | |||
326 | /* if escape ('\\'), just compare next character */ |
||
327 | if (c == '\\' && (c = *p++) == 0) /* if \ at end, then syntax error */ |
||
328 | return 0; |
||
329 | |||
330 | /* just a character--compare it */ |
||
331 | #ifdef QDOS |
||
332 | return QMatch(Case((uch)c), Case(*s)) ? |
||
333 | recmatch(p, s + CLEN(s), ic __WDL) : 0; |
||
334 | #else |
||
335 | return Case((uch)c) == Case(*s) ? |
||
336 | recmatch(p, s + CLEN(s), ic __WDL) : 0; |
||
337 | #endif |
||
338 | |||
339 | } /* end function recmatch() */ |
||
340 | |||
341 | |||
342 | |||
343 | static char *isshexp(p) |
||
344 | ZCONST char *p; |
||
345 | /* If p is a sh expression, a pointer to the first special character is |
||
346 | returned. Otherwise, NULL is returned. */ |
||
347 | { |
||
348 | for (; *p; INCSTR(p)) |
||
349 | if (*p == '\\' && *(p+1)) |
||
350 | p++; |
||
351 | else if (*p == WILDCHAR || *p == '*' || *p == BEG_RANGE) |
||
352 | return (char *)p; |
||
353 | return NULL; |
||
354 | } /* end function isshexp() */ |
||
355 | |||
356 | |||
357 | |||
358 | static int namecmp(s1, s2) |
||
359 | ZCONST char *s1, *s2; |
||
360 | { |
||
361 | int d; |
||
362 | |||
363 | for (;;) { |
||
364 | d = (int)ToLower((uch)*s1) |
||
365 | - (int)ToLower((uch)*s2); |
||
366 | |||
367 | if (d || *s1 == 0 || *s2 == 0) |
||
368 | return d; |
||
369 | |||
370 | s1++; |
||
371 | s2++; |
||
372 | } |
||
373 | } /* end function namecmp() */ |
||
374 | |||
375 | #endif /* !THEOS */ |
||
376 | |||
377 | |||
378 | |||
379 | |||
380 | int iswild(p) /* originally only used for stat()-bug workaround in */ |
||
381 | ZCONST char *p; /* VAX C, Turbo/Borland C, Watcom C, Atari MiNT libs; */ |
||
382 | { /* now used in process_zipfiles() as well */ |
||
383 | for (; *p; INCSTR(p)) |
||
384 | if (*p == '\\' && *(p+1)) |
||
385 | ++p; |
||
386 | #ifdef THEOS |
||
387 | else if (*p == '?' || *p == '*' || *p=='#'|| *p == '@') |
||
388 | #else /* !THEOS */ |
||
389 | #ifdef VMS |
||
390 | else if (*p == '%' || *p == '*') |
||
391 | #else /* !VMS */ |
||
392 | #ifdef AMIGA |
||
393 | else if (*p == '?' || *p == '*' || (*p=='#' && p[1]=='?') || *p == '[') |
||
394 | #else /* !AMIGA */ |
||
395 | else if (*p == '?' || *p == '*' || *p == '[') |
||
396 | #endif /* ?AMIGA */ |
||
397 | #endif /* ?VMS */ |
||
398 | #endif /* ?THEOS */ |
||
399 | #ifdef QDOS |
||
400 | return (int)p; |
||
401 | #else |
||
402 | return TRUE; |
||
403 | #endif |
||
404 | |||
405 | return FALSE; |
||
406 | |||
407 | } /* end function iswild() */ |
||
408 | |||
409 | |||
410 | |||
411 | |||
412 | |||
413 | #ifdef TEST_MATCH |
||
414 | |||
415 | #define put(s) {fputs(s,stdout); fflush(stdout);} |
||
416 | #ifdef main |
||
417 | # undef main |
||
418 | #endif |
||
419 | |||
420 | int main(int argc, char **argv) |
||
421 | { |
||
422 | char pat[256], str[256]; |
||
423 | |||
424 | for (;;) { |
||
425 | put("Pattern (return to exit): "); |
||
426 | gets(pat); |
||
427 | if (!pat[0]) |
||
428 | break; |
||
429 | for (;;) { |
||
430 | put("String (return for new pattern): "); |
||
431 | gets(str); |
||
432 | if (!str[0]) |
||
433 | break; |
||
434 | printf("Case sensitive: %s insensitive: %s\n", |
||
435 | match(str, pat, 0) ? "YES" : "NO", |
||
436 | match(str, pat, 1) ? "YES" : "NO"); |
||
437 | } |
||
438 | } |
||
439 | EXIT(0); |
||
440 | } |
||
441 | |||
442 | #endif /* TEST_MATCH */=>>>>=0x5A) |