/programs/develop/libraries/menuetlibc/src/libc/posix/regex/Makefile |
---|
0,0 → 1,4 |
THIS_SRCS = debug.c regcomp.c regerror.c regexec.c regfree.c \ |
split.c |
include $(MENUET_LIBC_TOPDIR)/Make.rules |
/programs/develop/libraries/menuetlibc/src/libc/posix/regex/cclass.h |
---|
0,0 → 1,32 |
/* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */ |
/* character-class table */ |
static struct cclass { |
char *name; |
char *chars; |
char *multis; |
} cclasses[] = { |
"alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ |
0123456789", "", |
"alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", |
"", |
"blank", " \t", "", |
"cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ |
\25\26\27\30\31\32\33\34\35\36\37\177", "", |
"digit", "0123456789", "", |
"graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ |
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", |
"", |
"lower", "abcdefghijklmnopqrstuvwxyz", |
"", |
"print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ |
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", |
"", |
"punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", |
"", |
"space", "\t\n\v\f\r ", "", |
"upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", |
"", |
"xdigit", "0123456789ABCDEFabcdef", |
"", |
NULL, 0, "" |
}; |
/programs/develop/libraries/menuetlibc/src/libc/posix/regex/cname.h |
---|
0,0 → 1,103 |
/* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */ |
/* character-name table */ |
static struct cname { |
char *name; |
char code; |
} cnames[] = { |
"NUL", '\0', |
"SOH", '\001', |
"STX", '\002', |
"ETX", '\003', |
"EOT", '\004', |
"ENQ", '\005', |
"ACK", '\006', |
"BEL", '\007', |
"alert", '\007', |
"BS", '\010', |
"backspace", '\b', |
"HT", '\011', |
"tab", '\t', |
"LF", '\012', |
"newline", '\n', |
"VT", '\013', |
"vertical-tab", '\v', |
"FF", '\014', |
"form-feed", '\f', |
"CR", '\015', |
"carriage-return", '\r', |
"SO", '\016', |
"SI", '\017', |
"DLE", '\020', |
"DC1", '\021', |
"DC2", '\022', |
"DC3", '\023', |
"DC4", '\024', |
"NAK", '\025', |
"SYN", '\026', |
"ETB", '\027', |
"CAN", '\030', |
"EM", '\031', |
"SUB", '\032', |
"ESC", '\033', |
"IS4", '\034', |
"FS", '\034', |
"IS3", '\035', |
"GS", '\035', |
"IS2", '\036', |
"RS", '\036', |
"IS1", '\037', |
"US", '\037', |
"space", ' ', |
"exclamation-mark", '!', |
"quotation-mark", '"', |
"number-sign", '#', |
"dollar-sign", '$', |
"percent-sign", '%', |
"ampersand", '&', |
"apostrophe", '\'', |
"left-parenthesis", '(', |
"right-parenthesis", ')', |
"asterisk", '*', |
"plus-sign", '+', |
"comma", ',', |
"hyphen", '-', |
"hyphen-minus", '-', |
"period", '.', |
"full-stop", '.', |
"slash", '/', |
"solidus", '/', |
"zero", '0', |
"one", '1', |
"two", '2', |
"three", '3', |
"four", '4', |
"five", '5', |
"six", '6', |
"seven", '7', |
"eight", '8', |
"nine", '9', |
"colon", ':', |
"semicolon", ';', |
"less-than-sign", '<', |
"equals-sign", '=', |
"greater-than-sign", '>', |
"question-mark", '?', |
"commercial-at", '@', |
"left-square-bracket", '[', |
"backslash", '\\', |
"reverse-solidus", '\\', |
"right-square-bracket", ']', |
"circumflex", '^', |
"circumflex-accent", '^', |
"underscore", '_', |
"low-line", '_', |
"grave-accent", '`', |
"left-brace", '{', |
"left-curly-bracket", '{', |
"vertical-line", '|', |
"right-brace", '}', |
"right-curly-bracket", '}', |
"tilde", '~', |
"DEL", '\177', |
NULL, 0, |
}; |
/programs/develop/libraries/menuetlibc/src/libc/posix/regex/debug.c |
---|
0,0 → 1,243 |
/* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */ |
#include <stdio.h> |
#include <string.h> |
#include <ctype.h> |
#include <limits.h> |
#include <stdlib.h> |
#include <sys/types.h> |
#include <regex.h> |
#include "utils.h" |
#include "regex2.h" |
#include "debug.ih" |
/* |
- regprint - print a regexp for debugging |
== void regprint(regex_t *r, FILE *d); |
*/ |
void |
regprint(r, d) |
regex_t *r; |
FILE *d; |
{ |
register struct re_guts *g = r->re_g; |
register int i; |
register int c; |
register int last; |
int nincat[NC]; |
fprintf(d, "%ld states, %d categories", (long)g->nstates, |
g->ncategories); |
fprintf(d, ", first %ld last %ld", (long)g->firststate, |
(long)g->laststate); |
if (g->iflags&USEBOL) |
fprintf(d, ", USEBOL"); |
if (g->iflags&USEEOL) |
fprintf(d, ", USEEOL"); |
if (g->iflags&BAD) |
fprintf(d, ", BAD"); |
if (g->nsub > 0) |
fprintf(d, ", nsub=%ld", (long)g->nsub); |
if (g->must != NULL) |
fprintf(d, ", must(%ld) `%*s'", (long)g->mlen, (int)g->mlen, |
g->must); |
if (g->backrefs) |
fprintf(d, ", backrefs"); |
if (g->nplus > 0) |
fprintf(d, ", nplus %ld", (long)g->nplus); |
fprintf(d, "\n"); |
s_print(g, d); |
for (i = 0; i < g->ncategories; i++) { |
nincat[i] = 0; |
for (c = CHAR_MIN; c <= CHAR_MAX; c++) |
if (g->categories[c] == i) |
nincat[i]++; |
} |
fprintf(d, "cc0#%d", nincat[0]); |
for (i = 1; i < g->ncategories; i++) |
if (nincat[i] == 1) { |
for (c = CHAR_MIN; c <= CHAR_MAX; c++) |
if (g->categories[c] == i) |
break; |
fprintf(d, ", %d=%s", i, regchar(c)); |
} |
fprintf(d, "\n"); |
for (i = 1; i < g->ncategories; i++) |
if (nincat[i] != 1) { |
fprintf(d, "cc%d\t", i); |
last = -1; |
for (c = CHAR_MIN; c <= CHAR_MAX+1; c++) /* +1 does flush */ |
if (c <= CHAR_MAX && g->categories[c] == i) { |
if (last < 0) { |
fprintf(d, "%s", regchar(c)); |
last = c; |
} |
} else { |
if (last >= 0) { |
if (last != c-1) |
fprintf(d, "-%s", |
regchar(c-1)); |
last = -1; |
} |
} |
fprintf(d, "\n"); |
} |
} |
/* |
- s_print - print the strip for debugging |
== static void s_print(register struct re_guts *g, FILE *d); |
*/ |
static void |
s_print(g, d) |
register struct re_guts *g; |
FILE *d; |
{ |
register sop *s; |
register cset *cs; |
register int i; |
register int done = 0; |
register sop opnd; |
register int col = 0; |
register int last; |
register sopno offset = 2; |
# define GAP() { if (offset % 5 == 0) { \ |
if (col > 40) { \ |
fprintf(d, "\n\t"); \ |
col = 0; \ |
} else { \ |
fprintf(d, " "); \ |
col++; \ |
} \ |
} else \ |
col++; \ |
offset++; \ |
} |
if (OP(g->strip[0]) != OEND) |
fprintf(d, "missing initial OEND!\n"); |
for (s = &g->strip[1]; !done; s++) { |
opnd = OPND(*s); |
switch (OP(*s)) { |
case OEND: |
fprintf(d, "\n"); |
done = 1; |
break; |
case OCHAR: |
if (strchr("\\|()^$.[+*?{}!<> ", (char)opnd) != NULL) |
fprintf(d, "\\%c", (char)opnd); |
else |
fprintf(d, "%s", regchar((char)opnd)); |
break; |
case OBOL: |
fprintf(d, "^"); |
break; |
case OEOL: |
fprintf(d, "$"); |
break; |
case OBOW: |
fprintf(d, "\\{"); |
break; |
case OEOW: |
fprintf(d, "\\}"); |
break; |
case OANY: |
fprintf(d, "."); |
break; |
case OANYOF: |
fprintf(d, "[(%ld)", (long)opnd); |
cs = &g->sets[opnd]; |
last = -1; |
for (i = 0; i < g->csetsize+1; i++) /* +1 flushes */ |
if (CHIN(cs, i) && i < g->csetsize) { |
if (last < 0) { |
fprintf(d, "%s", regchar(i)); |
last = i; |
} |
} else { |
if (last >= 0) { |
if (last != i-1) |
fprintf(d, "-%s", |
regchar(i-1)); |
last = -1; |
} |
} |
fprintf(d, "]"); |
break; |
case OBACK_: |
fprintf(d, "(\\<%ld>", (long)opnd); |
break; |
case O_BACK: |
fprintf(d, "<%ld>\\)", (long)opnd); |
break; |
case OPLUS_: |
fprintf(d, "(+"); |
if (OP(*(s+opnd)) != O_PLUS) |
fprintf(d, "<%ld>", (long)opnd); |
break; |
case O_PLUS: |
if (OP(*(s-opnd)) != OPLUS_) |
fprintf(d, "<%ld>", (long)opnd); |
fprintf(d, "+)"); |
break; |
case OQUEST_: |
fprintf(d, "(?"); |
if (OP(*(s+opnd)) != O_QUEST) |
fprintf(d, "<%ld>", (long)opnd); |
break; |
case O_QUEST: |
if (OP(*(s-opnd)) != OQUEST_) |
fprintf(d, "<%ld>", (long)opnd); |
fprintf(d, "?)"); |
break; |
case OLPAREN: |
fprintf(d, "((<%ld>", (long)opnd); |
break; |
case ORPAREN: |
fprintf(d, "<%ld>))", (long)opnd); |
break; |
case OCH_: |
fprintf(d, "<"); |
if (OP(*(s+opnd)) != OOR2) |
fprintf(d, "<%ld>", (long)opnd); |
break; |
case OOR1: |
if (OP(*(s-opnd)) != OOR1 && OP(*(s-opnd)) != OCH_) |
fprintf(d, "<%ld>", (long)opnd); |
fprintf(d, "|"); |
break; |
case OOR2: |
fprintf(d, "|"); |
if (OP(*(s+opnd)) != OOR2 && OP(*(s+opnd)) != O_CH) |
fprintf(d, "<%ld>", (long)opnd); |
break; |
case O_CH: |
if (OP(*(s-opnd)) != OOR1) |
fprintf(d, "<%ld>", (long)opnd); |
fprintf(d, ">"); |
break; |
default: |
fprintf(d, "!%d(%d)!", OP(*s), opnd); |
break; |
} |
if (!done) |
GAP(); |
} |
} |
/* |
- regchar - make a character printable |
== static char *regchar(int ch); |
*/ |
static char * /* -> representation */ |
regchar(ch) |
int ch; |
{ |
static char buf[10]; |
if (isprint(ch) || ch == ' ') |
sprintf(buf, "%c", ch); |
else |
sprintf(buf, "\\%o", ch); |
return(buf); |
} |
/programs/develop/libraries/menuetlibc/src/libc/posix/regex/debug.ih |
---|
0,0 → 1,14 |
/* ========= begin header generated by ./mkh ========= */ |
#ifdef __cplusplus |
extern "C" { |
#endif |
/* === debug.c === */ |
void regprint (regex_t *r, FILE *d); |
static void s_print (register struct re_guts *g, FILE *d); |
static char *regchar (int ch); |
#ifdef __cplusplus |
} |
#endif |
/* ========= end header generated by ./mkh ========= */ |
/programs/develop/libraries/menuetlibc/src/libc/posix/regex/engine.c |
---|
0,0 → 1,1019 |
/* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */ |
/* |
* The matching engine and friends. This file is #included by regexec.c |
* after suitable #defines of a variety of macros used herein, so that |
* different state representations can be used without duplicating masses |
* of code. |
*/ |
#ifdef SNAMES |
#define matcher smatcher |
#define fast sfast |
#define slow sslow |
#define dissect sdissect |
#define backref sbackref |
#define step sstep |
#define print sprint |
#define at sat |
#define match smat |
#endif |
#ifdef LNAMES |
#define matcher lmatcher |
#define fast lfast |
#define slow lslow |
#define dissect ldissect |
#define backref lbackref |
#define step lstep |
#define print lprint |
#define at lat |
#define match lmat |
#endif |
/* another structure passed up and down to avoid zillions of parameters */ |
struct match { |
struct re_guts *g; |
int eflags; |
regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ |
char *offp; /* offsets work from here */ |
char *beginp; /* start of string -- virtual NUL precedes */ |
char *endp; /* end of string -- virtual NUL here */ |
char *coldp; /* can be no match starting before here */ |
char **lastpos; /* [nplus+1] */ |
STATEVARS; |
states st; /* current states */ |
states fresh; /* states for a fresh start */ |
states tmp; /* temporary */ |
states empty; /* empty set of states */ |
}; |
#include "engine.ih" |
#ifdef REDEBUG |
#define SP(t, s, c) print(m, t, s, c, stdout) |
#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) |
#define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); } |
#else |
#define SP(t, s, c) /* nothing */ |
#define AT(t, p1, p2, s1, s2) /* nothing */ |
#define NOTE(s) /* nothing */ |
#endif |
/* |
- matcher - the actual matching engine |
== static int matcher(register struct re_guts *g, char *string, \ |
== size_t nmatch, regmatch_t pmatch[], int eflags); |
*/ |
static int /* 0 success, REG_NOMATCH failure */ |
matcher(g, string, nmatch, pmatch, eflags) |
register struct re_guts *g; |
char *string; |
size_t nmatch; |
regmatch_t pmatch[]; |
int eflags; |
{ |
register char *endp; |
register int i; |
struct match mv; |
register struct match *m = &mv; |
register char *dp; |
const register sopno gf = g->firststate+1; /* +1 for OEND */ |
const register sopno gl = g->laststate; |
char *start; |
char *stop; |
/* simplify the situation where possible */ |
if (g->cflags®_NOSUB) |
nmatch = 0; |
if (eflags®_STARTEND) { |
start = string + pmatch[0].rm_so; |
stop = string + pmatch[0].rm_eo; |
} else { |
start = string; |
stop = start + strlen(start); |
} |
if (stop < start) |
return(REG_INVARG); |
/* prescreening; this does wonders for this rather slow code */ |
if (g->must != NULL) { |
for (dp = start; dp < stop; dp++) |
if (*dp == g->must[0] && stop - dp >= g->mlen && |
memcmp(dp, g->must, (size_t)g->mlen) == 0) |
break; |
if (dp == stop) /* we didn't find g->must */ |
return(REG_NOMATCH); |
} |
/* match struct setup */ |
m->g = g; |
m->eflags = eflags; |
m->pmatch = NULL; |
m->lastpos = NULL; |
m->offp = string; |
m->beginp = start; |
m->endp = stop; |
STATESETUP(m, 4); |
SETUP(m->st); |
SETUP(m->fresh); |
SETUP(m->tmp); |
SETUP(m->empty); |
CLEAR(m->empty); |
/* this loop does only one repetition except for backrefs */ |
for (;;) { |
endp = fast(m, start, stop, gf, gl); |
if (endp == NULL) { /* a miss */ |
STATETEARDOWN(m); |
return(REG_NOMATCH); |
} |
if (nmatch == 0 && !g->backrefs) |
break; /* no further info needed */ |
/* where? */ |
assert(m->coldp != NULL); |
for (;;) { |
NOTE("finding start"); |
endp = slow(m, m->coldp, stop, gf, gl); |
if (endp != NULL) |
break; |
assert(m->coldp < m->endp); |
m->coldp++; |
} |
if (nmatch == 1 && !g->backrefs) |
break; /* no further info needed */ |
/* oh my, he wants the subexpressions... */ |
if (m->pmatch == NULL) |
m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * |
sizeof(regmatch_t)); |
if (m->pmatch == NULL) { |
STATETEARDOWN(m); |
return(REG_ESPACE); |
} |
for (i = 1; i <= m->g->nsub; i++) |
m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; |
if (!g->backrefs && !(m->eflags®_BACKR)) { |
NOTE("dissecting"); |
dp = dissect(m, m->coldp, endp, gf, gl); |
} else { |
if (g->nplus > 0 && m->lastpos == NULL) |
m->lastpos = (char **)malloc((g->nplus+1) * |
sizeof(char *)); |
if (g->nplus > 0 && m->lastpos == NULL) { |
free(m->pmatch); |
STATETEARDOWN(m); |
return(REG_ESPACE); |
} |
NOTE("backref dissect"); |
dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); |
} |
if (dp != NULL) |
break; |
/* uh-oh... we couldn't find a subexpression-level match */ |
assert(g->backrefs); /* must be back references doing it */ |
assert(g->nplus == 0 || m->lastpos != NULL); |
for (;;) { |
if (dp != NULL || endp <= m->coldp) |
break; /* defeat */ |
NOTE("backoff"); |
endp = slow(m, m->coldp, endp-1, gf, gl); |
if (endp == NULL) |
break; /* defeat */ |
/* try it on a shorter possibility */ |
#ifndef NDEBUG |
for (i = 1; i <= m->g->nsub; i++) { |
assert(m->pmatch[i].rm_so == -1); |
assert(m->pmatch[i].rm_eo == -1); |
} |
#endif |
NOTE("backoff dissect"); |
dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); |
} |
assert(dp == NULL || dp == endp); |
if (dp != NULL) /* found a shorter one */ |
break; |
/* despite initial appearances, there is no match here */ |
NOTE("false alarm"); |
start = m->coldp + 1; /* recycle starting later */ |
assert(start <= stop); |
} |
/* fill in the details if requested */ |
if (nmatch > 0) { |
pmatch[0].rm_so = m->coldp - m->offp; |
pmatch[0].rm_eo = endp - m->offp; |
} |
if (nmatch > 1) { |
assert(m->pmatch != NULL); |
for (i = 1; i < nmatch; i++) |
if (i <= m->g->nsub) |
pmatch[i] = m->pmatch[i]; |
else { |
pmatch[i].rm_so = -1; |
pmatch[i].rm_eo = -1; |
} |
} |
if (m->pmatch != NULL) |
free((char *)m->pmatch); |
if (m->lastpos != NULL) |
free((char *)m->lastpos); |
STATETEARDOWN(m); |
return(0); |
} |
/* |
- dissect - figure out what matched what, no back references |
== static char *dissect(register struct match *m, char *start, \ |
== char *stop, sopno startst, sopno stopst); |
*/ |
static char * /* == stop (success) always */ |
dissect(m, start, stop, startst, stopst) |
register struct match *m; |
char *start; |
char *stop; |
sopno startst; |
sopno stopst; |
{ |
register int i; |
register sopno ss; /* start sop of current subRE */ |
register sopno es; /* end sop of current subRE */ |
register char *sp; /* start of string matched by it */ |
register char *stp; /* string matched by it cannot pass here */ |
register char *rest; /* start of rest of string */ |
register char *tail; /* string unmatched by rest of RE */ |
register sopno ssub; /* start sop of subsubRE */ |
register sopno esub; /* end sop of subsubRE */ |
register char *ssp; /* start of string matched by subsubRE */ |
register char *sep; /* end of string matched by subsubRE */ |
register char *oldssp; /* previous ssp */ |
register char *dp; |
AT("diss", start, stop, startst, stopst); |
sp = start; |
for (ss = startst; ss < stopst; ss = es) { |
/* identify end of subRE */ |
es = ss; |
switch (OP(m->g->strip[es])) { |
case OPLUS_: |
case OQUEST_: |
es += OPND(m->g->strip[es]); |
break; |
case OCH_: |
while (OP(m->g->strip[es]) != O_CH) |
es += OPND(m->g->strip[es]); |
break; |
} |
es++; |
/* figure out what it matched */ |
switch (OP(m->g->strip[ss])) { |
case OEND: |
assert(nope); |
break; |
case OCHAR: |
sp++; |
break; |
case OBOL: |
case OEOL: |
case OBOW: |
case OEOW: |
break; |
case OANY: |
case OANYOF: |
sp++; |
break; |
case OBACK_: |
case O_BACK: |
assert(nope); |
break; |
/* cases where length of match is hard to find */ |
case OQUEST_: |
stp = stop; |
for (;;) { |
/* how long could this one be? */ |
rest = slow(m, sp, stp, ss, es); |
assert(rest != NULL); /* it did match */ |
/* could the rest match the rest? */ |
tail = slow(m, rest, stop, es, stopst); |
if (tail == stop) |
break; /* yes! */ |
/* no -- try a shorter match for this one */ |
stp = rest - 1; |
assert(stp >= sp); /* it did work */ |
} |
ssub = ss + 1; |
esub = es - 1; |
/* did innards match? */ |
if (slow(m, sp, rest, ssub, esub) != NULL) { |
dp = dissect(m, sp, rest, ssub, esub); |
assert(dp == rest); |
} else /* no */ |
assert(sp == rest); |
sp = rest; |
break; |
case OPLUS_: |
stp = stop; |
for (;;) { |
/* how long could this one be? */ |
rest = slow(m, sp, stp, ss, es); |
assert(rest != NULL); /* it did match */ |
/* could the rest match the rest? */ |
tail = slow(m, rest, stop, es, stopst); |
if (tail == stop) |
break; /* yes! */ |
/* no -- try a shorter match for this one */ |
stp = rest - 1; |
assert(stp >= sp); /* it did work */ |
} |
ssub = ss + 1; |
esub = es - 1; |
ssp = sp; |
oldssp = ssp; |
for (;;) { /* find last match of innards */ |
sep = slow(m, ssp, rest, ssub, esub); |
if (sep == NULL || sep == ssp) |
break; /* failed or matched null */ |
oldssp = ssp; /* on to next try */ |
ssp = sep; |
} |
if (sep == NULL) { |
/* last successful match */ |
sep = ssp; |
ssp = oldssp; |
} |
assert(sep == rest); /* must exhaust substring */ |
assert(slow(m, ssp, sep, ssub, esub) == rest); |
dp = dissect(m, ssp, sep, ssub, esub); |
assert(dp == sep); |
sp = rest; |
break; |
case OCH_: |
stp = stop; |
for (;;) { |
/* how long could this one be? */ |
rest = slow(m, sp, stp, ss, es); |
assert(rest != NULL); /* it did match */ |
/* could the rest match the rest? */ |
tail = slow(m, rest, stop, es, stopst); |
if (tail == stop) |
break; /* yes! */ |
/* no -- try a shorter match for this one */ |
stp = rest - 1; |
assert(stp >= sp); /* it did work */ |
} |
ssub = ss + 1; |
esub = ss + OPND(m->g->strip[ss]) - 1; |
assert(OP(m->g->strip[esub]) == OOR1); |
for (;;) { /* find first matching branch */ |
if (slow(m, sp, rest, ssub, esub) == rest) |
break; /* it matched all of it */ |
/* that one missed, try next one */ |
assert(OP(m->g->strip[esub]) == OOR1); |
esub++; |
assert(OP(m->g->strip[esub]) == OOR2); |
ssub = esub + 1; |
esub += OPND(m->g->strip[esub]); |
if (OP(m->g->strip[esub]) == OOR2) |
esub--; |
else |
assert(OP(m->g->strip[esub]) == O_CH); |
} |
dp = dissect(m, sp, rest, ssub, esub); |
assert(dp == rest); |
sp = rest; |
break; |
case O_PLUS: |
case O_QUEST: |
case OOR1: |
case OOR2: |
case O_CH: |
assert(nope); |
break; |
case OLPAREN: |
i = OPND(m->g->strip[ss]); |
assert(0 < i && i <= m->g->nsub); |
m->pmatch[i].rm_so = sp - m->offp; |
break; |
case ORPAREN: |
i = OPND(m->g->strip[ss]); |
assert(0 < i && i <= m->g->nsub); |
m->pmatch[i].rm_eo = sp - m->offp; |
break; |
default: /* uh oh */ |
assert(nope); |
break; |
} |
} |
assert(sp == stop); |
return(sp); |
} |
/* |
- backref - figure out what matched what, figuring in back references |
== static char *backref(register struct match *m, char *start, \ |
== char *stop, sopno startst, sopno stopst, sopno lev); |
*/ |
static char * /* == stop (success) or NULL (failure) */ |
backref(m, start, stop, startst, stopst, lev) |
register struct match *m; |
char *start; |
char *stop; |
sopno startst; |
sopno stopst; |
sopno lev; /* PLUS nesting level */ |
{ |
register int i; |
register sopno ss; /* start sop of current subRE */ |
register char *sp; /* start of string matched by it */ |
register sopno ssub; /* start sop of subsubRE */ |
register sopno esub; /* end sop of subsubRE */ |
register char *ssp; /* start of string matched by subsubRE */ |
register char *dp; |
register size_t len; |
register int hard; |
register sop s; |
register regoff_t offsave; |
register cset *cs; |
AT("back", start, stop, startst, stopst); |
sp = start; |
/* get as far as we can with easy stuff */ |
hard = 0; |
for (ss = startst; !hard && ss < stopst; ss++) |
switch (OP(s = m->g->strip[ss])) { |
case OCHAR: |
if (sp == stop || *sp++ != (char)OPND(s)) |
return(NULL); |
break; |
case OANY: |
if (sp == stop) |
return(NULL); |
sp++; |
break; |
case OANYOF: |
cs = &m->g->sets[OPND(s)]; |
if (sp == stop || !CHIN(cs, *sp++)) |
return(NULL); |
break; |
case OBOL: |
if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || |
(sp < m->endp && *(sp-1) == '\n' && |
(m->g->cflags®_NEWLINE)) ) |
{ /* yes */ } |
else |
return(NULL); |
break; |
case OEOL: |
if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || |
(sp < m->endp && *sp == '\n' && |
(m->g->cflags®_NEWLINE)) ) |
{ /* yes */ } |
else |
return(NULL); |
break; |
case OBOW: |
if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) || |
(sp < m->endp && *(sp-1) == '\n' && |
(m->g->cflags®_NEWLINE)) || |
(sp > m->beginp && |
!ISWORD(*(sp-1))) ) && |
(sp < m->endp && ISWORD(*sp)) ) |
{ /* yes */ } |
else |
return(NULL); |
break; |
case OEOW: |
if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || |
(sp < m->endp && *sp == '\n' && |
(m->g->cflags®_NEWLINE)) || |
(sp < m->endp && !ISWORD(*sp)) ) && |
(sp > m->beginp && ISWORD(*(sp-1))) ) |
{ /* yes */ } |
else |
return(NULL); |
break; |
case O_QUEST: |
break; |
case OOR1: /* matches null but needs to skip */ |
ss++; |
s = m->g->strip[ss]; |
do { |
assert(OP(s) == OOR2); |
ss += OPND(s); |
} while (OP(s = m->g->strip[ss]) != O_CH); |
/* note that the ss++ gets us past the O_CH */ |
break; |
default: /* have to make a choice */ |
hard = 1; |
break; |
} |
if (!hard) { /* that was it! */ |
if (sp != stop) |
return(NULL); |
return(sp); |
} |
ss--; /* adjust for the for's final increment */ |
/* the hard stuff */ |
AT("hard", sp, stop, ss, stopst); |
s = m->g->strip[ss]; |
switch (OP(s)) { |
case OBACK_: /* the vilest depths */ |
i = OPND(s); |
assert(0 < i && i <= m->g->nsub); |
if (m->pmatch[i].rm_eo == -1) |
return(NULL); |
assert(m->pmatch[i].rm_so != -1); |
len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; |
assert(stop - m->beginp >= len); |
if (sp > stop - len) |
return(NULL); /* not enough left to match */ |
ssp = m->offp + m->pmatch[i].rm_so; |
if (memcmp(sp, ssp, len) != 0) |
return(NULL); |
while (m->g->strip[ss] != SOP(O_BACK, i)) |
ss++; |
return(backref(m, sp+len, stop, ss+1, stopst, lev)); |
break; |
case OQUEST_: /* to null or not */ |
dp = backref(m, sp, stop, ss+1, stopst, lev); |
if (dp != NULL) |
return(dp); /* not */ |
return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev)); |
break; |
case OPLUS_: |
assert(m->lastpos != NULL); |
assert(lev+1 <= m->g->nplus); |
m->lastpos[lev+1] = sp; |
return(backref(m, sp, stop, ss+1, stopst, lev+1)); |
break; |
case O_PLUS: |
if (sp == m->lastpos[lev]) /* last pass matched null */ |
return(backref(m, sp, stop, ss+1, stopst, lev-1)); |
/* try another pass */ |
m->lastpos[lev] = sp; |
dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev); |
if (dp == NULL) |
return(backref(m, sp, stop, ss+1, stopst, lev-1)); |
else |
return(dp); |
break; |
case OCH_: /* find the right one, if any */ |
ssub = ss + 1; |
esub = ss + OPND(s) - 1; |
assert(OP(m->g->strip[esub]) == OOR1); |
for (;;) { /* find first matching branch */ |
dp = backref(m, sp, stop, ssub, esub, lev); |
if (dp != NULL) |
return(dp); |
/* that one missed, try next one */ |
if (OP(m->g->strip[esub]) == O_CH) |
return(NULL); /* there is none */ |
esub++; |
assert(OP(m->g->strip[esub]) == OOR2); |
ssub = esub + 1; |
esub += OPND(m->g->strip[esub]); |
if (OP(m->g->strip[esub]) == OOR2) |
esub--; |
else |
assert(OP(m->g->strip[esub]) == O_CH); |
} |
break; |
case OLPAREN: /* must undo assignment if rest fails */ |
i = OPND(s); |
assert(0 < i && i <= m->g->nsub); |
offsave = m->pmatch[i].rm_so; |
m->pmatch[i].rm_so = sp - m->offp; |
dp = backref(m, sp, stop, ss+1, stopst, lev); |
if (dp != NULL) |
return(dp); |
m->pmatch[i].rm_so = offsave; |
return(NULL); |
break; |
case ORPAREN: /* must undo assignment if rest fails */ |
i = OPND(s); |
assert(0 < i && i <= m->g->nsub); |
offsave = m->pmatch[i].rm_eo; |
m->pmatch[i].rm_eo = sp - m->offp; |
dp = backref(m, sp, stop, ss+1, stopst, lev); |
if (dp != NULL) |
return(dp); |
m->pmatch[i].rm_eo = offsave; |
return(NULL); |
break; |
default: /* uh oh */ |
assert(nope); |
break; |
} |
/* "can't happen" */ |
assert(nope); |
/* NOTREACHED */ |
} |
/* |
- fast - step through the string at top speed |
== static char *fast(register struct match *m, char *start, \ |
== char *stop, sopno startst, sopno stopst); |
*/ |
static char * /* where tentative match ended, or NULL */ |
fast(m, start, stop, startst, stopst) |
register struct match *m; |
char *start; |
char *stop; |
sopno startst; |
sopno stopst; |
{ |
register states st = m->st; |
register states fresh = m->fresh; |
register states tmp = m->tmp; |
register char *p = start; |
register int c = (start == m->beginp) ? OUT : *(start-1); |
register int lastc; /* previous c */ |
register int flagch; |
register int i; |
register char *coldp; /* last p after which no match was underway */ |
CLEAR(st); |
SET1(st, startst); |
st = step(m->g, startst, stopst, st, NOTHING, st); |
ASSIGN(fresh, st); |
SP("start", st, *p); |
coldp = NULL; |
for (;;) { |
/* next character */ |
lastc = c; |
c = (p == m->endp) ? OUT : *p; |
if (EQ(st, fresh)) |
coldp = p; |
/* is there an EOL and/or BOL between lastc and c? */ |
flagch = '\0'; |
i = 0; |
if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || |
(lastc == OUT && !(m->eflags®_NOTBOL)) ) { |
flagch = BOL; |
i = m->g->nbol; |
} |
if ( (c == '\n' && m->g->cflags®_NEWLINE) || |
(c == OUT && !(m->eflags®_NOTEOL)) ) { |
flagch = (flagch == BOL) ? BOLEOL : EOL; |
i += m->g->neol; |
} |
if (i != 0) { |
for (; i > 0; i--) |
st = step(m->g, startst, stopst, st, flagch, st); |
SP("boleol", st, c); |
} |
/* how about a word boundary? */ |
if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && |
(c != OUT && ISWORD(c)) ) { |
flagch = BOW; |
} |
if ( (lastc != OUT && ISWORD(lastc)) && |
(flagch == EOL || (c != OUT && !ISWORD(c))) ) { |
flagch = EOW; |
} |
if (flagch == BOW || flagch == EOW) { |
st = step(m->g, startst, stopst, st, flagch, st); |
SP("boweow", st, c); |
} |
/* are we done? */ |
if (ISSET(st, stopst) || p == stop) |
break; /* NOTE BREAK OUT */ |
/* no, we must deal with this character */ |
ASSIGN(tmp, st); |
ASSIGN(st, fresh); |
assert(c != OUT); |
st = step(m->g, startst, stopst, tmp, c, st); |
SP("aft", st, c); |
assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); |
p++; |
} |
assert(coldp != NULL); |
m->coldp = coldp; |
if (ISSET(st, stopst)) |
return(p+1); |
else |
return(NULL); |
} |
/* |
- slow - step through the string more deliberately |
== static char *slow(register struct match *m, char *start, \ |
== char *stop, sopno startst, sopno stopst); |
*/ |
static char * /* where it ended */ |
slow(m, start, stop, startst, stopst) |
register struct match *m; |
char *start; |
char *stop; |
sopno startst; |
sopno stopst; |
{ |
register states st = m->st; |
register states empty = m->empty; |
register states tmp = m->tmp; |
register char *p = start; |
register int c = (start == m->beginp) ? OUT : *(start-1); |
register int lastc; /* previous c */ |
register int flagch; |
register int i; |
register char *matchp; /* last p at which a match ended */ |
AT("slow", start, stop, startst, stopst); |
CLEAR(st); |
SET1(st, startst); |
SP("sstart", st, *p); |
st = step(m->g, startst, stopst, st, NOTHING, st); |
matchp = NULL; |
for (;;) { |
/* next character */ |
lastc = c; |
c = (p == m->endp) ? OUT : *p; |
/* is there an EOL and/or BOL between lastc and c? */ |
flagch = '\0'; |
i = 0; |
if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || |
(lastc == OUT && !(m->eflags®_NOTBOL)) ) { |
flagch = BOL; |
i = m->g->nbol; |
} |
if ( (c == '\n' && m->g->cflags®_NEWLINE) || |
(c == OUT && !(m->eflags®_NOTEOL)) ) { |
flagch = (flagch == BOL) ? BOLEOL : EOL; |
i += m->g->neol; |
} |
if (i != 0) { |
for (; i > 0; i--) |
st = step(m->g, startst, stopst, st, flagch, st); |
SP("sboleol", st, c); |
} |
/* how about a word boundary? */ |
if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && |
(c != OUT && ISWORD(c)) ) { |
flagch = BOW; |
} |
if ( (lastc != OUT && ISWORD(lastc)) && |
(flagch == EOL || (c != OUT && !ISWORD(c))) ) { |
flagch = EOW; |
} |
if (flagch == BOW || flagch == EOW) { |
st = step(m->g, startst, stopst, st, flagch, st); |
SP("sboweow", st, c); |
} |
/* are we done? */ |
if (ISSET(st, stopst)) |
matchp = p; |
if (EQ(st, empty) || p == stop) |
break; /* NOTE BREAK OUT */ |
/* no, we must deal with this character */ |
ASSIGN(tmp, st); |
ASSIGN(st, empty); |
assert(c != OUT); |
st = step(m->g, startst, stopst, tmp, c, st); |
SP("saft", st, c); |
assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); |
p++; |
} |
return(matchp); |
} |
/* |
- step - map set of states reachable before char to set reachable after |
== static states step(register struct re_guts *g, sopno start, sopno stop, \ |
== register states bef, int ch, register states aft); |
== #define BOL (OUT+1) |
== #define EOL (BOL+1) |
== #define BOLEOL (BOL+2) |
== #define NOTHING (BOL+3) |
== #define BOW (BOL+4) |
== #define EOW (BOL+5) |
== #define CODEMAX (BOL+5) // highest code used |
== #define NONCHAR(c) ((c) > CHAR_MAX) |
== #define NNONCHAR (CODEMAX-CHAR_MAX) |
*/ |
static states |
step(g, start, stop, bef, ch, aft) |
register struct re_guts *g; |
sopno start; /* start state within strip */ |
sopno stop; /* state after stop state within strip */ |
register states bef; /* states reachable before */ |
int ch; /* character or NONCHAR code */ |
register states aft; /* states already known reachable after */ |
{ |
register cset *cs; |
register sop s; |
register sopno pc; |
register onestate here; /* note, macros know this name */ |
register sopno look; |
register int i; |
for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { |
s = g->strip[pc]; |
switch (OP(s)) { |
case OEND: |
assert(pc == stop-1); |
break; |
case OCHAR: |
/* only characters can match */ |
assert(!NONCHAR(ch) || ch != (char)OPND(s)); |
if (ch == (char)OPND(s)) |
FWD(aft, bef, 1); |
break; |
case OBOL: |
if (ch == BOL || ch == BOLEOL) |
FWD(aft, bef, 1); |
break; |
case OEOL: |
if (ch == EOL || ch == BOLEOL) |
FWD(aft, bef, 1); |
break; |
case OBOW: |
if (ch == BOW) |
FWD(aft, bef, 1); |
break; |
case OEOW: |
if (ch == EOW) |
FWD(aft, bef, 1); |
break; |
case OANY: |
if (!NONCHAR(ch)) |
FWD(aft, bef, 1); |
break; |
case OANYOF: |
cs = &g->sets[OPND(s)]; |
if (!NONCHAR(ch) && CHIN(cs, ch)) |
FWD(aft, bef, 1); |
break; |
case OBACK_: /* ignored here */ |
case O_BACK: |
FWD(aft, aft, 1); |
break; |
case OPLUS_: /* forward, this is just an empty */ |
FWD(aft, aft, 1); |
break; |
case O_PLUS: /* both forward and back */ |
FWD(aft, aft, 1); |
i = ISSETBACK(aft, OPND(s)); |
BACK(aft, aft, OPND(s)); |
if (!i && ISSETBACK(aft, OPND(s))) { |
/* oho, must reconsider loop body */ |
pc -= OPND(s) + 1; |
INIT(here, pc); |
} |
break; |
case OQUEST_: /* two branches, both forward */ |
FWD(aft, aft, 1); |
FWD(aft, aft, OPND(s)); |
break; |
case O_QUEST: /* just an empty */ |
FWD(aft, aft, 1); |
break; |
case OLPAREN: /* not significant here */ |
case ORPAREN: |
FWD(aft, aft, 1); |
break; |
case OCH_: /* mark the first two branches */ |
FWD(aft, aft, 1); |
assert(OP(g->strip[pc+OPND(s)]) == OOR2); |
FWD(aft, aft, OPND(s)); |
break; |
case OOR1: /* done a branch, find the O_CH */ |
if (ISSTATEIN(aft, here)) { |
for (look = 1; |
OP(s = g->strip[pc+look]) != O_CH; |
look += OPND(s)) |
assert(OP(s) == OOR2); |
FWD(aft, aft, look); |
} |
break; |
case OOR2: /* propagate OCH_'s marking */ |
FWD(aft, aft, 1); |
if (OP(g->strip[pc+OPND(s)]) != O_CH) { |
assert(OP(g->strip[pc+OPND(s)]) == OOR2); |
FWD(aft, aft, OPND(s)); |
} |
break; |
case O_CH: /* just empty */ |
FWD(aft, aft, 1); |
break; |
default: /* ooooops... */ |
assert(nope); |
break; |
} |
} |
return(aft); |
} |
#ifdef REDEBUG |
/* |
- print - print a set of states |
== #ifdef REDEBUG |
== static void print(struct match *m, char *caption, states st, \ |
== int ch, FILE *d); |
== #endif |
*/ |
static void |
print(m, caption, st, ch, d) |
struct match *m; |
char *caption; |
states st; |
int ch; |
FILE *d; |
{ |
register struct re_guts *g = m->g; |
register int i; |
register int first = 1; |
if (!(m->eflags®_TRACE)) |
return; |
fprintf(d, "%s", caption); |
if (ch != '\0') |
fprintf(d, " %s", pchar(ch)); |
for (i = 0; i < g->nstates; i++) |
if (ISSET(st, i)) { |
fprintf(d, "%s%d", (first) ? "\t" : ", ", i); |
first = 0; |
} |
fprintf(d, "\n"); |
} |
/* |
- at - print current situation |
== #ifdef REDEBUG |
== static void at(struct match *m, char *title, char *start, char *stop, \ |
== sopno startst, sopno stopst); |
== #endif |
*/ |
static void |
at(m, title, start, stop, startst, stopst) |
struct match *m; |
char *title; |
char *start; |
char *stop; |
sopno startst; |
sopno stopst; |
{ |
if (!(m->eflags®_TRACE)) |
return; |
printf("%s %s-", title, pchar(*start)); |
printf("%s ", pchar(*stop)); |
printf("%ld-%ld\n", (long)startst, (long)stopst); |
} |
#ifndef PCHARDONE |
#define PCHARDONE /* never again */ |
/* |
- pchar - make a character printable |
== #ifdef REDEBUG |
== static char *pchar(int ch); |
== #endif |
* |
* Is this identical to regchar() over in debug.c? Well, yes. But a |
* duplicate here avoids having a debugging-capable regexec.o tied to |
* a matching debug.o, and this is convenient. It all disappears in |
* the non-debug compilation anyway, so it doesn't matter much. |
*/ |
static char * /* -> representation */ |
pchar(ch) |
int ch; |
{ |
static char pbuf[10]; |
if (isprint(ch) || ch == ' ') |
sprintf(pbuf, "%c", ch); |
else |
sprintf(pbuf, "\\%o", ch); |
return(pbuf); |
} |
#endif |
#endif |
#undef matcher |
#undef fast |
#undef slow |
#undef dissect |
#undef backref |
#undef step |
#undef print |
#undef at |
#undef match |
/programs/develop/libraries/menuetlibc/src/libc/posix/regex/engine.ih |
---|
0,0 → 1,35 |
/* ========= begin header generated by ./mkh ========= */ |
#ifdef __cplusplus |
extern "C" { |
#endif |
/* === engine.c === */ |
static int matcher (register struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags); |
static char *dissect (register struct match *m, char *start, char *stop, sopno startst, sopno stopst); |
static char *backref (register struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev); |
static char *fast (register struct match *m, char *start, char *stop, sopno startst, sopno stopst); |
static char *slow (register struct match *m, char *start, char *stop, sopno startst, sopno stopst); |
static states step (register struct re_guts *g, sopno start, sopno stop, register states bef, int ch, register states aft); |
#define BOL (OUT+1) |
#define EOL (BOL+1) |
#define BOLEOL (BOL+2) |
#define NOTHING (BOL+3) |
#define BOW (BOL+4) |
#define EOW (BOL+5) |
#define CODEMAX (BOL+5) /* highest code used */ |
#define NONCHAR(c) ((c) > CHAR_MAX) |
#define NNONCHAR (CODEMAX-CHAR_MAX) |
#ifdef REDEBUG |
static void print (struct match *m, char *caption, states st, int ch, FILE *d); |
#endif |
#ifdef REDEBUG |
static void at (struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst); |
#endif |
#ifdef REDEBUG |
static char *pchar (int ch); |
#endif |
#ifdef __cplusplus |
} |
#endif |
/* ========= end header generated by ./mkh ========= */ |
/programs/develop/libraries/menuetlibc/src/libc/posix/regex/main.c |
---|
0,0 → 1,511 |
/* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */ |
#include <stdio.h> |
#include <string.h> |
#include <sys/types.h> |
#include <regex.h> |
#include <assert.h> |
#include "main.ih" |
char *progname; |
int debug = 0; |
int line = 0; |
int status = 0; |
int copts = REG_EXTENDED; |
int eopts = 0; |
regoff_t startoff = 0; |
regoff_t endoff = 0; |
extern int split(); |
extern void regprint(); |
/* |
- main - do the simple case, hand off to regress() for regression |
*/ |
main(argc, argv) |
int argc; |
char *argv[]; |
{ |
regex_t re; |
# define NS 10 |
regmatch_t subs[NS]; |
char erbuf[100]; |
int err; |
size_t len; |
int c; |
int errflg = 0; |
register int i; |
extern int optind; |
extern char *optarg; |
progname = argv[0]; |
while ((c = getopt(argc, argv, "c:e:S:E:x")) != EOF) |
switch (c) { |
case 'c': /* compile options */ |
copts = options('c', optarg); |
break; |
case 'e': /* execute options */ |
eopts = options('e', optarg); |
break; |
case 'S': /* start offset */ |
startoff = (regoff_t)atoi(optarg); |
break; |
case 'E': /* end offset */ |
endoff = (regoff_t)atoi(optarg); |
break; |
case 'x': /* Debugging. */ |
debug++; |
break; |
case '?': |
default: |
errflg++; |
break; |
} |
if (errflg) { |
fprintf(stderr, "usage: %s ", progname); |
fprintf(stderr, "[-c copt][-C][-d] [re]\n"); |
exit(2); |
} |
if (optind >= argc) { |
regress(stdin); |
exit(status); |
} |
err = regcomp(&re, argv[optind++], copts); |
if (err) { |
len = regerror(err, &re, erbuf, sizeof(erbuf)); |
fprintf(stderr, "error %s, %d/%d `%s'\n", |
eprint(err), len, sizeof(erbuf), erbuf); |
exit(status); |
} |
regprint(&re, stdout); |
if (optind >= argc) { |
regfree(&re); |
exit(status); |
} |
if (eopts®_STARTEND) { |
subs[0].rm_so = startoff; |
subs[0].rm_eo = strlen(argv[optind]) - endoff; |
} |
err = regexec(&re, argv[optind], (size_t)NS, subs, eopts); |
if (err) { |
len = regerror(err, &re, erbuf, sizeof(erbuf)); |
fprintf(stderr, "error %s, %d/%d `%s'\n", |
eprint(err), len, sizeof(erbuf), erbuf); |
exit(status); |
} |
if (!(copts®_NOSUB)) { |
len = (int)(subs[0].rm_eo - subs[0].rm_so); |
if (subs[0].rm_so != -1) { |
if (len != 0) |
printf("match `%.*s'\n", len, |
argv[optind] + subs[0].rm_so); |
else |
printf("match `'@%.1s\n", |
argv[optind] + subs[0].rm_so); |
} |
for (i = 1; i < NS; i++) |
if (subs[i].rm_so != -1) |
printf("(%d) `%.*s'\n", i, |
(int)(subs[i].rm_eo - subs[i].rm_so), |
argv[optind] + subs[i].rm_so); |
} |
exit(status); |
} |
/* |
- regress - main loop of regression test |
== void regress(FILE *in); |
*/ |
void |
regress(in) |
FILE *in; |
{ |
char inbuf[1000]; |
# define MAXF 10 |
char *f[MAXF]; |
int nf; |
int i; |
char erbuf[100]; |
size_t ne; |
char *badpat = "invalid regular expression"; |
# define SHORT 10 |
char *bpname = "REG_BADPAT"; |
regex_t re; |
while (fgets(inbuf, sizeof(inbuf), in) != NULL) { |
line++; |
if (inbuf[0] == '#' || inbuf[0] == '\n') |
continue; /* NOTE CONTINUE */ |
inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */ |
if (debug) |
fprintf(stdout, "%d:\n", line); |
nf = split(inbuf, f, MAXF, "\t\t"); |
if (nf < 3) { |
fprintf(stderr, "bad input, line %d\n", line); |
exit(1); |
} |
for (i = 0; i < nf; i++) |
if (strcmp(f[i], "\"\"") == 0) |
f[i] = ""; |
if (nf <= 3) |
f[3] = NULL; |
if (nf <= 4) |
f[4] = NULL; |
try(f[0], f[1], f[2], f[3], f[4], options('c', f[1])); |
if (opt('&', f[1])) /* try with either type of RE */ |
try(f[0], f[1], f[2], f[3], f[4], |
options('c', f[1]) &~ REG_EXTENDED); |
} |
ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf)); |
if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) { |
fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n", |
erbuf, badpat); |
status = 1; |
} |
ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT); |
if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' || |
ne != strlen(badpat)+1) { |
fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n", |
erbuf, SHORT-1, badpat); |
status = 1; |
} |
ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf)); |
if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) { |
fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n", |
erbuf, bpname); |
status = 1; |
} |
re.re_endp = bpname; |
ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf)); |
if (atoi(erbuf) != (int)REG_BADPAT) { |
fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n", |
erbuf, (long)REG_BADPAT); |
status = 1; |
} else if (ne != strlen(erbuf)+1) { |
fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n", |
erbuf, (long)REG_BADPAT); |
status = 1; |
} |
} |
/* |
- try - try it, and report on problems |
== void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts); |
*/ |
void |
try(f0, f1, f2, f3, f4, opts) |
char *f0; |
char *f1; |
char *f2; |
char *f3; |
char *f4; |
int opts; /* may not match f1 */ |
{ |
regex_t re; |
# define NSUBS 10 |
regmatch_t subs[NSUBS]; |
# define NSHOULD 15 |
char *should[NSHOULD]; |
int nshould; |
char erbuf[100]; |
int err; |
int len; |
char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE"; |
register int i; |
char *grump; |
char f0copy[1000]; |
char f2copy[1000]; |
strcpy(f0copy, f0); |
re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL; |
fixstr(f0copy); |
err = regcomp(&re, f0copy, opts); |
if (err != 0 && (!opt('C', f1) || err != efind(f2))) { |
/* unexpected error or wrong error */ |
len = regerror(err, &re, erbuf, sizeof(erbuf)); |
fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n", |
line, type, eprint(err), len, |
sizeof(erbuf), erbuf); |
status = 1; |
} else if (err == 0 && opt('C', f1)) { |
/* unexpected success */ |
fprintf(stderr, "%d: %s should have given REG_%s\n", |
line, type, f2); |
status = 1; |
err = 1; /* so we won't try regexec */ |
} |
if (err != 0) { |
regfree(&re); |
return; |
} |
strcpy(f2copy, f2); |
fixstr(f2copy); |
if (options('e', f1)®_STARTEND) { |
if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL) |
fprintf(stderr, "%d: bad STARTEND syntax\n", line); |
subs[0].rm_so = strchr(f2, '(') - f2 + 1; |
subs[0].rm_eo = strchr(f2, ')') - f2; |
} |
err = regexec(&re, f2copy, NSUBS, subs, options('e', f1)); |
if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) { |
/* unexpected error or wrong error */ |
len = regerror(err, &re, erbuf, sizeof(erbuf)); |
fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n", |
line, type, eprint(err), len, |
sizeof(erbuf), erbuf); |
status = 1; |
} else if (err != 0) { |
/* nothing more to check */ |
} else if (f3 == NULL) { |
/* unexpected success */ |
fprintf(stderr, "%d: %s exec should have failed\n", |
line, type); |
status = 1; |
err = 1; /* just on principle */ |
} else if (opts®_NOSUB) { |
/* nothing more to check */ |
} else if ((grump = check(f2, subs[0], f3)) != NULL) { |
fprintf(stderr, "%d: %s %s\n", line, type, grump); |
status = 1; |
err = 1; |
} |
if (err != 0 || f4 == NULL) { |
regfree(&re); |
return; |
} |
for (i = 1; i < NSHOULD; i++) |
should[i] = NULL; |
nshould = split(f4, should+1, NSHOULD-1, ","); |
if (nshould == 0) { |
nshould = 1; |
should[1] = ""; |
} |
for (i = 1; i < NSUBS; i++) { |
grump = check(f2, subs[i], should[i]); |
if (grump != NULL) { |
fprintf(stderr, "%d: %s $%d %s\n", line, |
type, i, grump); |
status = 1; |
err = 1; |
} |
} |
regfree(&re); |
} |
/* |
- options - pick options out of a regression-test string |
== int options(int type, char *s); |
*/ |
int |
options(type, s) |
int type; /* 'c' compile, 'e' exec */ |
char *s; |
{ |
register char *p; |
register int o = (type == 'c') ? copts : eopts; |
register char *legal = (type == 'c') ? "bisnmp" : "^$#tl"; |
for (p = s; *p != '\0'; p++) |
if (strchr(legal, *p) != NULL) |
switch (*p) { |
case 'b': |
o &= ~REG_EXTENDED; |
break; |
case 'i': |
o |= REG_ICASE; |
break; |
case 's': |
o |= REG_NOSUB; |
break; |
case 'n': |
o |= REG_NEWLINE; |
break; |
case 'm': |
o &= ~REG_EXTENDED; |
o |= REG_NOSPEC; |
break; |
case 'p': |
o |= REG_PEND; |
break; |
case '^': |
o |= REG_NOTBOL; |
break; |
case '$': |
o |= REG_NOTEOL; |
break; |
case '#': |
o |= REG_STARTEND; |
break; |
case 't': /* trace */ |
o |= REG_TRACE; |
break; |
case 'l': /* force long representation */ |
o |= REG_LARGE; |
break; |
case 'r': /* force backref use */ |
o |= REG_BACKR; |
break; |
} |
return(o); |
} |
/* |
- opt - is a particular option in a regression string? |
== int opt(int c, char *s); |
*/ |
int /* predicate */ |
opt(c, s) |
int c; |
char *s; |
{ |
return(strchr(s, c) != NULL); |
} |
/* |
- fixstr - transform magic characters in strings |
== void fixstr(register char *p); |
*/ |
void |
fixstr(p) |
register char *p; |
{ |
if (p == NULL) |
return; |
for (; *p != '\0'; p++) |
if (*p == 'N') |
*p = '\n'; |
else if (*p == 'T') |
*p = '\t'; |
else if (*p == 'S') |
*p = ' '; |
else if (*p == 'Z') |
*p = '\0'; |
} |
/* |
- check - check a substring match |
== char *check(char *str, regmatch_t sub, char *should); |
*/ |
char * /* NULL or complaint */ |
check(str, sub, should) |
char *str; |
regmatch_t sub; |
char *should; |
{ |
register int len; |
register int shlen; |
register char *p; |
static char grump[500]; |
register char *at = NULL; |
if (should != NULL && strcmp(should, "-") == 0) |
should = NULL; |
if (should != NULL && should[0] == '@') { |
at = should + 1; |
should = ""; |
} |
/* check rm_so and rm_eo for consistency */ |
if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) || |
(sub.rm_so != -1 && sub.rm_eo == -1) || |
(sub.rm_so != -1 && sub.rm_so < 0) || |
(sub.rm_eo != -1 && sub.rm_eo < 0) ) { |
sprintf(grump, "start %ld end %ld", (long)sub.rm_so, |
(long)sub.rm_eo); |
return(grump); |
} |
/* check for no match */ |
if (sub.rm_so == -1 && should == NULL) |
return(NULL); |
if (sub.rm_so == -1) |
return("did not match"); |
/* check for in range */ |
if (sub.rm_eo > strlen(str)) { |
sprintf(grump, "start %ld end %ld, past end of string", |
(long)sub.rm_so, (long)sub.rm_eo); |
return(grump); |
} |
len = (int)(sub.rm_eo - sub.rm_so); |
shlen = (int)strlen(should); |
p = str + sub.rm_so; |
/* check for not supposed to match */ |
if (should == NULL) { |
sprintf(grump, "matched `%.*s'", len, p); |
return(grump); |
} |
/* check for wrong match */ |
if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) { |
sprintf(grump, "matched `%.*s' instead", len, p); |
return(grump); |
} |
if (shlen > 0) |
return(NULL); |
/* check null match in right place */ |
if (at == NULL) |
return(NULL); |
shlen = strlen(at); |
if (shlen == 0) |
shlen = 1; /* force check for end-of-string */ |
if (strncmp(p, at, shlen) != 0) { |
sprintf(grump, "matched null at `%.20s'", p); |
return(grump); |
} |
return(NULL); |
} |
/* |
- eprint - convert error number to name |
== static char *eprint(int err); |
*/ |
static char * |
eprint(err) |
int err; |
{ |
static char epbuf[100]; |
size_t len; |
len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf)); |
assert(len <= sizeof(epbuf)); |
return(epbuf); |
} |
/* |
- efind - convert error name to number |
== static int efind(char *name); |
*/ |
static int |
efind(name) |
char *name; |
{ |
static char efbuf[100]; |
size_t n; |
regex_t re; |
sprintf(efbuf, "REG_%s", name); |
assert(strlen(efbuf) < sizeof(efbuf)); |
re.re_endp = efbuf; |
(void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf)); |
return(atoi(efbuf)); |
} |
/programs/develop/libraries/menuetlibc/src/libc/posix/regex/main.ih |
---|
0,0 → 1,19 |
/* ========= begin header generated by ./mkh ========= */ |
#ifdef __cplusplus |
extern "C" { |
#endif |
/* === main.c === */ |
void regress (FILE *in); |
void try (char *f0, char *f1, char *f2, char *f3, char *f4, int opts); |
int options (int type, char *s); |
int opt (int c, char *s); |
void fixstr (register char *p); |
char *check (char *str, regmatch_t sub, char *should); |
static char *eprint (int err); |
static int efind (char *name); |
#ifdef __cplusplus |
} |
#endif |
/* ========= end header generated by ./mkh ========= */ |
/programs/develop/libraries/menuetlibc/src/libc/posix/regex/regcomp.c |
---|
0,0 → 1,1606 |
/* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */ |
#include <sys/types.h> |
#include <stdio.h> |
#include <string.h> |
#include <ctype.h> |
#include <limits.h> |
#include <stdlib.h> |
#include <regex.h> |
#include "utils.h" |
#include "regex2.h" |
#include "cclass.h" |
#include "cname.h" |
/* |
* parse structure, passed up and down to avoid global variables and |
* other clumsinesses |
*/ |
struct parse { |
char *next; /* next character in RE */ |
char *end; /* end of string (-> NUL normally) */ |
int error; /* has an error been seen? */ |
sop *strip; /* malloced strip */ |
sopno ssize; /* malloced strip size (allocated) */ |
sopno slen; /* malloced strip length (used) */ |
int ncsalloc; /* number of csets allocated */ |
struct re_guts *g; |
# define NPAREN 10 /* we need to remember () 1-9 for back refs */ |
sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ |
sopno pend[NPAREN]; /* -> ) ([0] unused) */ |
}; |
#include "regcomp.ih" |
static char nuls[10]; /* place to point scanner in event of error */ |
/* |
* macros for use with parse structure |
* BEWARE: these know that the parse structure is named `p' !!! |
*/ |
#define PEEK() (*p->next) |
#define PEEK2() (*(p->next+1)) |
#define MORE() (p->next < p->end) |
#define MORE2() (p->next+1 < p->end) |
#define SEE(c) (MORE() && PEEK() == (c)) |
#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) |
#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) |
#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) |
#define NEXT() (p->next++) |
#define NEXT2() (p->next += 2) |
#define NEXTn(n) (p->next += (n)) |
#define GETNEXT() (*p->next++) |
#define SETERROR(e) seterr(p, (e)) |
#define REQUIRE(co, e) ((co) || SETERROR(e)) |
#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) |
#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) |
#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) |
#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) |
#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) |
#define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) |
#define ASTERN(sop, pos) EMIT(sop, HERE()-pos) |
#define HERE() (p->slen) |
#define THERE() (p->slen - 1) |
#define THERETHERE() (p->slen - 2) |
#define DROP(n) (p->slen -= (n)) |
#ifndef NDEBUG |
static int never = 0; /* for use in asserts; shuts lint up */ |
#else |
#define never 0 /* some <assert.h>s have bugs too */ |
#endif |
/* |
- regcomp - interface for parser and compilation |
= extern int regcomp(regex_t *, const char *, int); |
= #define REG_BASIC 0000 |
= #define REG_EXTENDED 0001 |
= #define REG_ICASE 0002 |
= #define REG_NOSUB 0004 |
= #define REG_NEWLINE 0010 |
= #define REG_NOSPEC 0020 |
= #define REG_PEND 0040 |
= #define REG_DUMP 0200 |
*/ |
int /* 0 success, otherwise REG_something */ |
regcomp(preg, pattern, cflags) |
regex_t *preg; |
const char *pattern; |
int cflags; |
{ |
struct parse pa; |
register struct re_guts *g; |
register struct parse *p = &pa; |
register int i; |
register size_t len; |
#ifdef REDEBUG |
# define GOODFLAGS(f) (f) |
#else |
# define GOODFLAGS(f) ((f)&~REG_DUMP) |
#endif |
cflags = GOODFLAGS(cflags); |
if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) |
return(REG_INVARG); |
if (cflags®_PEND) { |
if (preg->re_endp < pattern) |
return(REG_INVARG); |
len = preg->re_endp - pattern; |
} else |
len = strlen((char *)pattern); |
/* do the mallocs early so failure handling is easy */ |
g = (struct re_guts *)malloc(sizeof(struct re_guts) + |
(NC-1)*sizeof(cat_t)); |
if (g == NULL) |
return(REG_ESPACE); |
p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ |
p->strip = (sop *)malloc(p->ssize * sizeof(sop)); |
p->slen = 0; |
if (p->strip == NULL) { |
free((char *)g); |
return(REG_ESPACE); |
} |
/* set things up */ |
p->g = g; |
p->next = (char *)pattern; /* convenience; we do not modify it */ |
p->end = p->next + len; |
p->error = 0; |
p->ncsalloc = 0; |
for (i = 0; i < NPAREN; i++) { |
p->pbegin[i] = 0; |
p->pend[i] = 0; |
} |
g->csetsize = NC; |
g->sets = NULL; |
g->setbits = NULL; |
g->ncsets = 0; |
g->cflags = cflags; |
g->iflags = 0; |
g->nbol = 0; |
g->neol = 0; |
g->must = NULL; |
g->mlen = 0; |
g->nsub = 0; |
g->ncategories = 1; /* category 0 is "everything else" */ |
g->categories = &g->catspace[-(CHAR_MIN)]; |
(void) memset((char *)g->catspace, 0, NC*sizeof(cat_t)); |
g->backrefs = 0; |
/* do it */ |
EMIT(OEND, 0); |
g->firststate = THERE(); |
if (cflags®_EXTENDED) |
p_ere(p, OUT); |
else if (cflags®_NOSPEC) |
p_str(p); |
else |
p_bre(p, OUT, OUT); |
EMIT(OEND, 0); |
g->laststate = THERE(); |
/* tidy up loose ends and fill things in */ |
categorize(p, g); |
stripsnug(p, g); |
findmust(p, g); |
g->nplus = pluscount(p, g); |
g->magic = MAGIC2; |
preg->re_nsub = g->nsub; |
preg->re_g = g; |
preg->re_magic = MAGIC1; |
#ifndef REDEBUG |
/* not debugging, so can't rely on the assert() in regexec() */ |
if (g->iflags&BAD) |
SETERROR(REG_ASSERT); |
#endif |
/* win or lose, we're done */ |
if (p->error != 0) /* lose */ |
regfree(preg); |
return(p->error); |
} |
/* |
- p_ere - ERE parser top level, concatenation and alternation |
== static void p_ere(register struct parse *p, int stop); |
*/ |
static void |
p_ere(p, stop) |
register struct parse *p; |
int stop; /* character this ERE should end at */ |
{ |
register char c; |
register sopno prevback; |
register sopno prevfwd; |
register sopno conc; |
register int first = 1; /* is this the first alternative? */ |
for (;;) { |
/* do a bunch of concatenated expressions */ |
conc = HERE(); |
while (MORE() && (c = PEEK()) != '|' && c != stop) |
p_ere_exp(p); |
REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ |
if (!EAT('|')) |
break; /* NOTE BREAK OUT */ |
if (first) { |
INSERT(OCH_, conc); /* offset is wrong */ |
prevfwd = conc; |
prevback = conc; |
first = 0; |
} |
ASTERN(OOR1, prevback); |
prevback = THERE(); |
AHEAD(prevfwd); /* fix previous offset */ |
prevfwd = HERE(); |
EMIT(OOR2, 0); /* offset is very wrong */ |
} |
if (!first) { /* tail-end fixups */ |
AHEAD(prevfwd); |
ASTERN(O_CH, prevback); |
} |
assert(!MORE() || SEE(stop)); |
} |
/* |
- p_ere_exp - parse one subERE, an atom possibly followed by a repetition op |
== static void p_ere_exp(register struct parse *p); |
*/ |
static void |
p_ere_exp(p) |
register struct parse *p; |
{ |
register char c; |
register sopno pos; |
register int count; |
register int count2; |
register sopno subno; |
int wascaret = 0; |
assert(MORE()); /* caller should have ensured this */ |
c = GETNEXT(); |
pos = HERE(); |
switch (c) { |
case '(': |
REQUIRE(MORE(), REG_EPAREN); |
p->g->nsub++; |
subno = p->g->nsub; |
if (subno < NPAREN) |
p->pbegin[subno] = HERE(); |
EMIT(OLPAREN, subno); |
if (!SEE(')')) |
p_ere(p, ')'); |
if (subno < NPAREN) { |
p->pend[subno] = HERE(); |
assert(p->pend[subno] != 0); |
} |
EMIT(ORPAREN, subno); |
MUSTEAT(')', REG_EPAREN); |
break; |
#ifndef POSIX_MISTAKE |
case ')': /* happens only if no current unmatched ( */ |
/* |
* You may ask, why the ifndef? Because I didn't notice |
* this until slightly too late for 1003.2, and none of the |
* other 1003.2 regular-expression reviewers noticed it at |
* all. So an unmatched ) is legal POSIX, at least until |
* we can get it fixed. |
*/ |
SETERROR(REG_EPAREN); |
break; |
#endif |
case '^': |
EMIT(OBOL, 0); |
p->g->iflags |= USEBOL; |
p->g->nbol++; |
wascaret = 1; |
break; |
case '$': |
EMIT(OEOL, 0); |
p->g->iflags |= USEEOL; |
p->g->neol++; |
break; |
case '|': |
SETERROR(REG_EMPTY); |
break; |
case '*': |
case '+': |
case '?': |
SETERROR(REG_BADRPT); |
break; |
case '.': |
if (p->g->cflags®_NEWLINE) |
nonnewline(p); |
else |
EMIT(OANY, 0); |
break; |
case '[': |
p_bracket(p); |
break; |
case '\\': |
REQUIRE(MORE(), REG_EESCAPE); |
c = GETNEXT(); |
ordinary(p, c); |
break; |
case '{': /* okay as ordinary except if digit follows */ |
REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT); |
/* FALLTHROUGH */ |
default: |
ordinary(p, c); |
break; |
} |
if (!MORE()) |
return; |
c = PEEK(); |
/* we call { a repetition if followed by a digit */ |
if (!( c == '*' || c == '+' || c == '?' || |
(c == '{' && MORE2() && isdigit(PEEK2())) )) |
return; /* no repetition, we're done */ |
NEXT(); |
REQUIRE(!wascaret, REG_BADRPT); |
switch (c) { |
case '*': /* implemented as +? */ |
/* this case does not require the (y|) trick, noKLUDGE */ |
INSERT(OPLUS_, pos); |
ASTERN(O_PLUS, pos); |
INSERT(OQUEST_, pos); |
ASTERN(O_QUEST, pos); |
break; |
case '+': |
INSERT(OPLUS_, pos); |
ASTERN(O_PLUS, pos); |
break; |
case '?': |
/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ |
INSERT(OCH_, pos); /* offset slightly wrong */ |
ASTERN(OOR1, pos); /* this one's right */ |
AHEAD(pos); /* fix the OCH_ */ |
EMIT(OOR2, 0); /* offset very wrong... */ |
AHEAD(THERE()); /* ...so fix it */ |
ASTERN(O_CH, THERETHERE()); |
break; |
case '{': |
count = p_count(p); |
if (EAT(',')) { |
if (isdigit(PEEK())) { |
count2 = p_count(p); |
REQUIRE(count <= count2, REG_BADBR); |
} else /* single number with comma */ |
count2 = INFINITY; |
} else /* just a single number */ |
count2 = count; |
repeat(p, pos, count, count2); |
if (!EAT('}')) { /* error heuristics */ |
while (MORE() && PEEK() != '}') |
NEXT(); |
REQUIRE(MORE(), REG_EBRACE); |
SETERROR(REG_BADBR); |
} |
break; |
} |
if (!MORE()) |
return; |
c = PEEK(); |
if (!( c == '*' || c == '+' || c == '?' || |
(c == '{' && MORE2() && isdigit(PEEK2())) ) ) |
return; |
SETERROR(REG_BADRPT); |
} |
/* |
- p_str - string (no metacharacters) "parser" |
== static void p_str(register struct parse *p); |
*/ |
static void |
p_str(p) |
register struct parse *p; |
{ |
REQUIRE(MORE(), REG_EMPTY); |
while (MORE()) |
ordinary(p, GETNEXT()); |
} |
/* |
- p_bre - BRE parser top level, anchoring and concatenation |
== static void p_bre(register struct parse *p, register int end1, \ |
== register int end2); |
* Giving end1 as OUT essentially eliminates the end1/end2 check. |
* |
* This implementation is a bit of a kludge, in that a trailing $ is first |
* taken as an ordinary character and then revised to be an anchor. The |
* only undesirable side effect is that '$' gets included as a character |
* category in such cases. This is fairly harmless; not worth fixing. |
* The amount of lookahead needed to avoid this kludge is excessive. |
*/ |
static void |
p_bre(p, end1, end2) |
register struct parse *p; |
register int end1; /* first terminating character */ |
register int end2; /* second terminating character */ |
{ |
register sopno start = HERE(); |
register int first = 1; /* first subexpression? */ |
register int wasdollar = 0; |
if (EAT('^')) { |
EMIT(OBOL, 0); |
p->g->iflags |= USEBOL; |
p->g->nbol++; |
} |
while (MORE() && !SEETWO(end1, end2)) { |
wasdollar = p_simp_re(p, first); |
first = 0; |
} |
if (wasdollar) { /* oops, that was a trailing anchor */ |
DROP(1); |
EMIT(OEOL, 0); |
p->g->iflags |= USEEOL; |
p->g->neol++; |
} |
REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ |
} |
/* |
- p_simp_re - parse a simple RE, an atom possibly followed by a repetition |
== static int p_simp_re(register struct parse *p, int starordinary); |
*/ |
static int /* was the simple RE an unbackslashed $? */ |
p_simp_re(p, starordinary) |
register struct parse *p; |
int starordinary; /* is a leading * an ordinary character? */ |
{ |
register int c; |
register int count; |
register int count2; |
register sopno pos; |
register int i; |
register sopno subno; |
# define BACKSL (1<<CHAR_BIT) |
pos = HERE(); /* repetion op, if any, covers from here */ |
assert(MORE()); /* caller should have ensured this */ |
c = GETNEXT(); |
if (c == '\\') { |
REQUIRE(MORE(), REG_EESCAPE); |
c = BACKSL | (unsigned char)GETNEXT(); |
} |
switch (c) { |
case '.': |
if (p->g->cflags®_NEWLINE) |
nonnewline(p); |
else |
EMIT(OANY, 0); |
break; |
case '[': |
p_bracket(p); |
break; |
case BACKSL|'{': |
SETERROR(REG_BADRPT); |
break; |
case BACKSL|'(': |
p->g->nsub++; |
subno = p->g->nsub; |
if (subno < NPAREN) |
p->pbegin[subno] = HERE(); |
EMIT(OLPAREN, subno); |
/* the MORE here is an error heuristic */ |
if (MORE() && !SEETWO('\\', ')')) |
p_bre(p, '\\', ')'); |
if (subno < NPAREN) { |
p->pend[subno] = HERE(); |
assert(p->pend[subno] != 0); |
} |
EMIT(ORPAREN, subno); |
REQUIRE(EATTWO('\\', ')'), REG_EPAREN); |
break; |
case BACKSL|')': /* should not get here -- must be user */ |
case BACKSL|'}': |
SETERROR(REG_EPAREN); |
break; |
case BACKSL|'1': |
case BACKSL|'2': |
case BACKSL|'3': |
case BACKSL|'4': |
case BACKSL|'5': |
case BACKSL|'6': |
case BACKSL|'7': |
case BACKSL|'8': |
case BACKSL|'9': |
i = (c&~BACKSL) - '0'; |
assert(i < NPAREN); |
if (p->pend[i] != 0) { |
assert(i <= p->g->nsub); |
EMIT(OBACK_, i); |
assert(p->pbegin[i] != 0); |
assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); |
assert(OP(p->strip[p->pend[i]]) == ORPAREN); |
(void) dupl(p, p->pbegin[i]+1, p->pend[i]); |
EMIT(O_BACK, i); |
} else |
SETERROR(REG_ESUBREG); |
p->g->backrefs = 1; |
break; |
case '*': |
REQUIRE(starordinary, REG_BADRPT); |
/* FALLTHROUGH */ |
default: |
ordinary(p, c &~ BACKSL); |
break; |
} |
if (EAT('*')) { /* implemented as +? */ |
/* this case does not require the (y|) trick, noKLUDGE */ |
INSERT(OPLUS_, pos); |
ASTERN(O_PLUS, pos); |
INSERT(OQUEST_, pos); |
ASTERN(O_QUEST, pos); |
} else if (EATTWO('\\', '{')) { |
count = p_count(p); |
if (EAT(',')) { |
if (MORE() && isdigit(PEEK())) { |
count2 = p_count(p); |
REQUIRE(count <= count2, REG_BADBR); |
} else /* single number with comma */ |
count2 = INFINITY; |
} else /* just a single number */ |
count2 = count; |
repeat(p, pos, count, count2); |
if (!EATTWO('\\', '}')) { /* error heuristics */ |
while (MORE() && !SEETWO('\\', '}')) |
NEXT(); |
REQUIRE(MORE(), REG_EBRACE); |
SETERROR(REG_BADBR); |
} |
} else if (c == (unsigned char)'$') /* $ (but not \$) ends it */ |
return(1); |
return(0); |
} |
/* |
- p_count - parse a repetition count |
== static int p_count(register struct parse *p); |
*/ |
static int /* the value */ |
p_count(p) |
register struct parse *p; |
{ |
register int count = 0; |
register int ndigits = 0; |
while (MORE() && isdigit(PEEK()) && count <= DUPMAX) { |
count = count*10 + (GETNEXT() - '0'); |
ndigits++; |
} |
REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); |
return(count); |
} |
/* |
- p_bracket - parse a bracketed character list |
== static void p_bracket(register struct parse *p); |
* |
* Note a significant property of this code: if the allocset() did SETERROR, |
* no set operations are done. |
*/ |
static void |
p_bracket(p) |
register struct parse *p; |
{ |
register char c; |
register cset *cs = allocset(p); |
register int invert = 0; |
/* Dept of Truly Sickening Special-Case Kludges */ |
if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { |
EMIT(OBOW, 0); |
NEXTn(6); |
return; |
} |
if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { |
EMIT(OEOW, 0); |
NEXTn(6); |
return; |
} |
if (EAT('^')) |
invert++; /* make note to invert set at end */ |
if (EAT(']')) |
CHadd(cs, ']'); |
else if (EAT('-')) |
CHadd(cs, '-'); |
while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) |
p_b_term(p, cs); |
if (EAT('-')) |
CHadd(cs, '-'); |
MUSTEAT(']', REG_EBRACK); |
if (p->error != 0) /* don't mess things up further */ |
return; |
if (p->g->cflags®_ICASE) { |
register int i; |
register int ci; |
for (i = p->g->csetsize - 1; i >= 0; i--) |
if (CHIN(cs, i) && isalpha(i)) { |
ci = othercase(i); |
if (ci != i) |
CHadd(cs, ci); |
} |
if (cs->multis != NULL) |
mccase(p, cs); |
} |
if (invert) { |
register int i; |
for (i = p->g->csetsize - 1; i >= 0; i--) |
if (CHIN(cs, i)) |
CHsub(cs, i); |
else |
CHadd(cs, i); |
if (p->g->cflags®_NEWLINE) |
CHsub(cs, '\n'); |
if (cs->multis != NULL) |
mcinvert(p, cs); |
} |
assert(cs->multis == NULL); /* xxx */ |
if (nch(p, cs) == 1) { /* optimize singleton sets */ |
ordinary(p, firstch(p, cs)); |
freeset(p, cs); |
} else |
EMIT(OANYOF, freezeset(p, cs)); |
} |
/* |
- p_b_term - parse one term of a bracketed character list |
== static void p_b_term(register struct parse *p, register cset *cs); |
*/ |
static void |
p_b_term(p, cs) |
register struct parse *p; |
register cset *cs; |
{ |
register char c; |
register char start, finish; |
register int i; |
/* classify what we've got */ |
switch ((MORE()) ? PEEK() : '\0') { |
case '[': |
c = (MORE2()) ? PEEK2() : '\0'; |
break; |
case '-': |
SETERROR(REG_ERANGE); |
return; /* NOTE RETURN */ |
break; |
default: |
c = '\0'; |
break; |
} |
switch (c) { |
case ':': /* character class */ |
NEXT2(); |
REQUIRE(MORE(), REG_EBRACK); |
c = PEEK(); |
REQUIRE(c != '-' && c != ']', REG_ECTYPE); |
p_b_cclass(p, cs); |
REQUIRE(MORE(), REG_EBRACK); |
REQUIRE(EATTWO(':', ']'), REG_ECTYPE); |
break; |
case '=': /* equivalence class */ |
NEXT2(); |
REQUIRE(MORE(), REG_EBRACK); |
c = PEEK(); |
REQUIRE(c != '-' && c != ']', REG_ECOLLATE); |
p_b_eclass(p, cs); |
REQUIRE(MORE(), REG_EBRACK); |
REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); |
break; |
default: /* symbol, ordinary character, or range */ |
/* xxx revision needed for multichar stuff */ |
start = p_b_symbol(p); |
if (SEE('-') && MORE2() && PEEK2() != ']') { |
/* range */ |
NEXT(); |
if (EAT('-')) |
finish = '-'; |
else |
finish = p_b_symbol(p); |
} else |
finish = start; |
/* xxx what about signed chars here... */ |
REQUIRE(start <= finish, REG_ERANGE); |
for (i = start; i <= finish; i++) |
CHadd(cs, i); |
break; |
} |
} |
/* |
- p_b_cclass - parse a character-class name and deal with it |
== static void p_b_cclass(register struct parse *p, register cset *cs); |
*/ |
static void |
p_b_cclass(p, cs) |
register struct parse *p; |
register cset *cs; |
{ |
register char *sp = p->next; |
register struct cclass *cp; |
register size_t len; |
register char *u; |
register char c; |
while (MORE() && isalpha(PEEK())) |
NEXT(); |
len = p->next - sp; |
for (cp = cclasses; cp->name != NULL; cp++) |
if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') |
break; |
if (cp->name == NULL) { |
/* oops, didn't find it */ |
SETERROR(REG_ECTYPE); |
return; |
} |
u = cp->chars; |
while ((c = *u++) != '\0') |
CHadd(cs, c); |
for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) |
MCadd(p, cs, u); |
} |
/* |
- p_b_eclass - parse an equivalence-class name and deal with it |
== static void p_b_eclass(register struct parse *p, register cset *cs); |
* |
* This implementation is incomplete. xxx |
*/ |
static void |
p_b_eclass(p, cs) |
register struct parse *p; |
register cset *cs; |
{ |
register char c; |
c = p_b_coll_elem(p, '='); |
CHadd(cs, c); |
} |
/* |
- p_b_symbol - parse a character or [..]ed multicharacter collating symbol |
== static char p_b_symbol(register struct parse *p); |
*/ |
static char /* value of symbol */ |
p_b_symbol(p) |
register struct parse *p; |
{ |
register char value; |
REQUIRE(MORE(), REG_EBRACK); |
if (!EATTWO('[', '.')) |
return(GETNEXT()); |
/* collating symbol */ |
value = p_b_coll_elem(p, '.'); |
REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); |
return(value); |
} |
/* |
- p_b_coll_elem - parse a collating-element name and look it up |
== static char p_b_coll_elem(register struct parse *p, int endc); |
*/ |
static char /* value of collating element */ |
p_b_coll_elem(p, endc) |
register struct parse *p; |
int endc; /* name ended by endc,']' */ |
{ |
register char *sp = p->next; |
register struct cname *cp; |
register int len; |
register char c; |
while (MORE() && !SEETWO(endc, ']')) |
NEXT(); |
if (!MORE()) { |
SETERROR(REG_EBRACK); |
return(0); |
} |
len = p->next - sp; |
for (cp = cnames; cp->name != NULL; cp++) |
if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') |
return(cp->code); /* known name */ |
if (len == 1) |
return(*sp); /* single character */ |
SETERROR(REG_ECOLLATE); /* neither */ |
return(0); |
} |
/* |
- othercase - return the case counterpart of an alphabetic |
== static char othercase(int ch); |
*/ |
static char /* if no counterpart, return ch */ |
othercase(ch) |
int ch; |
{ |
assert(isalpha(ch)); |
if (isupper(ch)) |
return(tolower(ch)); |
else if (islower(ch)) |
return(toupper(ch)); |
else /* peculiar, but could happen */ |
return(ch); |
} |
/* |
- bothcases - emit a dualcase version of a two-case character |
== static void bothcases(register struct parse *p, int ch); |
* |
* Boy, is this implementation ever a kludge... |
*/ |
static void |
bothcases(p, ch) |
register struct parse *p; |
int ch; |
{ |
register char *oldnext = p->next; |
register char *oldend = p->end; |
char bracket[3]; |
assert(othercase(ch) != ch); /* p_bracket() would recurse */ |
p->next = bracket; |
p->end = bracket+2; |
bracket[0] = ch; |
bracket[1] = ']'; |
bracket[2] = '\0'; |
p_bracket(p); |
assert(p->next == bracket+2); |
p->next = oldnext; |
p->end = oldend; |
} |
/* |
- ordinary - emit an ordinary character |
== static void ordinary(register struct parse *p, register int ch); |
*/ |
static void |
ordinary(p, ch) |
register struct parse *p; |
register int ch; |
{ |
register cat_t *cap = p->g->categories; |
if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch) |
bothcases(p, ch); |
else { |
EMIT(OCHAR, (unsigned char)ch); |
if (cap[ch] == 0) |
cap[ch] = p->g->ncategories++; |
} |
} |
/* |
- nonnewline - emit REG_NEWLINE version of OANY |
== static void nonnewline(register struct parse *p); |
* |
* Boy, is this implementation ever a kludge... |
*/ |
static void |
nonnewline(p) |
register struct parse *p; |
{ |
register char *oldnext = p->next; |
register char *oldend = p->end; |
char bracket[4]; |
p->next = bracket; |
p->end = bracket+3; |
bracket[0] = '^'; |
bracket[1] = '\n'; |
bracket[2] = ']'; |
bracket[3] = '\0'; |
p_bracket(p); |
assert(p->next == bracket+3); |
p->next = oldnext; |
p->end = oldend; |
} |
/* |
- repeat - generate code for a bounded repetition, recursively if needed |
== static void repeat(register struct parse *p, sopno start, int from, int to); |
*/ |
static void |
repeat(p, start, from, to) |
register struct parse *p; |
sopno start; /* operand from here to end of strip */ |
int from; /* repeated from this number */ |
int to; /* to this number of times (maybe INFINITY) */ |
{ |
register sopno finish = HERE(); |
# define N 2 |
# define INF 3 |
# define REP(f, t) ((f)*8 + (t)) |
# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) |
register sopno copy; |
if (p->error != 0) /* head off possible runaway recursion */ |
return; |
assert(from <= to); |
switch (REP(MAP(from), MAP(to))) { |
case REP(0, 0): /* must be user doing this */ |
DROP(finish-start); /* drop the operand */ |
break; |
case REP(0, 1): /* as x{1,1}? */ |
case REP(0, N): /* as x{1,n}? */ |
case REP(0, INF): /* as x{1,}? */ |
/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ |
INSERT(OCH_, start); /* offset is wrong... */ |
repeat(p, start+1, 1, to); |
ASTERN(OOR1, start); |
AHEAD(start); /* ... fix it */ |
EMIT(OOR2, 0); |
AHEAD(THERE()); |
ASTERN(O_CH, THERETHERE()); |
break; |
case REP(1, 1): /* trivial case */ |
/* done */ |
break; |
case REP(1, N): /* as x?x{1,n-1} */ |
/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ |
INSERT(OCH_, start); |
ASTERN(OOR1, start); |
AHEAD(start); |
EMIT(OOR2, 0); /* offset very wrong... */ |
AHEAD(THERE()); /* ...so fix it */ |
ASTERN(O_CH, THERETHERE()); |
copy = dupl(p, start+1, finish+1); |
assert(copy == finish+4); |
repeat(p, copy, 1, to-1); |
break; |
case REP(1, INF): /* as x+ */ |
INSERT(OPLUS_, start); |
ASTERN(O_PLUS, start); |
break; |
case REP(N, N): /* as xx{m-1,n-1} */ |
copy = dupl(p, start, finish); |
repeat(p, copy, from-1, to-1); |
break; |
case REP(N, INF): /* as xx{n-1,INF} */ |
copy = dupl(p, start, finish); |
repeat(p, copy, from-1, to); |
break; |
default: /* "can't happen" */ |
SETERROR(REG_ASSERT); /* just in case */ |
break; |
} |
} |
/* |
- seterr - set an error condition |
== static int seterr(register struct parse *p, int e); |
*/ |
static int /* useless but makes type checking happy */ |
seterr(p, e) |
register struct parse *p; |
int e; |
{ |
if (p->error == 0) /* keep earliest error condition */ |
p->error = e; |
p->next = nuls; /* try to bring things to a halt */ |
p->end = nuls; |
return(0); /* make the return value well-defined */ |
} |
/* |
- allocset - allocate a set of characters for [] |
== static cset *allocset(register struct parse *p); |
*/ |
static cset * |
allocset(p) |
register struct parse *p; |
{ |
register int no = p->g->ncsets++; |
register size_t nc; |
register size_t nbytes; |
register cset *cs; |
register size_t css = (size_t)p->g->csetsize; |
register int i; |
if (no >= p->ncsalloc) { /* need another column of space */ |
p->ncsalloc += CHAR_BIT; |
nc = p->ncsalloc; |
assert(nc % CHAR_BIT == 0); |
nbytes = nc / CHAR_BIT * css; |
if (p->g->sets == NULL) |
p->g->sets = (cset *)malloc(nc * sizeof(cset)); |
else |
p->g->sets = (cset *)realloc((char *)p->g->sets, |
nc * sizeof(cset)); |
if (p->g->setbits == NULL) |
p->g->setbits = (uch *)malloc(nbytes); |
else { |
p->g->setbits = (uch *)realloc((char *)p->g->setbits, |
nbytes); |
/* xxx this isn't right if setbits is now NULL */ |
for (i = 0; i < no; i++) |
p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); |
} |
if (p->g->sets != NULL && p->g->setbits != NULL) |
(void) memset((char *)p->g->setbits + (nbytes - css), |
0, css); |
else { |
no = 0; |
SETERROR(REG_ESPACE); |
/* caller's responsibility not to do set ops */ |
} |
} |
assert(p->g->sets != NULL); /* xxx */ |
cs = &p->g->sets[no]; |
cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); |
cs->mask = 1 << ((no) % CHAR_BIT); |
cs->hash = 0; |
cs->smultis = 0; |
cs->multis = NULL; |
return(cs); |
} |
/* |
- freeset - free a now-unused set |
== static void freeset(register struct parse *p, register cset *cs); |
*/ |
static void |
freeset(p, cs) |
register struct parse *p; |
register cset *cs; |
{ |
register int i; |
register cset *top = &p->g->sets[p->g->ncsets]; |
register size_t css = (size_t)p->g->csetsize; |
for (i = 0; i < css; i++) |
CHsub(cs, i); |
if (cs == top-1) /* recover only the easy case */ |
p->g->ncsets--; |
} |
/* |
- freezeset - final processing on a set of characters |
== static int freezeset(register struct parse *p, register cset *cs); |
* |
* The main task here is merging identical sets. This is usually a waste |
* of time (although the hash code minimizes the overhead), but can win |
* big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash |
* is done using addition rather than xor -- all ASCII [aA] sets xor to |
* the same value! |
*/ |
static int /* set number */ |
freezeset(p, cs) |
register struct parse *p; |
register cset *cs; |
{ |
register uch h = cs->hash; |
register int i; |
register cset *top = &p->g->sets[p->g->ncsets]; |
register cset *cs2; |
register size_t css = (size_t)p->g->csetsize; |
/* look for an earlier one which is the same */ |
for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) |
if (cs2->hash == h && cs2 != cs) { |
/* maybe */ |
for (i = 0; i < css; i++) |
if (!!CHIN(cs2, i) != !!CHIN(cs, i)) |
break; /* no */ |
if (i == css) |
break; /* yes */ |
} |
if (cs2 < top) { /* found one */ |
freeset(p, cs); |
cs = cs2; |
} |
return((int)(cs - p->g->sets)); |
} |
/* |
- firstch - return first character in a set (which must have at least one) |
== static int firstch(register struct parse *p, register cset *cs); |
*/ |
static int /* character; there is no "none" value */ |
firstch(p, cs) |
register struct parse *p; |
register cset *cs; |
{ |
register int i; |
register size_t css = (size_t)p->g->csetsize; |
for (i = 0; i < css; i++) |
if (CHIN(cs, i)) |
return((char)i); |
assert(never); |
return(0); /* arbitrary */ |
} |
/* |
- nch - number of characters in a set |
== static int nch(register struct parse *p, register cset *cs); |
*/ |
static int |
nch(p, cs) |
register struct parse *p; |
register cset *cs; |
{ |
register int i; |
register size_t css = (size_t)p->g->csetsize; |
register int n = 0; |
for (i = 0; i < css; i++) |
if (CHIN(cs, i)) |
n++; |
return(n); |
} |
/* |
- mcadd - add a collating element to a cset |
== static void mcadd(register struct parse *p, register cset *cs, \ |
== register char *cp); |
*/ |
static void |
mcadd(p, cs, cp) |
register struct parse *p; |
register cset *cs; |
register char *cp; |
{ |
register size_t oldend = cs->smultis; |
cs->smultis += strlen(cp) + 1; |
if (cs->multis == NULL) |
cs->multis = malloc(cs->smultis); |
else |
cs->multis = realloc(cs->multis, cs->smultis); |
if (cs->multis == NULL) { |
SETERROR(REG_ESPACE); |
return; |
} |
(void) strcpy(cs->multis + oldend - 1, cp); |
cs->multis[cs->smultis - 1] = '\0'; |
} |
/* |
- mcsub - subtract a collating element from a cset |
== static void mcsub(register cset *cs, register char *cp); |
*/ |
static void |
mcsub(cs, cp) |
register cset *cs; |
register char *cp; |
{ |
register char *fp = mcfind(cs, cp); |
register size_t len = strlen(fp); |
assert(fp != NULL); |
(void) memmove(fp, fp + len + 1, |
cs->smultis - (fp + len + 1 - cs->multis)); |
cs->smultis -= len; |
if (cs->smultis == 0) { |
free(cs->multis); |
cs->multis = NULL; |
return; |
} |
cs->multis = realloc(cs->multis, cs->smultis); |
assert(cs->multis != NULL); |
} |
/* |
- mcin - is a collating element in a cset? |
== static int mcin(register cset *cs, register char *cp); |
*/ |
static int |
mcin(cs, cp) |
register cset *cs; |
register char *cp; |
{ |
return(mcfind(cs, cp) != NULL); |
} |
/* |
- mcfind - find a collating element in a cset |
== static char *mcfind(register cset *cs, register char *cp); |
*/ |
static char * |
mcfind(cs, cp) |
register cset *cs; |
register char *cp; |
{ |
register char *p; |
if (cs->multis == NULL) |
return(NULL); |
for (p = cs->multis; *p != '\0'; p += strlen(p) + 1) |
if (strcmp(cp, p) == 0) |
return(p); |
return(NULL); |
} |
/* |
- mcinvert - invert the list of collating elements in a cset |
== static void mcinvert(register struct parse *p, register cset *cs); |
* |
* This would have to know the set of possibilities. Implementation |
* is deferred. |
*/ |
static void |
mcinvert(p, cs) |
register struct parse *p; |
register cset *cs; |
{ |
assert(cs->multis == NULL); /* xxx */ |
} |
/* |
- mccase - add case counterparts of the list of collating elements in a cset |
== static void mccase(register struct parse *p, register cset *cs); |
* |
* This would have to know the set of possibilities. Implementation |
* is deferred. |
*/ |
static void |
mccase(p, cs) |
register struct parse *p; |
register cset *cs; |
{ |
assert(cs->multis == NULL); /* xxx */ |
} |
/* |
- isinsets - is this character in any sets? |
== static int isinsets(register struct re_guts *g, int c); |
*/ |
static int /* predicate */ |
isinsets(g, c) |
register struct re_guts *g; |
int c; |
{ |
register uch *col; |
register int i; |
register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; |
register unsigned uc = (unsigned char)c; |
for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) |
if (col[uc] != 0) |
return(1); |
return(0); |
} |
/* |
- samesets - are these two characters in exactly the same sets? |
== static int samesets(register struct re_guts *g, int c1, int c2); |
*/ |
static int /* predicate */ |
samesets(g, c1, c2) |
register struct re_guts *g; |
int c1; |
int c2; |
{ |
register uch *col; |
register int i; |
register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; |
register unsigned uc1 = (unsigned char)c1; |
register unsigned uc2 = (unsigned char)c2; |
for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) |
if (col[uc1] != col[uc2]) |
return(0); |
return(1); |
} |
/* |
- categorize - sort out character categories |
== static void categorize(struct parse *p, register struct re_guts *g); |
*/ |
static void |
categorize(p, g) |
struct parse *p; |
register struct re_guts *g; |
{ |
register cat_t *cats = g->categories; |
register int c; |
register int c2; |
register cat_t cat; |
/* avoid making error situations worse */ |
if (p->error != 0) |
return; |
for (c = CHAR_MIN; c <= CHAR_MAX; c++) |
if (cats[c] == 0 && isinsets(g, c)) { |
cat = g->ncategories++; |
cats[c] = cat; |
for (c2 = c+1; c2 <= CHAR_MAX; c2++) |
if (cats[c2] == 0 && samesets(g, c, c2)) |
cats[c2] = cat; |
} |
} |
/* |
- dupl - emit a duplicate of a bunch of sops |
== static sopno dupl(register struct parse *p, sopno start, sopno finish); |
*/ |
static sopno /* start of duplicate */ |
dupl(p, start, finish) |
register struct parse *p; |
sopno start; /* from here */ |
sopno finish; /* to this less one */ |
{ |
register sopno ret = HERE(); |
register sopno len = finish - start; |
assert(finish >= start); |
if (len == 0) |
return(ret); |
enlarge(p, p->ssize + len); /* this many unexpected additions */ |
assert(p->ssize >= p->slen + len); |
(void) memcpy((char *)(p->strip + p->slen), |
(char *)(p->strip + start), (size_t)len*sizeof(sop)); |
p->slen += len; |
return(ret); |
} |
/* |
- doemit - emit a strip operator |
== static void doemit(register struct parse *p, sop op, size_t opnd); |
* |
* It might seem better to implement this as a macro with a function as |
* hard-case backup, but it's just too big and messy unless there are |
* some changes to the data structures. Maybe later. |
*/ |
static void |
doemit(p, op, opnd) |
register struct parse *p; |
sop op; |
size_t opnd; |
{ |
/* avoid making error situations worse */ |
if (p->error != 0) |
return; |
/* deal with oversize operands ("can't happen", more or less) */ |
assert(opnd < 1<<OPSHIFT); |
/* deal with undersized strip */ |
if (p->slen >= p->ssize) |
enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ |
assert(p->slen < p->ssize); |
/* finally, it's all reduced to the easy case */ |
p->strip[p->slen++] = SOP(op, opnd); |
} |
/* |
- doinsert - insert a sop into the strip |
== static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos); |
*/ |
static void |
doinsert(p, op, opnd, pos) |
register struct parse *p; |
sop op; |
size_t opnd; |
sopno pos; |
{ |
register sopno sn; |
register sop s; |
register int i; |
/* avoid making error situations worse */ |
if (p->error != 0) |
return; |
sn = HERE(); |
EMIT(op, opnd); /* do checks, ensure space */ |
assert(HERE() == sn+1); |
s = p->strip[sn]; |
/* adjust paren pointers */ |
assert(pos > 0); |
for (i = 1; i < NPAREN; i++) { |
if (p->pbegin[i] >= pos) { |
p->pbegin[i]++; |
} |
if (p->pend[i] >= pos) { |
p->pend[i]++; |
} |
} |
memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], |
(HERE()-pos-1)*sizeof(sop)); |
p->strip[pos] = s; |
} |
/* |
- dofwd - complete a forward reference |
== static void dofwd(register struct parse *p, sopno pos, sop value); |
*/ |
static void |
dofwd(p, pos, value) |
register struct parse *p; |
register sopno pos; |
sop value; |
{ |
/* avoid making error situations worse */ |
if (p->error != 0) |
return; |
assert(value < 1<<OPSHIFT); |
p->strip[pos] = OP(p->strip[pos]) | value; |
} |
/* |
- enlarge - enlarge the strip |
== static void enlarge(register struct parse *p, sopno size); |
*/ |
static void |
enlarge(p, size) |
register struct parse *p; |
register sopno size; |
{ |
register sop *sp; |
if (p->ssize >= size) |
return; |
sp = (sop *)realloc(p->strip, size*sizeof(sop)); |
if (sp == NULL) { |
SETERROR(REG_ESPACE); |
return; |
} |
p->strip = sp; |
p->ssize = size; |
} |
/* |
- stripsnug - compact the strip |
== static void stripsnug(register struct parse *p, register struct re_guts *g); |
*/ |
static void |
stripsnug(p, g) |
register struct parse *p; |
register struct re_guts *g; |
{ |
g->nstates = p->slen; |
g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop)); |
if (g->strip == NULL) { |
SETERROR(REG_ESPACE); |
g->strip = p->strip; |
} |
} |
/* |
- findmust - fill in must and mlen with longest mandatory literal string |
== static void findmust(register struct parse *p, register struct re_guts *g); |
* |
* This algorithm could do fancy things like analyzing the operands of | |
* for common subsequences. Someday. This code is simple and finds most |
* of the interesting cases. |
* |
* Note that must and mlen got initialized during setup. |
*/ |
static void |
findmust(p, g) |
struct parse *p; |
register struct re_guts *g; |
{ |
register sop *scan; |
sop *start; |
register sop *newstart; |
register sopno newlen; |
register sop s; |
register char *cp; |
register sopno i; |
/* avoid making error situations worse */ |
if (p->error != 0) |
return; |
/* find the longest OCHAR sequence in strip */ |
newlen = 0; |
scan = g->strip + 1; |
do { |
s = *scan++; |
switch (OP(s)) { |
case OCHAR: /* sequence member */ |
if (newlen == 0) /* new sequence */ |
newstart = scan - 1; |
newlen++; |
break; |
case OPLUS_: /* things that don't break one */ |
case OLPAREN: |
case ORPAREN: |
break; |
case OQUEST_: /* things that must be skipped */ |
case OCH_: |
scan--; |
do { |
scan += OPND(s); |
s = *scan; |
/* assert() interferes w debug printouts */ |
if (OP(s) != O_QUEST && OP(s) != O_CH && |
OP(s) != OOR2) { |
g->iflags |= BAD; |
return; |
} |
} while (OP(s) != O_QUEST && OP(s) != O_CH); |
/* fallthrough */ |
default: /* things that break a sequence */ |
if (newlen > g->mlen) { /* ends one */ |
start = newstart; |
g->mlen = newlen; |
} |
newlen = 0; |
break; |
} |
} while (OP(s) != OEND); |
if (g->mlen == 0) /* there isn't one */ |
return; |
/* turn it into a character string */ |
g->must = malloc((size_t)g->mlen + 1); |
if (g->must == NULL) { /* argh; just forget it */ |
g->mlen = 0; |
return; |
} |
cp = g->must; |
scan = start; |
for (i = g->mlen; i > 0; i--) { |
while (OP(s = *scan++) != OCHAR) |
continue; |
assert(cp < g->must + g->mlen); |
*cp++ = (char)OPND(s); |
} |
assert(cp == g->must + g->mlen); |
*cp++ = '\0'; /* just on general principles */ |
} |
/* |
- pluscount - count + nesting |
== static sopno pluscount(register struct parse *p, register struct re_guts *g); |
*/ |
static sopno /* nesting depth */ |
pluscount(p, g) |
struct parse *p; |
register struct re_guts *g; |
{ |
register sop *scan; |
register sop s; |
register sopno plusnest = 0; |
register sopno maxnest = 0; |
if (p->error != 0) |
return(0); /* there may not be an OEND */ |
scan = g->strip + 1; |
do { |
s = *scan++; |
switch (OP(s)) { |
case OPLUS_: |
plusnest++; |
break; |
case O_PLUS: |
if (plusnest > maxnest) |
maxnest = plusnest; |
plusnest--; |
break; |
} |
} while (OP(s) != OEND); |
if (plusnest != 0) |
g->iflags |= BAD; |
return(maxnest); |
} |
/programs/develop/libraries/menuetlibc/src/libc/posix/regex/regcomp.ih |
---|
0,0 → 1,51 |
/* ========= begin header generated by ./mkh ========= */ |
#ifdef __cplusplus |
extern "C" { |
#endif |
/* === regcomp.c === */ |
static void p_ere (register struct parse *p, int stop); |
static void p_ere_exp (register struct parse *p); |
static void p_str (register struct parse *p); |
static void p_bre (register struct parse *p, register int end1, register int end2); |
static int p_simp_re (register struct parse *p, int starordinary); |
static int p_count (register struct parse *p); |
static void p_bracket (register struct parse *p); |
static void p_b_term (register struct parse *p, register cset *cs); |
static void p_b_cclass (register struct parse *p, register cset *cs); |
static void p_b_eclass (register struct parse *p, register cset *cs); |
static char p_b_symbol (register struct parse *p); |
static char p_b_coll_elem (register struct parse *p, int endc); |
static char othercase (int ch); |
static void bothcases (register struct parse *p, int ch); |
static void ordinary (register struct parse *p, register int ch); |
static void nonnewline (register struct parse *p); |
static void repeat (register struct parse *p, sopno start, int from, int to); |
static int seterr (register struct parse *p, int e); |
static cset *allocset (register struct parse *p); |
static void freeset (register struct parse *p, register cset *cs); |
static int freezeset (register struct parse *p, register cset *cs); |
static int firstch (register struct parse *p, register cset *cs); |
static int nch (register struct parse *p, register cset *cs); |
static void mcadd (register struct parse *p, register cset *cs, register char *cp); |
static void mcsub (register cset *cs, register char *cp); |
static int mcin (register cset *cs, register char *cp); |
static char *mcfind (register cset *cs, register char *cp); |
static void mcinvert (register struct parse *p, register cset *cs); |
static void mccase (register struct parse *p, register cset *cs); |
static int isinsets (register struct re_guts *g, int c); |
static int samesets (register struct re_guts *g, int c1, int c2); |
static void categorize (struct parse *p, register struct re_guts *g); |
static sopno dupl (register struct parse *p, sopno start, sopno finish); |
static void doemit (register struct parse *p, sop op, size_t opnd); |
static void doinsert (register struct parse *p, sop op, size_t opnd, sopno pos); |
static void dofwd (register struct parse *p, sopno pos, sop value); |
static void enlarge (register struct parse *p, sopno size); |
static void stripsnug (register struct parse *p, register struct re_guts *g); |
static void findmust (register struct parse *p, register struct re_guts *g); |
static sopno pluscount (register struct parse *p, register struct re_guts *g); |
#ifdef __cplusplus |
} |
#endif |
/* ========= end header generated by ./mkh ========= */ |
/programs/develop/libraries/menuetlibc/src/libc/posix/regex/regerror.c |
---|
0,0 → 1,127 |
/* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */ |
#include <sys/types.h> |
#include <stdio.h> |
#include <string.h> |
#include <ctype.h> |
#include <limits.h> |
#include <stdlib.h> |
#include <regex.h> |
#include "utils.h" |
#include "regerror.ih" |
/* |
= #define REG_NOMATCH 1 |
= #define REG_BADPAT 2 |
= #define REG_ECOLLATE 3 |
= #define REG_ECTYPE 4 |
= #define REG_EESCAPE 5 |
= #define REG_ESUBREG 6 |
= #define REG_EBRACK 7 |
= #define REG_EPAREN 8 |
= #define REG_EBRACE 9 |
= #define REG_BADBR 10 |
= #define REG_ERANGE 11 |
= #define REG_ESPACE 12 |
= #define REG_BADRPT 13 |
= #define REG_EMPTY 14 |
= #define REG_ASSERT 15 |
= #define REG_INVARG 16 |
= #define REG_ATOI 255 // convert name to number (!) |
= #define REG_ITOA 0400 // convert number to name (!) |
*/ |
static struct rerr { |
int code; |
char *name; |
char *explain; |
} rerrs[] = { |
REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match", |
REG_BADPAT, "REG_BADPAT", "invalid regular expression", |
REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element", |
REG_ECTYPE, "REG_ECTYPE", "invalid character class", |
REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)", |
REG_ESUBREG, "REG_ESUBREG", "invalid backreference number", |
REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced", |
REG_EPAREN, "REG_EPAREN", "parentheses not balanced", |
REG_EBRACE, "REG_EBRACE", "braces not balanced", |
REG_BADBR, "REG_BADBR", "invalid repetition count(s)", |
REG_ERANGE, "REG_ERANGE", "invalid character range", |
REG_ESPACE, "REG_ESPACE", "out of memory", |
REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid", |
REG_EMPTY, "REG_EMPTY", "empty (sub)expression", |
REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug", |
REG_INVARG, "REG_INVARG", "invalid argument to regex routine", |
0, "", "*** unknown regexp error code ***", |
}; |
/* |
- regerror - the interface to error numbers |
= extern size_t regerror(int, const regex_t *, char *, size_t); |
*/ |
/* ARGSUSED */ |
size_t |
regerror(errcode, preg, errbuf, errbuf_size) |
int errcode; |
const regex_t *preg; |
char *errbuf; |
size_t errbuf_size; |
{ |
register struct rerr *r; |
register size_t len; |
register int target = errcode &~ REG_ITOA; |
register char *s; |
char convbuf[50]; |
if (errcode == REG_ATOI) |
s = regatoi(preg, convbuf); |
else { |
for (r = rerrs; r->code != 0; r++) |
if (r->code == target) |
break; |
if (errcode®_ITOA) { |
if (r->code != 0) |
(void) strcpy(convbuf, r->name); |
else |
sprintf(convbuf, "REG_0x%x", target); |
assert(strlen(convbuf) < sizeof(convbuf)); |
s = convbuf; |
} else |
s = r->explain; |
} |
len = strlen(s) + 1; |
if (errbuf_size > 0) { |
if (errbuf_size > len) |
(void) strcpy(errbuf, s); |
else { |
(void) strncpy(errbuf, s, errbuf_size-1); |
errbuf[errbuf_size-1] = '\0'; |
} |
} |
return(len); |
} |
/* |
- regatoi - internal routine to implement REG_ATOI |
== static char *regatoi(const regex_t *preg, char *localbuf); |
*/ |
static char * |
regatoi(preg, localbuf) |
const regex_t *preg; |
char *localbuf; |
{ |
register struct rerr *r; |
register size_t siz; |
register char *p; |
for (r = rerrs; r->code != 0; r++) |
if (strcmp(r->name, preg->re_endp) == 0) |
break; |
if (r->code == 0) |
return("0"); |
sprintf(localbuf, "%d", r->code); |
return(localbuf); |
} |
/programs/develop/libraries/menuetlibc/src/libc/posix/regex/regerror.ih |
---|
0,0 → 1,12 |
/* ========= begin header generated by ./mkh ========= */ |
#ifdef __cplusplus |
extern "C" { |
#endif |
/* === regerror.c === */ |
static char *regatoi (const regex_t *preg, char *localbuf); |
#ifdef __cplusplus |
} |
#endif |
/* ========= end header generated by ./mkh ========= */ |
/programs/develop/libraries/menuetlibc/src/libc/posix/regex/regex2.h |
---|
0,0 → 1,135 |
/* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */ |
/* |
* First, the stuff that ends up in the outside-world include file |
= typedef off_t regoff_t; |
= typedef struct { |
= int re_magic; |
= size_t re_nsub; // number of parenthesized subexpressions |
= const char *re_endp; // end pointer for REG_PEND |
= struct re_guts *re_g; // none of your business :-) |
= } regex_t; |
= typedef struct { |
= regoff_t rm_so; // start of match |
= regoff_t rm_eo; // end of match |
= } regmatch_t; |
*/ |
/* |
* internals of regex_t |
*/ |
#define MAGIC1 ((('r'^0200)<<8) | 'e') |
/* |
* The internal representation is a *strip*, a sequence of |
* operators ending with an endmarker. (Some terminology etc. is a |
* historical relic of earlier versions which used multiple strips.) |
* Certain oddities in the representation are there to permit running |
* the machinery backwards; in particular, any deviation from sequential |
* flow must be marked at both its source and its destination. Some |
* fine points: |
* |
* - OPLUS_ and O_PLUS are *inside* the loop they create. |
* - OQUEST_ and O_QUEST are *outside* the bypass they create. |
* - OCH_ and O_CH are *outside* the multi-way branch they create, while |
* OOR1 and OOR2 are respectively the end and the beginning of one of |
* the branches. Note that there is an implicit OOR2 following OCH_ |
* and an implicit OOR1 preceding O_CH. |
* |
* In state representations, an operator's bit is on to signify a state |
* immediately *preceding* "execution" of that operator. |
*/ |
typedef unsigned long sop; /* strip operator */ |
typedef long sopno; |
#define OPRMASK 0xf8000000 |
#define OPDMASK 0x07ffffff |
#define OPSHIFT ((unsigned)27) |
#define OP(n) ((n)&OPRMASK) |
#define OPND(n) ((n)&OPDMASK) |
#define SOP(op, opnd) ((op)|(opnd)) |
/* operators meaning operand */ |
/* (back, fwd are offsets) */ |
#define OEND (1<<OPSHIFT) /* endmarker - */ |
#define OCHAR (2<<OPSHIFT) /* character unsigned char */ |
#define OBOL (3<<OPSHIFT) /* left anchor - */ |
#define OEOL (4<<OPSHIFT) /* right anchor - */ |
#define OANY (5<<OPSHIFT) /* . - */ |
#define OANYOF (6<<OPSHIFT) /* [...] set number */ |
#define OBACK_ (7<<OPSHIFT) /* begin \d paren number */ |
#define O_BACK (8<<OPSHIFT) /* end \d paren number */ |
#define OPLUS_ (9<<OPSHIFT) /* + prefix fwd to suffix */ |
#define O_PLUS (10<<OPSHIFT) /* + suffix back to prefix */ |
#define OQUEST_ (11<<OPSHIFT) /* ? prefix fwd to suffix */ |
#define O_QUEST (12<<OPSHIFT) /* ? suffix back to prefix */ |
#define OLPAREN (13<<OPSHIFT) /* ( fwd to ) */ |
#define ORPAREN (14<<OPSHIFT) /* ) back to ( */ |
#define OCH_ (15<<OPSHIFT) /* begin choice fwd to OOR2 */ |
#define OOR1 (16<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */ |
#define OOR2 (17<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */ |
#define O_CH (18<<OPSHIFT) /* end choice back to OOR1 */ |
#define OBOW (19<<OPSHIFT) /* begin word - */ |
#define OEOW (20<<OPSHIFT) /* end word - */ |
/* |
* Structure for [] character-set representation. Character sets are |
* done as bit vectors, grouped 8 to a byte vector for compactness. |
* The individual set therefore has both a pointer to the byte vector |
* and a mask to pick out the relevant bit of each byte. A hash code |
* simplifies testing whether two sets could be identical. |
* |
* This will get trickier for multicharacter collating elements. As |
* preliminary hooks for dealing with such things, we also carry along |
* a string of multi-character elements, and decide the size of the |
* vectors at run time. |
*/ |
typedef struct { |
uch *ptr; /* -> uch [csetsize] */ |
uch mask; /* bit within array */ |
uch hash; /* hash code */ |
size_t smultis; |
char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */ |
} cset; |
/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */ |
#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c)) |
#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c)) |
#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask) |
#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */ |
#define MCsub(p, cs, cp) mcsub(p, cs, cp) |
#define MCin(p, cs, cp) mcin(p, cs, cp) |
/* stuff for character categories */ |
typedef unsigned char cat_t; |
/* |
* main compiled-expression structure |
*/ |
struct re_guts { |
int magic; |
# define MAGIC2 ((('R'^0200)<<8)|'E') |
sop *strip; /* malloced area for strip */ |
int csetsize; /* number of bits in a cset vector */ |
int ncsets; /* number of csets in use */ |
cset *sets; /* -> cset [ncsets] */ |
uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */ |
int cflags; /* copy of regcomp() cflags argument */ |
sopno nstates; /* = number of sops */ |
sopno firststate; /* the initial OEND (normally 0) */ |
sopno laststate; /* the final OEND */ |
int iflags; /* internal flags */ |
# define USEBOL 01 /* used ^ */ |
# define USEEOL 02 /* used $ */ |
# define BAD 04 /* something wrong */ |
int nbol; /* number of ^ used */ |
int neol; /* number of $ used */ |
int ncategories; /* how many character categories */ |
cat_t *categories; /* ->catspace[-CHAR_MIN] */ |
char *must; /* match must contain this string */ |
int mlen; /* length of must */ |
size_t nsub; /* copy of re_nsub */ |
int backrefs; /* does it use back references? */ |
sopno nplus; /* how deep does it nest +s? */ |
/* catspace must be last */ |
cat_t catspace[1]; /* actually [NC] */ |
}; |
/* misc utilities */ |
#define OUT (CHAR_MAX+1) /* a non-character value */ |
#define ISWORD(c) (isalnum(c) || (c) == '_') |
/programs/develop/libraries/menuetlibc/src/libc/posix/regex/regexec.c |
---|
0,0 → 1,139 |
/* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */ |
/* |
* the outer shell of regexec() |
* |
* This file includes engine.c *twice*, after muchos fiddling with the |
* macros that code uses. This lets the same code operate on two different |
* representations for state sets. |
*/ |
#include <sys/types.h> |
#include <stdio.h> |
#include <stdlib.h> |
#include <string.h> |
#include <limits.h> |
#include <ctype.h> |
#include <regex.h> |
#include "utils.h" |
#include "regex2.h" |
static int nope = 0; /* for use in asserts; shuts lint up */ |
/* macros for manipulating states, small version */ |
#define states long |
#define states1 states /* for later use in regexec() decision */ |
#define CLEAR(v) ((v) = 0) |
#define SET0(v, n) ((v) &= ~(1 << (n))) |
#define SET1(v, n) ((v) |= 1 << (n)) |
#define ISSET(v, n) ((v) & (1 << (n))) |
#define ASSIGN(d, s) ((d) = (s)) |
#define EQ(a, b) ((a) == (b)) |
#define STATEVARS int dummy /* dummy version */ |
#define STATESETUP(m, n) /* nothing */ |
#define STATETEARDOWN(m) /* nothing */ |
#define SETUP(v) ((v) = 0) |
#define onestate int |
#define INIT(o, n) ((o) = (unsigned)1 << (n)) |
#define INC(o) ((o) <<= 1) |
#define ISSTATEIN(v, o) ((v) & (o)) |
/* some abbreviations; note that some of these know variable names! */ |
/* do "if I'm here, I can also be there" etc without branches */ |
#define FWD(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) << (n)) |
#define BACK(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) >> (n)) |
#define ISSETBACK(v, n) ((v) & ((unsigned)here >> (n))) |
/* function names */ |
#define SNAMES /* engine.c looks after details */ |
#include "engine.c" |
/* now undo things */ |
#undef states |
#undef CLEAR |
#undef SET0 |
#undef SET1 |
#undef ISSET |
#undef ASSIGN |
#undef EQ |
#undef STATEVARS |
#undef STATESETUP |
#undef STATETEARDOWN |
#undef SETUP |
#undef onestate |
#undef INIT |
#undef INC |
#undef ISSTATEIN |
#undef FWD |
#undef BACK |
#undef ISSETBACK |
#undef SNAMES |
/* macros for manipulating states, large version */ |
#define states char * |
#define CLEAR(v) memset(v, 0, m->g->nstates) |
#define SET0(v, n) ((v)[n] = 0) |
#define SET1(v, n) ((v)[n] = 1) |
#define ISSET(v, n) ((v)[n]) |
#define ASSIGN(d, s) memcpy(d, s, m->g->nstates) |
#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0) |
#define STATEVARS int vn; char *space |
#define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \ |
if ((m)->space == NULL) return(REG_ESPACE); \ |
(m)->vn = 0; } |
#define STATETEARDOWN(m) { free((m)->space); } |
#define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates]) |
#define onestate int |
#define INIT(o, n) ((o) = (n)) |
#define INC(o) ((o)++) |
#define ISSTATEIN(v, o) ((v)[o]) |
/* some abbreviations; note that some of these know variable names! */ |
/* do "if I'm here, I can also be there" etc without branches */ |
#define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here]) |
#define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here]) |
#define ISSETBACK(v, n) ((v)[here - (n)]) |
/* function names */ |
#define LNAMES /* flag */ |
#include "engine.c" |
/* |
- regexec - interface for matching |
= extern int regexec(const regex_t *, const char *, size_t, \ |
= regmatch_t [], int); |
= #define REG_NOTBOL 00001 |
= #define REG_NOTEOL 00002 |
= #define REG_STARTEND 00004 |
= #define REG_TRACE 00400 // tracing of execution |
= #define REG_LARGE 01000 // force large representation |
= #define REG_BACKR 02000 // force use of backref code |
* |
* We put this here so we can exploit knowledge of the state representation |
* when choosing which matcher to call. Also, by this point the matchers |
* have been prototyped. |
*/ |
int /* 0 success, REG_NOMATCH failure */ |
regexec(preg, string, nmatch, pmatch, eflags) |
const regex_t *preg; |
const char *string; |
size_t nmatch; |
regmatch_t pmatch[]; |
int eflags; |
{ |
register struct re_guts *g = preg->re_g; |
#ifdef REDEBUG |
# define GOODFLAGS(f) (f) |
#else |
# define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND)) |
#endif |
if (preg->re_magic != MAGIC1 || g->magic != MAGIC2) |
return(REG_BADPAT); |
assert(!(g->iflags&BAD)); |
if (g->iflags&BAD) /* backstop for no-debug case */ |
return(REG_BADPAT); |
eflags = GOODFLAGS(eflags); |
if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE)) |
return(smatcher(g, (char *)string, nmatch, pmatch, eflags)); |
else |
return(lmatcher(g, (char *)string, nmatch, pmatch, eflags)); |
} |
/programs/develop/libraries/menuetlibc/src/libc/posix/regex/regfree.c |
---|
0,0 → 1,38 |
/* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */ |
#include <sys/types.h> |
#include <stdio.h> |
#include <stdlib.h> |
#include <regex.h> |
#include "utils.h" |
#include "regex2.h" |
/* |
- regfree - free everything |
= extern void regfree(regex_t *); |
*/ |
void |
regfree(preg) |
regex_t *preg; |
{ |
register struct re_guts *g; |
if (preg->re_magic != MAGIC1) /* oops */ |
return; /* nice to complain, but hard */ |
g = preg->re_g; |
if (g == NULL || g->magic != MAGIC2) /* oops again */ |
return; |
preg->re_magic = 0; /* mark it invalid */ |
g->magic = 0; /* mark it invalid */ |
if (g->strip != NULL) |
free((char *)g->strip); |
if (g->sets != NULL) |
free((char *)g->sets); |
if (g->setbits != NULL) |
free((char *)g->setbits); |
if (g->must != NULL) |
free(g->must); |
free((char *)g); |
} |
/programs/develop/libraries/menuetlibc/src/libc/posix/regex/split.c |
---|
0,0 → 1,317 |
/* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */ |
#include <stdio.h> |
#include <string.h> |
/* |
- split - divide a string into fields, like awk split() |
= int split(char *string, char *fields[], int nfields, char *sep); |
*/ |
int /* number of fields, including overflow */ |
split(string, fields, nfields, sep) |
char *string; |
char *fields[]; /* list is not NULL-terminated */ |
int nfields; /* number of entries available in fields[] */ |
char *sep; /* "" white, "c" single char, "ab" [ab]+ */ |
{ |
register char *p = string; |
register char c; /* latest character */ |
register char sepc = sep[0]; |
register char sepc2; |
register int fn; |
register char **fp = fields; |
register char *sepp; |
register int trimtrail; |
/* white space */ |
if (sepc == '\0') { |
while ((c = *p++) == ' ' || c == '\t') |
continue; |
p--; |
trimtrail = 1; |
sep = " \t"; /* note, code below knows this is 2 long */ |
sepc = ' '; |
} else |
trimtrail = 0; |
sepc2 = sep[1]; /* now we can safely pick this up */ |
/* catch empties */ |
if (*p == '\0') |
return(0); |
/* single separator */ |
if (sepc2 == '\0') { |
fn = nfields; |
for (;;) { |
*fp++ = p; |
fn--; |
if (fn == 0) |
break; |
while ((c = *p++) != sepc) |
if (c == '\0') |
return(nfields - fn); |
*(p-1) = '\0'; |
} |
/* we have overflowed the fields vector -- just count them */ |
fn = nfields; |
for (;;) { |
while ((c = *p++) != sepc) |
if (c == '\0') |
return(fn); |
fn++; |
} |
/* not reached */ |
} |
/* two separators */ |
if (sep[2] == '\0') { |
fn = nfields; |
for (;;) { |
*fp++ = p; |
fn--; |
while ((c = *p++) != sepc && c != sepc2) |
if (c == '\0') { |
if (trimtrail && **(fp-1) == '\0') |
fn++; |
return(nfields - fn); |
} |
if (fn == 0) |
break; |
*(p-1) = '\0'; |
while ((c = *p++) == sepc || c == sepc2) |
continue; |
p--; |
} |
/* we have overflowed the fields vector -- just count them */ |
fn = nfields; |
while (c != '\0') { |
while ((c = *p++) == sepc || c == sepc2) |
continue; |
p--; |
fn++; |
while ((c = *p++) != '\0' && c != sepc && c != sepc2) |
continue; |
} |
/* might have to trim trailing white space */ |
if (trimtrail) { |
p--; |
while ((c = *--p) == sepc || c == sepc2) |
continue; |
p++; |
if (*p != '\0') { |
if (fn == nfields+1) |
*p = '\0'; |
fn--; |
} |
} |
return(fn); |
} |
/* n separators */ |
fn = 0; |
for (;;) { |
if (fn < nfields) |
*fp++ = p; |
fn++; |
for (;;) { |
c = *p++; |
if (c == '\0') |
return(fn); |
sepp = sep; |
while ((sepc = *sepp++) != '\0' && sepc != c) |
continue; |
if (sepc != '\0') /* it was a separator */ |
break; |
} |
if (fn < nfields) |
*(p-1) = '\0'; |
for (;;) { |
c = *p++; |
sepp = sep; |
while ((sepc = *sepp++) != '\0' && sepc != c) |
continue; |
if (sepc == '\0') /* it wasn't a separator */ |
break; |
} |
p--; |
} |
/* not reached */ |
} |
#ifdef TEST_SPLIT |
/* |
* test program |
* pgm runs regression |
* pgm sep splits stdin lines by sep |
* pgm str sep splits str by sep |
* pgm str sep n splits str by sep n times |
*/ |
int |
main(argc, argv) |
int argc; |
char *argv[]; |
{ |
char buf[512]; |
register int n; |
# define MNF 10 |
char *fields[MNF]; |
if (argc > 4) |
for (n = atoi(argv[3]); n > 0; n--) { |
(void) strcpy(buf, argv[1]); |
} |
else if (argc > 3) |
for (n = atoi(argv[3]); n > 0; n--) { |
(void) strcpy(buf, argv[1]); |
(void) split(buf, fields, MNF, argv[2]); |
} |
else if (argc > 2) |
dosplit(argv[1], argv[2]); |
else if (argc > 1) |
while (fgets(buf, sizeof(buf), stdin) != NULL) { |
buf[strlen(buf)-1] = '\0'; /* stomp newline */ |
dosplit(buf, argv[1]); |
} |
else |
regress(); |
exit(0); |
} |
dosplit(string, seps) |
char *string; |
char *seps; |
{ |
# define NF 5 |
char *fields[NF]; |
register int nf; |
nf = split(string, fields, NF, seps); |
print(nf, NF, fields); |
} |
print(nf, nfp, fields) |
int nf; |
int nfp; |
char *fields[]; |
{ |
register int fn; |
register int bound; |
bound = (nf > nfp) ? nfp : nf; |
printf("%d:\t", nf); |
for (fn = 0; fn < bound; fn++) |
printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n"); |
} |
#define RNF 5 /* some table entries know this */ |
struct { |
char *str; |
char *seps; |
int nf; |
char *fi[RNF]; |
} tests[] = { |
"", " ", 0, { "" }, |
" ", " ", 2, { "", "" }, |
"x", " ", 1, { "x" }, |
"xy", " ", 1, { "xy" }, |
"x y", " ", 2, { "x", "y" }, |
"abc def g ", " ", 5, { "abc", "def", "", "g", "" }, |
" a bcd", " ", 4, { "", "", "a", "bcd" }, |
"a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, |
" a b c d ", " ", 6, { "", "a", "b", "c", "d " }, |
"", " _", 0, { "" }, |
" ", " _", 2, { "", "" }, |
"x", " _", 1, { "x" }, |
"x y", " _", 2, { "x", "y" }, |
"ab _ cd", " _", 2, { "ab", "cd" }, |
" a_b c ", " _", 5, { "", "a", "b", "c", "" }, |
"a b c_d e f", " _", 6, { "a", "b", "c", "d", "e f" }, |
" a b c d ", " _", 6, { "", "a", "b", "c", "d " }, |
"", " _~", 0, { "" }, |
" ", " _~", 2, { "", "" }, |
"x", " _~", 1, { "x" }, |
"x y", " _~", 2, { "x", "y" }, |
"ab _~ cd", " _~", 2, { "ab", "cd" }, |
" a_b c~", " _~", 5, { "", "a", "b", "c", "" }, |
"a b_c d~e f", " _~", 6, { "a", "b", "c", "d", "e f" }, |
"~a b c d ", " _~", 6, { "", "a", "b", "c", "d " }, |
"", " _~-", 0, { "" }, |
" ", " _~-", 2, { "", "" }, |
"x", " _~-", 1, { "x" }, |
"x y", " _~-", 2, { "x", "y" }, |
"ab _~- cd", " _~-", 2, { "ab", "cd" }, |
" a_b c~", " _~-", 5, { "", "a", "b", "c", "" }, |
"a b_c-d~e f", " _~-", 6, { "a", "b", "c", "d", "e f" }, |
"~a-b c d ", " _~-", 6, { "", "a", "b", "c", "d " }, |
"", " ", 0, { "" }, |
" ", " ", 2, { "", "" }, |
"x", " ", 1, { "x" }, |
"xy", " ", 1, { "xy" }, |
"x y", " ", 2, { "x", "y" }, |
"abc def g ", " ", 4, { "abc", "def", "g", "" }, |
" a bcd", " ", 3, { "", "a", "bcd" }, |
"a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, |
" a b c d ", " ", 6, { "", "a", "b", "c", "d " }, |
"", "", 0, { "" }, |
" ", "", 0, { "" }, |
"x", "", 1, { "x" }, |
"xy", "", 1, { "xy" }, |
"x y", "", 2, { "x", "y" }, |
"abc def g ", "", 3, { "abc", "def", "g" }, |
"\t a bcd", "", 2, { "a", "bcd" }, |
" a \tb\t c ", "", 3, { "a", "b", "c" }, |
"a b c d e ", "", 5, { "a", "b", "c", "d", "e" }, |
"a b\tc d e f", "", 6, { "a", "b", "c", "d", "e f" }, |
" a b c d e f ", "", 6, { "a", "b", "c", "d", "e f " }, |
NULL, NULL, 0, { NULL }, |
}; |
regress() |
{ |
char buf[512]; |
register int n; |
char *fields[RNF+1]; |
register int nf; |
register int i; |
register int printit; |
register char *f; |
for (n = 0; tests[n].str != NULL; n++) { |
(void) strcpy(buf, tests[n].str); |
fields[RNF] = NULL; |
nf = split(buf, fields, RNF, tests[n].seps); |
printit = 0; |
if (nf != tests[n].nf) { |
printf("split `%s' by `%s' gave %d fields, not %d\n", |
tests[n].str, tests[n].seps, nf, tests[n].nf); |
printit = 1; |
} else if (fields[RNF] != NULL) { |
printf("split() went beyond array end\n"); |
printit = 1; |
} else { |
for (i = 0; i < nf && i < RNF; i++) { |
f = fields[i]; |
if (f == NULL) |
f = "(NULL)"; |
if (strcmp(f, tests[n].fi[i]) != 0) { |
printf("split `%s' by `%s', field %d is `%s', not `%s'\n", |
tests[n].str, tests[n].seps, |
i, fields[i], tests[n].fi[i]); |
printit = 1; |
} |
} |
} |
if (printit) |
print(nf, RNF, fields); |
} |
} |
#endif |
/programs/develop/libraries/menuetlibc/src/libc/posix/regex/utils.h |
---|
0,0 → 1,19 |
/* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */ |
/* utility definitions */ |
#define DUPMAX _POSIX2_RE_DUP_MAX /* xxx is this right? */ |
#define INFINITY (DUPMAX + 1) |
#define NC (CHAR_MAX - CHAR_MIN + 1) |
typedef unsigned char uch; |
/* switch off assertions (if not already off) if no REDEBUG */ |
#ifndef REDEBUG |
#ifndef NDEBUG |
#define NDEBUG /* no assertions please */ |
#endif |
#endif |
#include <assert.h> |
/* for old systems with bcopy() but no memmove() */ |
#ifdef USEBCOPY |
#define memmove(d, s, c) bcopy(s, d, c) |
#endif |