Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4973 right-hear 1
/* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */
2
/*
3
 * The matching engine and friends.  This file is #included by regexec.c
4
 * after suitable #defines of a variety of macros used herein, so that
5
 * different state representations can be used without duplicating masses
6
 * of code.
7
 */
8
 
9
#ifdef SNAMES
10
#define	matcher	smatcher
11
#define	fast	sfast
12
#define	slow	sslow
13
#define	dissect	sdissect
14
#define	backref	sbackref
15
#define	step	sstep
16
#define	print	sprint
17
#define	at	sat
18
#define	match	smat
19
#endif
20
#ifdef LNAMES
21
#define	matcher	lmatcher
22
#define	fast	lfast
23
#define	slow	lslow
24
#define	dissect	ldissect
25
#define	backref	lbackref
26
#define	step	lstep
27
#define	print	lprint
28
#define	at	lat
29
#define	match	lmat
30
#endif
31
 
32
/* another structure passed up and down to avoid zillions of parameters */
33
struct match {
34
	struct re_guts *g;
35
	int eflags;
36
	regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */
37
	char *offp;		/* offsets work from here */
38
	char *beginp;		/* start of string -- virtual NUL precedes */
39
	char *endp;		/* end of string -- virtual NUL here */
40
	char *coldp;		/* can be no match starting before here */
41
	char **lastpos;		/* [nplus+1] */
42
	STATEVARS;
43
	states st;		/* current states */
44
	states fresh;		/* states for a fresh start */
45
	states tmp;		/* temporary */
46
	states empty;		/* empty set of states */
47
};
48
 
49
#include "engine.ih"
50
 
51
#ifdef REDEBUG
52
#define	SP(t, s, c)	print(m, t, s, c, stdout)
53
#define	AT(t, p1, p2, s1, s2)	at(m, t, p1, p2, s1, s2)
54
#define	NOTE(str)	{ if (m->eflags®_TRACE) printf("=%s\n", (str)); }
55
#else
56
#define	SP(t, s, c)	/* nothing */
57
#define	AT(t, p1, p2, s1, s2)	/* nothing */
58
#define	NOTE(s)	/* nothing */
59
#endif
60
 
61
/*
62
 - matcher - the actual matching engine
63
 == static int matcher(register struct re_guts *g, char *string, \
64
 ==	size_t nmatch, regmatch_t pmatch[], int eflags);
65
 */
66
static int			/* 0 success, REG_NOMATCH failure */
67
matcher(g, string, nmatch, pmatch, eflags)
68
register struct re_guts *g;
69
char *string;
70
size_t nmatch;
71
regmatch_t pmatch[];
72
int eflags;
73
{
74
	register char *endp;
75
	register int i;
76
	struct match mv;
77
	register struct match *m = &mv;
78
	register char *dp;
79
	const register sopno gf = g->firststate+1;	/* +1 for OEND */
80
	const register sopno gl = g->laststate;
81
	char *start;
82
	char *stop;
83
 
84
	/* simplify the situation where possible */
85
	if (g->cflags®_NOSUB)
86
		nmatch = 0;
87
	if (eflags®_STARTEND) {
88
		start = string + pmatch[0].rm_so;
89
		stop = string + pmatch[0].rm_eo;
90
	} else {
91
		start = string;
92
		stop = start + strlen(start);
93
	}
94
	if (stop < start)
95
		return(REG_INVARG);
96
 
97
	/* prescreening; this does wonders for this rather slow code */
98
	if (g->must != NULL) {
99
		for (dp = start; dp < stop; dp++)
100
			if (*dp == g->must[0] && stop - dp >= g->mlen &&
101
				memcmp(dp, g->must, (size_t)g->mlen) == 0)
102
				break;
103
		if (dp == stop)		/* we didn't find g->must */
104
			return(REG_NOMATCH);
105
	}
106
 
107
	/* match struct setup */
108
	m->g = g;
109
	m->eflags = eflags;
110
	m->pmatch = NULL;
111
	m->lastpos = NULL;
112
	m->offp = string;
113
	m->beginp = start;
114
	m->endp = stop;
115
	STATESETUP(m, 4);
116
	SETUP(m->st);
117
	SETUP(m->fresh);
118
	SETUP(m->tmp);
119
	SETUP(m->empty);
120
	CLEAR(m->empty);
121
 
122
	/* this loop does only one repetition except for backrefs */
123
	for (;;) {
124
		endp = fast(m, start, stop, gf, gl);
125
		if (endp == NULL) {		/* a miss */
126
			STATETEARDOWN(m);
127
			return(REG_NOMATCH);
128
		}
129
		if (nmatch == 0 && !g->backrefs)
130
			break;		/* no further info needed */
131
 
132
		/* where? */
133
		assert(m->coldp != NULL);
134
		for (;;) {
135
			NOTE("finding start");
136
			endp = slow(m, m->coldp, stop, gf, gl);
137
			if (endp != NULL)
138
				break;
139
			assert(m->coldp < m->endp);
140
			m->coldp++;
141
		}
142
		if (nmatch == 1 && !g->backrefs)
143
			break;		/* no further info needed */
144
 
145
		/* oh my, he wants the subexpressions... */
146
		if (m->pmatch == NULL)
147
			m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
148
							sizeof(regmatch_t));
149
		if (m->pmatch == NULL) {
150
			STATETEARDOWN(m);
151
			return(REG_ESPACE);
152
		}
153
		for (i = 1; i <= m->g->nsub; i++)
154
			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
155
		if (!g->backrefs && !(m->eflags®_BACKR)) {
156
			NOTE("dissecting");
157
			dp = dissect(m, m->coldp, endp, gf, gl);
158
		} else {
159
			if (g->nplus > 0 && m->lastpos == NULL)
160
				m->lastpos = (char **)malloc((g->nplus+1) *
161
							sizeof(char *));
162
			if (g->nplus > 0 && m->lastpos == NULL) {
163
				free(m->pmatch);
164
				STATETEARDOWN(m);
165
				return(REG_ESPACE);
166
			}
167
			NOTE("backref dissect");
168
			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
169
		}
170
		if (dp != NULL)
171
			break;
172
 
173
		/* uh-oh... we couldn't find a subexpression-level match */
174
		assert(g->backrefs);	/* must be back references doing it */
175
		assert(g->nplus == 0 || m->lastpos != NULL);
176
		for (;;) {
177
			if (dp != NULL || endp <= m->coldp)
178
				break;		/* defeat */
179
			NOTE("backoff");
180
			endp = slow(m, m->coldp, endp-1, gf, gl);
181
			if (endp == NULL)
182
				break;		/* defeat */
183
			/* try it on a shorter possibility */
184
#ifndef NDEBUG
185
			for (i = 1; i <= m->g->nsub; i++) {
186
				assert(m->pmatch[i].rm_so == -1);
187
				assert(m->pmatch[i].rm_eo == -1);
188
			}
189
#endif
190
			NOTE("backoff dissect");
191
			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
192
		}
193
		assert(dp == NULL || dp == endp);
194
		if (dp != NULL)		/* found a shorter one */
195
			break;
196
 
197
		/* despite initial appearances, there is no match here */
198
		NOTE("false alarm");
199
		start = m->coldp + 1;	/* recycle starting later */
200
		assert(start <= stop);
201
	}
202
 
203
	/* fill in the details if requested */
204
	if (nmatch > 0) {
205
		pmatch[0].rm_so = m->coldp - m->offp;
206
		pmatch[0].rm_eo = endp - m->offp;
207
	}
208
	if (nmatch > 1) {
209
		assert(m->pmatch != NULL);
210
		for (i = 1; i < nmatch; i++)
211
			if (i <= m->g->nsub)
212
				pmatch[i] = m->pmatch[i];
213
			else {
214
				pmatch[i].rm_so = -1;
215
				pmatch[i].rm_eo = -1;
216
			}
217
	}
218
 
219
	if (m->pmatch != NULL)
220
		free((char *)m->pmatch);
221
	if (m->lastpos != NULL)
222
		free((char *)m->lastpos);
223
	STATETEARDOWN(m);
224
	return(0);
225
}
226
 
227
/*
228
 - dissect - figure out what matched what, no back references
229
 == static char *dissect(register struct match *m, char *start, \
230
 ==	char *stop, sopno startst, sopno stopst);
231
 */
232
static char *			/* == stop (success) always */
233
dissect(m, start, stop, startst, stopst)
234
register struct match *m;
235
char *start;
236
char *stop;
237
sopno startst;
238
sopno stopst;
239
{
240
	register int i;
241
	register sopno ss;	/* start sop of current subRE */
242
	register sopno es;	/* end sop of current subRE */
243
	register char *sp;	/* start of string matched by it */
244
	register char *stp;	/* string matched by it cannot pass here */
245
	register char *rest;	/* start of rest of string */
246
	register char *tail;	/* string unmatched by rest of RE */
247
	register sopno ssub;	/* start sop of subsubRE */
248
	register sopno esub;	/* end sop of subsubRE */
249
	register char *ssp;	/* start of string matched by subsubRE */
250
	register char *sep;	/* end of string matched by subsubRE */
251
	register char *oldssp;	/* previous ssp */
252
	register char *dp;
253
 
254
	AT("diss", start, stop, startst, stopst);
255
	sp = start;
256
	for (ss = startst; ss < stopst; ss = es) {
257
		/* identify end of subRE */
258
		es = ss;
259
		switch (OP(m->g->strip[es])) {
260
		case OPLUS_:
261
		case OQUEST_:
262
			es += OPND(m->g->strip[es]);
263
			break;
264
		case OCH_:
265
			while (OP(m->g->strip[es]) != O_CH)
266
				es += OPND(m->g->strip[es]);
267
			break;
268
		}
269
		es++;
270
 
271
		/* figure out what it matched */
272
		switch (OP(m->g->strip[ss])) {
273
		case OEND:
274
			assert(nope);
275
			break;
276
		case OCHAR:
277
			sp++;
278
			break;
279
		case OBOL:
280
		case OEOL:
281
		case OBOW:
282
		case OEOW:
283
			break;
284
		case OANY:
285
		case OANYOF:
286
			sp++;
287
			break;
288
		case OBACK_:
289
		case O_BACK:
290
			assert(nope);
291
			break;
292
		/* cases where length of match is hard to find */
293
		case OQUEST_:
294
			stp = stop;
295
			for (;;) {
296
				/* how long could this one be? */
297
				rest = slow(m, sp, stp, ss, es);
298
				assert(rest != NULL);	/* it did match */
299
				/* could the rest match the rest? */
300
				tail = slow(m, rest, stop, es, stopst);
301
				if (tail == stop)
302
					break;		/* yes! */
303
				/* no -- try a shorter match for this one */
304
				stp = rest - 1;
305
				assert(stp >= sp);	/* it did work */
306
			}
307
			ssub = ss + 1;
308
			esub = es - 1;
309
			/* did innards match? */
310
			if (slow(m, sp, rest, ssub, esub) != NULL) {
311
				dp = dissect(m, sp, rest, ssub, esub);
312
				assert(dp == rest);
313
			} else		/* no */
314
				assert(sp == rest);
315
			sp = rest;
316
			break;
317
		case OPLUS_:
318
			stp = stop;
319
			for (;;) {
320
				/* how long could this one be? */
321
				rest = slow(m, sp, stp, ss, es);
322
				assert(rest != NULL);	/* it did match */
323
				/* could the rest match the rest? */
324
				tail = slow(m, rest, stop, es, stopst);
325
				if (tail == stop)
326
					break;		/* yes! */
327
				/* no -- try a shorter match for this one */
328
				stp = rest - 1;
329
				assert(stp >= sp);	/* it did work */
330
			}
331
			ssub = ss + 1;
332
			esub = es - 1;
333
			ssp = sp;
334
			oldssp = ssp;
335
			for (;;) {	/* find last match of innards */
336
				sep = slow(m, ssp, rest, ssub, esub);
337
				if (sep == NULL || sep == ssp)
338
					break;	/* failed or matched null */
339
				oldssp = ssp;	/* on to next try */
340
				ssp = sep;
341
			}
342
			if (sep == NULL) {
343
				/* last successful match */
344
				sep = ssp;
345
				ssp = oldssp;
346
			}
347
			assert(sep == rest);	/* must exhaust substring */
348
			assert(slow(m, ssp, sep, ssub, esub) == rest);
349
			dp = dissect(m, ssp, sep, ssub, esub);
350
			assert(dp == sep);
351
			sp = rest;
352
			break;
353
		case OCH_:
354
			stp = stop;
355
			for (;;) {
356
				/* how long could this one be? */
357
				rest = slow(m, sp, stp, ss, es);
358
				assert(rest != NULL);	/* it did match */
359
				/* could the rest match the rest? */
360
				tail = slow(m, rest, stop, es, stopst);
361
				if (tail == stop)
362
					break;		/* yes! */
363
				/* no -- try a shorter match for this one */
364
				stp = rest - 1;
365
				assert(stp >= sp);	/* it did work */
366
			}
367
			ssub = ss + 1;
368
			esub = ss + OPND(m->g->strip[ss]) - 1;
369
			assert(OP(m->g->strip[esub]) == OOR1);
370
			for (;;) {	/* find first matching branch */
371
				if (slow(m, sp, rest, ssub, esub) == rest)
372
					break;	/* it matched all of it */
373
				/* that one missed, try next one */
374
				assert(OP(m->g->strip[esub]) == OOR1);
375
				esub++;
376
				assert(OP(m->g->strip[esub]) == OOR2);
377
				ssub = esub + 1;
378
				esub += OPND(m->g->strip[esub]);
379
				if (OP(m->g->strip[esub]) == OOR2)
380
					esub--;
381
				else
382
					assert(OP(m->g->strip[esub]) == O_CH);
383
			}
384
			dp = dissect(m, sp, rest, ssub, esub);
385
			assert(dp == rest);
386
			sp = rest;
387
			break;
388
		case O_PLUS:
389
		case O_QUEST:
390
		case OOR1:
391
		case OOR2:
392
		case O_CH:
393
			assert(nope);
394
			break;
395
		case OLPAREN:
396
			i = OPND(m->g->strip[ss]);
397
			assert(0 < i && i <= m->g->nsub);
398
			m->pmatch[i].rm_so = sp - m->offp;
399
			break;
400
		case ORPAREN:
401
			i = OPND(m->g->strip[ss]);
402
			assert(0 < i && i <= m->g->nsub);
403
			m->pmatch[i].rm_eo = sp - m->offp;
404
			break;
405
		default:		/* uh oh */
406
			assert(nope);
407
			break;
408
		}
409
	}
410
 
411
	assert(sp == stop);
412
	return(sp);
413
}
414
 
415
/*
416
 - backref - figure out what matched what, figuring in back references
417
 == static char *backref(register struct match *m, char *start, \
418
 ==	char *stop, sopno startst, sopno stopst, sopno lev);
419
 */
420
static char *			/* == stop (success) or NULL (failure) */
421
backref(m, start, stop, startst, stopst, lev)
422
register struct match *m;
423
char *start;
424
char *stop;
425
sopno startst;
426
sopno stopst;
427
sopno lev;			/* PLUS nesting level */
428
{
429
	register int i;
430
	register sopno ss;	/* start sop of current subRE */
431
	register char *sp;	/* start of string matched by it */
432
	register sopno ssub;	/* start sop of subsubRE */
433
	register sopno esub;	/* end sop of subsubRE */
434
	register char *ssp;	/* start of string matched by subsubRE */
435
	register char *dp;
436
	register size_t len;
437
	register int hard;
438
	register sop s;
439
	register regoff_t offsave;
440
	register cset *cs;
441
 
442
	AT("back", start, stop, startst, stopst);
443
	sp = start;
444
 
445
	/* get as far as we can with easy stuff */
446
	hard = 0;
447
	for (ss = startst; !hard && ss < stopst; ss++)
448
		switch (OP(s = m->g->strip[ss])) {
449
		case OCHAR:
450
			if (sp == stop || *sp++ != (char)OPND(s))
451
				return(NULL);
452
			break;
453
		case OANY:
454
			if (sp == stop)
455
				return(NULL);
456
			sp++;
457
			break;
458
		case OANYOF:
459
			cs = &m->g->sets[OPND(s)];
460
			if (sp == stop || !CHIN(cs, *sp++))
461
				return(NULL);
462
			break;
463
		case OBOL:
464
			if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) ||
465
					(sp < m->endp && *(sp-1) == '\n' &&
466
						(m->g->cflags®_NEWLINE)) )
467
				{ /* yes */ }
468
			else
469
				return(NULL);
470
			break;
471
		case OEOL:
472
			if ( (sp == m->endp && !(m->eflags®_NOTEOL)) ||
473
					(sp < m->endp && *sp == '\n' &&
474
						(m->g->cflags®_NEWLINE)) )
475
				{ /* yes */ }
476
			else
477
				return(NULL);
478
			break;
479
		case OBOW:
480
			if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) ||
481
					(sp < m->endp && *(sp-1) == '\n' &&
482
						(m->g->cflags®_NEWLINE)) ||
483
					(sp > m->beginp &&
484
							!ISWORD(*(sp-1))) ) &&
485
					(sp < m->endp && ISWORD(*sp)) )
486
				{ /* yes */ }
487
			else
488
				return(NULL);
489
			break;
490
		case OEOW:
491
			if (( (sp == m->endp && !(m->eflags®_NOTEOL)) ||
492
					(sp < m->endp && *sp == '\n' &&
493
						(m->g->cflags®_NEWLINE)) ||
494
					(sp < m->endp && !ISWORD(*sp)) ) &&
495
					(sp > m->beginp && ISWORD(*(sp-1))) )
496
				{ /* yes */ }
497
			else
498
				return(NULL);
499
			break;
500
		case O_QUEST:
501
			break;
502
		case OOR1:	/* matches null but needs to skip */
503
			ss++;
504
			s = m->g->strip[ss];
505
			do {
506
				assert(OP(s) == OOR2);
507
				ss += OPND(s);
508
			} while (OP(s = m->g->strip[ss]) != O_CH);
509
			/* note that the ss++ gets us past the O_CH */
510
			break;
511
		default:	/* have to make a choice */
512
			hard = 1;
513
			break;
514
		}
515
	if (!hard) {		/* that was it! */
516
		if (sp != stop)
517
			return(NULL);
518
		return(sp);
519
	}
520
	ss--;			/* adjust for the for's final increment */
521
 
522
	/* the hard stuff */
523
	AT("hard", sp, stop, ss, stopst);
524
	s = m->g->strip[ss];
525
	switch (OP(s)) {
526
	case OBACK_:		/* the vilest depths */
527
		i = OPND(s);
528
		assert(0 < i && i <= m->g->nsub);
529
		if (m->pmatch[i].rm_eo == -1)
530
			return(NULL);
531
		assert(m->pmatch[i].rm_so != -1);
532
		len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
533
		assert(stop - m->beginp >= len);
534
		if (sp > stop - len)
535
			return(NULL);	/* not enough left to match */
536
		ssp = m->offp + m->pmatch[i].rm_so;
537
		if (memcmp(sp, ssp, len) != 0)
538
			return(NULL);
539
		while (m->g->strip[ss] != SOP(O_BACK, i))
540
			ss++;
541
		return(backref(m, sp+len, stop, ss+1, stopst, lev));
542
		break;
543
	case OQUEST_:		/* to null or not */
544
		dp = backref(m, sp, stop, ss+1, stopst, lev);
545
		if (dp != NULL)
546
			return(dp);	/* not */
547
		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
548
		break;
549
	case OPLUS_:
550
		assert(m->lastpos != NULL);
551
		assert(lev+1 <= m->g->nplus);
552
		m->lastpos[lev+1] = sp;
553
		return(backref(m, sp, stop, ss+1, stopst, lev+1));
554
		break;
555
	case O_PLUS:
556
		if (sp == m->lastpos[lev])	/* last pass matched null */
557
			return(backref(m, sp, stop, ss+1, stopst, lev-1));
558
		/* try another pass */
559
		m->lastpos[lev] = sp;
560
		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
561
		if (dp == NULL)
562
			return(backref(m, sp, stop, ss+1, stopst, lev-1));
563
		else
564
			return(dp);
565
		break;
566
	case OCH_:		/* find the right one, if any */
567
		ssub = ss + 1;
568
		esub = ss + OPND(s) - 1;
569
		assert(OP(m->g->strip[esub]) == OOR1);
570
		for (;;) {	/* find first matching branch */
571
			dp = backref(m, sp, stop, ssub, esub, lev);
572
			if (dp != NULL)
573
				return(dp);
574
			/* that one missed, try next one */
575
			if (OP(m->g->strip[esub]) == O_CH)
576
				return(NULL);	/* there is none */
577
			esub++;
578
			assert(OP(m->g->strip[esub]) == OOR2);
579
			ssub = esub + 1;
580
			esub += OPND(m->g->strip[esub]);
581
			if (OP(m->g->strip[esub]) == OOR2)
582
				esub--;
583
			else
584
				assert(OP(m->g->strip[esub]) == O_CH);
585
		}
586
		break;
587
	case OLPAREN:		/* must undo assignment if rest fails */
588
		i = OPND(s);
589
		assert(0 < i && i <= m->g->nsub);
590
		offsave = m->pmatch[i].rm_so;
591
		m->pmatch[i].rm_so = sp - m->offp;
592
		dp = backref(m, sp, stop, ss+1, stopst, lev);
593
		if (dp != NULL)
594
			return(dp);
595
		m->pmatch[i].rm_so = offsave;
596
		return(NULL);
597
		break;
598
	case ORPAREN:		/* must undo assignment if rest fails */
599
		i = OPND(s);
600
		assert(0 < i && i <= m->g->nsub);
601
		offsave = m->pmatch[i].rm_eo;
602
		m->pmatch[i].rm_eo = sp - m->offp;
603
		dp = backref(m, sp, stop, ss+1, stopst, lev);
604
		if (dp != NULL)
605
			return(dp);
606
		m->pmatch[i].rm_eo = offsave;
607
		return(NULL);
608
		break;
609
	default:		/* uh oh */
610
		assert(nope);
611
		break;
612
	}
613
 
614
	/* "can't happen" */
615
	assert(nope);
616
	/* NOTREACHED */
617
}
618
 
619
/*
620
 - fast - step through the string at top speed
621
 == static char *fast(register struct match *m, char *start, \
622
 ==	char *stop, sopno startst, sopno stopst);
623
 */
624
static char *			/* where tentative match ended, or NULL */
625
fast(m, start, stop, startst, stopst)
626
register struct match *m;
627
char *start;
628
char *stop;
629
sopno startst;
630
sopno stopst;
631
{
632
	register states st = m->st;
633
	register states fresh = m->fresh;
634
	register states tmp = m->tmp;
635
	register char *p = start;
636
	register int c = (start == m->beginp) ? OUT : *(start-1);
637
	register int lastc;	/* previous c */
638
	register int flagch;
639
	register int i;
640
	register char *coldp;	/* last p after which no match was underway */
641
 
642
	CLEAR(st);
643
	SET1(st, startst);
644
	st = step(m->g, startst, stopst, st, NOTHING, st);
645
	ASSIGN(fresh, st);
646
	SP("start", st, *p);
647
	coldp = NULL;
648
	for (;;) {
649
		/* next character */
650
		lastc = c;
651
		c = (p == m->endp) ? OUT : *p;
652
		if (EQ(st, fresh))
653
			coldp = p;
654
 
655
		/* is there an EOL and/or BOL between lastc and c? */
656
		flagch = '\0';
657
		i = 0;
658
		if ( (lastc == '\n' && m->g->cflags®_NEWLINE) ||
659
				(lastc == OUT && !(m->eflags®_NOTBOL)) ) {
660
			flagch = BOL;
661
			i = m->g->nbol;
662
		}
663
		if ( (c == '\n' && m->g->cflags®_NEWLINE) ||
664
				(c == OUT && !(m->eflags®_NOTEOL)) ) {
665
			flagch = (flagch == BOL) ? BOLEOL : EOL;
666
			i += m->g->neol;
667
		}
668
		if (i != 0) {
669
			for (; i > 0; i--)
670
				st = step(m->g, startst, stopst, st, flagch, st);
671
			SP("boleol", st, c);
672
		}
673
 
674
		/* how about a word boundary? */
675
		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
676
					(c != OUT && ISWORD(c)) ) {
677
			flagch = BOW;
678
		}
679
		if ( (lastc != OUT && ISWORD(lastc)) &&
680
				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
681
			flagch = EOW;
682
		}
683
		if (flagch == BOW || flagch == EOW) {
684
			st = step(m->g, startst, stopst, st, flagch, st);
685
			SP("boweow", st, c);
686
		}
687
 
688
		/* are we done? */
689
		if (ISSET(st, stopst) || p == stop)
690
			break;		/* NOTE BREAK OUT */
691
 
692
		/* no, we must deal with this character */
693
		ASSIGN(tmp, st);
694
		ASSIGN(st, fresh);
695
		assert(c != OUT);
696
		st = step(m->g, startst, stopst, tmp, c, st);
697
		SP("aft", st, c);
698
		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
699
		p++;
700
	}
701
 
702
	assert(coldp != NULL);
703
	m->coldp = coldp;
704
	if (ISSET(st, stopst))
705
		return(p+1);
706
	else
707
		return(NULL);
708
}
709
 
710
/*
711
 - slow - step through the string more deliberately
712
 == static char *slow(register struct match *m, char *start, \
713
 ==	char *stop, sopno startst, sopno stopst);
714
 */
715
static char *			/* where it ended */
716
slow(m, start, stop, startst, stopst)
717
register struct match *m;
718
char *start;
719
char *stop;
720
sopno startst;
721
sopno stopst;
722
{
723
	register states st = m->st;
724
	register states empty = m->empty;
725
	register states tmp = m->tmp;
726
	register char *p = start;
727
	register int c = (start == m->beginp) ? OUT : *(start-1);
728
	register int lastc;	/* previous c */
729
	register int flagch;
730
	register int i;
731
	register char *matchp;	/* last p at which a match ended */
732
 
733
	AT("slow", start, stop, startst, stopst);
734
	CLEAR(st);
735
	SET1(st, startst);
736
	SP("sstart", st, *p);
737
	st = step(m->g, startst, stopst, st, NOTHING, st);
738
	matchp = NULL;
739
	for (;;) {
740
		/* next character */
741
		lastc = c;
742
		c = (p == m->endp) ? OUT : *p;
743
 
744
		/* is there an EOL and/or BOL between lastc and c? */
745
		flagch = '\0';
746
		i = 0;
747
		if ( (lastc == '\n' && m->g->cflags®_NEWLINE) ||
748
				(lastc == OUT && !(m->eflags®_NOTBOL)) ) {
749
			flagch = BOL;
750
			i = m->g->nbol;
751
		}
752
		if ( (c == '\n' && m->g->cflags®_NEWLINE) ||
753
				(c == OUT && !(m->eflags®_NOTEOL)) ) {
754
			flagch = (flagch == BOL) ? BOLEOL : EOL;
755
			i += m->g->neol;
756
		}
757
		if (i != 0) {
758
			for (; i > 0; i--)
759
				st = step(m->g, startst, stopst, st, flagch, st);
760
			SP("sboleol", st, c);
761
		}
762
 
763
		/* how about a word boundary? */
764
		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
765
					(c != OUT && ISWORD(c)) ) {
766
			flagch = BOW;
767
		}
768
		if ( (lastc != OUT && ISWORD(lastc)) &&
769
				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
770
			flagch = EOW;
771
		}
772
		if (flagch == BOW || flagch == EOW) {
773
			st = step(m->g, startst, stopst, st, flagch, st);
774
			SP("sboweow", st, c);
775
		}
776
 
777
		/* are we done? */
778
		if (ISSET(st, stopst))
779
			matchp = p;
780
		if (EQ(st, empty) || p == stop)
781
			break;		/* NOTE BREAK OUT */
782
 
783
		/* no, we must deal with this character */
784
		ASSIGN(tmp, st);
785
		ASSIGN(st, empty);
786
		assert(c != OUT);
787
		st = step(m->g, startst, stopst, tmp, c, st);
788
		SP("saft", st, c);
789
		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
790
		p++;
791
	}
792
 
793
	return(matchp);
794
}
795
 
796
 
797
/*
798
 - step - map set of states reachable before char to set reachable after
799
 == static states step(register struct re_guts *g, sopno start, sopno stop, \
800
 ==	register states bef, int ch, register states aft);
801
 == #define	BOL	(OUT+1)
802
 == #define	EOL	(BOL+1)
803
 == #define	BOLEOL	(BOL+2)
804
 == #define	NOTHING	(BOL+3)
805
 == #define	BOW	(BOL+4)
806
 == #define	EOW	(BOL+5)
807
 == #define	CODEMAX	(BOL+5)		// highest code used
808
 == #define	NONCHAR(c)	((c) > CHAR_MAX)
809
 == #define	NNONCHAR	(CODEMAX-CHAR_MAX)
810
 */
811
static states
812
step(g, start, stop, bef, ch, aft)
813
register struct re_guts *g;
814
sopno start;			/* start state within strip */
815
sopno stop;			/* state after stop state within strip */
816
register states bef;		/* states reachable before */
817
int ch;				/* character or NONCHAR code */
818
register states aft;		/* states already known reachable after */
819
{
820
	register cset *cs;
821
	register sop s;
822
	register sopno pc;
823
	register onestate here;		/* note, macros know this name */
824
	register sopno look;
825
	register int i;
826
 
827
	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
828
		s = g->strip[pc];
829
		switch (OP(s)) {
830
		case OEND:
831
			assert(pc == stop-1);
832
			break;
833
		case OCHAR:
834
			/* only characters can match */
835
			assert(!NONCHAR(ch) || ch != (char)OPND(s));
836
			if (ch == (char)OPND(s))
837
				FWD(aft, bef, 1);
838
			break;
839
		case OBOL:
840
			if (ch == BOL || ch == BOLEOL)
841
				FWD(aft, bef, 1);
842
			break;
843
		case OEOL:
844
			if (ch == EOL || ch == BOLEOL)
845
				FWD(aft, bef, 1);
846
			break;
847
		case OBOW:
848
			if (ch == BOW)
849
				FWD(aft, bef, 1);
850
			break;
851
		case OEOW:
852
			if (ch == EOW)
853
				FWD(aft, bef, 1);
854
			break;
855
		case OANY:
856
			if (!NONCHAR(ch))
857
				FWD(aft, bef, 1);
858
			break;
859
		case OANYOF:
860
			cs = &g->sets[OPND(s)];
861
			if (!NONCHAR(ch) && CHIN(cs, ch))
862
				FWD(aft, bef, 1);
863
			break;
864
		case OBACK_:		/* ignored here */
865
		case O_BACK:
866
			FWD(aft, aft, 1);
867
			break;
868
		case OPLUS_:		/* forward, this is just an empty */
869
			FWD(aft, aft, 1);
870
			break;
871
		case O_PLUS:		/* both forward and back */
872
			FWD(aft, aft, 1);
873
			i = ISSETBACK(aft, OPND(s));
874
			BACK(aft, aft, OPND(s));
875
			if (!i && ISSETBACK(aft, OPND(s))) {
876
				/* oho, must reconsider loop body */
877
				pc -= OPND(s) + 1;
878
				INIT(here, pc);
879
			}
880
			break;
881
		case OQUEST_:		/* two branches, both forward */
882
			FWD(aft, aft, 1);
883
			FWD(aft, aft, OPND(s));
884
			break;
885
		case O_QUEST:		/* just an empty */
886
			FWD(aft, aft, 1);
887
			break;
888
		case OLPAREN:		/* not significant here */
889
		case ORPAREN:
890
			FWD(aft, aft, 1);
891
			break;
892
		case OCH_:		/* mark the first two branches */
893
			FWD(aft, aft, 1);
894
			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
895
			FWD(aft, aft, OPND(s));
896
			break;
897
		case OOR1:		/* done a branch, find the O_CH */
898
			if (ISSTATEIN(aft, here)) {
899
				for (look = 1;
900
						OP(s = g->strip[pc+look]) != O_CH;
901
						look += OPND(s))
902
					assert(OP(s) == OOR2);
903
				FWD(aft, aft, look);
904
			}
905
			break;
906
		case OOR2:		/* propagate OCH_'s marking */
907
			FWD(aft, aft, 1);
908
			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
909
				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
910
				FWD(aft, aft, OPND(s));
911
			}
912
			break;
913
		case O_CH:		/* just empty */
914
			FWD(aft, aft, 1);
915
			break;
916
		default:		/* ooooops... */
917
			assert(nope);
918
			break;
919
		}
920
	}
921
 
922
	return(aft);
923
}
924
 
925
#ifdef REDEBUG
926
/*
927
 - print - print a set of states
928
 == #ifdef REDEBUG
929
 == static void print(struct match *m, char *caption, states st, \
930
 ==	int ch, FILE *d);
931
 == #endif
932
 */
933
static void
934
print(m, caption, st, ch, d)
935
struct match *m;
936
char *caption;
937
states st;
938
int ch;
939
FILE *d;
940
{
941
	register struct re_guts *g = m->g;
942
	register int i;
943
	register int first = 1;
944
 
945
	if (!(m->eflags®_TRACE))
946
		return;
947
 
948
	fprintf(d, "%s", caption);
949
	if (ch != '\0')
950
		fprintf(d, " %s", pchar(ch));
951
	for (i = 0; i < g->nstates; i++)
952
		if (ISSET(st, i)) {
953
			fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
954
			first = 0;
955
		}
956
	fprintf(d, "\n");
957
}
958
 
959
/*
960
 - at - print current situation
961
 == #ifdef REDEBUG
962
 == static void at(struct match *m, char *title, char *start, char *stop, \
963
 ==						sopno startst, sopno stopst);
964
 == #endif
965
 */
966
static void
967
at(m, title, start, stop, startst, stopst)
968
struct match *m;
969
char *title;
970
char *start;
971
char *stop;
972
sopno startst;
973
sopno stopst;
974
{
975
	if (!(m->eflags®_TRACE))
976
		return;
977
 
978
	printf("%s %s-", title, pchar(*start));
979
	printf("%s ", pchar(*stop));
980
	printf("%ld-%ld\n", (long)startst, (long)stopst);
981
}
982
 
983
#ifndef PCHARDONE
984
#define	PCHARDONE	/* never again */
985
/*
986
 - pchar - make a character printable
987
 == #ifdef REDEBUG
988
 == static char *pchar(int ch);
989
 == #endif
990
 *
991
 * Is this identical to regchar() over in debug.c?  Well, yes.  But a
992
 * duplicate here avoids having a debugging-capable regexec.o tied to
993
 * a matching debug.o, and this is convenient.  It all disappears in
994
 * the non-debug compilation anyway, so it doesn't matter much.
995
 */
996
static char *			/* -> representation */
997
pchar(ch)
998
int ch;
999
{
1000
	static char pbuf[10];
1001
 
1002
	if (isprint(ch) || ch == ' ')
1003
		sprintf(pbuf, "%c", ch);
1004
	else
1005
		sprintf(pbuf, "\\%o", ch);
1006
	return(pbuf);
1007
}
1008
#endif
1009
#endif
1010
 
1011
#undef	matcher
1012
#undef	fast
1013
#undef	slow
1014
#undef	dissect
1015
#undef	backref
1016
#undef	step
1017
#undef	print
1018
#undef	at
1019
#undef	match