Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4973 right-hear 1
/* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */
2
#include 
3
#include 
4
#include 
5
#include 
6
#include 
7
#include 
8
#include 
9
 
10
#include "utils.h"
11
#include "regex2.h"
12
 
13
#include "cclass.h"
14
#include "cname.h"
15
 
16
/*
17
 * parse structure, passed up and down to avoid global variables and
18
 * other clumsinesses
19
 */
20
struct parse {
21
	char *next;		/* next character in RE */
22
	char *end;		/* end of string (-> NUL normally) */
23
	int error;		/* has an error been seen? */
24
	sop *strip;		/* malloced strip */
25
	sopno ssize;		/* malloced strip size (allocated) */
26
	sopno slen;		/* malloced strip length (used) */
27
	int ncsalloc;		/* number of csets allocated */
28
	struct re_guts *g;
29
#	define	NPAREN	10	/* we need to remember () 1-9 for back refs */
30
	sopno pbegin[NPAREN];	/* -> ( ([0] unused) */
31
	sopno pend[NPAREN];	/* -> ) ([0] unused) */
32
};
33
 
34
#include "regcomp.ih"
35
 
36
static char nuls[10];		/* place to point scanner in event of error */
37
 
38
/*
39
 * macros for use with parse structure
40
 * BEWARE:  these know that the parse structure is named `p' !!!
41
 */
42
#define	PEEK()	(*p->next)
43
#define	PEEK2()	(*(p->next+1))
44
#define	MORE()	(p->next < p->end)
45
#define	MORE2()	(p->next+1 < p->end)
46
#define	SEE(c)	(MORE() && PEEK() == (c))
47
#define	SEETWO(a, b)	(MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
48
#define	EAT(c)	((SEE(c)) ? (NEXT(), 1) : 0)
49
#define	EATTWO(a, b)	((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
50
#define	NEXT()	(p->next++)
51
#define	NEXT2()	(p->next += 2)
52
#define	NEXTn(n)	(p->next += (n))
53
#define	GETNEXT()	(*p->next++)
54
#define	SETERROR(e)	seterr(p, (e))
55
#define	REQUIRE(co, e)	((co) || SETERROR(e))
56
#define	MUSTSEE(c, e)	(REQUIRE(MORE() && PEEK() == (c), e))
57
#define	MUSTEAT(c, e)	(REQUIRE(MORE() && GETNEXT() == (c), e))
58
#define	MUSTNOTSEE(c, e)	(REQUIRE(!MORE() || PEEK() != (c), e))
59
#define	EMIT(op, sopnd)	doemit(p, (sop)(op), (size_t)(sopnd))
60
#define	INSERT(op, pos)	doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
61
#define	AHEAD(pos)		dofwd(p, pos, HERE()-(pos))
62
#define	ASTERN(sop, pos)	EMIT(sop, HERE()-pos)
63
#define	HERE()		(p->slen)
64
#define	THERE()		(p->slen - 1)
65
#define	THERETHERE()	(p->slen - 2)
66
#define	DROP(n)	(p->slen -= (n))
67
 
68
#ifndef NDEBUG
69
static int never = 0;		/* for use in asserts; shuts lint up */
70
#else
71
#define	never	0		/* some s have bugs too */
72
#endif
73
 
74
/*
75
 - regcomp - interface for parser and compilation
76
 = extern int regcomp(regex_t *, const char *, int);
77
 = #define	REG_BASIC	0000
78
 = #define	REG_EXTENDED	0001
79
 = #define	REG_ICASE	0002
80
 = #define	REG_NOSUB	0004
81
 = #define	REG_NEWLINE	0010
82
 = #define	REG_NOSPEC	0020
83
 = #define	REG_PEND	0040
84
 = #define	REG_DUMP	0200
85
 */
86
int				/* 0 success, otherwise REG_something */
87
regcomp(preg, pattern, cflags)
88
regex_t *preg;
89
const char *pattern;
90
int cflags;
91
{
92
	struct parse pa;
93
	register struct re_guts *g;
94
	register struct parse *p = &pa;
95
	register int i;
96
	register size_t len;
97
#ifdef REDEBUG
98
#	define	GOODFLAGS(f)	(f)
99
#else
100
#	define	GOODFLAGS(f)	((f)&~REG_DUMP)
101
#endif
102
 
103
	cflags = GOODFLAGS(cflags);
104
	if ((cflags®_EXTENDED) && (cflags®_NOSPEC))
105
		return(REG_INVARG);
106
 
107
	if (cflags®_PEND) {
108
		if (preg->re_endp < pattern)
109
			return(REG_INVARG);
110
		len = preg->re_endp - pattern;
111
	} else
112
		len = strlen((char *)pattern);
113
 
114
	/* do the mallocs early so failure handling is easy */
115
	g = (struct re_guts *)malloc(sizeof(struct re_guts) +
116
							(NC-1)*sizeof(cat_t));
117
	if (g == NULL)
118
		return(REG_ESPACE);
119
	p->ssize = len/(size_t)2*(size_t)3 + (size_t)1;	/* ugh */
120
	p->strip = (sop *)malloc(p->ssize * sizeof(sop));
121
	p->slen = 0;
122
	if (p->strip == NULL) {
123
		free((char *)g);
124
		return(REG_ESPACE);
125
	}
126
 
127
	/* set things up */
128
	p->g = g;
129
	p->next = (char *)pattern;	/* convenience; we do not modify it */
130
	p->end = p->next + len;
131
	p->error = 0;
132
	p->ncsalloc = 0;
133
	for (i = 0; i < NPAREN; i++) {
134
		p->pbegin[i] = 0;
135
		p->pend[i] = 0;
136
	}
137
	g->csetsize = NC;
138
	g->sets = NULL;
139
	g->setbits = NULL;
140
	g->ncsets = 0;
141
	g->cflags = cflags;
142
	g->iflags = 0;
143
	g->nbol = 0;
144
	g->neol = 0;
145
	g->must = NULL;
146
	g->mlen = 0;
147
	g->nsub = 0;
148
	g->ncategories = 1;	/* category 0 is "everything else" */
149
	g->categories = &g->catspace[-(CHAR_MIN)];
150
	(void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
151
	g->backrefs = 0;
152
 
153
	/* do it */
154
	EMIT(OEND, 0);
155
	g->firststate = THERE();
156
	if (cflags®_EXTENDED)
157
		p_ere(p, OUT);
158
	else if (cflags®_NOSPEC)
159
		p_str(p);
160
	else
161
		p_bre(p, OUT, OUT);
162
	EMIT(OEND, 0);
163
	g->laststate = THERE();
164
 
165
	/* tidy up loose ends and fill things in */
166
	categorize(p, g);
167
	stripsnug(p, g);
168
	findmust(p, g);
169
	g->nplus = pluscount(p, g);
170
	g->magic = MAGIC2;
171
	preg->re_nsub = g->nsub;
172
	preg->re_g = g;
173
	preg->re_magic = MAGIC1;
174
#ifndef REDEBUG
175
	/* not debugging, so can't rely on the assert() in regexec() */
176
	if (g->iflags&BAD)
177
		SETERROR(REG_ASSERT);
178
#endif
179
 
180
	/* win or lose, we're done */
181
	if (p->error != 0)	/* lose */
182
		regfree(preg);
183
	return(p->error);
184
}
185
 
186
/*
187
 - p_ere - ERE parser top level, concatenation and alternation
188
 == static void p_ere(register struct parse *p, int stop);
189
 */
190
static void
191
p_ere(p, stop)
192
register struct parse *p;
193
int stop;			/* character this ERE should end at */
194
{
195
	register char c;
196
	register sopno prevback;
197
	register sopno prevfwd;
198
	register sopno conc;
199
	register int first = 1;		/* is this the first alternative? */
200
 
201
	for (;;) {
202
		/* do a bunch of concatenated expressions */
203
		conc = HERE();
204
		while (MORE() && (c = PEEK()) != '|' && c != stop)
205
			p_ere_exp(p);
206
		REQUIRE(HERE() != conc, REG_EMPTY);	/* require nonempty */
207
 
208
		if (!EAT('|'))
209
			break;		/* NOTE BREAK OUT */
210
 
211
		if (first) {
212
			INSERT(OCH_, conc);	/* offset is wrong */
213
			prevfwd = conc;
214
			prevback = conc;
215
			first = 0;
216
		}
217
		ASTERN(OOR1, prevback);
218
		prevback = THERE();
219
		AHEAD(prevfwd);			/* fix previous offset */
220
		prevfwd = HERE();
221
		EMIT(OOR2, 0);			/* offset is very wrong */
222
	}
223
 
224
	if (!first) {		/* tail-end fixups */
225
		AHEAD(prevfwd);
226
		ASTERN(O_CH, prevback);
227
	}
228
 
229
	assert(!MORE() || SEE(stop));
230
}
231
 
232
/*
233
 - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
234
 == static void p_ere_exp(register struct parse *p);
235
 */
236
static void
237
p_ere_exp(p)
238
register struct parse *p;
239
{
240
	register char c;
241
	register sopno pos;
242
	register int count;
243
	register int count2;
244
	register sopno subno;
245
	int wascaret = 0;
246
 
247
	assert(MORE());		/* caller should have ensured this */
248
	c = GETNEXT();
249
 
250
	pos = HERE();
251
	switch (c) {
252
	case '(':
253
		REQUIRE(MORE(), REG_EPAREN);
254
		p->g->nsub++;
255
		subno = p->g->nsub;
256
		if (subno < NPAREN)
257
			p->pbegin[subno] = HERE();
258
		EMIT(OLPAREN, subno);
259
		if (!SEE(')'))
260
			p_ere(p, ')');
261
		if (subno < NPAREN) {
262
			p->pend[subno] = HERE();
263
			assert(p->pend[subno] != 0);
264
		}
265
		EMIT(ORPAREN, subno);
266
		MUSTEAT(')', REG_EPAREN);
267
		break;
268
#ifndef POSIX_MISTAKE
269
	case ')':		/* happens only if no current unmatched ( */
270
		/*
271
		 * You may ask, why the ifndef?  Because I didn't notice
272
		 * this until slightly too late for 1003.2, and none of the
273
		 * other 1003.2 regular-expression reviewers noticed it at
274
		 * all.  So an unmatched ) is legal POSIX, at least until
275
		 * we can get it fixed.
276
		 */
277
		SETERROR(REG_EPAREN);
278
		break;
279
#endif
280
	case '^':
281
		EMIT(OBOL, 0);
282
		p->g->iflags |= USEBOL;
283
		p->g->nbol++;
284
		wascaret = 1;
285
		break;
286
	case '$':
287
		EMIT(OEOL, 0);
288
		p->g->iflags |= USEEOL;
289
		p->g->neol++;
290
		break;
291
	case '|':
292
		SETERROR(REG_EMPTY);
293
		break;
294
	case '*':
295
	case '+':
296
	case '?':
297
		SETERROR(REG_BADRPT);
298
		break;
299
	case '.':
300
		if (p->g->cflags®_NEWLINE)
301
			nonnewline(p);
302
		else
303
			EMIT(OANY, 0);
304
		break;
305
	case '[':
306
		p_bracket(p);
307
		break;
308
	case '\\':
309
		REQUIRE(MORE(), REG_EESCAPE);
310
		c = GETNEXT();
311
		ordinary(p, c);
312
		break;
313
	case '{':		/* okay as ordinary except if digit follows */
314
		REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT);
315
		/* FALLTHROUGH */
316
	default:
317
		ordinary(p, c);
318
		break;
319
	}
320
 
321
	if (!MORE())
322
		return;
323
	c = PEEK();
324
	/* we call { a repetition if followed by a digit */
325
	if (!( c == '*' || c == '+' || c == '?' ||
326
				(c == '{' && MORE2() && isdigit(PEEK2())) ))
327
		return;		/* no repetition, we're done */
328
	NEXT();
329
 
330
	REQUIRE(!wascaret, REG_BADRPT);
331
	switch (c) {
332
	case '*':	/* implemented as +? */
333
		/* this case does not require the (y|) trick, noKLUDGE */
334
		INSERT(OPLUS_, pos);
335
		ASTERN(O_PLUS, pos);
336
		INSERT(OQUEST_, pos);
337
		ASTERN(O_QUEST, pos);
338
		break;
339
	case '+':
340
		INSERT(OPLUS_, pos);
341
		ASTERN(O_PLUS, pos);
342
		break;
343
	case '?':
344
		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
345
		INSERT(OCH_, pos);		/* offset slightly wrong */
346
		ASTERN(OOR1, pos);		/* this one's right */
347
		AHEAD(pos);			/* fix the OCH_ */
348
		EMIT(OOR2, 0);			/* offset very wrong... */
349
		AHEAD(THERE());			/* ...so fix it */
350
		ASTERN(O_CH, THERETHERE());
351
		break;
352
	case '{':
353
		count = p_count(p);
354
		if (EAT(',')) {
355
			if (isdigit(PEEK())) {
356
				count2 = p_count(p);
357
				REQUIRE(count <= count2, REG_BADBR);
358
			} else		/* single number with comma */
359
				count2 = INFINITY;
360
		} else		/* just a single number */
361
			count2 = count;
362
		repeat(p, pos, count, count2);
363
		if (!EAT('}')) {	/* error heuristics */
364
			while (MORE() && PEEK() != '}')
365
				NEXT();
366
			REQUIRE(MORE(), REG_EBRACE);
367
			SETERROR(REG_BADBR);
368
		}
369
		break;
370
	}
371
 
372
	if (!MORE())
373
		return;
374
	c = PEEK();
375
	if (!( c == '*' || c == '+' || c == '?' ||
376
				(c == '{' && MORE2() && isdigit(PEEK2())) ) )
377
		return;
378
	SETERROR(REG_BADRPT);
379
}
380
 
381
/*
382
 - p_str - string (no metacharacters) "parser"
383
 == static void p_str(register struct parse *p);
384
 */
385
static void
386
p_str(p)
387
register struct parse *p;
388
{
389
	REQUIRE(MORE(), REG_EMPTY);
390
	while (MORE())
391
		ordinary(p, GETNEXT());
392
}
393
 
394
/*
395
 - p_bre - BRE parser top level, anchoring and concatenation
396
 == static void p_bre(register struct parse *p, register int end1, \
397
 ==	register int end2);
398
 * Giving end1 as OUT essentially eliminates the end1/end2 check.
399
 *
400
 * This implementation is a bit of a kludge, in that a trailing $ is first
401
 * taken as an ordinary character and then revised to be an anchor.  The
402
 * only undesirable side effect is that '$' gets included as a character
403
 * category in such cases.  This is fairly harmless; not worth fixing.
404
 * The amount of lookahead needed to avoid this kludge is excessive.
405
 */
406
static void
407
p_bre(p, end1, end2)
408
register struct parse *p;
409
register int end1;		/* first terminating character */
410
register int end2;		/* second terminating character */
411
{
412
	register sopno start = HERE();
413
	register int first = 1;			/* first subexpression? */
414
	register int wasdollar = 0;
415
 
416
	if (EAT('^')) {
417
		EMIT(OBOL, 0);
418
		p->g->iflags |= USEBOL;
419
		p->g->nbol++;
420
	}
421
	while (MORE() && !SEETWO(end1, end2)) {
422
		wasdollar = p_simp_re(p, first);
423
		first = 0;
424
	}
425
	if (wasdollar) {	/* oops, that was a trailing anchor */
426
		DROP(1);
427
		EMIT(OEOL, 0);
428
		p->g->iflags |= USEEOL;
429
		p->g->neol++;
430
	}
431
 
432
	REQUIRE(HERE() != start, REG_EMPTY);	/* require nonempty */
433
}
434
 
435
/*
436
 - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
437
 == static int p_simp_re(register struct parse *p, int starordinary);
438
 */
439
static int			/* was the simple RE an unbackslashed $? */
440
p_simp_re(p, starordinary)
441
register struct parse *p;
442
int starordinary;		/* is a leading * an ordinary character? */
443
{
444
	register int c;
445
	register int count;
446
	register int count2;
447
	register sopno pos;
448
	register int i;
449
	register sopno subno;
450
#	define	BACKSL	(1<
451
 
452
	pos = HERE();		/* repetion op, if any, covers from here */
453
 
454
	assert(MORE());		/* caller should have ensured this */
455
	c = GETNEXT();
456
	if (c == '\\') {
457
		REQUIRE(MORE(), REG_EESCAPE);
458
		c = BACKSL | (unsigned char)GETNEXT();
459
	}
460
	switch (c) {
461
	case '.':
462
		if (p->g->cflags®_NEWLINE)
463
			nonnewline(p);
464
		else
465
			EMIT(OANY, 0);
466
		break;
467
	case '[':
468
		p_bracket(p);
469
		break;
470
	case BACKSL|'{':
471
		SETERROR(REG_BADRPT);
472
		break;
473
	case BACKSL|'(':
474
		p->g->nsub++;
475
		subno = p->g->nsub;
476
		if (subno < NPAREN)
477
			p->pbegin[subno] = HERE();
478
		EMIT(OLPAREN, subno);
479
		/* the MORE here is an error heuristic */
480
		if (MORE() && !SEETWO('\\', ')'))
481
			p_bre(p, '\\', ')');
482
		if (subno < NPAREN) {
483
			p->pend[subno] = HERE();
484
			assert(p->pend[subno] != 0);
485
		}
486
		EMIT(ORPAREN, subno);
487
		REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
488
		break;
489
	case BACKSL|')':	/* should not get here -- must be user */
490
	case BACKSL|'}':
491
		SETERROR(REG_EPAREN);
492
		break;
493
	case BACKSL|'1':
494
	case BACKSL|'2':
495
	case BACKSL|'3':
496
	case BACKSL|'4':
497
	case BACKSL|'5':
498
	case BACKSL|'6':
499
	case BACKSL|'7':
500
	case BACKSL|'8':
501
	case BACKSL|'9':
502
		i = (c&~BACKSL) - '0';
503
		assert(i < NPAREN);
504
		if (p->pend[i] != 0) {
505
			assert(i <= p->g->nsub);
506
			EMIT(OBACK_, i);
507
			assert(p->pbegin[i] != 0);
508
			assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
509
			assert(OP(p->strip[p->pend[i]]) == ORPAREN);
510
			(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
511
			EMIT(O_BACK, i);
512
		} else
513
			SETERROR(REG_ESUBREG);
514
		p->g->backrefs = 1;
515
		break;
516
	case '*':
517
		REQUIRE(starordinary, REG_BADRPT);
518
		/* FALLTHROUGH */
519
	default:
520
		ordinary(p, c &~ BACKSL);
521
		break;
522
	}
523
 
524
	if (EAT('*')) {		/* implemented as +? */
525
		/* this case does not require the (y|) trick, noKLUDGE */
526
		INSERT(OPLUS_, pos);
527
		ASTERN(O_PLUS, pos);
528
		INSERT(OQUEST_, pos);
529
		ASTERN(O_QUEST, pos);
530
	} else if (EATTWO('\\', '{')) {
531
		count = p_count(p);
532
		if (EAT(',')) {
533
			if (MORE() && isdigit(PEEK())) {
534
				count2 = p_count(p);
535
				REQUIRE(count <= count2, REG_BADBR);
536
			} else		/* single number with comma */
537
				count2 = INFINITY;
538
		} else		/* just a single number */
539
			count2 = count;
540
		repeat(p, pos, count, count2);
541
		if (!EATTWO('\\', '}')) {	/* error heuristics */
542
			while (MORE() && !SEETWO('\\', '}'))
543
				NEXT();
544
			REQUIRE(MORE(), REG_EBRACE);
545
			SETERROR(REG_BADBR);
546
		}
547
	} else if (c == (unsigned char)'$')	/* $ (but not \$) ends it */
548
		return(1);
549
 
550
	return(0);
551
}
552
 
553
/*
554
 - p_count - parse a repetition count
555
 == static int p_count(register struct parse *p);
556
 */
557
static int			/* the value */
558
p_count(p)
559
register struct parse *p;
560
{
561
	register int count = 0;
562
	register int ndigits = 0;
563
 
564
	while (MORE() && isdigit(PEEK()) && count <= DUPMAX) {
565
		count = count*10 + (GETNEXT() - '0');
566
		ndigits++;
567
	}
568
 
569
	REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
570
	return(count);
571
}
572
 
573
/*
574
 - p_bracket - parse a bracketed character list
575
 == static void p_bracket(register struct parse *p);
576
 *
577
 * Note a significant property of this code:  if the allocset() did SETERROR,
578
 * no set operations are done.
579
 */
580
static void
581
p_bracket(p)
582
register struct parse *p;
583
{
584
	register char c;
585
	register cset *cs = allocset(p);
586
	register int invert = 0;
587
 
588
	/* Dept of Truly Sickening Special-Case Kludges */
589
	if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
590
		EMIT(OBOW, 0);
591
		NEXTn(6);
592
		return;
593
	}
594
	if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
595
		EMIT(OEOW, 0);
596
		NEXTn(6);
597
		return;
598
	}
599
 
600
	if (EAT('^'))
601
		invert++;	/* make note to invert set at end */
602
	if (EAT(']'))
603
		CHadd(cs, ']');
604
	else if (EAT('-'))
605
		CHadd(cs, '-');
606
	while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
607
		p_b_term(p, cs);
608
	if (EAT('-'))
609
		CHadd(cs, '-');
610
	MUSTEAT(']', REG_EBRACK);
611
 
612
	if (p->error != 0)	/* don't mess things up further */
613
		return;
614
 
615
	if (p->g->cflags®_ICASE) {
616
		register int i;
617
		register int ci;
618
 
619
		for (i = p->g->csetsize - 1; i >= 0; i--)
620
			if (CHIN(cs, i) && isalpha(i)) {
621
				ci = othercase(i);
622
				if (ci != i)
623
					CHadd(cs, ci);
624
			}
625
		if (cs->multis != NULL)
626
			mccase(p, cs);
627
	}
628
	if (invert) {
629
		register int i;
630
 
631
		for (i = p->g->csetsize - 1; i >= 0; i--)
632
			if (CHIN(cs, i))
633
				CHsub(cs, i);
634
			else
635
				CHadd(cs, i);
636
		if (p->g->cflags®_NEWLINE)
637
			CHsub(cs, '\n');
638
		if (cs->multis != NULL)
639
			mcinvert(p, cs);
640
	}
641
 
642
	assert(cs->multis == NULL);		/* xxx */
643
 
644
	if (nch(p, cs) == 1) {		/* optimize singleton sets */
645
		ordinary(p, firstch(p, cs));
646
		freeset(p, cs);
647
	} else
648
		EMIT(OANYOF, freezeset(p, cs));
649
}
650
 
651
/*
652
 - p_b_term - parse one term of a bracketed character list
653
 == static void p_b_term(register struct parse *p, register cset *cs);
654
 */
655
static void
656
p_b_term(p, cs)
657
register struct parse *p;
658
register cset *cs;
659
{
660
	register char c;
661
	register char start, finish;
662
	register int i;
663
 
664
	/* classify what we've got */
665
	switch ((MORE()) ? PEEK() : '\0') {
666
	case '[':
667
		c = (MORE2()) ? PEEK2() : '\0';
668
		break;
669
	case '-':
670
		SETERROR(REG_ERANGE);
671
		return;			/* NOTE RETURN */
672
		break;
673
	default:
674
		c = '\0';
675
		break;
676
	}
677
 
678
	switch (c) {
679
	case ':':		/* character class */
680
		NEXT2();
681
		REQUIRE(MORE(), REG_EBRACK);
682
		c = PEEK();
683
		REQUIRE(c != '-' && c != ']', REG_ECTYPE);
684
		p_b_cclass(p, cs);
685
		REQUIRE(MORE(), REG_EBRACK);
686
		REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
687
		break;
688
	case '=':		/* equivalence class */
689
		NEXT2();
690
		REQUIRE(MORE(), REG_EBRACK);
691
		c = PEEK();
692
		REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
693
		p_b_eclass(p, cs);
694
		REQUIRE(MORE(), REG_EBRACK);
695
		REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
696
		break;
697
	default:		/* symbol, ordinary character, or range */
698
/* xxx revision needed for multichar stuff */
699
		start = p_b_symbol(p);
700
		if (SEE('-') && MORE2() && PEEK2() != ']') {
701
			/* range */
702
			NEXT();
703
			if (EAT('-'))
704
				finish = '-';
705
			else
706
				finish = p_b_symbol(p);
707
		} else
708
			finish = start;
709
/* xxx what about signed chars here... */
710
		REQUIRE(start <= finish, REG_ERANGE);
711
		for (i = start; i <= finish; i++)
712
			CHadd(cs, i);
713
		break;
714
	}
715
}
716
 
717
/*
718
 - p_b_cclass - parse a character-class name and deal with it
719
 == static void p_b_cclass(register struct parse *p, register cset *cs);
720
 */
721
static void
722
p_b_cclass(p, cs)
723
register struct parse *p;
724
register cset *cs;
725
{
726
	register char *sp = p->next;
727
	register struct cclass *cp;
728
	register size_t len;
729
	register char *u;
730
	register char c;
731
 
732
	while (MORE() && isalpha(PEEK()))
733
		NEXT();
734
	len = p->next - sp;
735
	for (cp = cclasses; cp->name != NULL; cp++)
736
		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
737
			break;
738
	if (cp->name == NULL) {
739
		/* oops, didn't find it */
740
		SETERROR(REG_ECTYPE);
741
		return;
742
	}
743
 
744
	u = cp->chars;
745
	while ((c = *u++) != '\0')
746
		CHadd(cs, c);
747
	for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
748
		MCadd(p, cs, u);
749
}
750
 
751
/*
752
 - p_b_eclass - parse an equivalence-class name and deal with it
753
 == static void p_b_eclass(register struct parse *p, register cset *cs);
754
 *
755
 * This implementation is incomplete. xxx
756
 */
757
static void
758
p_b_eclass(p, cs)
759
register struct parse *p;
760
register cset *cs;
761
{
762
	register char c;
763
 
764
	c = p_b_coll_elem(p, '=');
765
	CHadd(cs, c);
766
}
767
 
768
/*
769
 - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
770
 == static char p_b_symbol(register struct parse *p);
771
 */
772
static char			/* value of symbol */
773
p_b_symbol(p)
774
register struct parse *p;
775
{
776
	register char value;
777
 
778
	REQUIRE(MORE(), REG_EBRACK);
779
	if (!EATTWO('[', '.'))
780
		return(GETNEXT());
781
 
782
	/* collating symbol */
783
	value = p_b_coll_elem(p, '.');
784
	REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
785
	return(value);
786
}
787
 
788
/*
789
 - p_b_coll_elem - parse a collating-element name and look it up
790
 == static char p_b_coll_elem(register struct parse *p, int endc);
791
 */
792
static char			/* value of collating element */
793
p_b_coll_elem(p, endc)
794
register struct parse *p;
795
int endc;			/* name ended by endc,']' */
796
{
797
	register char *sp = p->next;
798
	register struct cname *cp;
799
	register int len;
800
	register char c;
801
 
802
	while (MORE() && !SEETWO(endc, ']'))
803
		NEXT();
804
	if (!MORE()) {
805
		SETERROR(REG_EBRACK);
806
		return(0);
807
	}
808
	len = p->next - sp;
809
	for (cp = cnames; cp->name != NULL; cp++)
810
		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
811
			return(cp->code);	/* known name */
812
	if (len == 1)
813
		return(*sp);	/* single character */
814
	SETERROR(REG_ECOLLATE);			/* neither */
815
	return(0);
816
}
817
 
818
/*
819
 - othercase - return the case counterpart of an alphabetic
820
 == static char othercase(int ch);
821
 */
822
static char			/* if no counterpart, return ch */
823
othercase(ch)
824
int ch;
825
{
826
	assert(isalpha(ch));
827
	if (isupper(ch))
828
		return(tolower(ch));
829
	else if (islower(ch))
830
		return(toupper(ch));
831
	else			/* peculiar, but could happen */
832
		return(ch);
833
}
834
 
835
/*
836
 - bothcases - emit a dualcase version of a two-case character
837
 == static void bothcases(register struct parse *p, int ch);
838
 *
839
 * Boy, is this implementation ever a kludge...
840
 */
841
static void
842
bothcases(p, ch)
843
register struct parse *p;
844
int ch;
845
{
846
	register char *oldnext = p->next;
847
	register char *oldend = p->end;
848
	char bracket[3];
849
 
850
	assert(othercase(ch) != ch);	/* p_bracket() would recurse */
851
	p->next = bracket;
852
	p->end = bracket+2;
853
	bracket[0] = ch;
854
	bracket[1] = ']';
855
	bracket[2] = '\0';
856
	p_bracket(p);
857
	assert(p->next == bracket+2);
858
	p->next = oldnext;
859
	p->end = oldend;
860
}
861
 
862
/*
863
 - ordinary - emit an ordinary character
864
 == static void ordinary(register struct parse *p, register int ch);
865
 */
866
static void
867
ordinary(p, ch)
868
register struct parse *p;
869
register int ch;
870
{
871
	register cat_t *cap = p->g->categories;
872
 
873
	if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch)
874
		bothcases(p, ch);
875
	else {
876
		EMIT(OCHAR, (unsigned char)ch);
877
		if (cap[ch] == 0)
878
			cap[ch] = p->g->ncategories++;
879
	}
880
}
881
 
882
/*
883
 - nonnewline - emit REG_NEWLINE version of OANY
884
 == static void nonnewline(register struct parse *p);
885
 *
886
 * Boy, is this implementation ever a kludge...
887
 */
888
static void
889
nonnewline(p)
890
register struct parse *p;
891
{
892
	register char *oldnext = p->next;
893
	register char *oldend = p->end;
894
	char bracket[4];
895
 
896
	p->next = bracket;
897
	p->end = bracket+3;
898
	bracket[0] = '^';
899
	bracket[1] = '\n';
900
	bracket[2] = ']';
901
	bracket[3] = '\0';
902
	p_bracket(p);
903
	assert(p->next == bracket+3);
904
	p->next = oldnext;
905
	p->end = oldend;
906
}
907
 
908
/*
909
 - repeat - generate code for a bounded repetition, recursively if needed
910
 == static void repeat(register struct parse *p, sopno start, int from, int to);
911
 */
912
static void
913
repeat(p, start, from, to)
914
register struct parse *p;
915
sopno start;			/* operand from here to end of strip */
916
int from;			/* repeated from this number */
917
int to;				/* to this number of times (maybe INFINITY) */
918
{
919
	register sopno finish = HERE();
920
#	define	N	2
921
#	define	INF	3
922
#	define	REP(f, t)	((f)*8 + (t))
923
#	define	MAP(n)	(((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
924
	register sopno copy;
925
 
926
	if (p->error != 0)	/* head off possible runaway recursion */
927
		return;
928
 
929
	assert(from <= to);
930
 
931
	switch (REP(MAP(from), MAP(to))) {
932
	case REP(0, 0):			/* must be user doing this */
933
		DROP(finish-start);	/* drop the operand */
934
		break;
935
	case REP(0, 1):			/* as x{1,1}? */
936
	case REP(0, N):			/* as x{1,n}? */
937
	case REP(0, INF):		/* as x{1,}? */
938
		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
939
		INSERT(OCH_, start);		/* offset is wrong... */
940
		repeat(p, start+1, 1, to);
941
		ASTERN(OOR1, start);
942
		AHEAD(start);			/* ... fix it */
943
		EMIT(OOR2, 0);
944
		AHEAD(THERE());
945
		ASTERN(O_CH, THERETHERE());
946
		break;
947
	case REP(1, 1):			/* trivial case */
948
		/* done */
949
		break;
950
	case REP(1, N):			/* as x?x{1,n-1} */
951
		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
952
		INSERT(OCH_, start);
953
		ASTERN(OOR1, start);
954
		AHEAD(start);
955
		EMIT(OOR2, 0);			/* offset very wrong... */
956
		AHEAD(THERE());			/* ...so fix it */
957
		ASTERN(O_CH, THERETHERE());
958
		copy = dupl(p, start+1, finish+1);
959
		assert(copy == finish+4);
960
		repeat(p, copy, 1, to-1);
961
		break;
962
	case REP(1, INF):		/* as x+ */
963
		INSERT(OPLUS_, start);
964
		ASTERN(O_PLUS, start);
965
		break;
966
	case REP(N, N):			/* as xx{m-1,n-1} */
967
		copy = dupl(p, start, finish);
968
		repeat(p, copy, from-1, to-1);
969
		break;
970
	case REP(N, INF):		/* as xx{n-1,INF} */
971
		copy = dupl(p, start, finish);
972
		repeat(p, copy, from-1, to);
973
		break;
974
	default:			/* "can't happen" */
975
		SETERROR(REG_ASSERT);	/* just in case */
976
		break;
977
	}
978
}
979
 
980
/*
981
 - seterr - set an error condition
982
 == static int seterr(register struct parse *p, int e);
983
 */
984
static int			/* useless but makes type checking happy */
985
seterr(p, e)
986
register struct parse *p;
987
int e;
988
{
989
	if (p->error == 0)	/* keep earliest error condition */
990
		p->error = e;
991
	p->next = nuls;		/* try to bring things to a halt */
992
	p->end = nuls;
993
	return(0);		/* make the return value well-defined */
994
}
995
 
996
/*
997
 - allocset - allocate a set of characters for []
998
 == static cset *allocset(register struct parse *p);
999
 */
1000
static cset *
1001
allocset(p)
1002
register struct parse *p;
1003
{
1004
	register int no = p->g->ncsets++;
1005
	register size_t nc;
1006
	register size_t nbytes;
1007
	register cset *cs;
1008
	register size_t css = (size_t)p->g->csetsize;
1009
	register int i;
1010
 
1011
	if (no >= p->ncsalloc) {	/* need another column of space */
1012
		p->ncsalloc += CHAR_BIT;
1013
		nc = p->ncsalloc;
1014
		assert(nc % CHAR_BIT == 0);
1015
		nbytes = nc / CHAR_BIT * css;
1016
		if (p->g->sets == NULL)
1017
			p->g->sets = (cset *)malloc(nc * sizeof(cset));
1018
		else
1019
			p->g->sets = (cset *)realloc((char *)p->g->sets,
1020
							nc * sizeof(cset));
1021
		if (p->g->setbits == NULL)
1022
			p->g->setbits = (uch *)malloc(nbytes);
1023
		else {
1024
			p->g->setbits = (uch *)realloc((char *)p->g->setbits,
1025
								nbytes);
1026
			/* xxx this isn't right if setbits is now NULL */
1027
			for (i = 0; i < no; i++)
1028
				p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
1029
		}
1030
		if (p->g->sets != NULL && p->g->setbits != NULL)
1031
			(void) memset((char *)p->g->setbits + (nbytes - css),
1032
								0, css);
1033
		else {
1034
			no = 0;
1035
			SETERROR(REG_ESPACE);
1036
			/* caller's responsibility not to do set ops */
1037
		}
1038
	}
1039
 
1040
	assert(p->g->sets != NULL);	/* xxx */
1041
	cs = &p->g->sets[no];
1042
	cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
1043
	cs->mask = 1 << ((no) % CHAR_BIT);
1044
	cs->hash = 0;
1045
	cs->smultis = 0;
1046
	cs->multis = NULL;
1047
 
1048
	return(cs);
1049
}
1050
 
1051
/*
1052
 - freeset - free a now-unused set
1053
 == static void freeset(register struct parse *p, register cset *cs);
1054
 */
1055
static void
1056
freeset(p, cs)
1057
register struct parse *p;
1058
register cset *cs;
1059
{
1060
	register int i;
1061
	register cset *top = &p->g->sets[p->g->ncsets];
1062
	register size_t css = (size_t)p->g->csetsize;
1063
 
1064
	for (i = 0; i < css; i++)
1065
		CHsub(cs, i);
1066
	if (cs == top-1)	/* recover only the easy case */
1067
		p->g->ncsets--;
1068
}
1069
 
1070
/*
1071
 - freezeset - final processing on a set of characters
1072
 == static int freezeset(register struct parse *p, register cset *cs);
1073
 *
1074
 * The main task here is merging identical sets.  This is usually a waste
1075
 * of time (although the hash code minimizes the overhead), but can win
1076
 * big if REG_ICASE is being used.  REG_ICASE, by the way, is why the hash
1077
 * is done using addition rather than xor -- all ASCII [aA] sets xor to
1078
 * the same value!
1079
 */
1080
static int			/* set number */
1081
freezeset(p, cs)
1082
register struct parse *p;
1083
register cset *cs;
1084
{
1085
	register uch h = cs->hash;
1086
	register int i;
1087
	register cset *top = &p->g->sets[p->g->ncsets];
1088
	register cset *cs2;
1089
	register size_t css = (size_t)p->g->csetsize;
1090
 
1091
	/* look for an earlier one which is the same */
1092
	for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
1093
		if (cs2->hash == h && cs2 != cs) {
1094
			/* maybe */
1095
			for (i = 0; i < css; i++)
1096
				if (!!CHIN(cs2, i) != !!CHIN(cs, i))
1097
					break;		/* no */
1098
			if (i == css)
1099
				break;			/* yes */
1100
		}
1101
 
1102
	if (cs2 < top) {	/* found one */
1103
		freeset(p, cs);
1104
		cs = cs2;
1105
	}
1106
 
1107
	return((int)(cs - p->g->sets));
1108
}
1109
 
1110
/*
1111
 - firstch - return first character in a set (which must have at least one)
1112
 == static int firstch(register struct parse *p, register cset *cs);
1113
 */
1114
static int			/* character; there is no "none" value */
1115
firstch(p, cs)
1116
register struct parse *p;
1117
register cset *cs;
1118
{
1119
	register int i;
1120
	register size_t css = (size_t)p->g->csetsize;
1121
 
1122
	for (i = 0; i < css; i++)
1123
		if (CHIN(cs, i))
1124
			return((char)i);
1125
	assert(never);
1126
	return(0);		/* arbitrary */
1127
}
1128
 
1129
/*
1130
 - nch - number of characters in a set
1131
 == static int nch(register struct parse *p, register cset *cs);
1132
 */
1133
static int
1134
nch(p, cs)
1135
register struct parse *p;
1136
register cset *cs;
1137
{
1138
	register int i;
1139
	register size_t css = (size_t)p->g->csetsize;
1140
	register int n = 0;
1141
 
1142
	for (i = 0; i < css; i++)
1143
		if (CHIN(cs, i))
1144
			n++;
1145
	return(n);
1146
}
1147
 
1148
/*
1149
 - mcadd - add a collating element to a cset
1150
 == static void mcadd(register struct parse *p, register cset *cs, \
1151
 ==	register char *cp);
1152
 */
1153
static void
1154
mcadd(p, cs, cp)
1155
register struct parse *p;
1156
register cset *cs;
1157
register char *cp;
1158
{
1159
	register size_t oldend = cs->smultis;
1160
 
1161
	cs->smultis += strlen(cp) + 1;
1162
	if (cs->multis == NULL)
1163
		cs->multis = malloc(cs->smultis);
1164
	else
1165
		cs->multis = realloc(cs->multis, cs->smultis);
1166
	if (cs->multis == NULL) {
1167
		SETERROR(REG_ESPACE);
1168
		return;
1169
	}
1170
 
1171
	(void) strcpy(cs->multis + oldend - 1, cp);
1172
	cs->multis[cs->smultis - 1] = '\0';
1173
}
1174
 
1175
/*
1176
 - mcsub - subtract a collating element from a cset
1177
 == static void mcsub(register cset *cs, register char *cp);
1178
 */
1179
static void
1180
mcsub(cs, cp)
1181
register cset *cs;
1182
register char *cp;
1183
{
1184
	register char *fp = mcfind(cs, cp);
1185
	register size_t len = strlen(fp);
1186
 
1187
	assert(fp != NULL);
1188
	(void) memmove(fp, fp + len + 1,
1189
				cs->smultis - (fp + len + 1 - cs->multis));
1190
	cs->smultis -= len;
1191
 
1192
	if (cs->smultis == 0) {
1193
		free(cs->multis);
1194
		cs->multis = NULL;
1195
		return;
1196
	}
1197
 
1198
	cs->multis = realloc(cs->multis, cs->smultis);
1199
	assert(cs->multis != NULL);
1200
}
1201
 
1202
/*
1203
 - mcin - is a collating element in a cset?
1204
 == static int mcin(register cset *cs, register char *cp);
1205
 */
1206
static int
1207
mcin(cs, cp)
1208
register cset *cs;
1209
register char *cp;
1210
{
1211
	return(mcfind(cs, cp) != NULL);
1212
}
1213
 
1214
/*
1215
 - mcfind - find a collating element in a cset
1216
 == static char *mcfind(register cset *cs, register char *cp);
1217
 */
1218
static char *
1219
mcfind(cs, cp)
1220
register cset *cs;
1221
register char *cp;
1222
{
1223
	register char *p;
1224
 
1225
	if (cs->multis == NULL)
1226
		return(NULL);
1227
	for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
1228
		if (strcmp(cp, p) == 0)
1229
			return(p);
1230
	return(NULL);
1231
}
1232
 
1233
/*
1234
 - mcinvert - invert the list of collating elements in a cset
1235
 == static void mcinvert(register struct parse *p, register cset *cs);
1236
 *
1237
 * This would have to know the set of possibilities.  Implementation
1238
 * is deferred.
1239
 */
1240
static void
1241
mcinvert(p, cs)
1242
register struct parse *p;
1243
register cset *cs;
1244
{
1245
	assert(cs->multis == NULL);	/* xxx */
1246
}
1247
 
1248
/*
1249
 - mccase - add case counterparts of the list of collating elements in a cset
1250
 == static void mccase(register struct parse *p, register cset *cs);
1251
 *
1252
 * This would have to know the set of possibilities.  Implementation
1253
 * is deferred.
1254
 */
1255
static void
1256
mccase(p, cs)
1257
register struct parse *p;
1258
register cset *cs;
1259
{
1260
	assert(cs->multis == NULL);	/* xxx */
1261
}
1262
 
1263
/*
1264
 - isinsets - is this character in any sets?
1265
 == static int isinsets(register struct re_guts *g, int c);
1266
 */
1267
static int			/* predicate */
1268
isinsets(g, c)
1269
register struct re_guts *g;
1270
int c;
1271
{
1272
	register uch *col;
1273
	register int i;
1274
	register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1275
	register unsigned uc = (unsigned char)c;
1276
 
1277
	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1278
		if (col[uc] != 0)
1279
			return(1);
1280
	return(0);
1281
}
1282
 
1283
/*
1284
 - samesets - are these two characters in exactly the same sets?
1285
 == static int samesets(register struct re_guts *g, int c1, int c2);
1286
 */
1287
static int			/* predicate */
1288
samesets(g, c1, c2)
1289
register struct re_guts *g;
1290
int c1;
1291
int c2;
1292
{
1293
	register uch *col;
1294
	register int i;
1295
	register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1296
	register unsigned uc1 = (unsigned char)c1;
1297
	register unsigned uc2 = (unsigned char)c2;
1298
 
1299
	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1300
		if (col[uc1] != col[uc2])
1301
			return(0);
1302
	return(1);
1303
}
1304
 
1305
/*
1306
 - categorize - sort out character categories
1307
 == static void categorize(struct parse *p, register struct re_guts *g);
1308
 */
1309
static void
1310
categorize(p, g)
1311
struct parse *p;
1312
register struct re_guts *g;
1313
{
1314
	register cat_t *cats = g->categories;
1315
	register int c;
1316
	register int c2;
1317
	register cat_t cat;
1318
 
1319
	/* avoid making error situations worse */
1320
	if (p->error != 0)
1321
		return;
1322
 
1323
	for (c = CHAR_MIN; c <= CHAR_MAX; c++)
1324
		if (cats[c] == 0 && isinsets(g, c)) {
1325
			cat = g->ncategories++;
1326
			cats[c] = cat;
1327
			for (c2 = c+1; c2 <= CHAR_MAX; c2++)
1328
				if (cats[c2] == 0 && samesets(g, c, c2))
1329
					cats[c2] = cat;
1330
		}
1331
}
1332
 
1333
/*
1334
 - dupl - emit a duplicate of a bunch of sops
1335
 == static sopno dupl(register struct parse *p, sopno start, sopno finish);
1336
 */
1337
static sopno			/* start of duplicate */
1338
dupl(p, start, finish)
1339
register struct parse *p;
1340
sopno start;			/* from here */
1341
sopno finish;			/* to this less one */
1342
{
1343
	register sopno ret = HERE();
1344
	register sopno len = finish - start;
1345
 
1346
	assert(finish >= start);
1347
	if (len == 0)
1348
		return(ret);
1349
	enlarge(p, p->ssize + len);	/* this many unexpected additions */
1350
	assert(p->ssize >= p->slen + len);
1351
	(void) memcpy((char *)(p->strip + p->slen),
1352
		(char *)(p->strip + start), (size_t)len*sizeof(sop));
1353
	p->slen += len;
1354
	return(ret);
1355
}
1356
 
1357
/*
1358
 - doemit - emit a strip operator
1359
 == static void doemit(register struct parse *p, sop op, size_t opnd);
1360
 *
1361
 * It might seem better to implement this as a macro with a function as
1362
 * hard-case backup, but it's just too big and messy unless there are
1363
 * some changes to the data structures.  Maybe later.
1364
 */
1365
static void
1366
doemit(p, op, opnd)
1367
register struct parse *p;
1368
sop op;
1369
size_t opnd;
1370
{
1371
	/* avoid making error situations worse */
1372
	if (p->error != 0)
1373
		return;
1374
 
1375
	/* deal with oversize operands ("can't happen", more or less) */
1376
	assert(opnd < 1<
1377
 
1378
	/* deal with undersized strip */
1379
	if (p->slen >= p->ssize)
1380
		enlarge(p, (p->ssize+1) / 2 * 3);	/* +50% */
1381
	assert(p->slen < p->ssize);
1382
 
1383
	/* finally, it's all reduced to the easy case */
1384
	p->strip[p->slen++] = SOP(op, opnd);
1385
}
1386
 
1387
/*
1388
 - doinsert - insert a sop into the strip
1389
 == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
1390
 */
1391
static void
1392
doinsert(p, op, opnd, pos)
1393
register struct parse *p;
1394
sop op;
1395
size_t opnd;
1396
sopno pos;
1397
{
1398
	register sopno sn;
1399
	register sop s;
1400
	register int i;
1401
 
1402
	/* avoid making error situations worse */
1403
	if (p->error != 0)
1404
		return;
1405
 
1406
	sn = HERE();
1407
	EMIT(op, opnd);		/* do checks, ensure space */
1408
	assert(HERE() == sn+1);
1409
	s = p->strip[sn];
1410
 
1411
	/* adjust paren pointers */
1412
	assert(pos > 0);
1413
	for (i = 1; i < NPAREN; i++) {
1414
		if (p->pbegin[i] >= pos) {
1415
			p->pbegin[i]++;
1416
		}
1417
		if (p->pend[i] >= pos) {
1418
			p->pend[i]++;
1419
		}
1420
	}
1421
 
1422
	memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
1423
						(HERE()-pos-1)*sizeof(sop));
1424
	p->strip[pos] = s;
1425
}
1426
 
1427
/*
1428
 - dofwd - complete a forward reference
1429
 == static void dofwd(register struct parse *p, sopno pos, sop value);
1430
 */
1431
static void
1432
dofwd(p, pos, value)
1433
register struct parse *p;
1434
register sopno pos;
1435
sop value;
1436
{
1437
	/* avoid making error situations worse */
1438
	if (p->error != 0)
1439
		return;
1440
 
1441
	assert(value < 1<
1442
	p->strip[pos] = OP(p->strip[pos]) | value;
1443
}
1444
 
1445
/*
1446
 - enlarge - enlarge the strip
1447
 == static void enlarge(register struct parse *p, sopno size);
1448
 */
1449
static void
1450
enlarge(p, size)
1451
register struct parse *p;
1452
register sopno size;
1453
{
1454
	register sop *sp;
1455
 
1456
	if (p->ssize >= size)
1457
		return;
1458
 
1459
	sp = (sop *)realloc(p->strip, size*sizeof(sop));
1460
	if (sp == NULL) {
1461
		SETERROR(REG_ESPACE);
1462
		return;
1463
	}
1464
	p->strip = sp;
1465
	p->ssize = size;
1466
}
1467
 
1468
/*
1469
 - stripsnug - compact the strip
1470
 == static void stripsnug(register struct parse *p, register struct re_guts *g);
1471
 */
1472
static void
1473
stripsnug(p, g)
1474
register struct parse *p;
1475
register struct re_guts *g;
1476
{
1477
	g->nstates = p->slen;
1478
	g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
1479
	if (g->strip == NULL) {
1480
		SETERROR(REG_ESPACE);
1481
		g->strip = p->strip;
1482
	}
1483
}
1484
 
1485
/*
1486
 - findmust - fill in must and mlen with longest mandatory literal string
1487
 == static void findmust(register struct parse *p, register struct re_guts *g);
1488
 *
1489
 * This algorithm could do fancy things like analyzing the operands of |
1490
 * for common subsequences.  Someday.  This code is simple and finds most
1491
 * of the interesting cases.
1492
 *
1493
 * Note that must and mlen got initialized during setup.
1494
 */
1495
static void
1496
findmust(p, g)
1497
struct parse *p;
1498
register struct re_guts *g;
1499
{
1500
	register sop *scan;
1501
	sop *start;
1502
	register sop *newstart;
1503
	register sopno newlen;
1504
	register sop s;
1505
	register char *cp;
1506
	register sopno i;
1507
 
1508
	/* avoid making error situations worse */
1509
	if (p->error != 0)
1510
		return;
1511
 
1512
	/* find the longest OCHAR sequence in strip */
1513
	newlen = 0;
1514
	scan = g->strip + 1;
1515
	do {
1516
		s = *scan++;
1517
		switch (OP(s)) {
1518
		case OCHAR:		/* sequence member */
1519
			if (newlen == 0)		/* new sequence */
1520
				newstart = scan - 1;
1521
			newlen++;
1522
			break;
1523
		case OPLUS_:		/* things that don't break one */
1524
		case OLPAREN:
1525
		case ORPAREN:
1526
			break;
1527
		case OQUEST_:		/* things that must be skipped */
1528
		case OCH_:
1529
			scan--;
1530
			do {
1531
				scan += OPND(s);
1532
				s = *scan;
1533
				/* assert() interferes w debug printouts */
1534
				if (OP(s) != O_QUEST && OP(s) != O_CH &&
1535
							OP(s) != OOR2) {
1536
					g->iflags |= BAD;
1537
					return;
1538
				}
1539
			} while (OP(s) != O_QUEST && OP(s) != O_CH);
1540
			/* fallthrough */
1541
		default:		/* things that break a sequence */
1542
			if (newlen > g->mlen) {		/* ends one */
1543
				start = newstart;
1544
				g->mlen = newlen;
1545
			}
1546
			newlen = 0;
1547
			break;
1548
		}
1549
	} while (OP(s) != OEND);
1550
 
1551
	if (g->mlen == 0)		/* there isn't one */
1552
		return;
1553
 
1554
	/* turn it into a character string */
1555
	g->must = malloc((size_t)g->mlen + 1);
1556
	if (g->must == NULL) {		/* argh; just forget it */
1557
		g->mlen = 0;
1558
		return;
1559
	}
1560
	cp = g->must;
1561
	scan = start;
1562
	for (i = g->mlen; i > 0; i--) {
1563
		while (OP(s = *scan++) != OCHAR)
1564
			continue;
1565
		assert(cp < g->must + g->mlen);
1566
		*cp++ = (char)OPND(s);
1567
	}
1568
	assert(cp == g->must + g->mlen);
1569
	*cp++ = '\0';		/* just on general principles */
1570
}
1571
 
1572
/*
1573
 - pluscount - count + nesting
1574
 == static sopno pluscount(register struct parse *p, register struct re_guts *g);
1575
 */
1576
static sopno			/* nesting depth */
1577
pluscount(p, g)
1578
struct parse *p;
1579
register struct re_guts *g;
1580
{
1581
	register sop *scan;
1582
	register sop s;
1583
	register sopno plusnest = 0;
1584
	register sopno maxnest = 0;
1585
 
1586
	if (p->error != 0)
1587
		return(0);	/* there may not be an OEND */
1588
 
1589
	scan = g->strip + 1;
1590
	do {
1591
		s = *scan++;
1592
		switch (OP(s)) {
1593
		case OPLUS_:
1594
			plusnest++;
1595
			break;
1596
		case O_PLUS:
1597
			if (plusnest > maxnest)
1598
				maxnest = plusnest;
1599
			plusnest--;
1600
			break;
1601
		}
1602
	} while (OP(s) != OEND);
1603
	if (plusnest != 0)
1604
		g->iflags |= BAD;
1605
	return(maxnest);
1606
}