Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
1882 clevermous 1
/* Copyright (C) 1995 DJ Delorie, see COPYING.DJ for details */
2
/*
3
 * First, the stuff that ends up in the outside-world include file
4
 = typedef off_t regoff_t;
5
 = typedef struct {
6
 = 	int re_magic;
7
 = 	size_t re_nsub;		// number of parenthesized subexpressions
8
 = 	const char *re_endp;	// end pointer for REG_PEND
9
 = 	struct re_guts *re_g;	// none of your business :-)
10
 = } regex_t;
11
 = typedef struct {
12
 = 	regoff_t rm_so;		// start of match
13
 = 	regoff_t rm_eo;		// end of match
14
 = } regmatch_t;
15
 */
16
/*
17
 * internals of regex_t
18
 */
19
#define	MAGIC1	((('r'^0200)<<8) | 'e')
20
 
21
/*
22
 * The internal representation is a *strip*, a sequence of
23
 * operators ending with an endmarker.  (Some terminology etc. is a
24
 * historical relic of earlier versions which used multiple strips.)
25
 * Certain oddities in the representation are there to permit running
26
 * the machinery backwards; in particular, any deviation from sequential
27
 * flow must be marked at both its source and its destination.  Some
28
 * fine points:
29
 *
30
 * - OPLUS_ and O_PLUS are *inside* the loop they create.
31
 * - OQUEST_ and O_QUEST are *outside* the bypass they create.
32
 * - OCH_ and O_CH are *outside* the multi-way branch they create, while
33
 *   OOR1 and OOR2 are respectively the end and the beginning of one of
34
 *   the branches.  Note that there is an implicit OOR2 following OCH_
35
 *   and an implicit OOR1 preceding O_CH.
36
 *
37
 * In state representations, an operator's bit is on to signify a state
38
 * immediately *preceding* "execution" of that operator.
39
 */
40
typedef unsigned long sop;	/* strip operator */
41
typedef long sopno;
42
#define	OPRMASK	0xf8000000
43
#define	OPDMASK	0x07ffffff
44
#define	OPSHIFT	((unsigned)27)
45
#define	OP(n)	((n)&OPRMASK)
46
#define	OPND(n)	((n)&OPDMASK)
47
#define	SOP(op, opnd)	((op)|(opnd))
48
/* operators			   meaning	operand			*/
49
/*						(back, fwd are offsets)	*/
50
#define	OEND	(1<
51
#define	OCHAR	(2<
52
#define	OBOL	(3<
53
#define	OEOL	(4<
54
#define	OANY	(5<
55
#define	OANYOF	(6<
56
#define	OBACK_	(7<
57
#define	O_BACK	(8<
58
#define	OPLUS_	(9<
59
#define	O_PLUS	(10<
60
#define	OQUEST_	(11<
61
#define	O_QUEST	(12<
62
#define	OLPAREN	(13<
63
#define	ORPAREN	(14<
64
#define	OCH_	(15<
65
#define	OOR1	(16<
66
#define	OOR2	(17<
67
#define	O_CH	(18<
68
#define	OBOW	(19<
69
#define	OEOW	(20<
70
 
71
/*
72
 * Structure for [] character-set representation.  Character sets are
73
 * done as bit vectors, grouped 8 to a byte vector for compactness.
74
 * The individual set therefore has both a pointer to the byte vector
75
 * and a mask to pick out the relevant bit of each byte.  A hash code
76
 * simplifies testing whether two sets could be identical.
77
 *
78
 * This will get trickier for multicharacter collating elements.  As
79
 * preliminary hooks for dealing with such things, we also carry along
80
 * a string of multi-character elements, and decide the size of the
81
 * vectors at run time.
82
 */
83
typedef struct {
84
	uch *ptr;		/* -> uch [csetsize] */
85
	uch mask;		/* bit within array */
86
	uch hash;		/* hash code */
87
	size_t smultis;
88
	char *multis;		/* -> char[smulti]  ab\0cd\0ef\0\0 */
89
} cset;
90
/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */
91
#define	CHadd(cs, c)	((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c))
92
#define	CHsub(cs, c)	((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
93
#define	CHIN(cs, c)	((cs)->ptr[(uch)(c)] & (cs)->mask)
94
#define	MCadd(p, cs, cp)	mcadd(p, cs, cp)	/* regcomp() internal fns */
95
#define	MCsub(p, cs, cp)	mcsub(p, cs, cp)
96
#define	MCin(p, cs, cp)	mcin(p, cs, cp)
97
 
98
/* stuff for character categories */
99
typedef unsigned char cat_t;
100
 
101
/*
102
 * main compiled-expression structure
103
 */
104
struct re_guts {
105
	int magic;
106
#		define	MAGIC2	((('R'^0200)<<8)|'E')
107
	sop *strip;		/* malloced area for strip */
108
	int csetsize;		/* number of bits in a cset vector */
109
	int ncsets;		/* number of csets in use */
110
	cset *sets;		/* -> cset [ncsets] */
111
	uch *setbits;		/* -> uch[csetsize][ncsets/CHAR_BIT] */
112
	int cflags;		/* copy of regcomp() cflags argument */
113
	sopno nstates;		/* = number of sops */
114
	sopno firststate;	/* the initial OEND (normally 0) */
115
	sopno laststate;	/* the final OEND */
116
	int iflags;		/* internal flags */
117
#		define	USEBOL	01	/* used ^ */
118
#		define	USEEOL	02	/* used $ */
119
#		define	BAD	04	/* something wrong */
120
	int nbol;		/* number of ^ used */
121
	int neol;		/* number of $ used */
122
	int ncategories;	/* how many character categories */
123
	cat_t *categories;	/* ->catspace[-CHAR_MIN] */
124
	char *must;		/* match must contain this string */
125
	int mlen;		/* length of must */
126
	size_t nsub;		/* copy of re_nsub */
127
	int backrefs;		/* does it use back references? */
128
	sopno nplus;		/* how deep does it nest +s? */
129
	/* catspace must be last */
130
	cat_t catspace[1];	/* actually [NC] */
131
};
132
 
133
/* misc utilities */
134
#define	OUT	(CHAR_MAX+1)	/* a non-character value */
135
#define	ISWORD(c)	(isalnum(c) || (c) == '_')