Rev 7693 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 7693 | Rev 7696 | ||
---|---|---|---|
1 | (* |
1 | (* |
2 | BSD 2-Clause License |
2 | BSD 2-Clause License |
3 | 3 | ||
4 | Copyright (c) 2018, 2019, Anton Krotov |
4 | Copyright (c) 2018-2019, Anton Krotov |
5 | All rights reserved. |
5 | All rights reserved. |
6 | *) |
6 | *) |
7 | 7 | ||
8 | MODULE SCAN; |
8 | MODULE SCAN; |
9 | 9 | ||
10 | IMPORT TXT := TEXTDRV, AVL := AVLTREES, ARITH, S := STRINGS; |
10 | IMPORT TXT := TEXTDRV, AVL := AVLTREES, ARITH, S := STRINGS; |
11 | 11 | ||
12 | 12 | ||
13 | CONST |
13 | CONST |
14 | 14 | ||
15 | LEXLEN = 1024; |
15 | LEXLEN = 1024; |
16 | 16 | ||
17 | lxUNDEF* = 0; lxIDENT* = 1; lxINTEGER* = 2; lxHEX* = 3; |
17 | lxUNDEF* = 0; lxIDENT* = 1; lxINTEGER* = 2; lxHEX* = 3; |
18 | lxCHAR* = 4; lxFLOAT* = 5; lxSTRING* = 6; lxCOMMENT* = 7; |
18 | lxCHAR* = 4; lxFLOAT* = 5; lxSTRING* = 6; lxCOMMENT* = 7; |
19 | lxEOF* = 8; |
19 | lxEOF* = 8; |
20 | 20 | ||
21 | lxPLUS* = 21; lxMINUS* = 22; lxMUL* = 23; lxSLASH* = 24; |
21 | lxPLUS* = 21; lxMINUS* = 22; lxMUL* = 23; lxSLASH* = 24; |
22 | lxNOT* = 25; lxAND* = 26; lxPOINT* = 27; lxCOMMA* = 28; |
22 | lxNOT* = 25; lxAND* = 26; lxPOINT* = 27; lxCOMMA* = 28; |
23 | lxSEMI* = 29; lxBAR* = 30; lxLROUND* = 31; lxLSQUARE* = 32; |
23 | lxSEMI* = 29; lxBAR* = 30; lxLROUND* = 31; lxLSQUARE* = 32; |
24 | lxLCURLY* = 33; lxCARET* = 34; lxEQ* = 35; lxNE* = 36; |
24 | lxLCURLY* = 33; lxCARET* = 34; lxEQ* = 35; lxNE* = 36; |
25 | lxLT* = 37; lxGT* = 38; lxCOLON* = 39; lxRROUND* = 40; |
25 | lxLT* = 37; lxGT* = 38; lxCOLON* = 39; lxRROUND* = 40; |
26 | lxRSQUARE* = 41; lxRCURLY* = 42; lxLE* = 43; lxGE* = 44; |
26 | lxRSQUARE* = 41; lxRCURLY* = 42; lxLE* = 43; lxGE* = 44; |
27 | lxASSIGN* = 45; lxRANGE* = 46; |
27 | lxASSIGN* = 45; lxRANGE* = 46; |
28 | 28 | ||
29 | lxKW = 51; |
29 | lxKW = 51; |
30 | 30 | ||
31 | lxARRAY* = 51; lxBEGIN* = 52; lxBY* = 53; lxCASE* = 54; |
31 | lxARRAY* = 51; lxBEGIN* = 52; lxBY* = 53; lxCASE* = 54; |
32 | lxCONST* = 55; lxDIV* = 56; lxDO* = 57; lxELSE* = 58; |
32 | lxCONST* = 55; lxDIV* = 56; lxDO* = 57; lxELSE* = 58; |
33 | lxELSIF* = 59; lxEND* = 60; lxFALSE* = 61; lxFOR* = 62; |
33 | lxELSIF* = 59; lxEND* = 60; lxFALSE* = 61; lxFOR* = 62; |
34 | lxIF* = 63; lxIMPORT* = 64; lxIN* = 65; lxIS* = 66; |
34 | lxIF* = 63; lxIMPORT* = 64; lxIN* = 65; lxIS* = 66; |
35 | lxMOD* = 67; lxMODULE* = 68; lxNIL* = 69; lxOF* = 70; |
35 | lxMOD* = 67; lxMODULE* = 68; lxNIL* = 69; lxOF* = 70; |
36 | lxOR* = 71; lxPOINTER* = 72; lxPROCEDURE* = 73; lxRECORD* = 74; |
36 | lxOR* = 71; lxPOINTER* = 72; lxPROCEDURE* = 73; lxRECORD* = 74; |
37 | lxREPEAT* = 75; lxRETURN* = 76; lxTHEN* = 77; lxTO* = 78; |
37 | lxREPEAT* = 75; lxRETURN* = 76; lxTHEN* = 77; lxTO* = 78; |
38 | lxTRUE* = 79; lxTYPE* = 80; lxUNTIL* = 81; lxVAR* = 82; |
38 | lxTRUE* = 79; lxTYPE* = 80; lxUNTIL* = 81; lxVAR* = 82; |
39 | lxWHILE* = 83; |
39 | lxWHILE* = 83; |
40 | 40 | ||
41 | lxERROR01* = -1; lxERROR02* = -2; lxERROR03* = -3; lxERROR04* = -4; |
41 | lxERROR01* = -1; lxERROR02* = -2; lxERROR03* = -3; lxERROR04* = -4; |
42 | lxERROR05* = -5; lxERROR06* = -6; lxERROR07* = -7; lxERROR08* = -8; |
42 | lxERROR05* = -5; lxERROR06* = -6; lxERROR07* = -7; lxERROR08* = -8; |
43 | lxERROR09* = -9; lxERROR10* = -10; lxERROR11* = -11; lxERROR12* = -12; |
43 | lxERROR09* = -9; lxERROR10* = -10; lxERROR11* = -11; lxERROR12* = -12; |
44 | lxERROR13* = -13; |
44 | lxERROR13* = -13; |
45 | 45 | ||
46 | 46 | ||
47 | TYPE |
47 | TYPE |
48 | 48 | ||
49 | LEXSTR* = ARRAY LEXLEN OF CHAR; |
49 | LEXSTR* = ARRAY LEXLEN OF CHAR; |
50 | 50 | ||
51 | IDENT* = POINTER TO RECORD (AVL.DATA) |
51 | IDENT* = POINTER TO RECORD (AVL.DATA) |
52 | 52 | ||
53 | s*: LEXSTR; |
53 | s*: LEXSTR; |
54 | offset*, offsetW*: INTEGER |
54 | offset*, offsetW*: INTEGER; |
- | 55 | key: INTEGER |
|
55 | 56 | ||
56 | END; |
57 | END; |
57 | 58 | ||
58 | POSITION* = RECORD |
59 | POSITION* = RECORD |
59 | 60 | ||
60 | line*, col*: INTEGER |
61 | line*, col*: INTEGER |
61 | 62 | ||
62 | END; |
63 | END; |
63 | 64 | ||
64 | LEX* = RECORD |
65 | LEX* = RECORD |
65 | 66 | ||
66 | s*: LEXSTR; |
67 | s*: LEXSTR; |
67 | length*: INTEGER; |
68 | length*: INTEGER; |
68 | sym*: INTEGER; |
69 | sym*: INTEGER; |
69 | pos*: POSITION; |
70 | pos*: POSITION; |
70 | ident*: IDENT; |
71 | ident*: IDENT; |
71 | string*: IDENT; |
72 | string*: IDENT; |
72 | value*: ARITH.VALUE; |
73 | value*: ARITH.VALUE; |
73 | error*: INTEGER; |
74 | error*: INTEGER; |
74 | 75 | ||
75 | over: BOOLEAN |
76 | over: BOOLEAN |
76 | 77 | ||
77 | END; |
78 | END; |
78 | 79 | ||
79 | SCANNER* = TXT.TEXT; |
80 | SCANNER* = TXT.TEXT; |
80 | - | ||
81 | KEYWORD = ARRAY 10 OF CHAR; |
- | |
82 | 81 | ||
83 | 82 | ||
84 | VAR |
83 | VAR |
85 | 84 | ||
86 | vocabulary: RECORD |
- | |
87 | 85 | idents: AVL.NODE; |
|
88 | KW: ARRAY 33 OF KEYWORD; |
- | |
89 | delimiters: ARRAY 256 OF BOOLEAN; |
- | |
90 | idents: AVL.NODE; |
86 | |
91 | ident: IDENT |
87 | delimiters: ARRAY 256 OF BOOLEAN; |
92 | 88 | ||
93 | END; |
89 | NewIdent: IDENT; |
94 | 90 | ||
95 | upto: BOOLEAN; |
91 | upto: BOOLEAN; |
96 | 92 | ||
97 | 93 | ||
98 | PROCEDURE nodecmp (a, b: AVL.DATA): INTEGER; |
94 | PROCEDURE nodecmp (a, b: AVL.DATA): INTEGER; |
99 | RETURN ORD(a(IDENT).s > b(IDENT).s) - ORD(a(IDENT).s < b(IDENT).s) |
95 | RETURN ORD(a(IDENT).s > b(IDENT).s) - ORD(a(IDENT).s < b(IDENT).s) |
100 | END nodecmp; |
96 | END nodecmp; |
101 | 97 | ||
102 | - | ||
103 | PROCEDURE key (VAR lex: LEX); |
- | |
104 | VAR |
- | |
105 | L, R, M: INTEGER; |
- | |
106 | found: BOOLEAN; |
- | |
107 | - | ||
108 | BEGIN |
- | |
109 | L := 0; |
- | |
110 | R := LEN(vocabulary.KW) - 1; |
- | |
111 | found := FALSE; |
- | |
112 | - | ||
113 | REPEAT |
- | |
114 | M := (L + R) DIV 2; |
- | |
115 | - | ||
116 | IF lex.s # vocabulary.KW[M] THEN |
- | |
117 | IF lex.s > vocabulary.KW[M] THEN |
- | |
118 | L := M + 1 |
- | |
119 | ELSE |
- | |
120 | R := M - 1 |
- | |
121 | END |
- | |
122 | ELSE |
- | |
123 | found := TRUE; |
- | |
124 | lex.sym := lxKW + M |
- | |
125 | END |
- | |
126 | UNTIL found OR (L > R) |
- | |
127 | END key; |
- | |
128 | - | ||
129 | 98 | ||
130 | PROCEDURE enterid* (s: LEXSTR): IDENT; |
99 | PROCEDURE enterid* (s: LEXSTR): IDENT; |
131 | VAR |
100 | VAR |
132 | newnode: BOOLEAN; |
101 | newnode: BOOLEAN; |
133 | node: AVL.NODE; |
102 | node: AVL.NODE; |
134 | 103 | ||
135 | BEGIN |
104 | BEGIN |
136 | vocabulary.ident.s := s; |
105 | NewIdent.s := s; |
137 | vocabulary.idents := AVL.insert(vocabulary.idents, vocabulary.ident, nodecmp, newnode, node); |
106 | idents := AVL.insert(idents, NewIdent, nodecmp, newnode, node); |
138 | 107 | ||
139 | IF newnode THEN |
108 | IF newnode THEN |
140 | NEW(vocabulary.ident); |
109 | NEW(NewIdent); |
141 | vocabulary.ident.offset := -1; |
110 | NewIdent.offset := -1; |
- | 111 | NewIdent.offsetW := -1; |
|
142 | vocabulary.ident.offsetW := -1 |
112 | NewIdent.key := 0 |
143 | END |
113 | END |
144 | 114 | ||
145 | RETURN node.data(IDENT) |
115 | RETURN node.data(IDENT) |
146 | END enterid; |
116 | END enterid; |
147 | 117 | ||
148 | 118 | ||
149 | PROCEDURE putchar (VAR lex: LEX; c: CHAR); |
119 | PROCEDURE putchar (VAR lex: LEX; c: CHAR); |
150 | BEGIN |
120 | BEGIN |
151 | IF lex.length < LEXLEN - 1 THEN |
121 | IF lex.length < LEXLEN - 1 THEN |
152 | lex.s[lex.length] := c; |
122 | lex.s[lex.length] := c; |
153 | INC(lex.length); |
123 | INC(lex.length); |
154 | lex.s[lex.length] := 0X |
124 | lex.s[lex.length] := 0X |
155 | ELSE |
125 | ELSE |
156 | lex.over := TRUE |
126 | lex.over := TRUE |
157 | END |
127 | END |
158 | END putchar; |
128 | END putchar; |
159 | 129 | ||
160 | 130 | ||
161 | PROCEDURE nextc (text: TXT.TEXT): CHAR; |
131 | PROCEDURE nextc (text: TXT.TEXT): CHAR; |
162 | BEGIN |
132 | BEGIN |
163 | TXT.next(text) |
133 | TXT.next(text) |
164 | RETURN text.peak |
134 | RETURN text.peak |
165 | END nextc; |
135 | END nextc; |
166 | 136 | ||
167 | 137 | ||
168 | PROCEDURE ident (text: TXT.TEXT; VAR lex: LEX); |
138 | PROCEDURE ident (text: TXT.TEXT; VAR lex: LEX); |
169 | VAR |
139 | VAR |
170 | c: CHAR; |
140 | c: CHAR; |
171 | 141 | ||
172 | BEGIN |
142 | BEGIN |
173 | c := text.peak; |
143 | c := text.peak; |
174 | ASSERT(S.letter(c)); |
144 | ASSERT(S.letter(c)); |
175 | 145 | ||
176 | WHILE S.letter(c) OR S.digit(c) DO |
146 | WHILE S.letter(c) OR S.digit(c) DO |
177 | putchar(lex, c); |
147 | putchar(lex, c); |
178 | c := nextc(text) |
148 | c := nextc(text) |
179 | END; |
149 | END; |
180 | 150 | ||
181 | IF lex.over THEN |
151 | IF lex.over THEN |
182 | lex.sym := lxERROR06 |
152 | lex.sym := lxERROR06 |
183 | ELSE |
153 | ELSE |
184 | lex.sym := lxIDENT; |
154 | lex.ident := enterid(lex.s); |
- | 155 | IF lex.ident.key # 0 THEN |
|
185 | key(lex) |
156 | lex.sym := lex.ident.key |
186 | END; |
157 | ELSE |
187 | - | ||
188 | IF lex.sym = lxIDENT THEN |
158 | lex.sym := lxIDENT |
189 | lex.ident := enterid(lex.s) |
159 | END |
190 | END |
160 | END |
191 | 161 | ||
192 | END ident; |
162 | END ident; |
193 | 163 | ||
194 | 164 | ||
195 | PROCEDURE number (text: TXT.TEXT; VAR lex: LEX); |
165 | PROCEDURE number (text: TXT.TEXT; VAR lex: LEX); |
196 | VAR |
166 | VAR |
197 | c: CHAR; |
167 | c: CHAR; |
198 | hex: BOOLEAN; |
168 | hex: BOOLEAN; |
199 | error: INTEGER; |
169 | error: INTEGER; |
200 | 170 | ||
201 | BEGIN |
171 | BEGIN |
202 | c := text.peak; |
172 | c := text.peak; |
203 | ASSERT(S.digit(c)); |
173 | ASSERT(S.digit(c)); |
204 | 174 | ||
205 | error := 0; |
175 | error := 0; |
206 | 176 | ||
207 | lex.sym := lxINTEGER; |
177 | lex.sym := lxINTEGER; |
208 | hex := FALSE; |
178 | hex := FALSE; |
209 | 179 | ||
210 | WHILE S.digit(c) DO |
180 | WHILE S.digit(c) DO |
211 | putchar(lex, c); |
181 | putchar(lex, c); |
212 | c := nextc(text) |
182 | c := nextc(text) |
213 | END; |
183 | END; |
214 | 184 | ||
215 | WHILE S.hexdigit(c) DO |
185 | WHILE S.hexdigit(c) DO |
216 | putchar(lex, c); |
186 | putchar(lex, c); |
217 | c := nextc(text); |
187 | c := nextc(text); |
218 | hex := TRUE |
188 | hex := TRUE |
219 | END; |
189 | END; |
220 | 190 | ||
221 | IF c = "H" THEN |
191 | IF c = "H" THEN |
222 | putchar(lex, c); |
192 | putchar(lex, c); |
223 | TXT.next(text); |
193 | TXT.next(text); |
224 | lex.sym := lxHEX |
194 | lex.sym := lxHEX |
225 | 195 | ||
226 | ELSIF c = "X" THEN |
196 | ELSIF c = "X" THEN |
227 | putchar(lex, c); |
197 | putchar(lex, c); |
228 | TXT.next(text); |
198 | TXT.next(text); |
229 | lex.sym := lxCHAR |
199 | lex.sym := lxCHAR |
230 | 200 | ||
231 | ELSIF c = "." THEN |
201 | ELSIF c = "." THEN |
232 | 202 | ||
233 | IF hex THEN |
203 | IF hex THEN |
234 | lex.sym := lxERROR01 |
204 | lex.sym := lxERROR01 |
235 | ELSE |
205 | ELSE |
236 | 206 | ||
237 | c := nextc(text); |
207 | c := nextc(text); |
238 | 208 | ||
239 | IF c # "." THEN |
209 | IF c # "." THEN |
240 | putchar(lex, "."); |
210 | putchar(lex, "."); |
241 | lex.sym := lxFLOAT |
211 | lex.sym := lxFLOAT |
242 | ELSE |
212 | ELSE |
243 | lex.sym := lxINTEGER; |
213 | lex.sym := lxINTEGER; |
244 | text.peak := 7FX; |
214 | text.peak := 7FX; |
245 | upto := TRUE |
215 | upto := TRUE |
246 | END; |
216 | END; |
247 | 217 | ||
248 | WHILE S.digit(c) DO |
218 | WHILE S.digit(c) DO |
249 | putchar(lex, c); |
219 | putchar(lex, c); |
250 | c := nextc(text) |
220 | c := nextc(text) |
251 | END; |
221 | END; |
252 | 222 | ||
253 | IF c = "E" THEN |
223 | IF c = "E" THEN |
254 | 224 | ||
255 | putchar(lex, c); |
225 | putchar(lex, c); |
256 | c := nextc(text); |
226 | c := nextc(text); |
257 | IF (c = "+") OR (c = "-") THEN |
227 | IF (c = "+") OR (c = "-") THEN |
258 | putchar(lex, c); |
228 | putchar(lex, c); |
259 | c := nextc(text) |
229 | c := nextc(text) |
260 | END; |
230 | END; |
261 | 231 | ||
262 | IF S.digit(c) THEN |
232 | IF S.digit(c) THEN |
263 | WHILE S.digit(c) DO |
233 | WHILE S.digit(c) DO |
264 | putchar(lex, c); |
234 | putchar(lex, c); |
265 | c := nextc(text) |
235 | c := nextc(text) |
266 | END |
236 | END |
267 | ELSE |
237 | ELSE |
268 | lex.sym := lxERROR02 |
238 | lex.sym := lxERROR02 |
269 | END |
239 | END |
270 | 240 | ||
271 | END |
241 | END |
272 | 242 | ||
273 | END |
243 | END |
274 | 244 | ||
275 | ELSIF hex THEN |
245 | ELSIF hex THEN |
276 | lex.sym := lxERROR01 |
246 | lex.sym := lxERROR01 |
277 | 247 | ||
278 | END; |
248 | END; |
279 | 249 | ||
280 | IF lex.over & (lex.sym >= 0) THEN |
250 | IF lex.over & (lex.sym >= 0) THEN |
281 | lex.sym := lxERROR07 |
251 | lex.sym := lxERROR07 |
282 | END; |
252 | END; |
283 | 253 | ||
284 | IF lex.sym = lxINTEGER THEN |
254 | IF lex.sym = lxINTEGER THEN |
285 | ARITH.iconv(lex.s, lex.value, error) |
255 | ARITH.iconv(lex.s, lex.value, error) |
286 | ELSIF (lex.sym = lxHEX) OR (lex.sym = lxCHAR) THEN |
256 | ELSIF (lex.sym = lxHEX) OR (lex.sym = lxCHAR) THEN |
287 | ARITH.hconv(lex.s, lex.value, error) |
257 | ARITH.hconv(lex.s, lex.value, error) |
288 | ELSIF lex.sym = lxFLOAT THEN |
258 | ELSIF lex.sym = lxFLOAT THEN |
289 | ARITH.fconv(lex.s, lex.value, error) |
259 | ARITH.fconv(lex.s, lex.value, error) |
290 | END; |
260 | END; |
291 | 261 | ||
292 | CASE error OF |
262 | CASE error OF |
293 | |0: |
263 | |0: |
294 | |1: lex.sym := lxERROR08 |
264 | |1: lex.sym := lxERROR08 |
295 | |2: lex.sym := lxERROR09 |
265 | |2: lex.sym := lxERROR09 |
296 | |3: lex.sym := lxERROR10 |
266 | |3: lex.sym := lxERROR10 |
297 | |4: lex.sym := lxERROR11 |
267 | |4: lex.sym := lxERROR11 |
298 | |5: lex.sym := lxERROR12 |
268 | |5: lex.sym := lxERROR12 |
299 | END |
269 | END |
300 | 270 | ||
301 | END number; |
271 | END number; |
302 | 272 | ||
303 | 273 | ||
304 | PROCEDURE string (text: TXT.TEXT; VAR lex: LEX; quot: CHAR); |
274 | PROCEDURE string (text: TXT.TEXT; VAR lex: LEX; quot: CHAR); |
305 | VAR |
275 | VAR |
306 | c: CHAR; |
276 | c: CHAR; |
307 | n: INTEGER; |
277 | n: INTEGER; |
308 | 278 | ||
309 | BEGIN |
279 | BEGIN |
310 | c := nextc(text); |
280 | c := nextc(text); |
311 | n := 0; |
281 | n := 0; |
312 | 282 | ||
313 | WHILE (c # quot) & (c # 0X) & ~text.eol & ~text.eof DO |
283 | WHILE (c # quot) & (c # 0X) & ~text.eol & ~text.eof DO |
314 | putchar(lex, c); |
284 | putchar(lex, c); |
315 | c := nextc(text); |
285 | c := nextc(text); |
316 | INC(n) |
286 | INC(n) |
317 | END; |
287 | END; |
318 | 288 | ||
319 | IF c = quot THEN |
289 | IF c = quot THEN |
320 | TXT.next(text); |
290 | TXT.next(text); |
321 | IF lex.over THEN |
291 | IF lex.over THEN |
322 | lex.sym := lxERROR05 |
292 | lex.sym := lxERROR05 |
323 | ELSE |
293 | ELSE |
324 | IF n # 1 THEN |
294 | IF n # 1 THEN |
325 | lex.sym := lxSTRING |
295 | lex.sym := lxSTRING |
326 | ELSE |
296 | ELSE |
327 | lex.sym := lxCHAR; |
297 | lex.sym := lxCHAR; |
328 | ARITH.setChar(lex.value, ORD(lex.s[0])) |
298 | ARITH.setChar(lex.value, ORD(lex.s[0])) |
329 | END |
299 | END |
330 | END |
300 | END |
331 | ELSE |
301 | ELSE |
332 | lex.sym := lxERROR03 |
302 | lex.sym := lxERROR03 |
333 | END; |
303 | END; |
334 | 304 | ||
335 | IF lex.sym = lxSTRING THEN |
305 | IF lex.sym = lxSTRING THEN |
336 | lex.string := enterid(lex.s); |
306 | lex.string := enterid(lex.s); |
337 | lex.value.typ := ARITH.tSTRING; |
307 | lex.value.typ := ARITH.tSTRING; |
338 | lex.value.string := lex.string |
308 | lex.value.string := lex.string |
339 | END |
309 | END |
340 | 310 | ||
341 | END string; |
311 | END string; |
342 | 312 | ||
343 | 313 | ||
344 | PROCEDURE comment (text: TXT.TEXT); |
314 | PROCEDURE comment (text: TXT.TEXT); |
345 | VAR |
315 | VAR |
346 | c: CHAR; |
316 | c: CHAR; |
347 | cond, depth: INTEGER; |
317 | cond, depth: INTEGER; |
348 | 318 | ||
349 | BEGIN |
319 | BEGIN |
350 | cond := 0; |
320 | cond := 0; |
351 | depth := 1; |
321 | depth := 1; |
352 | 322 | ||
353 | REPEAT |
323 | REPEAT |
354 | 324 | ||
355 | c := text.peak; |
325 | c := text.peak; |
356 | TXT.next(text); |
326 | TXT.next(text); |
357 | 327 | ||
358 | IF c = "*" THEN |
328 | IF c = "*" THEN |
359 | IF cond = 1 THEN |
329 | IF cond = 1 THEN |
360 | cond := 0; |
330 | cond := 0; |
361 | INC(depth) |
331 | INC(depth) |
362 | ELSE |
332 | ELSE |
363 | cond := 2 |
333 | cond := 2 |
364 | END |
334 | END |
365 | ELSIF c = ")" THEN |
335 | ELSIF c = ")" THEN |
366 | IF cond = 2 THEN |
336 | IF cond = 2 THEN |
367 | DEC(depth) |
337 | DEC(depth) |
368 | END; |
338 | END; |
369 | cond := 0 |
339 | cond := 0 |
370 | ELSIF c = "(" THEN |
340 | ELSIF c = "(" THEN |
371 | cond := 1 |
341 | cond := 1 |
372 | ELSE |
342 | ELSE |
373 | cond := 0 |
343 | cond := 0 |
374 | END |
344 | END |
375 | 345 | ||
376 | UNTIL (depth = 0) OR text.eof |
346 | UNTIL (depth = 0) OR text.eof |
377 | 347 | ||
378 | END comment; |
348 | END comment; |
379 | 349 | ||
380 | 350 | ||
381 | PROCEDURE delimiter (text: TXT.TEXT; VAR lex: LEX; c: CHAR); |
351 | PROCEDURE delimiter (text: TXT.TEXT; VAR lex: LEX; c: CHAR); |
382 | BEGIN |
352 | BEGIN |
383 | putchar(lex, c); |
353 | putchar(lex, c); |
384 | c := nextc(text); |
354 | c := nextc(text); |
385 | 355 | ||
386 | CASE lex.s[0] OF |
356 | CASE lex.s[0] OF |
387 | |"+": |
357 | |"+": |
388 | lex.sym := lxPLUS |
358 | lex.sym := lxPLUS |
389 | 359 | ||
390 | |"-": |
360 | |"-": |
391 | lex.sym := lxMINUS |
361 | lex.sym := lxMINUS |
392 | 362 | ||
393 | |"*": |
363 | |"*": |
394 | lex.sym := lxMUL |
364 | lex.sym := lxMUL |
395 | 365 | ||
396 | |"/": |
366 | |"/": |
397 | lex.sym := lxSLASH; |
367 | lex.sym := lxSLASH; |
398 | 368 | ||
399 | IF c = "/" THEN |
369 | IF c = "/" THEN |
400 | lex.sym := lxCOMMENT; |
370 | lex.sym := lxCOMMENT; |
401 | REPEAT |
371 | REPEAT |
402 | TXT.next(text) |
372 | TXT.next(text) |
403 | UNTIL text.eol OR text.eof |
373 | UNTIL text.eol OR text.eof |
404 | END |
374 | END |
405 | 375 | ||
406 | |"~": |
376 | |"~": |
407 | lex.sym := lxNOT |
377 | lex.sym := lxNOT |
408 | 378 | ||
409 | |"&": |
379 | |"&": |
410 | lex.sym := lxAND |
380 | lex.sym := lxAND |
411 | 381 | ||
412 | |".": |
382 | |".": |
413 | lex.sym := lxPOINT; |
383 | lex.sym := lxPOINT; |
414 | 384 | ||
415 | IF c = "." THEN |
385 | IF c = "." THEN |
416 | lex.sym := lxRANGE; |
386 | lex.sym := lxRANGE; |
417 | putchar(lex, c); |
387 | putchar(lex, c); |
418 | TXT.next(text) |
388 | TXT.next(text) |
419 | END |
389 | END |
420 | 390 | ||
421 | |",": |
391 | |",": |
422 | lex.sym := lxCOMMA |
392 | lex.sym := lxCOMMA |
423 | 393 | ||
424 | |";": |
394 | |";": |
425 | lex.sym := lxSEMI |
395 | lex.sym := lxSEMI |
426 | 396 | ||
427 | |"|": |
397 | |"|": |
428 | lex.sym := lxBAR |
398 | lex.sym := lxBAR |
429 | 399 | ||
430 | |"(": |
400 | |"(": |
431 | lex.sym := lxLROUND; |
401 | lex.sym := lxLROUND; |
432 | 402 | ||
433 | IF c = "*" THEN |
403 | IF c = "*" THEN |
434 | lex.sym := lxCOMMENT; |
404 | lex.sym := lxCOMMENT; |
435 | TXT.next(text); |
405 | TXT.next(text); |
436 | comment(text) |
406 | comment(text) |
437 | END |
407 | END |
438 | 408 | ||
439 | |"[": |
409 | |"[": |
440 | lex.sym := lxLSQUARE |
410 | lex.sym := lxLSQUARE |
441 | 411 | ||
442 | |"{": |
412 | |"{": |
443 | lex.sym := lxLCURLY |
413 | lex.sym := lxLCURLY |
444 | 414 | ||
445 | |"^": |
415 | |"^": |
446 | lex.sym := lxCARET |
416 | lex.sym := lxCARET |
447 | 417 | ||
448 | |"=": |
418 | |"=": |
449 | lex.sym := lxEQ |
419 | lex.sym := lxEQ |
450 | 420 | ||
451 | |"#": |
421 | |"#": |
452 | lex.sym := lxNE |
422 | lex.sym := lxNE |
453 | 423 | ||
454 | |"<": |
424 | |"<": |
455 | lex.sym := lxLT; |
425 | lex.sym := lxLT; |
456 | 426 | ||
457 | IF c = "=" THEN |
427 | IF c = "=" THEN |
458 | lex.sym := lxLE; |
428 | lex.sym := lxLE; |
459 | putchar(lex, c); |
429 | putchar(lex, c); |
460 | TXT.next(text) |
430 | TXT.next(text) |
461 | END |
431 | END |
462 | 432 | ||
463 | |">": |
433 | |">": |
464 | lex.sym := lxGT; |
434 | lex.sym := lxGT; |
465 | 435 | ||
466 | IF c = "=" THEN |
436 | IF c = "=" THEN |
467 | lex.sym := lxGE; |
437 | lex.sym := lxGE; |
468 | putchar(lex, c); |
438 | putchar(lex, c); |
469 | TXT.next(text) |
439 | TXT.next(text) |
470 | END |
440 | END |
471 | 441 | ||
472 | |":": |
442 | |":": |
473 | lex.sym := lxCOLON; |
443 | lex.sym := lxCOLON; |
474 | 444 | ||
475 | IF c = "=" THEN |
445 | IF c = "=" THEN |
476 | lex.sym := lxASSIGN; |
446 | lex.sym := lxASSIGN; |
477 | putchar(lex, c); |
447 | putchar(lex, c); |
478 | TXT.next(text) |
448 | TXT.next(text) |
479 | END |
449 | END |
480 | 450 | ||
481 | |")": |
451 | |")": |
482 | lex.sym := lxRROUND |
452 | lex.sym := lxRROUND |
483 | 453 | ||
484 | |"]": |
454 | |"]": |
485 | lex.sym := lxRSQUARE |
455 | lex.sym := lxRSQUARE |
486 | 456 | ||
487 | |"}": |
457 | |"}": |
488 | lex.sym := lxRCURLY |
458 | lex.sym := lxRCURLY |
489 | 459 | ||
490 | END |
460 | END |
491 | 461 | ||
492 | END delimiter; |
462 | END delimiter; |
493 | 463 | ||
494 | 464 | ||
495 | PROCEDURE Next* (text: SCANNER; VAR lex: LEX); |
465 | PROCEDURE Next* (text: SCANNER; VAR lex: LEX); |
496 | VAR |
466 | VAR |
497 | c: CHAR; |
467 | c: CHAR; |
498 | 468 | ||
499 | BEGIN |
469 | BEGIN |
500 | 470 | ||
501 | REPEAT |
471 | REPEAT |
502 | c := text.peak; |
472 | c := text.peak; |
503 | 473 | ||
504 | WHILE S.space(c) DO |
474 | WHILE S.space(c) DO |
505 | c := nextc(text) |
475 | c := nextc(text) |
506 | END; |
476 | END; |
507 | 477 | ||
508 | lex.s[0] := 0X; |
478 | lex.s[0] := 0X; |
509 | lex.length := 0; |
479 | lex.length := 0; |
510 | lex.pos.line := text.line; |
480 | lex.pos.line := text.line; |
511 | lex.pos.col := text.col; |
481 | lex.pos.col := text.col; |
512 | lex.ident := NIL; |
482 | lex.ident := NIL; |
513 | lex.over := FALSE; |
483 | lex.over := FALSE; |
514 | 484 | ||
515 | IF S.letter(c) THEN |
485 | IF S.letter(c) THEN |
516 | ident(text, lex) |
486 | ident(text, lex) |
517 | ELSIF S.digit(c) THEN |
487 | ELSIF S.digit(c) THEN |
518 | number(text, lex) |
488 | number(text, lex) |
519 | ELSIF (c = '"') OR (c = "'") THEN |
489 | ELSIF (c = '"') OR (c = "'") THEN |
520 | string(text, lex, c) |
490 | string(text, lex, c) |
521 | ELSIF vocabulary.delimiters[ORD(c)] THEN |
491 | ELSIF delimiters[ORD(c)] THEN |
522 | delimiter(text, lex, c) |
492 | delimiter(text, lex, c) |
523 | ELSIF c = 0X THEN |
493 | ELSIF c = 0X THEN |
524 | lex.sym := lxEOF; |
494 | lex.sym := lxEOF; |
525 | IF text.eof THEN |
495 | IF text.eof THEN |
526 | INC(lex.pos.col) |
496 | INC(lex.pos.col) |
527 | END |
497 | END |
528 | ELSIF (c = 7FX) & upto THEN |
498 | ELSIF (c = 7FX) & upto THEN |
529 | upto := FALSE; |
499 | upto := FALSE; |
530 | lex.sym := lxRANGE; |
500 | lex.sym := lxRANGE; |
531 | putchar(lex, "."); |
501 | putchar(lex, "."); |
532 | putchar(lex, "."); |
502 | putchar(lex, "."); |
533 | DEC(lex.pos.col); |
503 | DEC(lex.pos.col); |
534 | TXT.next(text) |
504 | TXT.next(text) |
535 | ELSE |
505 | ELSE |
536 | putchar(lex, c); |
506 | putchar(lex, c); |
537 | TXT.next(text); |
507 | TXT.next(text); |
538 | lex.sym := lxERROR04 |
508 | lex.sym := lxERROR04 |
539 | END; |
509 | END; |
540 | 510 | ||
541 | IF lex.sym < 0 THEN |
511 | IF lex.sym < 0 THEN |
542 | lex.error := -lex.sym |
512 | lex.error := -lex.sym |
543 | ELSE |
513 | ELSE |
544 | lex.error := 0 |
514 | lex.error := 0 |
545 | END |
515 | END |
546 | 516 | ||
547 | UNTIL lex.sym # lxCOMMENT |
517 | UNTIL lex.sym # lxCOMMENT |
548 | 518 | ||
549 | END Next; |
519 | END Next; |
550 | 520 | ||
551 | 521 | ||
552 | PROCEDURE open* (name: ARRAY OF CHAR): SCANNER; |
522 | PROCEDURE open* (name: ARRAY OF CHAR): SCANNER; |
553 | RETURN TXT.open(name) |
523 | RETURN TXT.open(name) |
554 | END open; |
524 | END open; |
555 | 525 | ||
556 | 526 | ||
557 | PROCEDURE close* (VAR scanner: SCANNER); |
527 | PROCEDURE close* (VAR scanner: SCANNER); |
558 | BEGIN |
528 | BEGIN |
559 | TXT.close(scanner) |
529 | TXT.close(scanner) |
560 | END close; |
530 | END close; |
561 | 531 | ||
562 | 532 | ||
563 | PROCEDURE init; |
533 | PROCEDURE init; |
564 | VAR |
534 | VAR |
565 | i: INTEGER; |
535 | i: INTEGER; |
566 | delim: ARRAY 23 OF CHAR; |
536 | delim: ARRAY 23 OF CHAR; |
567 | 537 | ||
568 | 538 | ||
- | 539 | PROCEDURE enterkw (key: INTEGER; kw: LEXSTR); |
|
- | 540 | VAR |
|
- | 541 | id: IDENT; |
|
569 | PROCEDURE enterkw (VAR i: INTEGER; kw: KEYWORD); |
542 | |
570 | BEGIN |
543 | BEGIN |
571 | vocabulary.KW[i] := kw; |
544 | id := enterid(kw); |
572 | INC(i) |
545 | id.key := key |
573 | END enterkw; |
546 | END enterkw; |
574 | 547 | ||
575 | 548 | ||
576 | BEGIN |
549 | BEGIN |
577 | upto := FALSE; |
550 | upto := FALSE; |
578 | 551 | ||
579 | FOR i := 0 TO 255 DO |
552 | FOR i := 0 TO 255 DO |
580 | vocabulary.delimiters[i] := FALSE |
553 | delimiters[i] := FALSE |
581 | END; |
554 | END; |
582 | 555 | ||
583 | delim := "+-*/~&.,;|([{^=#<>:)]}"; |
556 | delim := "+-*/~&.,;|([{^=#<>:)]}"; |
584 | 557 | ||
585 | FOR i := 0 TO LEN(delim) - 2 DO |
558 | FOR i := 0 TO LEN(delim) - 2 DO |
586 | vocabulary.delimiters[ORD(delim[i])] := TRUE |
559 | delimiters[ORD(delim[i])] := TRUE |
587 | END; |
560 | END; |
- | 561 | ||
- | 562 | NEW(NewIdent); |
|
- | 563 | NewIdent.s := ""; |
|
- | 564 | NewIdent.offset := -1; |
|
- | 565 | NewIdent.offsetW := -1; |
|
- | 566 | NewIdent.key := 0; |
|
588 | 567 | ||
- | 568 | idents := NIL; |
|
589 | i := 0; |
569 | |
590 | enterkw(i, "ARRAY"); |
570 | enterkw(lxARRAY, "ARRAY"); |
591 | enterkw(i, "BEGIN"); |
571 | enterkw(lxBEGIN, "BEGIN"); |
592 | enterkw(i, "BY"); |
572 | enterkw(lxBY, "BY"); |
593 | enterkw(i, "CASE"); |
573 | enterkw(lxCASE, "CASE"); |
594 | enterkw(i, "CONST"); |
574 | enterkw(lxCONST, "CONST"); |
595 | enterkw(i, "DIV"); |
575 | enterkw(lxDIV, "DIV"); |
596 | enterkw(i, "DO"); |
576 | enterkw(lxDO, "DO"); |
597 | enterkw(i, "ELSE"); |
577 | enterkw(lxELSE, "ELSE"); |
598 | enterkw(i, "ELSIF"); |
578 | enterkw(lxELSIF, "ELSIF"); |
599 | enterkw(i, "END"); |
579 | enterkw(lxEND, "END"); |
600 | enterkw(i, "FALSE"); |
580 | enterkw(lxFALSE, "FALSE"); |
601 | enterkw(i, "FOR"); |
581 | enterkw(lxFOR, "FOR"); |
602 | enterkw(i, "IF"); |
582 | enterkw(lxIF, "IF"); |
603 | enterkw(i, "IMPORT"); |
583 | enterkw(lxIMPORT, "IMPORT"); |
604 | enterkw(i, "IN"); |
584 | enterkw(lxIN, "IN"); |
605 | enterkw(i, "IS"); |
585 | enterkw(lxIS, "IS"); |
606 | enterkw(i, "MOD"); |
586 | enterkw(lxMOD, "MOD"); |
607 | enterkw(i, "MODULE"); |
587 | enterkw(lxMODULE, "MODULE"); |
608 | enterkw(i, "NIL"); |
588 | enterkw(lxNIL, "NIL"); |
609 | enterkw(i, "OF"); |
589 | enterkw(lxOF, "OF"); |
610 | enterkw(i, "OR"); |
590 | enterkw(lxOR, "OR"); |
611 | enterkw(i, "POINTER"); |
591 | enterkw(lxPOINTER, "POINTER"); |
612 | enterkw(i, "PROCEDURE"); |
592 | enterkw(lxPROCEDURE, "PROCEDURE"); |
613 | enterkw(i, "RECORD"); |
593 | enterkw(lxRECORD, "RECORD"); |
614 | enterkw(i, "REPEAT"); |
594 | enterkw(lxREPEAT, "REPEAT"); |
615 | enterkw(i, "RETURN"); |
595 | enterkw(lxRETURN, "RETURN"); |
616 | enterkw(i, "THEN"); |
596 | enterkw(lxTHEN, "THEN"); |
617 | enterkw(i, "TO"); |
597 | enterkw(lxTO, "TO"); |
618 | enterkw(i, "TRUE"); |
598 | enterkw(lxTRUE, "TRUE"); |
619 | enterkw(i, "TYPE"); |
599 | enterkw(lxTYPE, "TYPE"); |
620 | enterkw(i, "UNTIL"); |
600 | enterkw(lxUNTIL, "UNTIL"); |
621 | enterkw(i, "VAR"); |
601 | enterkw(lxVAR, "VAR"); |
622 | enterkw(i, "WHILE"); |
602 | enterkw(lxWHILE, "WHILE") |
623 | - | ||
624 | NEW(vocabulary.ident); |
- | |
625 | vocabulary.ident.s := ""; |
- | |
626 | vocabulary.ident.offset := -1; |
- | |
627 | vocabulary.ident.offsetW := -1; |
- | |
628 | vocabulary.idents := NIL |
603 | |
629 | END init; |
604 | END init; |
630 | 605 | ||
631 | 606 | ||
632 | BEGIN |
607 | BEGIN |
633 | init |
608 | init |
634 | END SCAN.>>": |
609 | END SCAN.>>": |
635 | >>> |
610 | >>> |