Rev 7983 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 7983 | Rev 8097 | ||
---|---|---|---|
1 | (* |
1 | (* |
2 | BSD 2-Clause License |
2 | BSD 2-Clause License |
3 | 3 | ||
4 | Copyright (c) 2018-2019, Anton Krotov |
4 | Copyright (c) 2018-2020, Anton Krotov |
5 | All rights reserved. |
5 | All rights reserved. |
6 | *) |
6 | *) |
7 | 7 | ||
8 | MODULE SCAN; |
8 | MODULE SCAN; |
9 | 9 | ||
10 | IMPORT TXT := TEXTDRV, AVL := AVLTREES, ARITH, S := STRINGS; |
10 | IMPORT TXT := TEXTDRV, AVL := AVLTREES, ARITH, S := STRINGS, ERRORS, LISTS; |
11 | 11 | ||
12 | 12 | ||
13 | CONST |
13 | CONST |
14 | 14 | ||
15 | LEXLEN = 1024; |
15 | LEXLEN = 1024; |
16 | 16 | ||
17 | lxUNDEF* = 0; lxIDENT* = 1; lxINTEGER* = 2; lxHEX* = 3; |
17 | lxUNDEF* = 0; lxIDENT* = 1; lxINTEGER* = 2; lxHEX* = 3; |
18 | lxCHAR* = 4; lxFLOAT* = 5; lxSTRING* = 6; lxCOMMENT* = 7; |
18 | lxCHAR* = 4; lxFLOAT* = 5; lxSTRING* = 6; lxCOMMENT* = 7; |
19 | lxEOF* = 8; |
19 | lxEOF* = 8; |
20 | 20 | ||
21 | lxPLUS* = 21; lxMINUS* = 22; lxMUL* = 23; lxSLASH* = 24; |
21 | lxPLUS* = 21; lxMINUS* = 22; lxMUL* = 23; lxSLASH* = 24; |
22 | lxNOT* = 25; lxAND* = 26; lxPOINT* = 27; lxCOMMA* = 28; |
22 | lxNOT* = 25; lxAND* = 26; lxPOINT* = 27; lxCOMMA* = 28; |
23 | lxSEMI* = 29; lxBAR* = 30; lxLROUND* = 31; lxLSQUARE* = 32; |
23 | lxSEMI* = 29; lxBAR* = 30; lxLROUND* = 31; lxLSQUARE* = 32; |
24 | lxLCURLY* = 33; lxCARET* = 34; lxEQ* = 35; lxNE* = 36; |
24 | lxLCURLY* = 33; lxCARET* = 34; lxEQ* = 35; lxNE* = 36; |
25 | lxLT* = 37; lxGT* = 38; lxCOLON* = 39; lxRROUND* = 40; |
25 | lxLT* = 37; lxGT* = 38; lxCOLON* = 39; lxRROUND* = 40; |
26 | lxRSQUARE* = 41; lxRCURLY* = 42; lxLE* = 43; lxGE* = 44; |
26 | lxRSQUARE* = 41; lxRCURLY* = 42; lxLE* = 43; lxGE* = 44; |
27 | lxASSIGN* = 45; lxRANGE* = 46; |
27 | lxASSIGN* = 45; lxRANGE* = 46; |
28 | 28 | ||
29 | lxKW = 51; |
29 | lxKW = 51; |
30 | 30 | ||
31 | lxARRAY* = 51; lxBEGIN* = 52; lxBY* = 53; lxCASE* = 54; |
31 | lxARRAY* = 51; lxBEGIN* = 52; lxBY* = 53; lxCASE* = 54; |
32 | lxCONST* = 55; lxDIV* = 56; lxDO* = 57; lxELSE* = 58; |
32 | lxCONST* = 55; lxDIV* = 56; lxDO* = 57; lxELSE* = 58; |
33 | lxELSIF* = 59; lxEND* = 60; lxFALSE* = 61; lxFOR* = 62; |
33 | lxELSIF* = 59; lxEND* = 60; lxFALSE* = 61; lxFOR* = 62; |
34 | lxIF* = 63; lxIMPORT* = 64; lxIN* = 65; lxIS* = 66; |
34 | lxIF* = 63; lxIMPORT* = 64; lxIN* = 65; lxIS* = 66; |
35 | lxMOD* = 67; lxMODULE* = 68; lxNIL* = 69; lxOF* = 70; |
35 | lxMOD* = 67; lxMODULE* = 68; lxNIL* = 69; lxOF* = 70; |
36 | lxOR* = 71; lxPOINTER* = 72; lxPROCEDURE* = 73; lxRECORD* = 74; |
36 | lxOR* = 71; lxPOINTER* = 72; lxPROCEDURE* = 73; lxRECORD* = 74; |
37 | lxREPEAT* = 75; lxRETURN* = 76; lxTHEN* = 77; lxTO* = 78; |
37 | lxREPEAT* = 75; lxRETURN* = 76; lxTHEN* = 77; lxTO* = 78; |
38 | lxTRUE* = 79; lxTYPE* = 80; lxUNTIL* = 81; lxVAR* = 82; |
38 | lxTRUE* = 79; lxTYPE* = 80; lxUNTIL* = 81; lxVAR* = 82; |
39 | lxWHILE* = 83; |
39 | lxWHILE* = 83; |
40 | 40 | ||
41 | lxERROR01* = -1; lxERROR02* = -2; lxERROR03* = -3; lxERROR04* = -4; |
41 | lxERROR01* = -1; lxERROR02* = -2; lxERROR03* = -3; lxERROR04* = -4; |
42 | lxERROR05* = -5; lxERROR06* = -6; lxERROR07* = -7; lxERROR08* = -8; |
42 | lxERROR05* = -5; lxERROR06* = -6; lxERROR07* = -7; lxERROR08* = -8; |
43 | lxERROR09* = -9; lxERROR10* = -10; lxERROR11* = -11; lxERROR12* = -12; |
43 | lxERROR09* = -9; lxERROR10* = -10; lxERROR11* = -11; lxERROR12* = -12; |
44 | lxERROR13* = -13; |
44 | lxERROR13* = -13; |
45 | 45 | ||
46 | 46 | ||
47 | TYPE |
47 | TYPE |
48 | 48 | ||
49 | LEXSTR* = ARRAY LEXLEN OF CHAR; |
49 | LEXSTR* = ARRAY LEXLEN OF CHAR; |
- | 50 | ||
- | 51 | DEF = POINTER TO RECORD (LISTS.ITEM) |
|
- | 52 | ||
- | 53 | ident: LEXSTR |
|
- | 54 | ||
- | 55 | END; |
|
50 | 56 | ||
51 | IDENT* = POINTER TO RECORD (AVL.DATA) |
57 | IDENT* = POINTER TO RECORD (AVL.DATA) |
52 | 58 | ||
53 | s*: LEXSTR; |
59 | s*: LEXSTR; |
54 | offset*, offsetW*: INTEGER; |
60 | offset*, offsetW*: INTEGER; |
55 | key: INTEGER |
61 | key: INTEGER |
56 | 62 | ||
57 | END; |
63 | END; |
58 | 64 | ||
59 | POSITION* = RECORD |
65 | POSITION* = RECORD |
60 | 66 | ||
61 | line*, col*: INTEGER |
67 | line*, col*: INTEGER |
62 | 68 | ||
63 | END; |
69 | END; |
64 | 70 | ||
65 | LEX* = RECORD |
71 | LEX* = RECORD |
66 | 72 | ||
67 | s*: LEXSTR; |
73 | s*: LEXSTR; |
68 | length*: INTEGER; |
74 | length*: INTEGER; |
69 | sym*: INTEGER; |
75 | sym*: INTEGER; |
70 | pos*: POSITION; |
76 | pos*: POSITION; |
71 | ident*: IDENT; |
77 | ident*: IDENT; |
72 | string*: IDENT; |
78 | string*: IDENT; |
73 | value*: ARITH.VALUE; |
79 | value*: ARITH.VALUE; |
74 | error*: INTEGER; |
80 | error*: INTEGER; |
75 | 81 | ||
76 | over: BOOLEAN |
82 | over: BOOLEAN |
77 | 83 | ||
78 | END; |
84 | END; |
79 | 85 | ||
80 | SCANNER* = TXT.TEXT; |
86 | SCANNER* = TXT.TEXT; |
81 | 87 | ||
82 | 88 | ||
83 | VAR |
89 | VAR |
84 | 90 | ||
85 | idents: AVL.NODE; |
91 | idents: AVL.NODE; |
86 | 92 | ||
87 | delimiters: ARRAY 256 OF BOOLEAN; |
93 | delimiters: ARRAY 256 OF BOOLEAN; |
88 | 94 | ||
89 | NewIdent: IDENT; |
95 | NewIdent: IDENT; |
90 | 96 | ||
- | 97 | upto, LowerCase, _if: BOOLEAN; |
|
- | 98 | ||
91 | upto: BOOLEAN; |
99 | def: LISTS.LIST; |
92 | 100 | ||
93 | 101 | ||
94 | PROCEDURE nodecmp (a, b: AVL.DATA): INTEGER; |
102 | PROCEDURE nodecmp (a, b: AVL.DATA): INTEGER; |
95 | RETURN ORD(a(IDENT).s > b(IDENT).s) - ORD(a(IDENT).s < b(IDENT).s) |
103 | RETURN ORD(a(IDENT).s > b(IDENT).s) - ORD(a(IDENT).s < b(IDENT).s) |
96 | END nodecmp; |
104 | END nodecmp; |
97 | 105 | ||
98 | 106 | ||
99 | PROCEDURE enterid* (s: LEXSTR): IDENT; |
107 | PROCEDURE enterid* (s: LEXSTR): IDENT; |
100 | VAR |
108 | VAR |
101 | newnode: BOOLEAN; |
109 | newnode: BOOLEAN; |
102 | node: AVL.NODE; |
110 | node: AVL.NODE; |
103 | 111 | ||
104 | BEGIN |
112 | BEGIN |
105 | NewIdent.s := s; |
113 | NewIdent.s := s; |
106 | idents := AVL.insert(idents, NewIdent, nodecmp, newnode, node); |
114 | idents := AVL.insert(idents, NewIdent, nodecmp, newnode, node); |
107 | 115 | ||
108 | IF newnode THEN |
116 | IF newnode THEN |
109 | NEW(NewIdent); |
117 | NEW(NewIdent); |
110 | NewIdent.offset := -1; |
118 | NewIdent.offset := -1; |
111 | NewIdent.offsetW := -1; |
119 | NewIdent.offsetW := -1; |
112 | NewIdent.key := 0 |
120 | NewIdent.key := 0 |
113 | END |
121 | END |
114 | 122 | ||
115 | RETURN node.data(IDENT) |
123 | RETURN node.data(IDENT) |
116 | END enterid; |
124 | END enterid; |
117 | 125 | ||
118 | 126 | ||
119 | PROCEDURE putchar (VAR lex: LEX; c: CHAR); |
127 | PROCEDURE putchar (VAR lex: LEX; c: CHAR); |
120 | BEGIN |
128 | BEGIN |
121 | IF lex.length < LEXLEN - 1 THEN |
129 | IF lex.length < LEXLEN - 1 THEN |
122 | lex.s[lex.length] := c; |
130 | lex.s[lex.length] := c; |
123 | INC(lex.length); |
131 | INC(lex.length); |
124 | lex.s[lex.length] := 0X |
132 | lex.s[lex.length] := 0X |
125 | ELSE |
133 | ELSE |
126 | lex.over := TRUE |
134 | lex.over := TRUE |
127 | END |
135 | END |
128 | END putchar; |
136 | END putchar; |
129 | 137 | ||
130 | 138 | ||
131 | PROCEDURE nextc (text: TXT.TEXT): CHAR; |
139 | PROCEDURE nextc (text: TXT.TEXT): CHAR; |
132 | BEGIN |
140 | BEGIN |
133 | TXT.next(text) |
141 | TXT.next(text) |
134 | RETURN text.peak |
142 | RETURN text.peak |
135 | END nextc; |
143 | END nextc; |
136 | 144 | ||
137 | 145 | ||
138 | PROCEDURE ident (text: TXT.TEXT; VAR lex: LEX); |
146 | PROCEDURE ident (text: TXT.TEXT; VAR lex: LEX); |
139 | VAR |
147 | VAR |
140 | c: CHAR; |
148 | c: CHAR; |
141 | 149 | ||
142 | BEGIN |
150 | BEGIN |
143 | c := text.peak; |
151 | c := text.peak; |
144 | ASSERT(S.letter(c)); |
152 | ASSERT(S.letter(c)); |
145 | 153 | ||
146 | WHILE S.letter(c) OR S.digit(c) DO |
154 | WHILE S.letter(c) OR S.digit(c) DO |
147 | putchar(lex, c); |
155 | putchar(lex, c); |
148 | c := nextc(text) |
156 | c := nextc(text) |
149 | END; |
157 | END; |
150 | 158 | ||
151 | IF lex.over THEN |
159 | IF lex.over THEN |
152 | lex.sym := lxERROR06 |
160 | lex.sym := lxERROR06 |
153 | ELSE |
161 | ELSE |
154 | lex.ident := enterid(lex.s); |
162 | lex.ident := enterid(lex.s); |
155 | IF lex.ident.key # 0 THEN |
163 | IF lex.ident.key # 0 THEN |
156 | lex.sym := lex.ident.key |
164 | lex.sym := lex.ident.key |
157 | ELSE |
165 | ELSE |
158 | lex.sym := lxIDENT |
166 | lex.sym := lxIDENT |
159 | END |
167 | END |
160 | END |
168 | END |
161 | 169 | ||
162 | END ident; |
170 | END ident; |
163 | 171 | ||
164 | 172 | ||
165 | PROCEDURE number (text: TXT.TEXT; VAR lex: LEX); |
173 | PROCEDURE number (text: TXT.TEXT; VAR lex: LEX); |
166 | VAR |
174 | VAR |
167 | c: CHAR; |
175 | c: CHAR; |
168 | hex: BOOLEAN; |
176 | hex: BOOLEAN; |
169 | error: INTEGER; |
177 | error, sym: INTEGER; |
170 | 178 | ||
171 | BEGIN |
179 | BEGIN |
172 | c := text.peak; |
180 | c := text.peak; |
173 | ASSERT(S.digit(c)); |
181 | ASSERT(S.digit(c)); |
174 | 182 | ||
175 | error := 0; |
183 | error := 0; |
176 | 184 | ||
177 | lex.sym := lxINTEGER; |
185 | sym := lxINTEGER; |
178 | hex := FALSE; |
186 | hex := FALSE; |
179 | 187 | ||
180 | WHILE S.digit(c) DO |
188 | WHILE S.digit(c) DO |
181 | putchar(lex, c); |
189 | putchar(lex, c); |
182 | c := nextc(text) |
190 | c := nextc(text) |
183 | END; |
191 | END; |
184 | 192 | ||
185 | WHILE S.hexdigit(c) DO |
193 | WHILE S.hexdigit(c) DO |
186 | putchar(lex, c); |
194 | putchar(lex, c); |
187 | c := nextc(text); |
195 | c := nextc(text); |
188 | hex := TRUE |
196 | hex := TRUE |
189 | END; |
197 | END; |
190 | 198 | ||
191 | IF c = "H" THEN |
199 | IF c = "H" THEN |
192 | putchar(lex, c); |
200 | putchar(lex, c); |
193 | TXT.next(text); |
201 | TXT.next(text); |
194 | lex.sym := lxHEX |
202 | sym := lxHEX |
195 | 203 | ||
196 | ELSIF c = "X" THEN |
204 | ELSIF c = "X" THEN |
197 | putchar(lex, c); |
205 | putchar(lex, c); |
198 | TXT.next(text); |
206 | TXT.next(text); |
199 | lex.sym := lxCHAR |
207 | sym := lxCHAR |
200 | 208 | ||
201 | ELSIF c = "." THEN |
209 | ELSIF c = "." THEN |
202 | 210 | ||
203 | IF hex THEN |
211 | IF hex THEN |
204 | lex.sym := lxERROR01 |
212 | sym := lxERROR01 |
205 | ELSE |
213 | ELSE |
206 | 214 | ||
207 | c := nextc(text); |
215 | c := nextc(text); |
208 | 216 | ||
209 | IF c # "." THEN |
217 | IF c # "." THEN |
210 | putchar(lex, "."); |
218 | putchar(lex, "."); |
211 | lex.sym := lxFLOAT |
219 | sym := lxFLOAT |
212 | ELSE |
220 | ELSE |
213 | lex.sym := lxINTEGER; |
221 | sym := lxINTEGER; |
214 | text.peak := 7FX; |
222 | text.peak := 7FX; |
215 | upto := TRUE |
223 | upto := TRUE |
216 | END; |
224 | END; |
217 | 225 | ||
218 | WHILE S.digit(c) DO |
226 | WHILE S.digit(c) DO |
219 | putchar(lex, c); |
227 | putchar(lex, c); |
220 | c := nextc(text) |
228 | c := nextc(text) |
221 | END; |
229 | END; |
222 | 230 | ||
223 | IF c = "E" THEN |
231 | IF c = "E" THEN |
224 | 232 | ||
225 | putchar(lex, c); |
233 | putchar(lex, c); |
226 | c := nextc(text); |
234 | c := nextc(text); |
227 | IF (c = "+") OR (c = "-") THEN |
235 | IF (c = "+") OR (c = "-") THEN |
228 | putchar(lex, c); |
236 | putchar(lex, c); |
229 | c := nextc(text) |
237 | c := nextc(text) |
230 | END; |
238 | END; |
231 | 239 | ||
232 | IF S.digit(c) THEN |
240 | IF S.digit(c) THEN |
233 | WHILE S.digit(c) DO |
241 | WHILE S.digit(c) DO |
234 | putchar(lex, c); |
242 | putchar(lex, c); |
235 | c := nextc(text) |
243 | c := nextc(text) |
236 | END |
244 | END |
237 | ELSE |
245 | ELSE |
238 | lex.sym := lxERROR02 |
246 | sym := lxERROR02 |
239 | END |
247 | END |
240 | 248 | ||
241 | END |
249 | END |
242 | 250 | ||
243 | END |
251 | END |
244 | 252 | ||
245 | ELSIF hex THEN |
253 | ELSIF hex THEN |
246 | lex.sym := lxERROR01 |
254 | sym := lxERROR01 |
247 | 255 | ||
248 | END; |
256 | END; |
249 | 257 | ||
250 | IF lex.over & (lex.sym >= 0) THEN |
258 | IF lex.over & (sym >= 0) THEN |
251 | lex.sym := lxERROR07 |
259 | sym := lxERROR07 |
252 | END; |
260 | END; |
253 | 261 | ||
254 | IF lex.sym = lxINTEGER THEN |
262 | IF sym = lxINTEGER THEN |
255 | ARITH.iconv(lex.s, lex.value, error) |
263 | ARITH.iconv(lex.s, lex.value, error) |
256 | ELSIF (lex.sym = lxHEX) OR (lex.sym = lxCHAR) THEN |
264 | ELSIF (sym = lxHEX) OR (sym = lxCHAR) THEN |
257 | ARITH.hconv(lex.s, lex.value, error) |
265 | ARITH.hconv(lex.s, lex.value, error) |
258 | ELSIF lex.sym = lxFLOAT THEN |
266 | ELSIF sym = lxFLOAT THEN |
259 | ARITH.fconv(lex.s, lex.value, error) |
267 | ARITH.fconv(lex.s, lex.value, error) |
260 | END; |
268 | END; |
261 | 269 | ||
262 | CASE error OF |
270 | CASE error OF |
263 | |0: |
271 | |0: |
264 | |1: lex.sym := lxERROR08 |
272 | |1: sym := lxERROR08 |
265 | |2: lex.sym := lxERROR09 |
273 | |2: sym := lxERROR09 |
266 | |3: lex.sym := lxERROR10 |
274 | |3: sym := lxERROR10 |
267 | |4: lex.sym := lxERROR11 |
275 | |4: sym := lxERROR11 |
268 | |5: lex.sym := lxERROR12 |
276 | |5: sym := lxERROR12 |
269 | END |
277 | END; |
- | 278 | ||
270 | 279 | lex.sym := sym |
|
271 | END number; |
280 | END number; |
272 | 281 | ||
273 | 282 | ||
274 | PROCEDURE string (text: TXT.TEXT; VAR lex: LEX; quot: CHAR); |
283 | PROCEDURE string (text: TXT.TEXT; VAR lex: LEX; quot: CHAR); |
275 | VAR |
284 | VAR |
276 | c: CHAR; |
285 | c: CHAR; |
277 | n: INTEGER; |
286 | n: INTEGER; |
278 | 287 | ||
279 | BEGIN |
288 | BEGIN |
280 | c := nextc(text); |
289 | c := nextc(text); |
281 | n := 0; |
290 | n := 0; |
282 | 291 | ||
283 | WHILE (c # quot) & (c # 0X) & ~text.eol & ~text.eof DO |
292 | WHILE (c # quot) & (c # 0X) & ~text.eol & ~text.eof DO |
284 | putchar(lex, c); |
293 | putchar(lex, c); |
285 | c := nextc(text); |
294 | c := nextc(text); |
286 | INC(n) |
295 | INC(n) |
287 | END; |
296 | END; |
288 | 297 | ||
289 | IF c = quot THEN |
298 | IF c = quot THEN |
290 | TXT.next(text); |
299 | TXT.next(text); |
291 | IF lex.over THEN |
300 | IF lex.over THEN |
292 | lex.sym := lxERROR05 |
301 | lex.sym := lxERROR05 |
293 | ELSE |
302 | ELSE |
294 | IF n # 1 THEN |
303 | IF n # 1 THEN |
295 | lex.sym := lxSTRING |
304 | lex.sym := lxSTRING |
296 | ELSE |
305 | ELSE |
297 | lex.sym := lxCHAR; |
306 | lex.sym := lxCHAR; |
298 | ARITH.setChar(lex.value, ORD(lex.s[0])) |
307 | ARITH.setChar(lex.value, ORD(lex.s[0])) |
299 | END |
308 | END |
300 | END |
309 | END |
301 | ELSE |
310 | ELSE |
302 | lex.sym := lxERROR03 |
311 | lex.sym := lxERROR03 |
303 | END; |
312 | END; |
304 | 313 | ||
305 | IF lex.sym = lxSTRING THEN |
314 | IF lex.sym = lxSTRING THEN |
306 | lex.string := enterid(lex.s); |
315 | lex.string := enterid(lex.s); |
307 | lex.value.typ := ARITH.tSTRING; |
316 | lex.value.typ := ARITH.tSTRING; |
308 | lex.value.string := lex.string |
317 | lex.value.string := lex.string |
309 | END |
318 | END |
310 | 319 | ||
311 | END string; |
320 | END string; |
312 | 321 | ||
313 | 322 | ||
314 | PROCEDURE comment (text: TXT.TEXT); |
323 | PROCEDURE comment (text: TXT.TEXT); |
315 | VAR |
324 | VAR |
316 | c: CHAR; |
325 | c: CHAR; |
317 | cond, depth: INTEGER; |
326 | cond, depth: INTEGER; |
318 | 327 | ||
319 | BEGIN |
328 | BEGIN |
320 | cond := 0; |
329 | cond := 0; |
321 | depth := 1; |
330 | depth := 1; |
322 | 331 | ||
323 | REPEAT |
332 | REPEAT |
324 | 333 | ||
325 | c := text.peak; |
334 | c := text.peak; |
326 | TXT.next(text); |
335 | TXT.next(text); |
327 | 336 | ||
328 | IF c = "*" THEN |
337 | IF c = "*" THEN |
329 | IF cond = 1 THEN |
338 | IF cond = 1 THEN |
330 | cond := 0; |
339 | cond := 0; |
331 | INC(depth) |
340 | INC(depth) |
332 | ELSE |
341 | ELSE |
333 | cond := 2 |
342 | cond := 2 |
334 | END |
343 | END |
335 | ELSIF c = ")" THEN |
344 | ELSIF c = ")" THEN |
336 | IF cond = 2 THEN |
345 | IF cond = 2 THEN |
337 | DEC(depth) |
346 | DEC(depth) |
338 | END; |
347 | END; |
339 | cond := 0 |
348 | cond := 0 |
340 | ELSIF c = "(" THEN |
349 | ELSIF c = "(" THEN |
341 | cond := 1 |
350 | cond := 1 |
342 | ELSE |
351 | ELSE |
343 | cond := 0 |
352 | cond := 0 |
344 | END |
353 | END |
345 | 354 | ||
346 | UNTIL (depth = 0) OR text.eof |
355 | UNTIL (depth = 0) OR text.eof |
347 | 356 | ||
348 | END comment; |
357 | END comment; |
349 | 358 | ||
350 | 359 | ||
351 | PROCEDURE delimiter (text: TXT.TEXT; VAR lex: LEX; c: CHAR); |
360 | PROCEDURE delimiter (text: TXT.TEXT; VAR lex: LEX; c: CHAR); |
- | 361 | VAR |
|
- | 362 | sym: INTEGER; |
|
- | 363 | ||
352 | BEGIN |
364 | BEGIN |
353 | putchar(lex, c); |
365 | putchar(lex, c); |
354 | c := nextc(text); |
366 | c := nextc(text); |
355 | 367 | ||
356 | CASE lex.s[0] OF |
368 | CASE lex.s[0] OF |
357 | |"+": |
369 | |"+": |
358 | lex.sym := lxPLUS |
370 | sym := lxPLUS |
359 | 371 | ||
360 | |"-": |
372 | |"-": |
361 | lex.sym := lxMINUS |
373 | sym := lxMINUS |
362 | 374 | ||
363 | |"*": |
375 | |"*": |
364 | lex.sym := lxMUL |
376 | sym := lxMUL |
365 | 377 | ||
366 | |"/": |
378 | |"/": |
367 | lex.sym := lxSLASH; |
379 | sym := lxSLASH; |
368 | 380 | ||
369 | IF c = "/" THEN |
381 | IF c = "/" THEN |
370 | lex.sym := lxCOMMENT; |
382 | sym := lxCOMMENT; |
371 | REPEAT |
383 | REPEAT |
372 | TXT.next(text) |
384 | TXT.next(text) |
373 | UNTIL text.eol OR text.eof |
385 | UNTIL text.eol OR text.eof |
374 | END |
386 | END |
375 | 387 | ||
376 | |"~": |
388 | |"~": |
377 | lex.sym := lxNOT |
389 | sym := lxNOT |
378 | 390 | ||
379 | |"&": |
391 | |"&": |
380 | lex.sym := lxAND |
392 | sym := lxAND |
381 | 393 | ||
382 | |".": |
394 | |".": |
383 | lex.sym := lxPOINT; |
395 | sym := lxPOINT; |
384 | 396 | ||
385 | IF c = "." THEN |
397 | IF c = "." THEN |
386 | lex.sym := lxRANGE; |
398 | sym := lxRANGE; |
387 | putchar(lex, c); |
399 | putchar(lex, c); |
388 | TXT.next(text) |
400 | TXT.next(text) |
389 | END |
401 | END |
390 | 402 | ||
391 | |",": |
403 | |",": |
392 | lex.sym := lxCOMMA |
404 | sym := lxCOMMA |
393 | 405 | ||
394 | |";": |
406 | |";": |
395 | lex.sym := lxSEMI |
407 | sym := lxSEMI |
396 | 408 | ||
397 | |"|": |
409 | |"|": |
398 | lex.sym := lxBAR |
410 | sym := lxBAR |
399 | 411 | ||
400 | |"(": |
412 | |"(": |
401 | lex.sym := lxLROUND; |
413 | sym := lxLROUND; |
402 | 414 | ||
403 | IF c = "*" THEN |
415 | IF c = "*" THEN |
404 | lex.sym := lxCOMMENT; |
416 | sym := lxCOMMENT; |
405 | TXT.next(text); |
417 | TXT.next(text); |
406 | comment(text) |
418 | comment(text) |
407 | END |
419 | END |
408 | 420 | ||
409 | |"[": |
421 | |"[": |
410 | lex.sym := lxLSQUARE |
422 | sym := lxLSQUARE |
411 | 423 | ||
412 | |"{": |
424 | |"{": |
413 | lex.sym := lxLCURLY |
425 | sym := lxLCURLY |
414 | 426 | ||
415 | |"^": |
427 | |"^": |
416 | lex.sym := lxCARET |
428 | sym := lxCARET |
417 | 429 | ||
418 | |"=": |
430 | |"=": |
419 | lex.sym := lxEQ |
431 | sym := lxEQ |
420 | 432 | ||
421 | |"#": |
433 | |"#": |
422 | lex.sym := lxNE |
434 | sym := lxNE |
423 | 435 | ||
424 | |"<": |
436 | |"<": |
425 | lex.sym := lxLT; |
437 | sym := lxLT; |
426 | 438 | ||
427 | IF c = "=" THEN |
439 | IF c = "=" THEN |
428 | lex.sym := lxLE; |
440 | sym := lxLE; |
429 | putchar(lex, c); |
441 | putchar(lex, c); |
430 | TXT.next(text) |
442 | TXT.next(text) |
431 | END |
443 | END |
432 | 444 | ||
433 | |">": |
445 | |">": |
434 | lex.sym := lxGT; |
446 | sym := lxGT; |
435 | 447 | ||
436 | IF c = "=" THEN |
448 | IF c = "=" THEN |
437 | lex.sym := lxGE; |
449 | sym := lxGE; |
438 | putchar(lex, c); |
450 | putchar(lex, c); |
439 | TXT.next(text) |
451 | TXT.next(text) |
440 | END |
452 | END |
441 | 453 | ||
442 | |":": |
454 | |":": |
443 | lex.sym := lxCOLON; |
455 | sym := lxCOLON; |
444 | 456 | ||
445 | IF c = "=" THEN |
457 | IF c = "=" THEN |
446 | lex.sym := lxASSIGN; |
458 | sym := lxASSIGN; |
447 | putchar(lex, c); |
459 | putchar(lex, c); |
448 | TXT.next(text) |
460 | TXT.next(text) |
449 | END |
461 | END |
450 | 462 | ||
451 | |")": |
463 | |")": |
452 | lex.sym := lxRROUND |
464 | sym := lxRROUND |
453 | 465 | ||
454 | |"]": |
466 | |"]": |
455 | lex.sym := lxRSQUARE |
467 | sym := lxRSQUARE |
456 | 468 | ||
457 | |"}": |
469 | |"}": |
- | 470 | sym := lxRCURLY |
|
- | 471 | ||
458 | lex.sym := lxRCURLY |
472 | END; |
459 | 473 | ||
460 | END |
474 | lex.sym := sym |
461 | 475 | ||
462 | END delimiter; |
476 | END delimiter; |
463 | 477 | ||
464 | 478 | ||
465 | PROCEDURE Next* (text: SCANNER; VAR lex: LEX); |
479 | PROCEDURE Next* (text: SCANNER; VAR lex: LEX); |
466 | VAR |
480 | VAR |
467 | c: CHAR; |
481 | c: CHAR; |
- | 482 | ||
- | 483 | ||
- | 484 | PROCEDURE check (cond: BOOLEAN; text: SCANNER; lex: LEX; errno: INTEGER); |
|
- | 485 | BEGIN |
|
- | 486 | IF ~cond THEN |
|
- | 487 | ERRORS.ErrorMsg(text.fname, lex.pos.line, lex.pos.col, errno) |
|
- | 488 | END |
|
- | 489 | END check; |
|
- | 490 | ||
- | 491 | ||
- | 492 | PROCEDURE IsDef (str: ARRAY OF CHAR): BOOLEAN; |
|
- | 493 | VAR |
|
- | 494 | cur: DEF; |
|
- | 495 | ||
- | 496 | BEGIN |
|
- | 497 | cur := def.first(DEF); |
|
- | 498 | WHILE (cur # NIL) & (cur.ident # str) DO |
|
- | 499 | cur := cur.next(DEF) |
|
- | 500 | END |
|
- | 501 | ||
- | 502 | RETURN cur # NIL |
|
- | 503 | END IsDef; |
|
- | 504 | ||
- | 505 | ||
- | 506 | PROCEDURE Skip (text: SCANNER); |
|
- | 507 | VAR |
|
- | 508 | i: INTEGER; |
|
- | 509 | ||
- | 510 | BEGIN |
|
- | 511 | i := 0; |
|
- | 512 | WHILE (i <= text.ifc) & ~text._skip[i] DO |
|
- | 513 | INC(i) |
|
- | 514 | END; |
|
- | 515 | text.skip := i <= text.ifc |
|
- | 516 | END Skip; |
|
- | 517 | ||
- | 518 | ||
- | 519 | PROCEDURE prep_if (text: SCANNER; VAR lex: LEX); |
|
- | 520 | VAR |
|
- | 521 | skip: BOOLEAN; |
|
- | 522 | ||
- | 523 | BEGIN |
|
- | 524 | INC(text.ifc); |
|
- | 525 | text._elsif[text.ifc] := lex.sym = lxELSIF; |
|
- | 526 | IF lex.sym = lxIF THEN |
|
- | 527 | INC(text.elsec); |
|
- | 528 | text._else[text.elsec] := FALSE |
|
- | 529 | END; |
|
- | 530 | _if := TRUE; |
|
- | 531 | skip := TRUE; |
|
- | 532 | text.skip := FALSE; |
|
- | 533 | ||
- | 534 | Next(text, lex); |
|
- | 535 | check(lex.sym = lxLROUND, text, lex, 64); |
|
- | 536 | ||
- | 537 | Next(text, lex); |
|
- | 538 | check(lex.sym = lxIDENT, text, lex, 22); |
|
- | 539 | ||
- | 540 | REPEAT |
|
- | 541 | IF IsDef(lex.s) THEN |
|
- | 542 | skip := FALSE |
|
- | 543 | END; |
|
- | 544 | ||
- | 545 | Next(text, lex); |
|
- | 546 | IF lex.sym = lxBAR THEN |
|
- | 547 | Next(text, lex); |
|
- | 548 | check(lex.sym = lxIDENT, text, lex, 22) |
|
- | 549 | ELSE |
|
- | 550 | check(lex.sym = lxRROUND, text, lex, 33) |
|
- | 551 | END |
|
- | 552 | UNTIL lex.sym = lxRROUND; |
|
- | 553 | ||
- | 554 | _if := FALSE; |
|
- | 555 | text._skip[text.ifc] := skip; |
|
- | 556 | Skip(text); |
|
- | 557 | Next(text, lex) |
|
- | 558 | END prep_if; |
|
- | 559 | ||
- | 560 | ||
- | 561 | PROCEDURE prep_end (text: SCANNER; VAR lex: LEX); |
|
- | 562 | BEGIN |
|
- | 563 | check(text.ifc > 0, text, lex, 118); |
|
- | 564 | IF lex.sym = lxEND THEN |
|
- | 565 | WHILE text._elsif[text.ifc] DO |
|
- | 566 | DEC(text.ifc) |
|
- | 567 | END; |
|
- | 568 | DEC(text.ifc); |
|
- | 569 | DEC(text.elsec) |
|
- | 570 | ELSIF (lex.sym = lxELSE) OR (lex.sym = lxELSIF) THEN |
|
- | 571 | check(~text._else[text.elsec], text, lex, 118); |
|
- | 572 | text._skip[text.ifc] := ~text._skip[text.ifc]; |
|
- | 573 | text._else[text.elsec] := lex.sym = lxELSE |
|
- | 574 | END; |
|
- | 575 | Skip(text); |
|
- | 576 | IF lex.sym = lxELSIF THEN |
|
- | 577 | prep_if(text, lex) |
|
- | 578 | ELSE |
|
- | 579 | Next(text, lex) |
|
- | 580 | END |
|
- | 581 | END prep_end; |
|
- | 582 | ||
468 | 583 | ||
469 | BEGIN |
584 | BEGIN |
470 | 585 | ||
471 | REPEAT |
586 | REPEAT |
472 | c := text.peak; |
587 | c := text.peak; |
473 | 588 | ||
474 | WHILE S.space(c) DO |
589 | WHILE S.space(c) DO |
475 | c := nextc(text) |
590 | c := nextc(text) |
476 | END; |
591 | END; |
477 | 592 | ||
478 | lex.s[0] := 0X; |
593 | lex.s[0] := 0X; |
479 | lex.length := 0; |
594 | lex.length := 0; |
480 | lex.pos.line := text.line; |
595 | lex.pos.line := text.line; |
481 | lex.pos.col := text.col; |
596 | lex.pos.col := text.col; |
482 | lex.ident := NIL; |
597 | lex.ident := NIL; |
483 | lex.over := FALSE; |
598 | lex.over := FALSE; |
484 | 599 | ||
485 | IF S.letter(c) THEN |
600 | IF S.letter(c) THEN |
486 | ident(text, lex) |
601 | ident(text, lex) |
487 | ELSIF S.digit(c) THEN |
602 | ELSIF S.digit(c) THEN |
488 | number(text, lex) |
603 | number(text, lex) |
489 | ELSIF (c = '"') OR (c = "'") THEN |
604 | ELSIF (c = '"') OR (c = "'") THEN |
490 | string(text, lex, c) |
605 | string(text, lex, c) |
491 | ELSIF delimiters[ORD(c)] THEN |
606 | ELSIF delimiters[ORD(c)] THEN |
492 | delimiter(text, lex, c) |
607 | delimiter(text, lex, c) |
- | 608 | ELSIF c = "$" THEN |
|
- | 609 | IF S.letter(nextc(text)) THEN |
|
- | 610 | ident(text, lex); |
|
- | 611 | IF lex.sym = lxIF THEN |
|
- | 612 | IF ~_if THEN |
|
- | 613 | prep_if(text, lex) |
|
- | 614 | END |
|
- | 615 | ELSIF (lex.sym = lxEND) OR (lex.sym = lxELSE) OR (lex.sym = lxELSIF) THEN |
|
- | 616 | IF ~_if THEN |
|
- | 617 | prep_end(text, lex) |
|
- | 618 | END |
|
- | 619 | ELSE |
|
- | 620 | check(FALSE, text, lex, 119) |
|
- | 621 | END |
|
- | 622 | ELSE |
|
- | 623 | check(FALSE, text, lex, 119) |
|
- | 624 | END |
|
493 | ELSIF c = 0X THEN |
625 | ELSIF c = 0X THEN |
494 | lex.sym := lxEOF; |
626 | lex.sym := lxEOF; |
- | 627 | text.skip := FALSE; |
|
495 | IF text.eof THEN |
628 | IF text.eof THEN |
496 | INC(lex.pos.col) |
629 | INC(lex.pos.col) |
497 | END |
630 | END |
498 | ELSIF (c = 7FX) & upto THEN |
631 | ELSIF (c = 7FX) & upto THEN |
499 | upto := FALSE; |
632 | upto := FALSE; |
500 | lex.sym := lxRANGE; |
633 | lex.sym := lxRANGE; |
501 | putchar(lex, "."); |
634 | putchar(lex, "."); |
502 | putchar(lex, "."); |
635 | putchar(lex, "."); |
503 | DEC(lex.pos.col); |
636 | DEC(lex.pos.col); |
504 | TXT.next(text) |
637 | TXT.next(text) |
505 | ELSE |
638 | ELSE |
506 | putchar(lex, c); |
639 | putchar(lex, c); |
507 | TXT.next(text); |
640 | TXT.next(text); |
508 | lex.sym := lxERROR04 |
641 | lex.sym := lxERROR04 |
509 | END; |
642 | END; |
510 | 643 | ||
511 | IF lex.sym < 0 THEN |
644 | IF lex.sym < 0 THEN |
512 | lex.error := -lex.sym |
645 | lex.error := -lex.sym |
513 | ELSE |
646 | ELSE |
514 | lex.error := 0 |
647 | lex.error := 0 |
515 | END |
648 | END |
516 | 649 | ||
517 | UNTIL lex.sym # lxCOMMENT |
650 | UNTIL (lex.sym # lxCOMMENT) & ~text.skip |
518 | 651 | ||
519 | END Next; |
652 | END Next; |
520 | 653 | ||
521 | 654 | ||
522 | PROCEDURE open* (name: ARRAY OF CHAR): SCANNER; |
655 | PROCEDURE open* (name: ARRAY OF CHAR): SCANNER; |
523 | RETURN TXT.open(name) |
656 | RETURN TXT.open(name) |
524 | END open; |
657 | END open; |
525 | 658 | ||
526 | 659 | ||
527 | PROCEDURE close* (VAR scanner: SCANNER); |
660 | PROCEDURE close* (VAR scanner: SCANNER); |
528 | BEGIN |
661 | BEGIN |
529 | TXT.close(scanner) |
662 | TXT.close(scanner) |
530 | END close; |
663 | END close; |
531 | 664 | ||
532 | 665 | ||
533 | PROCEDURE init; |
666 | PROCEDURE init* (lower: BOOLEAN); |
534 | VAR |
667 | VAR |
535 | i: INTEGER; |
668 | i: INTEGER; |
536 | delim: ARRAY 23 OF CHAR; |
669 | delim: ARRAY 23 OF CHAR; |
537 | 670 | ||
538 | 671 | ||
539 | PROCEDURE enterkw (key: INTEGER; kw: LEXSTR); |
672 | PROCEDURE enterkw (key: INTEGER; kw: LEXSTR); |
540 | VAR |
673 | VAR |
541 | id: IDENT; |
674 | id: IDENT; |
- | 675 | upper: LEXSTR; |
|
542 | 676 | ||
- | 677 | BEGIN |
|
543 | BEGIN |
678 | IF LowerCase THEN |
544 | id := enterid(kw); |
679 | id := enterid(kw); |
- | 680 | id.key := key |
|
- | 681 | END; |
|
- | 682 | upper := kw; |
|
- | 683 | S.UpCase(upper); |
|
- | 684 | id := enterid(upper); |
|
545 | id.key := key |
685 | id.key := key |
546 | END enterkw; |
686 | END enterkw; |
547 | 687 | ||
548 | 688 | ||
549 | BEGIN |
689 | BEGIN |
550 | upto := FALSE; |
690 | upto := FALSE; |
- | 691 | LowerCase := lower; |
|
551 | 692 | ||
552 | FOR i := 0 TO 255 DO |
693 | FOR i := 0 TO 255 DO |
553 | delimiters[i] := FALSE |
694 | delimiters[i] := FALSE |
554 | END; |
695 | END; |
555 | 696 | ||
556 | delim := "+-*/~&.,;|([{^=#<>:)]}"; |
697 | delim := "+-*/~&.,;|([{^=#<>:)]}"; |
557 | 698 | ||
558 | FOR i := 0 TO LEN(delim) - 2 DO |
699 | FOR i := 0 TO LEN(delim) - 2 DO |
559 | delimiters[ORD(delim[i])] := TRUE |
700 | delimiters[ORD(delim[i])] := TRUE |
560 | END; |
701 | END; |
561 | 702 | ||
562 | NEW(NewIdent); |
703 | NEW(NewIdent); |
563 | NewIdent.s := ""; |
704 | NewIdent.s := ""; |
564 | NewIdent.offset := -1; |
705 | NewIdent.offset := -1; |
565 | NewIdent.offsetW := -1; |
706 | NewIdent.offsetW := -1; |
566 | NewIdent.key := 0; |
707 | NewIdent.key := 0; |
567 | 708 | ||
568 | idents := NIL; |
709 | idents := NIL; |
569 | 710 | ||
570 | enterkw(lxARRAY, "ARRAY"); |
711 | enterkw(lxARRAY, "array"); |
571 | enterkw(lxBEGIN, "BEGIN"); |
712 | enterkw(lxBEGIN, "begin"); |
572 | enterkw(lxBY, "BY"); |
713 | enterkw(lxBY, "by"); |
573 | enterkw(lxCASE, "CASE"); |
714 | enterkw(lxCASE, "case"); |
574 | enterkw(lxCONST, "CONST"); |
715 | enterkw(lxCONST, "const"); |
575 | enterkw(lxDIV, "DIV"); |
716 | enterkw(lxDIV, "div"); |
576 | enterkw(lxDO, "DO"); |
717 | enterkw(lxDO, "do"); |
577 | enterkw(lxELSE, "ELSE"); |
718 | enterkw(lxELSE, "else"); |
578 | enterkw(lxELSIF, "ELSIF"); |
719 | enterkw(lxELSIF, "elsif"); |
579 | enterkw(lxEND, "END"); |
720 | enterkw(lxEND, "end"); |
580 | enterkw(lxFALSE, "FALSE"); |
721 | enterkw(lxFALSE, "false"); |
581 | enterkw(lxFOR, "FOR"); |
722 | enterkw(lxFOR, "for"); |
582 | enterkw(lxIF, "IF"); |
723 | enterkw(lxIF, "if"); |
583 | enterkw(lxIMPORT, "IMPORT"); |
724 | enterkw(lxIMPORT, "import"); |
584 | enterkw(lxIN, "IN"); |
725 | enterkw(lxIN, "in"); |
585 | enterkw(lxIS, "IS"); |
726 | enterkw(lxIS, "is"); |
586 | enterkw(lxMOD, "MOD"); |
727 | enterkw(lxMOD, "mod"); |
587 | enterkw(lxMODULE, "MODULE"); |
728 | enterkw(lxMODULE, "module"); |
588 | enterkw(lxNIL, "NIL"); |
729 | enterkw(lxNIL, "nil"); |
589 | enterkw(lxOF, "OF"); |
730 | enterkw(lxOF, "of"); |
590 | enterkw(lxOR, "OR"); |
731 | enterkw(lxOR, "or"); |
591 | enterkw(lxPOINTER, "POINTER"); |
732 | enterkw(lxPOINTER, "pointer"); |
592 | enterkw(lxPROCEDURE, "PROCEDURE"); |
733 | enterkw(lxPROCEDURE, "procedure"); |
593 | enterkw(lxRECORD, "RECORD"); |
734 | enterkw(lxRECORD, "record"); |
594 | enterkw(lxREPEAT, "REPEAT"); |
735 | enterkw(lxREPEAT, "repeat"); |
595 | enterkw(lxRETURN, "RETURN"); |
736 | enterkw(lxRETURN, "return"); |
596 | enterkw(lxTHEN, "THEN"); |
737 | enterkw(lxTHEN, "then"); |
597 | enterkw(lxTO, "TO"); |
738 | enterkw(lxTO, "to"); |
598 | enterkw(lxTRUE, "TRUE"); |
739 | enterkw(lxTRUE, "true"); |
599 | enterkw(lxTYPE, "TYPE"); |
740 | enterkw(lxTYPE, "type"); |
600 | enterkw(lxUNTIL, "UNTIL"); |
741 | enterkw(lxUNTIL, "until"); |
601 | enterkw(lxVAR, "VAR"); |
742 | enterkw(lxVAR, "var"); |
602 | enterkw(lxWHILE, "WHILE") |
743 | enterkw(lxWHILE, "while") |
603 | 744 | ||
604 | END init; |
745 | END init; |
605 | 746 | ||
- | 747 | ||
- | 748 | PROCEDURE NewDef* (str: ARRAY OF CHAR); |
|
- | 749 | VAR |
|
- | 750 | item: DEF; |
|
- | 751 | ||
- | 752 | BEGIN |
|
- | 753 | NEW(item); |
|
- | 754 | COPY(str, item.ident); |
|
- | 755 | LISTS.push(def, item) |
|
- | 756 | END NewDef; |
|
- | 757 | ||
606 | 758 | ||
607 | BEGIN |
759 | BEGIN |
608 | init |
760 | def := LISTS.create(NIL) |
609 | END SCAN.>>": |
761 | END SCAN.>>=>=>": |
610 | >>> |
762 | >>> |