Subversion Repositories Kolibri OS

Rev

Rev 7983 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 7983 Rev 8097
1
(*
1
(*
2
    BSD 2-Clause License
2
    BSD 2-Clause License
3
 
3
 
4
    Copyright (c) 2018-2019, Anton Krotov
4
    Copyright (c) 2018-2020, Anton Krotov
5
    All rights reserved.
5
    All rights reserved.
6
*)
6
*)
7
 
7
 
8
MODULE SCAN;
8
MODULE SCAN;
9
 
9
 
10
IMPORT TXT := TEXTDRV, AVL := AVLTREES, ARITH, S := STRINGS;
10
IMPORT TXT := TEXTDRV, AVL := AVLTREES, ARITH, S := STRINGS, ERRORS, LISTS;
11
 
11
 
12
 
12
 
13
CONST
13
CONST
14
 
14
 
15
    LEXLEN = 1024;
15
    LEXLEN = 1024;
16
 
16
 
17
    lxUNDEF*   =  0;  lxIDENT*   =  1;  lxINTEGER*   =  2;  lxHEX*     =  3;
17
    lxUNDEF*   =  0;  lxIDENT*   =  1;  lxINTEGER*   =  2;  lxHEX*     =  3;
18
    lxCHAR*    =  4;  lxFLOAT*   =  5;  lxSTRING*    =  6;  lxCOMMENT* =  7;
18
    lxCHAR*    =  4;  lxFLOAT*   =  5;  lxSTRING*    =  6;  lxCOMMENT* =  7;
19
    lxEOF*     =  8;
19
    lxEOF*     =  8;
20
 
20
 
21
    lxPLUS*    = 21;  lxMINUS*   = 22;  lxMUL*       = 23;  lxSLASH*   = 24;
21
    lxPLUS*    = 21;  lxMINUS*   = 22;  lxMUL*       = 23;  lxSLASH*   = 24;
22
    lxNOT*     = 25;  lxAND*     = 26;  lxPOINT*     = 27;  lxCOMMA*   = 28;
22
    lxNOT*     = 25;  lxAND*     = 26;  lxPOINT*     = 27;  lxCOMMA*   = 28;
23
    lxSEMI*    = 29;  lxBAR*     = 30;  lxLROUND*    = 31;  lxLSQUARE* = 32;
23
    lxSEMI*    = 29;  lxBAR*     = 30;  lxLROUND*    = 31;  lxLSQUARE* = 32;
24
    lxLCURLY*  = 33;  lxCARET*   = 34;  lxEQ*        = 35;  lxNE*      = 36;
24
    lxLCURLY*  = 33;  lxCARET*   = 34;  lxEQ*        = 35;  lxNE*      = 36;
25
    lxLT*      = 37;  lxGT*      = 38;  lxCOLON*     = 39;  lxRROUND*  = 40;
25
    lxLT*      = 37;  lxGT*      = 38;  lxCOLON*     = 39;  lxRROUND*  = 40;
26
    lxRSQUARE* = 41;  lxRCURLY*  = 42;  lxLE*        = 43;  lxGE*      = 44;
26
    lxRSQUARE* = 41;  lxRCURLY*  = 42;  lxLE*        = 43;  lxGE*      = 44;
27
    lxASSIGN*  = 45;  lxRANGE*   = 46;
27
    lxASSIGN*  = 45;  lxRANGE*   = 46;
28
 
28
 
29
    lxKW       = 51;
29
    lxKW       = 51;
30
 
30
 
31
    lxARRAY*   = 51;  lxBEGIN*   = 52;  lxBY*        = 53;  lxCASE*    = 54;
31
    lxARRAY*   = 51;  lxBEGIN*   = 52;  lxBY*        = 53;  lxCASE*    = 54;
32
    lxCONST*   = 55;  lxDIV*     = 56;  lxDO*        = 57;  lxELSE*    = 58;
32
    lxCONST*   = 55;  lxDIV*     = 56;  lxDO*        = 57;  lxELSE*    = 58;
33
    lxELSIF*   = 59;  lxEND*     = 60;  lxFALSE*     = 61;  lxFOR*     = 62;
33
    lxELSIF*   = 59;  lxEND*     = 60;  lxFALSE*     = 61;  lxFOR*     = 62;
34
    lxIF*      = 63;  lxIMPORT*  = 64;  lxIN*        = 65;  lxIS*      = 66;
34
    lxIF*      = 63;  lxIMPORT*  = 64;  lxIN*        = 65;  lxIS*      = 66;
35
    lxMOD*     = 67;  lxMODULE*  = 68;  lxNIL*       = 69;  lxOF*      = 70;
35
    lxMOD*     = 67;  lxMODULE*  = 68;  lxNIL*       = 69;  lxOF*      = 70;
36
    lxOR*      = 71;  lxPOINTER* = 72;  lxPROCEDURE* = 73;  lxRECORD*  = 74;
36
    lxOR*      = 71;  lxPOINTER* = 72;  lxPROCEDURE* = 73;  lxRECORD*  = 74;
37
    lxREPEAT*  = 75;  lxRETURN*  = 76;  lxTHEN*      = 77;  lxTO*      = 78;
37
    lxREPEAT*  = 75;  lxRETURN*  = 76;  lxTHEN*      = 77;  lxTO*      = 78;
38
    lxTRUE*    = 79;  lxTYPE*    = 80;  lxUNTIL*     = 81;  lxVAR*     = 82;
38
    lxTRUE*    = 79;  lxTYPE*    = 80;  lxUNTIL*     = 81;  lxVAR*     = 82;
39
    lxWHILE*   = 83;
39
    lxWHILE*   = 83;
40
 
40
 
41
    lxERROR01* =  -1;  lxERROR02* =  -2;  lxERROR03* =  -3;  lxERROR04* =  -4;
41
    lxERROR01* =  -1;  lxERROR02* =  -2;  lxERROR03* =  -3;  lxERROR04* =  -4;
42
    lxERROR05* =  -5;  lxERROR06* =  -6;  lxERROR07* =  -7;  lxERROR08* =  -8;
42
    lxERROR05* =  -5;  lxERROR06* =  -6;  lxERROR07* =  -7;  lxERROR08* =  -8;
43
    lxERROR09* =  -9;  lxERROR10* = -10;  lxERROR11* = -11;  lxERROR12* = -12;
43
    lxERROR09* =  -9;  lxERROR10* = -10;  lxERROR11* = -11;  lxERROR12* = -12;
44
    lxERROR13* = -13;
44
    lxERROR13* = -13;
45
 
45
 
46
 
46
 
47
TYPE
47
TYPE
48
 
48
 
49
    LEXSTR* = ARRAY LEXLEN OF CHAR;
49
    LEXSTR* = ARRAY LEXLEN OF CHAR;
-
 
50
 
-
 
51
    DEF = POINTER TO RECORD (LISTS.ITEM)
-
 
52
 
-
 
53
        ident: LEXSTR
-
 
54
 
-
 
55
    END;
50
 
56
 
51
    IDENT* = POINTER TO RECORD (AVL.DATA)
57
    IDENT* = POINTER TO RECORD (AVL.DATA)
52
 
58
 
53
        s*: LEXSTR;
59
        s*: LEXSTR;
54
        offset*, offsetW*: INTEGER;
60
        offset*, offsetW*: INTEGER;
55
        key: INTEGER
61
        key: INTEGER
56
 
62
 
57
    END;
63
    END;
58
 
64
 
59
    POSITION* = RECORD
65
    POSITION* = RECORD
60
 
66
 
61
        line*, col*: INTEGER
67
        line*, col*: INTEGER
62
 
68
 
63
    END;
69
    END;
64
 
70
 
65
    LEX* = RECORD
71
    LEX* = RECORD
66
 
72
 
67
        s*:         LEXSTR;
73
        s*:         LEXSTR;
68
        length*:    INTEGER;
74
        length*:    INTEGER;
69
        sym*:       INTEGER;
75
        sym*:       INTEGER;
70
        pos*:       POSITION;
76
        pos*:       POSITION;
71
        ident*:     IDENT;
77
        ident*:     IDENT;
72
        string*:    IDENT;
78
        string*:    IDENT;
73
        value*:     ARITH.VALUE;
79
        value*:     ARITH.VALUE;
74
        error*:     INTEGER;
80
        error*:     INTEGER;
75
 
81
 
76
        over:       BOOLEAN
82
        over:       BOOLEAN
77
 
83
 
78
    END;
84
    END;
79
 
85
 
80
    SCANNER* = TXT.TEXT;
86
    SCANNER* = TXT.TEXT;
81
 
87
 
82
 
88
 
83
VAR
89
VAR
84
 
90
 
85
    idents: AVL.NODE;
91
    idents: AVL.NODE;
86
 
92
 
87
    delimiters: ARRAY 256 OF BOOLEAN;
93
    delimiters: ARRAY 256 OF BOOLEAN;
88
 
94
 
89
    NewIdent: IDENT;
95
    NewIdent: IDENT;
90
 
96
 
-
 
97
    upto, LowerCase, _if: BOOLEAN;
-
 
98
 
91
    upto: BOOLEAN;
99
    def: LISTS.LIST;
92
 
100
 
93
 
101
 
94
PROCEDURE nodecmp (a, b: AVL.DATA): INTEGER;
102
PROCEDURE nodecmp (a, b: AVL.DATA): INTEGER;
95
    RETURN ORD(a(IDENT).s > b(IDENT).s) - ORD(a(IDENT).s < b(IDENT).s)
103
    RETURN ORD(a(IDENT).s > b(IDENT).s) - ORD(a(IDENT).s < b(IDENT).s)
96
END nodecmp;
104
END nodecmp;
97
 
105
 
98
 
106
 
99
PROCEDURE enterid* (s: LEXSTR): IDENT;
107
PROCEDURE enterid* (s: LEXSTR): IDENT;
100
VAR
108
VAR
101
    newnode: BOOLEAN;
109
    newnode: BOOLEAN;
102
    node:    AVL.NODE;
110
    node:    AVL.NODE;
103
 
111
 
104
BEGIN
112
BEGIN
105
    NewIdent.s := s;
113
    NewIdent.s := s;
106
    idents := AVL.insert(idents, NewIdent, nodecmp, newnode, node);
114
    idents := AVL.insert(idents, NewIdent, nodecmp, newnode, node);
107
 
115
 
108
    IF newnode THEN
116
    IF newnode THEN
109
        NEW(NewIdent);
117
        NEW(NewIdent);
110
        NewIdent.offset  := -1;
118
        NewIdent.offset  := -1;
111
        NewIdent.offsetW := -1;
119
        NewIdent.offsetW := -1;
112
        NewIdent.key := 0
120
        NewIdent.key := 0
113
    END
121
    END
114
 
122
 
115
    RETURN node.data(IDENT)
123
    RETURN node.data(IDENT)
116
END enterid;
124
END enterid;
117
 
125
 
118
 
126
 
119
PROCEDURE putchar (VAR lex: LEX; c: CHAR);
127
PROCEDURE putchar (VAR lex: LEX; c: CHAR);
120
BEGIN
128
BEGIN
121
    IF lex.length < LEXLEN - 1 THEN
129
    IF lex.length < LEXLEN - 1 THEN
122
        lex.s[lex.length] := c;
130
        lex.s[lex.length] := c;
123
        INC(lex.length);
131
        INC(lex.length);
124
        lex.s[lex.length] := 0X
132
        lex.s[lex.length] := 0X
125
    ELSE
133
    ELSE
126
        lex.over := TRUE
134
        lex.over := TRUE
127
    END
135
    END
128
END putchar;
136
END putchar;
129
 
137
 
130
 
138
 
131
PROCEDURE nextc (text: TXT.TEXT): CHAR;
139
PROCEDURE nextc (text: TXT.TEXT): CHAR;
132
BEGIN
140
BEGIN
133
    TXT.next(text)
141
    TXT.next(text)
134
    RETURN text.peak
142
    RETURN text.peak
135
END nextc;
143
END nextc;
136
 
144
 
137
 
145
 
138
PROCEDURE ident (text: TXT.TEXT; VAR lex: LEX);
146
PROCEDURE ident (text: TXT.TEXT; VAR lex: LEX);
139
VAR
147
VAR
140
    c: CHAR;
148
    c: CHAR;
141
 
149
 
142
BEGIN
150
BEGIN
143
    c := text.peak;
151
    c := text.peak;
144
    ASSERT(S.letter(c));
152
    ASSERT(S.letter(c));
145
 
153
 
146
    WHILE S.letter(c) OR S.digit(c) DO
154
    WHILE S.letter(c) OR S.digit(c) DO
147
        putchar(lex, c);
155
        putchar(lex, c);
148
        c := nextc(text)
156
        c := nextc(text)
149
    END;
157
    END;
150
 
158
 
151
    IF lex.over THEN
159
    IF lex.over THEN
152
        lex.sym := lxERROR06
160
        lex.sym := lxERROR06
153
    ELSE
161
    ELSE
154
        lex.ident := enterid(lex.s);
162
        lex.ident := enterid(lex.s);
155
        IF lex.ident.key # 0 THEN
163
        IF lex.ident.key # 0 THEN
156
            lex.sym := lex.ident.key
164
            lex.sym := lex.ident.key
157
        ELSE
165
        ELSE
158
            lex.sym := lxIDENT
166
            lex.sym := lxIDENT
159
        END
167
        END
160
    END
168
    END
161
 
169
 
162
END ident;
170
END ident;
163
 
171
 
164
 
172
 
165
PROCEDURE number (text: TXT.TEXT; VAR lex: LEX);
173
PROCEDURE number (text: TXT.TEXT; VAR lex: LEX);
166
VAR
174
VAR
167
    c: CHAR;
175
    c: CHAR;
168
    hex: BOOLEAN;
176
    hex: BOOLEAN;
169
    error: INTEGER;
177
    error, sym: INTEGER;
170
 
178
 
171
BEGIN
179
BEGIN
172
    c := text.peak;
180
    c := text.peak;
173
    ASSERT(S.digit(c));
181
    ASSERT(S.digit(c));
174
 
182
 
175
    error := 0;
183
    error := 0;
176
 
184
 
177
    lex.sym := lxINTEGER;
185
    sym := lxINTEGER;
178
    hex := FALSE;
186
    hex := FALSE;
179
 
187
 
180
    WHILE S.digit(c) DO
188
    WHILE S.digit(c) DO
181
        putchar(lex, c);
189
        putchar(lex, c);
182
        c := nextc(text)
190
        c := nextc(text)
183
    END;
191
    END;
184
 
192
 
185
    WHILE S.hexdigit(c) DO
193
    WHILE S.hexdigit(c) DO
186
        putchar(lex, c);
194
        putchar(lex, c);
187
        c := nextc(text);
195
        c := nextc(text);
188
        hex := TRUE
196
        hex := TRUE
189
    END;
197
    END;
190
 
198
 
191
    IF c = "H" THEN
199
    IF c = "H" THEN
192
        putchar(lex, c);
200
        putchar(lex, c);
193
        TXT.next(text);
201
        TXT.next(text);
194
        lex.sym := lxHEX
202
        sym := lxHEX
195
 
203
 
196
    ELSIF c = "X" THEN
204
    ELSIF c = "X" THEN
197
        putchar(lex, c);
205
        putchar(lex, c);
198
        TXT.next(text);
206
        TXT.next(text);
199
        lex.sym := lxCHAR
207
        sym := lxCHAR
200
 
208
 
201
    ELSIF c = "." THEN
209
    ELSIF c = "." THEN
202
 
210
 
203
        IF hex THEN
211
        IF hex THEN
204
            lex.sym := lxERROR01
212
            sym := lxERROR01
205
        ELSE
213
        ELSE
206
 
214
 
207
            c := nextc(text);
215
            c := nextc(text);
208
 
216
 
209
            IF c # "." THEN
217
            IF c # "." THEN
210
                putchar(lex, ".");
218
                putchar(lex, ".");
211
                lex.sym := lxFLOAT
219
                sym := lxFLOAT
212
            ELSE
220
            ELSE
213
                lex.sym := lxINTEGER;
221
                sym := lxINTEGER;
214
                text.peak := 7FX;
222
                text.peak := 7FX;
215
                upto := TRUE
223
                upto := TRUE
216
            END;
224
            END;
217
 
225
 
218
            WHILE S.digit(c) DO
226
            WHILE S.digit(c) DO
219
                putchar(lex, c);
227
                putchar(lex, c);
220
                c := nextc(text)
228
                c := nextc(text)
221
            END;
229
            END;
222
 
230
 
223
            IF c = "E" THEN
231
            IF c = "E" THEN
224
 
232
 
225
                putchar(lex, c);
233
                putchar(lex, c);
226
                c := nextc(text);
234
                c := nextc(text);
227
                IF (c = "+") OR (c = "-") THEN
235
                IF (c = "+") OR (c = "-") THEN
228
                    putchar(lex, c);
236
                    putchar(lex, c);
229
                    c := nextc(text)
237
                    c := nextc(text)
230
                END;
238
                END;
231
 
239
 
232
                IF S.digit(c) THEN
240
                IF S.digit(c) THEN
233
                    WHILE S.digit(c) DO
241
                    WHILE S.digit(c) DO
234
                        putchar(lex, c);
242
                        putchar(lex, c);
235
                        c := nextc(text)
243
                        c := nextc(text)
236
                    END
244
                    END
237
                ELSE
245
                ELSE
238
                    lex.sym := lxERROR02
246
                    sym := lxERROR02
239
                END
247
                END
240
 
248
 
241
            END
249
            END
242
 
250
 
243
        END
251
        END
244
 
252
 
245
    ELSIF hex THEN
253
    ELSIF hex THEN
246
        lex.sym := lxERROR01
254
        sym := lxERROR01
247
 
255
 
248
    END;
256
    END;
249
 
257
 
250
    IF lex.over & (lex.sym >= 0) THEN
258
    IF lex.over & (sym >= 0) THEN
251
        lex.sym := lxERROR07
259
        sym := lxERROR07
252
    END;
260
    END;
253
 
261
 
254
    IF lex.sym = lxINTEGER THEN
262
    IF sym = lxINTEGER THEN
255
        ARITH.iconv(lex.s, lex.value, error)
263
        ARITH.iconv(lex.s, lex.value, error)
256
    ELSIF (lex.sym = lxHEX) OR (lex.sym = lxCHAR) THEN
264
    ELSIF (sym = lxHEX) OR (sym = lxCHAR) THEN
257
        ARITH.hconv(lex.s, lex.value, error)
265
        ARITH.hconv(lex.s, lex.value, error)
258
    ELSIF lex.sym = lxFLOAT THEN
266
    ELSIF sym = lxFLOAT THEN
259
        ARITH.fconv(lex.s, lex.value, error)
267
        ARITH.fconv(lex.s, lex.value, error)
260
    END;
268
    END;
261
 
269
 
262
    CASE error OF
270
    CASE error OF
263
    |0:
271
    |0:
264
    |1: lex.sym := lxERROR08
272
    |1: sym := lxERROR08
265
    |2: lex.sym := lxERROR09
273
    |2: sym := lxERROR09
266
    |3: lex.sym := lxERROR10
274
    |3: sym := lxERROR10
267
    |4: lex.sym := lxERROR11
275
    |4: sym := lxERROR11
268
    |5: lex.sym := lxERROR12
276
    |5: sym := lxERROR12
269
    END
277
    END;
-
 
278
 
270
 
279
    lex.sym := sym
271
END number;
280
END number;
272
 
281
 
273
 
282
 
274
PROCEDURE string (text: TXT.TEXT; VAR lex: LEX; quot: CHAR);
283
PROCEDURE string (text: TXT.TEXT; VAR lex: LEX; quot: CHAR);
275
VAR
284
VAR
276
    c: CHAR;
285
    c: CHAR;
277
    n: INTEGER;
286
    n: INTEGER;
278
 
287
 
279
BEGIN
288
BEGIN
280
    c  := nextc(text);
289
    c  := nextc(text);
281
    n  := 0;
290
    n  := 0;
282
 
291
 
283
    WHILE (c # quot) & (c # 0X) & ~text.eol & ~text.eof DO
292
    WHILE (c # quot) & (c # 0X) & ~text.eol & ~text.eof DO
284
        putchar(lex, c);
293
        putchar(lex, c);
285
        c := nextc(text);
294
        c := nextc(text);
286
        INC(n)
295
        INC(n)
287
    END;
296
    END;
288
 
297
 
289
    IF c = quot THEN
298
    IF c = quot THEN
290
        TXT.next(text);
299
        TXT.next(text);
291
        IF lex.over THEN
300
        IF lex.over THEN
292
            lex.sym := lxERROR05
301
            lex.sym := lxERROR05
293
        ELSE
302
        ELSE
294
            IF n # 1 THEN
303
            IF n # 1 THEN
295
                lex.sym := lxSTRING
304
                lex.sym := lxSTRING
296
            ELSE
305
            ELSE
297
                lex.sym := lxCHAR;
306
                lex.sym := lxCHAR;
298
                ARITH.setChar(lex.value, ORD(lex.s[0]))
307
                ARITH.setChar(lex.value, ORD(lex.s[0]))
299
            END
308
            END
300
        END
309
        END
301
    ELSE
310
    ELSE
302
        lex.sym := lxERROR03
311
        lex.sym := lxERROR03
303
    END;
312
    END;
304
 
313
 
305
    IF lex.sym = lxSTRING THEN
314
    IF lex.sym = lxSTRING THEN
306
        lex.string := enterid(lex.s);
315
        lex.string := enterid(lex.s);
307
        lex.value.typ := ARITH.tSTRING;
316
        lex.value.typ := ARITH.tSTRING;
308
        lex.value.string := lex.string
317
        lex.value.string := lex.string
309
    END
318
    END
310
 
319
 
311
END string;
320
END string;
312
 
321
 
313
 
322
 
314
PROCEDURE comment (text: TXT.TEXT);
323
PROCEDURE comment (text: TXT.TEXT);
315
VAR
324
VAR
316
    c: CHAR;
325
    c: CHAR;
317
    cond, depth: INTEGER;
326
    cond, depth: INTEGER;
318
 
327
 
319
BEGIN
328
BEGIN
320
    cond  := 0;
329
    cond  := 0;
321
    depth := 1;
330
    depth := 1;
322
 
331
 
323
    REPEAT
332
    REPEAT
324
 
333
 
325
        c := text.peak;
334
        c := text.peak;
326
        TXT.next(text);
335
        TXT.next(text);
327
 
336
 
328
        IF c = "*" THEN
337
        IF c = "*" THEN
329
            IF cond = 1 THEN
338
            IF cond = 1 THEN
330
                cond := 0;
339
                cond := 0;
331
                INC(depth)
340
                INC(depth)
332
            ELSE
341
            ELSE
333
               cond := 2
342
               cond := 2
334
            END
343
            END
335
        ELSIF c = ")" THEN
344
        ELSIF c = ")" THEN
336
            IF cond = 2 THEN
345
            IF cond = 2 THEN
337
                DEC(depth)
346
                DEC(depth)
338
            END;
347
            END;
339
            cond := 0
348
            cond := 0
340
        ELSIF c = "(" THEN
349
        ELSIF c = "(" THEN
341
            cond := 1
350
            cond := 1
342
        ELSE
351
        ELSE
343
            cond := 0
352
            cond := 0
344
        END
353
        END
345
 
354
 
346
    UNTIL (depth = 0) OR text.eof
355
    UNTIL (depth = 0) OR text.eof
347
 
356
 
348
END comment;
357
END comment;
349
 
358
 
350
 
359
 
351
PROCEDURE delimiter (text: TXT.TEXT; VAR lex: LEX; c: CHAR);
360
PROCEDURE delimiter (text: TXT.TEXT; VAR lex: LEX; c: CHAR);
-
 
361
VAR
-
 
362
    sym: INTEGER;
-
 
363
 
352
BEGIN
364
BEGIN
353
    putchar(lex, c);
365
    putchar(lex, c);
354
    c := nextc(text);
366
    c := nextc(text);
355
 
367
 
356
    CASE lex.s[0] OF
368
    CASE lex.s[0] OF
357
    |"+":
369
    |"+":
358
        lex.sym := lxPLUS
370
        sym := lxPLUS
359
 
371
 
360
    |"-":
372
    |"-":
361
        lex.sym := lxMINUS
373
        sym := lxMINUS
362
 
374
 
363
    |"*":
375
    |"*":
364
        lex.sym := lxMUL
376
        sym := lxMUL
365
 
377
 
366
    |"/":
378
    |"/":
367
        lex.sym := lxSLASH;
379
        sym := lxSLASH;
368
 
380
 
369
        IF c = "/" THEN
381
        IF c = "/" THEN
370
            lex.sym := lxCOMMENT;
382
            sym := lxCOMMENT;
371
            REPEAT
383
            REPEAT
372
                TXT.next(text)
384
                TXT.next(text)
373
            UNTIL text.eol OR text.eof
385
            UNTIL text.eol OR text.eof
374
        END
386
        END
375
 
387
 
376
    |"~":
388
    |"~":
377
        lex.sym := lxNOT
389
        sym := lxNOT
378
 
390
 
379
    |"&":
391
    |"&":
380
        lex.sym := lxAND
392
        sym := lxAND
381
 
393
 
382
    |".":
394
    |".":
383
        lex.sym := lxPOINT;
395
        sym := lxPOINT;
384
 
396
 
385
        IF c = "." THEN
397
        IF c = "." THEN
386
            lex.sym := lxRANGE;
398
            sym := lxRANGE;
387
            putchar(lex, c);
399
            putchar(lex, c);
388
            TXT.next(text)
400
            TXT.next(text)
389
        END
401
        END
390
 
402
 
391
    |",":
403
    |",":
392
        lex.sym := lxCOMMA
404
        sym := lxCOMMA
393
 
405
 
394
    |";":
406
    |";":
395
        lex.sym := lxSEMI
407
        sym := lxSEMI
396
 
408
 
397
    |"|":
409
    |"|":
398
        lex.sym := lxBAR
410
        sym := lxBAR
399
 
411
 
400
    |"(":
412
    |"(":
401
        lex.sym := lxLROUND;
413
        sym := lxLROUND;
402
 
414
 
403
        IF c = "*" THEN
415
        IF c = "*" THEN
404
            lex.sym := lxCOMMENT;
416
            sym := lxCOMMENT;
405
            TXT.next(text);
417
            TXT.next(text);
406
            comment(text)
418
            comment(text)
407
        END
419
        END
408
 
420
 
409
    |"[":
421
    |"[":
410
        lex.sym := lxLSQUARE
422
        sym := lxLSQUARE
411
 
423
 
412
    |"{":
424
    |"{":
413
        lex.sym := lxLCURLY
425
        sym := lxLCURLY
414
 
426
 
415
    |"^":
427
    |"^":
416
        lex.sym := lxCARET
428
        sym := lxCARET
417
 
429
 
418
    |"=":
430
    |"=":
419
        lex.sym := lxEQ
431
        sym := lxEQ
420
 
432
 
421
    |"#":
433
    |"#":
422
        lex.sym := lxNE
434
        sym := lxNE
423
 
435
 
424
    |"<":
436
    |"<":
425
        lex.sym := lxLT;
437
        sym := lxLT;
426
 
438
 
427
        IF c = "=" THEN
439
        IF c = "=" THEN
428
            lex.sym := lxLE;
440
            sym := lxLE;
429
            putchar(lex, c);
441
            putchar(lex, c);
430
            TXT.next(text)
442
            TXT.next(text)
431
        END
443
        END
432
 
444
 
433
    |">":
445
    |">":
434
        lex.sym := lxGT;
446
        sym := lxGT;
435
 
447
 
436
        IF c = "=" THEN
448
        IF c = "=" THEN
437
            lex.sym := lxGE;
449
            sym := lxGE;
438
            putchar(lex, c);
450
            putchar(lex, c);
439
            TXT.next(text)
451
            TXT.next(text)
440
        END
452
        END
441
 
453
 
442
    |":":
454
    |":":
443
        lex.sym := lxCOLON;
455
        sym := lxCOLON;
444
 
456
 
445
        IF c = "=" THEN
457
        IF c = "=" THEN
446
            lex.sym := lxASSIGN;
458
            sym := lxASSIGN;
447
            putchar(lex, c);
459
            putchar(lex, c);
448
            TXT.next(text)
460
            TXT.next(text)
449
        END
461
        END
450
 
462
 
451
    |")":
463
    |")":
452
        lex.sym := lxRROUND
464
        sym := lxRROUND
453
 
465
 
454
    |"]":
466
    |"]":
455
        lex.sym := lxRSQUARE
467
        sym := lxRSQUARE
456
 
468
 
457
    |"}":
469
    |"}":
-
 
470
        sym := lxRCURLY
-
 
471
 
458
        lex.sym := lxRCURLY
472
    END;
459
 
473
 
460
    END
474
    lex.sym := sym
461
 
475
 
462
END delimiter;
476
END delimiter;
463
 
477
 
464
 
478
 
465
PROCEDURE Next* (text: SCANNER; VAR lex: LEX);
479
PROCEDURE Next* (text: SCANNER; VAR lex: LEX);
466
VAR
480
VAR
467
    c: CHAR;
481
    c: CHAR;
-
 
482
 
-
 
483
 
-
 
484
    PROCEDURE check (cond: BOOLEAN; text: SCANNER; lex: LEX; errno: INTEGER);
-
 
485
    BEGIN
-
 
486
        IF ~cond THEN
-
 
487
            ERRORS.ErrorMsg(text.fname, lex.pos.line, lex.pos.col, errno)
-
 
488
        END
-
 
489
    END check;
-
 
490
 
-
 
491
 
-
 
492
    PROCEDURE IsDef (str: ARRAY OF CHAR): BOOLEAN;
-
 
493
    VAR
-
 
494
        cur: DEF;
-
 
495
 
-
 
496
    BEGIN
-
 
497
        cur := def.first(DEF);
-
 
498
        WHILE (cur # NIL) & (cur.ident # str) DO
-
 
499
            cur := cur.next(DEF)
-
 
500
        END
-
 
501
 
-
 
502
        RETURN cur # NIL
-
 
503
    END IsDef;
-
 
504
 
-
 
505
 
-
 
506
    PROCEDURE Skip (text: SCANNER);
-
 
507
    VAR
-
 
508
        i: INTEGER;
-
 
509
 
-
 
510
    BEGIN
-
 
511
        i := 0;
-
 
512
        WHILE (i <= text.ifc) & ~text._skip[i] DO
-
 
513
            INC(i)
-
 
514
        END;
-
 
515
        text.skip := i <= text.ifc
-
 
516
    END Skip;
-
 
517
 
-
 
518
 
-
 
519
    PROCEDURE prep_if (text: SCANNER; VAR lex: LEX);
-
 
520
    VAR
-
 
521
        skip: BOOLEAN;
-
 
522
 
-
 
523
    BEGIN
-
 
524
        INC(text.ifc);
-
 
525
        text._elsif[text.ifc] := lex.sym = lxELSIF;
-
 
526
        IF lex.sym = lxIF THEN
-
 
527
            INC(text.elsec);
-
 
528
            text._else[text.elsec] := FALSE
-
 
529
        END;
-
 
530
        _if := TRUE;
-
 
531
        skip := TRUE;
-
 
532
        text.skip := FALSE;
-
 
533
 
-
 
534
        Next(text, lex);
-
 
535
        check(lex.sym = lxLROUND, text, lex, 64);
-
 
536
 
-
 
537
        Next(text, lex);
-
 
538
        check(lex.sym = lxIDENT, text, lex, 22);
-
 
539
 
-
 
540
        REPEAT
-
 
541
            IF IsDef(lex.s) THEN
-
 
542
                skip := FALSE
-
 
543
            END;
-
 
544
 
-
 
545
            Next(text, lex);
-
 
546
            IF lex.sym = lxBAR THEN
-
 
547
                Next(text, lex);
-
 
548
                check(lex.sym = lxIDENT, text, lex, 22)
-
 
549
            ELSE
-
 
550
                check(lex.sym = lxRROUND, text, lex, 33)
-
 
551
            END
-
 
552
        UNTIL lex.sym = lxRROUND;
-
 
553
 
-
 
554
        _if := FALSE;
-
 
555
        text._skip[text.ifc] := skip;
-
 
556
        Skip(text);
-
 
557
        Next(text, lex)
-
 
558
    END prep_if;
-
 
559
 
-
 
560
 
-
 
561
    PROCEDURE prep_end (text: SCANNER; VAR lex: LEX);
-
 
562
    BEGIN
-
 
563
        check(text.ifc > 0, text, lex, 118);
-
 
564
        IF lex.sym = lxEND THEN
-
 
565
            WHILE text._elsif[text.ifc] DO
-
 
566
                DEC(text.ifc)
-
 
567
            END;
-
 
568
            DEC(text.ifc);
-
 
569
            DEC(text.elsec)
-
 
570
        ELSIF (lex.sym = lxELSE) OR (lex.sym = lxELSIF) THEN
-
 
571
            check(~text._else[text.elsec], text, lex, 118);
-
 
572
            text._skip[text.ifc] := ~text._skip[text.ifc];
-
 
573
            text._else[text.elsec] := lex.sym = lxELSE
-
 
574
        END;
-
 
575
        Skip(text);
-
 
576
        IF lex.sym = lxELSIF THEN
-
 
577
            prep_if(text, lex)
-
 
578
        ELSE
-
 
579
            Next(text, lex)
-
 
580
        END
-
 
581
    END prep_end;
-
 
582
 
468
 
583
 
469
BEGIN
584
BEGIN
470
 
585
 
471
    REPEAT
586
    REPEAT
472
        c := text.peak;
587
        c := text.peak;
473
 
588
 
474
        WHILE S.space(c) DO
589
        WHILE S.space(c) DO
475
            c := nextc(text)
590
            c := nextc(text)
476
        END;
591
        END;
477
 
592
 
478
        lex.s[0]     := 0X;
593
        lex.s[0]     := 0X;
479
        lex.length   := 0;
594
        lex.length   := 0;
480
        lex.pos.line := text.line;
595
        lex.pos.line := text.line;
481
        lex.pos.col  := text.col;
596
        lex.pos.col  := text.col;
482
        lex.ident    := NIL;
597
        lex.ident    := NIL;
483
        lex.over     := FALSE;
598
        lex.over     := FALSE;
484
 
599
 
485
        IF S.letter(c) THEN
600
        IF S.letter(c) THEN
486
            ident(text, lex)
601
            ident(text, lex)
487
        ELSIF S.digit(c) THEN
602
        ELSIF S.digit(c) THEN
488
            number(text, lex)
603
            number(text, lex)
489
        ELSIF (c = '"') OR (c = "'") THEN
604
        ELSIF (c = '"') OR (c = "'") THEN
490
            string(text, lex, c)
605
            string(text, lex, c)
491
        ELSIF delimiters[ORD(c)] THEN
606
        ELSIF delimiters[ORD(c)] THEN
492
            delimiter(text, lex, c)
607
            delimiter(text, lex, c)
-
 
608
        ELSIF c = "$" THEN
-
 
609
            IF S.letter(nextc(text)) THEN
-
 
610
                ident(text, lex);
-
 
611
                IF lex.sym = lxIF THEN
-
 
612
                    IF ~_if THEN
-
 
613
                        prep_if(text, lex)
-
 
614
                    END
-
 
615
                ELSIF (lex.sym = lxEND) OR (lex.sym = lxELSE) OR (lex.sym = lxELSIF) THEN
-
 
616
                    IF ~_if THEN
-
 
617
                        prep_end(text, lex)
-
 
618
                    END
-
 
619
                ELSE
-
 
620
                    check(FALSE, text, lex, 119)
-
 
621
                END
-
 
622
            ELSE
-
 
623
                check(FALSE, text, lex, 119)
-
 
624
            END
493
        ELSIF c = 0X THEN
625
        ELSIF c = 0X THEN
494
            lex.sym := lxEOF;
626
            lex.sym := lxEOF;
-
 
627
            text.skip := FALSE;
495
            IF text.eof THEN
628
            IF text.eof THEN
496
                INC(lex.pos.col)
629
                INC(lex.pos.col)
497
            END
630
            END
498
        ELSIF (c = 7FX) & upto THEN
631
        ELSIF (c = 7FX) & upto THEN
499
            upto := FALSE;
632
            upto := FALSE;
500
            lex.sym := lxRANGE;
633
            lex.sym := lxRANGE;
501
            putchar(lex, ".");
634
            putchar(lex, ".");
502
            putchar(lex, ".");
635
            putchar(lex, ".");
503
            DEC(lex.pos.col);
636
            DEC(lex.pos.col);
504
            TXT.next(text)
637
            TXT.next(text)
505
        ELSE
638
        ELSE
506
            putchar(lex, c);
639
            putchar(lex, c);
507
            TXT.next(text);
640
            TXT.next(text);
508
            lex.sym := lxERROR04
641
            lex.sym := lxERROR04
509
        END;
642
        END;
510
 
643
 
511
        IF lex.sym < 0 THEN
644
        IF lex.sym < 0 THEN
512
            lex.error := -lex.sym
645
            lex.error := -lex.sym
513
        ELSE
646
        ELSE
514
            lex.error := 0
647
            lex.error := 0
515
        END
648
        END
516
 
649
 
517
    UNTIL lex.sym # lxCOMMENT
650
    UNTIL (lex.sym # lxCOMMENT) & ~text.skip
518
 
651
 
519
END Next;
652
END Next;
520
 
653
 
521
 
654
 
522
PROCEDURE open* (name: ARRAY OF CHAR): SCANNER;
655
PROCEDURE open* (name: ARRAY OF CHAR): SCANNER;
523
    RETURN TXT.open(name)
656
    RETURN TXT.open(name)
524
END open;
657
END open;
525
 
658
 
526
 
659
 
527
PROCEDURE close* (VAR scanner: SCANNER);
660
PROCEDURE close* (VAR scanner: SCANNER);
528
BEGIN
661
BEGIN
529
    TXT.close(scanner)
662
    TXT.close(scanner)
530
END close;
663
END close;
531
 
664
 
532
 
665
 
533
PROCEDURE init;
666
PROCEDURE init* (lower: BOOLEAN);
534
VAR
667
VAR
535
    i: INTEGER;
668
    i: INTEGER;
536
    delim: ARRAY 23 OF CHAR;
669
    delim: ARRAY 23 OF CHAR;
537
 
670
 
538
 
671
 
539
    PROCEDURE enterkw (key: INTEGER; kw: LEXSTR);
672
    PROCEDURE enterkw (key: INTEGER; kw: LEXSTR);
540
    VAR
673
    VAR
541
        id: IDENT;
674
        id: IDENT;
-
 
675
        upper: LEXSTR;
542
 
676
 
-
 
677
    BEGIN
543
    BEGIN
678
        IF LowerCase THEN
544
        id := enterid(kw);
679
            id := enterid(kw);
-
 
680
            id.key := key
-
 
681
        END;
-
 
682
        upper := kw;
-
 
683
        S.UpCase(upper);
-
 
684
        id := enterid(upper);
545
        id.key := key
685
        id.key := key
546
    END enterkw;
686
    END enterkw;
547
 
687
 
548
 
688
 
549
BEGIN
689
BEGIN
550
    upto := FALSE;
690
    upto := FALSE;
-
 
691
    LowerCase := lower;
551
 
692
 
552
    FOR i := 0 TO 255 DO
693
    FOR i := 0 TO 255 DO
553
        delimiters[i] := FALSE
694
        delimiters[i] := FALSE
554
    END;
695
    END;
555
 
696
 
556
    delim := "+-*/~&.,;|([{^=#<>:)]}";
697
    delim := "+-*/~&.,;|([{^=#<>:)]}";
557
 
698
 
558
    FOR i := 0 TO LEN(delim) - 2 DO
699
    FOR i := 0 TO LEN(delim) - 2 DO
559
        delimiters[ORD(delim[i])] := TRUE
700
        delimiters[ORD(delim[i])] := TRUE
560
    END;
701
    END;
561
 
702
 
562
    NEW(NewIdent);
703
    NEW(NewIdent);
563
    NewIdent.s       := "";
704
    NewIdent.s       := "";
564
    NewIdent.offset  := -1;
705
    NewIdent.offset  := -1;
565
    NewIdent.offsetW := -1;
706
    NewIdent.offsetW := -1;
566
    NewIdent.key     := 0;
707
    NewIdent.key     := 0;
567
 
708
 
568
    idents := NIL;
709
    idents := NIL;
569
 
710
 
570
    enterkw(lxARRAY, "ARRAY");
711
    enterkw(lxARRAY, "array");
571
    enterkw(lxBEGIN, "BEGIN");
712
    enterkw(lxBEGIN, "begin");
572
    enterkw(lxBY, "BY");
713
    enterkw(lxBY, "by");
573
    enterkw(lxCASE, "CASE");
714
    enterkw(lxCASE, "case");
574
    enterkw(lxCONST, "CONST");
715
    enterkw(lxCONST, "const");
575
    enterkw(lxDIV, "DIV");
716
    enterkw(lxDIV, "div");
576
    enterkw(lxDO, "DO");
717
    enterkw(lxDO, "do");
577
    enterkw(lxELSE, "ELSE");
718
    enterkw(lxELSE, "else");
578
    enterkw(lxELSIF, "ELSIF");
719
    enterkw(lxELSIF, "elsif");
579
    enterkw(lxEND, "END");
720
    enterkw(lxEND, "end");
580
    enterkw(lxFALSE, "FALSE");
721
    enterkw(lxFALSE, "false");
581
    enterkw(lxFOR, "FOR");
722
    enterkw(lxFOR, "for");
582
    enterkw(lxIF, "IF");
723
    enterkw(lxIF, "if");
583
    enterkw(lxIMPORT, "IMPORT");
724
    enterkw(lxIMPORT, "import");
584
    enterkw(lxIN, "IN");
725
    enterkw(lxIN, "in");
585
    enterkw(lxIS, "IS");
726
    enterkw(lxIS, "is");
586
    enterkw(lxMOD, "MOD");
727
    enterkw(lxMOD, "mod");
587
    enterkw(lxMODULE, "MODULE");
728
    enterkw(lxMODULE, "module");
588
    enterkw(lxNIL, "NIL");
729
    enterkw(lxNIL, "nil");
589
    enterkw(lxOF, "OF");
730
    enterkw(lxOF, "of");
590
    enterkw(lxOR, "OR");
731
    enterkw(lxOR, "or");
591
    enterkw(lxPOINTER, "POINTER");
732
    enterkw(lxPOINTER, "pointer");
592
    enterkw(lxPROCEDURE, "PROCEDURE");
733
    enterkw(lxPROCEDURE, "procedure");
593
    enterkw(lxRECORD, "RECORD");
734
    enterkw(lxRECORD, "record");
594
    enterkw(lxREPEAT, "REPEAT");
735
    enterkw(lxREPEAT, "repeat");
595
    enterkw(lxRETURN, "RETURN");
736
    enterkw(lxRETURN, "return");
596
    enterkw(lxTHEN, "THEN");
737
    enterkw(lxTHEN, "then");
597
    enterkw(lxTO, "TO");
738
    enterkw(lxTO, "to");
598
    enterkw(lxTRUE, "TRUE");
739
    enterkw(lxTRUE, "true");
599
    enterkw(lxTYPE, "TYPE");
740
    enterkw(lxTYPE, "type");
600
    enterkw(lxUNTIL, "UNTIL");
741
    enterkw(lxUNTIL, "until");
601
    enterkw(lxVAR, "VAR");
742
    enterkw(lxVAR, "var");
602
    enterkw(lxWHILE, "WHILE")
743
    enterkw(lxWHILE, "while")
603
 
744
 
604
END init;
745
END init;
605
 
746
 
-
 
747
 
-
 
748
PROCEDURE NewDef* (str: ARRAY OF CHAR);
-
 
749
VAR
-
 
750
    item: DEF;
-
 
751
 
-
 
752
BEGIN
-
 
753
    NEW(item);
-
 
754
    COPY(str, item.ident);
-
 
755
    LISTS.push(def, item)
-
 
756
END NewDef;
-
 
757
 
606
 
758
 
607
BEGIN
759
BEGIN
608
    init
760
    def := LISTS.create(NIL)
609
END SCAN.
761
END SCAN.
610
>
762
>