Subversion Repositories Kolibri OS

Rev

Rev 7693 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
7696 akron1 1
(*
7597 akron1 2
    BSD 2-Clause License
6613 leency 3
 
7696 akron1 4
    Copyright (c) 2018-2019, Anton Krotov
7597 akron1 5
    All rights reserved.
6613 leency 6
*)
7
 
8
MODULE SCAN;
9
 
7693 akron1 10
IMPORT TXT := TEXTDRV, AVL := AVLTREES, ARITH, S := STRINGS;
6613 leency 11
 
7597 akron1 12
 
6613 leency 13
CONST
14
 
7597 akron1 15
    LEXLEN = 1024;
6613 leency 16
 
7693 akron1 17
    lxUNDEF*   =  0;  lxIDENT*   =  1;  lxINTEGER*   =  2;  lxHEX*     =  3;
18
    lxCHAR*    =  4;  lxFLOAT*   =  5;  lxSTRING*    =  6;  lxCOMMENT* =  7;
19
    lxEOF*     =  8;
6613 leency 20
 
7693 akron1 21
    lxPLUS*    = 21;  lxMINUS*   = 22;  lxMUL*       = 23;  lxSLASH*   = 24;
22
    lxNOT*     = 25;  lxAND*     = 26;  lxPOINT*     = 27;  lxCOMMA*   = 28;
23
    lxSEMI*    = 29;  lxBAR*     = 30;  lxLROUND*    = 31;  lxLSQUARE* = 32;
24
    lxLCURLY*  = 33;  lxCARET*   = 34;  lxEQ*        = 35;  lxNE*      = 36;
25
    lxLT*      = 37;  lxGT*      = 38;  lxCOLON*     = 39;  lxRROUND*  = 40;
26
    lxRSQUARE* = 41;  lxRCURLY*  = 42;  lxLE*        = 43;  lxGE*      = 44;
27
    lxASSIGN*  = 45;  lxRANGE*   = 46;
6613 leency 28
 
7693 akron1 29
    lxKW       = 51;
6613 leency 30
 
7693 akron1 31
    lxARRAY*   = 51;  lxBEGIN*   = 52;  lxBY*        = 53;  lxCASE*    = 54;
32
    lxCONST*   = 55;  lxDIV*     = 56;  lxDO*        = 57;  lxELSE*    = 58;
33
    lxELSIF*   = 59;  lxEND*     = 60;  lxFALSE*     = 61;  lxFOR*     = 62;
34
    lxIF*      = 63;  lxIMPORT*  = 64;  lxIN*        = 65;  lxIS*      = 66;
35
    lxMOD*     = 67;  lxMODULE*  = 68;  lxNIL*       = 69;  lxOF*      = 70;
36
    lxOR*      = 71;  lxPOINTER* = 72;  lxPROCEDURE* = 73;  lxRECORD*  = 74;
37
    lxREPEAT*  = 75;  lxRETURN*  = 76;  lxTHEN*      = 77;  lxTO*      = 78;
38
    lxTRUE*    = 79;  lxTYPE*    = 80;  lxUNTIL*     = 81;  lxVAR*     = 82;
39
    lxWHILE*   = 83;
7597 akron1 40
 
7693 akron1 41
    lxERROR01* =  -1;  lxERROR02* =  -2;  lxERROR03* =  -3;  lxERROR04* =  -4;
42
    lxERROR05* =  -5;  lxERROR06* =  -6;  lxERROR07* =  -7;  lxERROR08* =  -8;
43
    lxERROR09* =  -9;  lxERROR10* = -10;  lxERROR11* = -11;  lxERROR12* = -12;
44
    lxERROR13* = -13;
7597 akron1 45
 
46
 
6613 leency 47
TYPE
48
 
7597 akron1 49
    LEXSTR* = ARRAY LEXLEN OF CHAR;
6613 leency 50
 
7597 akron1 51
    IDENT* = POINTER TO RECORD (AVL.DATA)
6613 leency 52
 
7597 akron1 53
        s*: LEXSTR;
7696 akron1 54
        offset*, offsetW*: INTEGER;
55
        key: INTEGER
6613 leency 56
 
7597 akron1 57
    END;
58
 
59
    POSITION* = RECORD
60
 
61
        line*, col*: INTEGER
62
 
63
    END;
64
 
65
    LEX* = RECORD
66
 
7693 akron1 67
        s*:         LEXSTR;
68
        length*:    INTEGER;
69
        sym*:       INTEGER;
70
        pos*:       POSITION;
71
        ident*:     IDENT;
72
        string*:    IDENT;
73
        value*:     ARITH.VALUE;
74
        error*:     INTEGER;
7597 akron1 75
 
7693 akron1 76
        over:       BOOLEAN
7597 akron1 77
 
78
    END;
79
 
7693 akron1 80
    SCANNER* = TXT.TEXT;
7597 akron1 81
 
82
 
6613 leency 83
VAR
84
 
7696 akron1 85
    idents: AVL.NODE;
6613 leency 86
 
7696 akron1 87
    delimiters: ARRAY 256 OF BOOLEAN;
6613 leency 88
 
7696 akron1 89
    NewIdent: IDENT;
6613 leency 90
 
7693 akron1 91
    upto: BOOLEAN;
6613 leency 92
 
7597 akron1 93
 
94
PROCEDURE nodecmp (a, b: AVL.DATA): INTEGER;
95
    RETURN ORD(a(IDENT).s > b(IDENT).s) - ORD(a(IDENT).s < b(IDENT).s)
96
END nodecmp;
97
 
98
 
99
PROCEDURE enterid* (s: LEXSTR): IDENT;
100
VAR
101
    newnode: BOOLEAN;
102
    node:    AVL.NODE;
6613 leency 103
 
104
BEGIN
7696 akron1 105
    NewIdent.s := s;
106
    idents := AVL.insert(idents, NewIdent, nodecmp, newnode, node);
6613 leency 107
 
7597 akron1 108
    IF newnode THEN
7696 akron1 109
        NEW(NewIdent);
110
        NewIdent.offset  := -1;
111
        NewIdent.offsetW := -1;
112
        NewIdent.key := 0
7597 akron1 113
    END
6613 leency 114
 
7597 akron1 115
    RETURN node.data(IDENT)
116
END enterid;
6613 leency 117
 
7597 akron1 118
 
119
PROCEDURE putchar (VAR lex: LEX; c: CHAR);
6613 leency 120
BEGIN
7597 akron1 121
    IF lex.length < LEXLEN - 1 THEN
122
        lex.s[lex.length] := c;
123
        INC(lex.length);
124
        lex.s[lex.length] := 0X
6613 leency 125
    ELSE
7597 akron1 126
        lex.over := TRUE
6613 leency 127
    END
7597 akron1 128
END putchar;
6613 leency 129
 
7597 akron1 130
 
7693 akron1 131
PROCEDURE nextc (text: TXT.TEXT): CHAR;
132
BEGIN
133
    TXT.next(text)
134
    RETURN text.peak
135
END nextc;
136
 
137
 
138
PROCEDURE ident (text: TXT.TEXT; VAR lex: LEX);
7597 akron1 139
VAR
140
    c: CHAR;
141
 
6613 leency 142
BEGIN
7693 akron1 143
    c := text.peak;
7597 akron1 144
    ASSERT(S.letter(c));
6613 leency 145
 
7597 akron1 146
    WHILE S.letter(c) OR S.digit(c) DO
147
        putchar(lex, c);
7693 akron1 148
        c := nextc(text)
7597 akron1 149
    END;
150
 
151
    IF lex.over THEN
152
        lex.sym := lxERROR06
6613 leency 153
    ELSE
7696 akron1 154
        lex.ident := enterid(lex.s);
155
        IF lex.ident.key # 0 THEN
156
            lex.sym := lex.ident.key
157
        ELSE
158
            lex.sym := lxIDENT
159
        END
6613 leency 160
    END
161
 
7597 akron1 162
END ident;
6613 leency 163
 
164
 
7693 akron1 165
PROCEDURE number (text: TXT.TEXT; VAR lex: LEX);
7597 akron1 166
VAR
167
    c: CHAR;
168
    hex: BOOLEAN;
169
    error: INTEGER;
6613 leency 170
 
7597 akron1 171
BEGIN
7693 akron1 172
    c := text.peak;
7597 akron1 173
    ASSERT(S.digit(c));
6613 leency 174
 
7597 akron1 175
    error := 0;
6613 leency 176
 
7597 akron1 177
    lex.sym := lxINTEGER;
178
    hex := FALSE;
179
 
180
    WHILE S.digit(c) DO
181
        putchar(lex, c);
7693 akron1 182
        c := nextc(text)
6613 leency 183
    END;
7597 akron1 184
 
185
    WHILE S.hexdigit(c) DO
186
        putchar(lex, c);
7693 akron1 187
        c := nextc(text);
7597 akron1 188
        hex := TRUE
6613 leency 189
    END;
7597 akron1 190
 
191
    IF c = "H" THEN
192
        putchar(lex, c);
7693 akron1 193
        TXT.next(text);
7597 akron1 194
        lex.sym := lxHEX
195
 
196
    ELSIF c = "X" THEN
197
        putchar(lex, c);
7693 akron1 198
        TXT.next(text);
7597 akron1 199
        lex.sym := lxCHAR
200
 
201
    ELSIF c = "." THEN
202
 
203
        IF hex THEN
204
            lex.sym := lxERROR01
6613 leency 205
        ELSE
7597 akron1 206
 
7693 akron1 207
            c := nextc(text);
7597 akron1 208
 
209
            IF c # "." THEN
210
                putchar(lex, ".");
211
                lex.sym := lxFLOAT
212
            ELSE
213
                lex.sym := lxINTEGER;
7693 akron1 214
                text.peak := 7FX;
215
                upto := TRUE
7597 akron1 216
            END;
217
 
218
            WHILE S.digit(c) DO
219
                putchar(lex, c);
7693 akron1 220
                c := nextc(text)
7597 akron1 221
            END;
222
 
223
            IF c = "E" THEN
224
 
225
                putchar(lex, c);
7693 akron1 226
                c := nextc(text);
7597 akron1 227
                IF (c = "+") OR (c = "-") THEN
228
                    putchar(lex, c);
7693 akron1 229
                    c := nextc(text)
7597 akron1 230
                END;
231
 
232
                IF S.digit(c) THEN
233
                    WHILE S.digit(c) DO
234
                        putchar(lex, c);
7693 akron1 235
                        c := nextc(text)
7597 akron1 236
                    END
237
                ELSE
238
                    lex.sym := lxERROR02
239
                END
240
 
241
            END
242
 
6613 leency 243
        END
7597 akron1 244
 
7693 akron1 245
    ELSIF hex THEN
246
        lex.sym := lxERROR01
7597 akron1 247
 
248
    END;
249
 
250
    IF lex.over & (lex.sym >= 0) THEN
251
        lex.sym := lxERROR07
252
    END;
253
 
254
    IF lex.sym = lxINTEGER THEN
255
        ARITH.iconv(lex.s, lex.value, error)
256
    ELSIF (lex.sym = lxHEX) OR (lex.sym = lxCHAR) THEN
257
        ARITH.hconv(lex.s, lex.value, error)
258
    ELSIF lex.sym = lxFLOAT THEN
259
        ARITH.fconv(lex.s, lex.value, error)
260
    END;
261
 
262
    CASE error OF
263
    |0:
264
    |1: lex.sym := lxERROR08
265
    |2: lex.sym := lxERROR09
266
    |3: lex.sym := lxERROR10
267
    |4: lex.sym := lxERROR11
268
    |5: lex.sym := lxERROR12
6613 leency 269
    END
270
 
7597 akron1 271
END number;
272
 
273
 
7693 akron1 274
PROCEDURE string (text: TXT.TEXT; VAR lex: LEX; quot: CHAR);
7597 akron1 275
VAR
7693 akron1 276
    c: CHAR;
277
    n: INTEGER;
7597 akron1 278
 
279
BEGIN
7693 akron1 280
    c  := nextc(text);
281
    n  := 0;
7597 akron1 282
 
283
    WHILE (c # quot) & (c # 0X) & ~text.eol & ~text.eof DO
284
        putchar(lex, c);
7693 akron1 285
        c := nextc(text);
7597 akron1 286
        INC(n)
6613 leency 287
    END;
7597 akron1 288
 
289
    IF c = quot THEN
7693 akron1 290
        TXT.next(text);
7597 akron1 291
        IF lex.over THEN
292
            lex.sym := lxERROR05
293
        ELSE
294
            IF n # 1 THEN
295
                lex.sym := lxSTRING
296
            ELSE
297
                lex.sym := lxCHAR;
7693 akron1 298
                ARITH.setChar(lex.value, ORD(lex.s[0]))
7597 akron1 299
            END
300
        END
301
    ELSE
302
        lex.sym := lxERROR03
6613 leency 303
    END;
7597 akron1 304
 
305
    IF lex.sym = lxSTRING THEN
306
        lex.string := enterid(lex.s);
307
        lex.value.typ := ARITH.tSTRING;
308
        lex.value.string := lex.string
6613 leency 309
    END
310
 
7597 akron1 311
END string;
312
 
313
 
7693 akron1 314
PROCEDURE comment (text: TXT.TEXT);
7597 akron1 315
VAR
316
    c: CHAR;
317
    cond, depth: INTEGER;
318
 
6613 leency 319
BEGIN
7693 akron1 320
    cond  := 0;
7597 akron1 321
    depth := 1;
6613 leency 322
 
7597 akron1 323
    REPEAT
324
 
7693 akron1 325
        c := text.peak;
326
        TXT.next(text);
7597 akron1 327
 
328
        IF c = "*" THEN
329
            IF cond = 1 THEN
330
                cond := 0;
331
                INC(depth)
332
            ELSE
333
               cond := 2
334
            END
335
        ELSIF c = ")" THEN
336
            IF cond = 2 THEN
337
                DEC(depth)
338
            END;
339
            cond := 0
340
        ELSIF c = "(" THEN
341
            cond := 1
342
        ELSE
343
            cond := 0
344
        END
345
 
346
    UNTIL (depth = 0) OR text.eof
347
 
348
END comment;
349
 
350
 
7693 akron1 351
PROCEDURE delimiter (text: TXT.TEXT; VAR lex: LEX; c: CHAR);
6613 leency 352
BEGIN
7597 akron1 353
    putchar(lex, c);
7693 akron1 354
    c := nextc(text);
7597 akron1 355
 
7693 akron1 356
    CASE lex.s[0] OF
7597 akron1 357
    |"+":
358
        lex.sym := lxPLUS
359
 
360
    |"-":
361
        lex.sym := lxMINUS
362
 
363
    |"*":
364
        lex.sym := lxMUL
365
 
366
    |"/":
367
        lex.sym := lxSLASH;
368
 
7693 akron1 369
        IF c = "/" THEN
7597 akron1 370
            lex.sym := lxCOMMENT;
371
            REPEAT
7693 akron1 372
                TXT.next(text)
7597 akron1 373
            UNTIL text.eol OR text.eof
374
        END
375
 
376
    |"~":
377
        lex.sym := lxNOT
378
 
379
    |"&":
380
        lex.sym := lxAND
381
 
382
    |".":
7693 akron1 383
        lex.sym := lxPOINT;
7597 akron1 384
 
7693 akron1 385
        IF c = "." THEN
7597 akron1 386
            lex.sym := lxRANGE;
7693 akron1 387
            putchar(lex, c);
388
            TXT.next(text)
6613 leency 389
        END
7597 akron1 390
 
391
    |",":
392
        lex.sym := lxCOMMA
393
 
394
    |";":
395
        lex.sym := lxSEMI
396
 
397
    |"|":
398
        lex.sym := lxBAR
399
 
400
    |"(":
401
        lex.sym := lxLROUND;
402
 
403
        IF c = "*" THEN
404
            lex.sym := lxCOMMENT;
7693 akron1 405
            TXT.next(text);
7597 akron1 406
            comment(text)
407
        END
408
 
409
    |"[":
410
        lex.sym := lxLSQUARE
411
 
412
    |"{":
413
        lex.sym := lxLCURLY
414
 
415
    |"^":
416
        lex.sym := lxCARET
417
 
418
    |"=":
419
        lex.sym := lxEQ
420
 
421
    |"#":
422
        lex.sym := lxNE
423
 
424
    |"<":
425
        lex.sym := lxLT;
426
 
427
        IF c = "=" THEN
428
            lex.sym := lxLE;
429
            putchar(lex, c);
7693 akron1 430
            TXT.next(text)
7597 akron1 431
        END
432
 
433
    |">":
434
        lex.sym := lxGT;
435
 
436
        IF c = "=" THEN
437
            lex.sym := lxGE;
438
            putchar(lex, c);
7693 akron1 439
            TXT.next(text)
7597 akron1 440
        END
441
 
442
    |":":
443
        lex.sym := lxCOLON;
444
 
445
        IF c = "=" THEN
446
            lex.sym := lxASSIGN;
447
            putchar(lex, c);
7693 akron1 448
            TXT.next(text)
7597 akron1 449
        END
450
 
451
    |")":
452
        lex.sym := lxRROUND
453
 
454
    |"]":
455
        lex.sym := lxRSQUARE
456
 
457
    |"}":
458
        lex.sym := lxRCURLY
459
 
6613 leency 460
    END
461
 
7597 akron1 462
END delimiter;
463
 
464
 
7693 akron1 465
PROCEDURE Next* (text: SCANNER; VAR lex: LEX);
7597 akron1 466
VAR
467
    c: CHAR;
468
 
6613 leency 469
BEGIN
470
 
7597 akron1 471
    REPEAT
7693 akron1 472
        c := text.peak;
7597 akron1 473
 
474
        WHILE S.space(c) DO
7693 akron1 475
            c := nextc(text)
7597 akron1 476
        END;
477
 
478
        lex.s[0]     := 0X;
479
        lex.length   := 0;
480
        lex.pos.line := text.line;
481
        lex.pos.col  := text.col;
482
        lex.ident    := NIL;
483
        lex.over     := FALSE;
484
 
485
        IF S.letter(c) THEN
486
            ident(text, lex)
487
        ELSIF S.digit(c) THEN
7693 akron1 488
            number(text, lex)
7597 akron1 489
        ELSIF (c = '"') OR (c = "'") THEN
7693 akron1 490
            string(text, lex, c)
7696 akron1 491
        ELSIF delimiters[ORD(c)] THEN
7693 akron1 492
            delimiter(text, lex, c)
7597 akron1 493
        ELSIF c = 0X THEN
494
            lex.sym := lxEOF;
495
            IF text.eof THEN
496
                INC(lex.pos.col)
497
            END
7693 akron1 498
        ELSIF (c = 7FX) & upto THEN
499
            upto := FALSE;
500
            lex.sym := lxRANGE;
501
            putchar(lex, ".");
502
            putchar(lex, ".");
503
            DEC(lex.pos.col);
504
            TXT.next(text)
7597 akron1 505
        ELSE
506
            putchar(lex, c);
7693 akron1 507
            TXT.next(text);
7597 akron1 508
            lex.sym := lxERROR04
509
        END;
510
 
511
        IF lex.sym < 0 THEN
512
            lex.error := -lex.sym
513
        ELSE
514
            lex.error := 0
515
        END
516
 
517
    UNTIL lex.sym # lxCOMMENT
518
 
519
END Next;
520
 
521
 
522
PROCEDURE open* (name: ARRAY OF CHAR): SCANNER;
7693 akron1 523
    RETURN TXT.open(name)
7597 akron1 524
END open;
525
 
526
 
527
PROCEDURE close* (VAR scanner: SCANNER);
528
BEGIN
7693 akron1 529
    TXT.close(scanner)
7597 akron1 530
END close;
531
 
532
 
533
PROCEDURE init;
534
VAR
535
    i: INTEGER;
536
    delim: ARRAY 23 OF CHAR;
537
 
7693 akron1 538
 
7696 akron1 539
    PROCEDURE enterkw (key: INTEGER; kw: LEXSTR);
540
    VAR
541
        id: IDENT;
542
 
7597 akron1 543
    BEGIN
7696 akron1 544
        id := enterid(kw);
545
        id.key := key
7597 akron1 546
    END enterkw;
547
 
7693 akron1 548
 
7597 akron1 549
BEGIN
7693 akron1 550
    upto := FALSE;
7597 akron1 551
 
552
    FOR i := 0 TO 255 DO
7696 akron1 553
        delimiters[i] := FALSE
6613 leency 554
    END;
7597 akron1 555
 
556
    delim := "+-*/~&.,;|([{^=#<>:)]}";
557
 
558
    FOR i := 0 TO LEN(delim) - 2 DO
7696 akron1 559
        delimiters[ORD(delim[i])] := TRUE
6613 leency 560
    END;
561
 
7696 akron1 562
    NEW(NewIdent);
563
    NewIdent.s       := "";
564
    NewIdent.offset  := -1;
565
    NewIdent.offsetW := -1;
566
    NewIdent.key     := 0;
6613 leency 567
 
7696 akron1 568
    idents := NIL;
569
 
570
    enterkw(lxARRAY, "ARRAY");
571
    enterkw(lxBEGIN, "BEGIN");
572
    enterkw(lxBY, "BY");
573
    enterkw(lxCASE, "CASE");
574
    enterkw(lxCONST, "CONST");
575
    enterkw(lxDIV, "DIV");
576
    enterkw(lxDO, "DO");
577
    enterkw(lxELSE, "ELSE");
578
    enterkw(lxELSIF, "ELSIF");
579
    enterkw(lxEND, "END");
580
    enterkw(lxFALSE, "FALSE");
581
    enterkw(lxFOR, "FOR");
582
    enterkw(lxIF, "IF");
583
    enterkw(lxIMPORT, "IMPORT");
584
    enterkw(lxIN, "IN");
585
    enterkw(lxIS, "IS");
586
    enterkw(lxMOD, "MOD");
587
    enterkw(lxMODULE, "MODULE");
588
    enterkw(lxNIL, "NIL");
589
    enterkw(lxOF, "OF");
590
    enterkw(lxOR, "OR");
591
    enterkw(lxPOINTER, "POINTER");
592
    enterkw(lxPROCEDURE, "PROCEDURE");
593
    enterkw(lxRECORD, "RECORD");
594
    enterkw(lxREPEAT, "REPEAT");
595
    enterkw(lxRETURN, "RETURN");
596
    enterkw(lxTHEN, "THEN");
597
    enterkw(lxTO, "TO");
598
    enterkw(lxTRUE, "TRUE");
599
    enterkw(lxTYPE, "TYPE");
600
    enterkw(lxUNTIL, "UNTIL");
601
    enterkw(lxVAR, "VAR");
602
    enterkw(lxWHILE, "WHILE")
603
 
7597 akron1 604
END init;
6613 leency 605
 
7597 akron1 606
 
6613 leency 607
BEGIN
7597 akron1 608
    init
7696 akron1 609
END SCAN.