Subversion Repositories Kolibri OS

Rev

Rev 7597 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
7597 akron1 1
(*
2
    BSD 2-Clause License
6613 leency 3
 
7693 akron1 4
    Copyright (c) 2018, 2019, Anton Krotov
7597 akron1 5
    All rights reserved.
6613 leency 6
*)
7
 
8
MODULE SCAN;
9
 
7693 akron1 10
IMPORT TXT := TEXTDRV, AVL := AVLTREES, ARITH, S := STRINGS;
6613 leency 11
 
7597 akron1 12
 
6613 leency 13
CONST
14
 
7597 akron1 15
    LEXLEN = 1024;
6613 leency 16
 
7693 akron1 17
    lxUNDEF*   =  0;  lxIDENT*   =  1;  lxINTEGER*   =  2;  lxHEX*     =  3;
18
    lxCHAR*    =  4;  lxFLOAT*   =  5;  lxSTRING*    =  6;  lxCOMMENT* =  7;
19
    lxEOF*     =  8;
6613 leency 20
 
7693 akron1 21
    lxPLUS*    = 21;  lxMINUS*   = 22;  lxMUL*       = 23;  lxSLASH*   = 24;
22
    lxNOT*     = 25;  lxAND*     = 26;  lxPOINT*     = 27;  lxCOMMA*   = 28;
23
    lxSEMI*    = 29;  lxBAR*     = 30;  lxLROUND*    = 31;  lxLSQUARE* = 32;
24
    lxLCURLY*  = 33;  lxCARET*   = 34;  lxEQ*        = 35;  lxNE*      = 36;
25
    lxLT*      = 37;  lxGT*      = 38;  lxCOLON*     = 39;  lxRROUND*  = 40;
26
    lxRSQUARE* = 41;  lxRCURLY*  = 42;  lxLE*        = 43;  lxGE*      = 44;
27
    lxASSIGN*  = 45;  lxRANGE*   = 46;
6613 leency 28
 
7693 akron1 29
    lxKW       = 51;
6613 leency 30
 
7693 akron1 31
    lxARRAY*   = 51;  lxBEGIN*   = 52;  lxBY*        = 53;  lxCASE*    = 54;
32
    lxCONST*   = 55;  lxDIV*     = 56;  lxDO*        = 57;  lxELSE*    = 58;
33
    lxELSIF*   = 59;  lxEND*     = 60;  lxFALSE*     = 61;  lxFOR*     = 62;
34
    lxIF*      = 63;  lxIMPORT*  = 64;  lxIN*        = 65;  lxIS*      = 66;
35
    lxMOD*     = 67;  lxMODULE*  = 68;  lxNIL*       = 69;  lxOF*      = 70;
36
    lxOR*      = 71;  lxPOINTER* = 72;  lxPROCEDURE* = 73;  lxRECORD*  = 74;
37
    lxREPEAT*  = 75;  lxRETURN*  = 76;  lxTHEN*      = 77;  lxTO*      = 78;
38
    lxTRUE*    = 79;  lxTYPE*    = 80;  lxUNTIL*     = 81;  lxVAR*     = 82;
39
    lxWHILE*   = 83;
7597 akron1 40
 
7693 akron1 41
    lxERROR01* =  -1;  lxERROR02* =  -2;  lxERROR03* =  -3;  lxERROR04* =  -4;
42
    lxERROR05* =  -5;  lxERROR06* =  -6;  lxERROR07* =  -7;  lxERROR08* =  -8;
43
    lxERROR09* =  -9;  lxERROR10* = -10;  lxERROR11* = -11;  lxERROR12* = -12;
44
    lxERROR13* = -13;
7597 akron1 45
 
46
 
6613 leency 47
TYPE
48
 
7597 akron1 49
    LEXSTR* = ARRAY LEXLEN OF CHAR;
6613 leency 50
 
7597 akron1 51
    IDENT* = POINTER TO RECORD (AVL.DATA)
6613 leency 52
 
7597 akron1 53
        s*: LEXSTR;
54
        offset*, offsetW*: INTEGER
6613 leency 55
 
7597 akron1 56
    END;
57
 
58
    POSITION* = RECORD
59
 
60
        line*, col*: INTEGER
61
 
62
    END;
63
 
64
    LEX* = RECORD
65
 
7693 akron1 66
        s*:         LEXSTR;
67
        length*:    INTEGER;
68
        sym*:       INTEGER;
69
        pos*:       POSITION;
70
        ident*:     IDENT;
71
        string*:    IDENT;
72
        value*:     ARITH.VALUE;
73
        error*:     INTEGER;
7597 akron1 74
 
7693 akron1 75
        over:       BOOLEAN
7597 akron1 76
 
77
    END;
78
 
7693 akron1 79
    SCANNER* = TXT.TEXT;
7597 akron1 80
 
81
    KEYWORD = ARRAY 10 OF CHAR;
82
 
83
 
6613 leency 84
VAR
85
 
7597 akron1 86
    vocabulary: RECORD
6613 leency 87
 
7693 akron1 88
        KW:         ARRAY 33 OF KEYWORD;
89
        delimiters: ARRAY 256 OF BOOLEAN;
90
        idents:     AVL.NODE;
91
        ident:      IDENT
6613 leency 92
 
7597 akron1 93
    END;
6613 leency 94
 
7693 akron1 95
    upto: BOOLEAN;
6613 leency 96
 
7597 akron1 97
 
98
PROCEDURE nodecmp (a, b: AVL.DATA): INTEGER;
99
    RETURN ORD(a(IDENT).s > b(IDENT).s) - ORD(a(IDENT).s < b(IDENT).s)
100
END nodecmp;
101
 
102
 
103
PROCEDURE key (VAR lex: LEX);
104
VAR
105
    L, R, M: INTEGER;
7693 akron1 106
    found: BOOLEAN;
7597 akron1 107
 
6613 leency 108
BEGIN
7597 akron1 109
    L := 0;
110
    R := LEN(vocabulary.KW) - 1;
7693 akron1 111
    found := FALSE;
6613 leency 112
 
7693 akron1 113
    REPEAT
114
        M := (L + R) DIV 2;
115
 
116
        IF lex.s # vocabulary.KW[M] THEN
117
            IF lex.s > vocabulary.KW[M] THEN
118
                L := M + 1
119
            ELSE
120
                R := M - 1
121
            END
7597 akron1 122
        ELSE
7693 akron1 123
            found := TRUE;
124
            lex.sym := lxKW + M
6613 leency 125
        END
7693 akron1 126
    UNTIL found OR (L > R)
7597 akron1 127
END key;
6613 leency 128
 
129
 
7597 akron1 130
PROCEDURE enterid* (s: LEXSTR): IDENT;
131
VAR
132
    newnode: BOOLEAN;
133
    node:    AVL.NODE;
6613 leency 134
 
135
BEGIN
7597 akron1 136
    vocabulary.ident.s := s;
137
    vocabulary.idents := AVL.insert(vocabulary.idents, vocabulary.ident, nodecmp, newnode, node);
6613 leency 138
 
7597 akron1 139
    IF newnode THEN
140
        NEW(vocabulary.ident);
141
        vocabulary.ident.offset  := -1;
142
        vocabulary.ident.offsetW := -1
143
    END
6613 leency 144
 
7597 akron1 145
    RETURN node.data(IDENT)
146
END enterid;
6613 leency 147
 
7597 akron1 148
 
149
PROCEDURE putchar (VAR lex: LEX; c: CHAR);
6613 leency 150
BEGIN
7597 akron1 151
    IF lex.length < LEXLEN - 1 THEN
152
        lex.s[lex.length] := c;
153
        INC(lex.length);
154
        lex.s[lex.length] := 0X
6613 leency 155
    ELSE
7597 akron1 156
        lex.over := TRUE
6613 leency 157
    END
7597 akron1 158
END putchar;
6613 leency 159
 
7597 akron1 160
 
7693 akron1 161
PROCEDURE nextc (text: TXT.TEXT): CHAR;
162
BEGIN
163
    TXT.next(text)
164
    RETURN text.peak
165
END nextc;
166
 
167
 
168
PROCEDURE ident (text: TXT.TEXT; VAR lex: LEX);
7597 akron1 169
VAR
170
    c: CHAR;
171
 
6613 leency 172
BEGIN
7693 akron1 173
    c := text.peak;
7597 akron1 174
    ASSERT(S.letter(c));
6613 leency 175
 
7597 akron1 176
    WHILE S.letter(c) OR S.digit(c) DO
177
        putchar(lex, c);
7693 akron1 178
        c := nextc(text)
7597 akron1 179
    END;
180
 
181
    IF lex.over THEN
182
        lex.sym := lxERROR06
6613 leency 183
    ELSE
7597 akron1 184
        lex.sym := lxIDENT;
185
        key(lex)
186
    END;
187
 
188
    IF lex.sym = lxIDENT THEN
189
        lex.ident := enterid(lex.s)
6613 leency 190
    END
191
 
7597 akron1 192
END ident;
6613 leency 193
 
194
 
7693 akron1 195
PROCEDURE number (text: TXT.TEXT; VAR lex: LEX);
7597 akron1 196
VAR
197
    c: CHAR;
198
    hex: BOOLEAN;
199
    error: INTEGER;
6613 leency 200
 
7597 akron1 201
BEGIN
7693 akron1 202
    c := text.peak;
7597 akron1 203
    ASSERT(S.digit(c));
6613 leency 204
 
7597 akron1 205
    error := 0;
6613 leency 206
 
7597 akron1 207
    lex.sym := lxINTEGER;
208
    hex := FALSE;
209
 
210
    WHILE S.digit(c) DO
211
        putchar(lex, c);
7693 akron1 212
        c := nextc(text)
6613 leency 213
    END;
7597 akron1 214
 
215
    WHILE S.hexdigit(c) DO
216
        putchar(lex, c);
7693 akron1 217
        c := nextc(text);
7597 akron1 218
        hex := TRUE
6613 leency 219
    END;
7597 akron1 220
 
221
    IF c = "H" THEN
222
        putchar(lex, c);
7693 akron1 223
        TXT.next(text);
7597 akron1 224
        lex.sym := lxHEX
225
 
226
    ELSIF c = "X" THEN
227
        putchar(lex, c);
7693 akron1 228
        TXT.next(text);
7597 akron1 229
        lex.sym := lxCHAR
230
 
231
    ELSIF c = "." THEN
232
 
233
        IF hex THEN
234
            lex.sym := lxERROR01
6613 leency 235
        ELSE
7597 akron1 236
 
7693 akron1 237
            c := nextc(text);
7597 akron1 238
 
239
            IF c # "." THEN
240
                putchar(lex, ".");
241
                lex.sym := lxFLOAT
242
            ELSE
243
                lex.sym := lxINTEGER;
7693 akron1 244
                text.peak := 7FX;
245
                upto := TRUE
7597 akron1 246
            END;
247
 
248
            WHILE S.digit(c) DO
249
                putchar(lex, c);
7693 akron1 250
                c := nextc(text)
7597 akron1 251
            END;
252
 
253
            IF c = "E" THEN
254
 
255
                putchar(lex, c);
7693 akron1 256
                c := nextc(text);
7597 akron1 257
                IF (c = "+") OR (c = "-") THEN
258
                    putchar(lex, c);
7693 akron1 259
                    c := nextc(text)
7597 akron1 260
                END;
261
 
262
                IF S.digit(c) THEN
263
                    WHILE S.digit(c) DO
264
                        putchar(lex, c);
7693 akron1 265
                        c := nextc(text)
7597 akron1 266
                    END
267
                ELSE
268
                    lex.sym := lxERROR02
269
                END
270
 
271
            END
272
 
6613 leency 273
        END
7597 akron1 274
 
7693 akron1 275
    ELSIF hex THEN
276
        lex.sym := lxERROR01
7597 akron1 277
 
278
    END;
279
 
280
    IF lex.over & (lex.sym >= 0) THEN
281
        lex.sym := lxERROR07
282
    END;
283
 
284
    IF lex.sym = lxINTEGER THEN
285
        ARITH.iconv(lex.s, lex.value, error)
286
    ELSIF (lex.sym = lxHEX) OR (lex.sym = lxCHAR) THEN
287
        ARITH.hconv(lex.s, lex.value, error)
288
    ELSIF lex.sym = lxFLOAT THEN
289
        ARITH.fconv(lex.s, lex.value, error)
290
    END;
291
 
292
    CASE error OF
293
    |0:
294
    |1: lex.sym := lxERROR08
295
    |2: lex.sym := lxERROR09
296
    |3: lex.sym := lxERROR10
297
    |4: lex.sym := lxERROR11
298
    |5: lex.sym := lxERROR12
6613 leency 299
    END
300
 
7597 akron1 301
END number;
302
 
303
 
7693 akron1 304
PROCEDURE string (text: TXT.TEXT; VAR lex: LEX; quot: CHAR);
7597 akron1 305
VAR
7693 akron1 306
    c: CHAR;
307
    n: INTEGER;
7597 akron1 308
 
309
BEGIN
7693 akron1 310
    c  := nextc(text);
311
    n  := 0;
7597 akron1 312
 
313
    WHILE (c # quot) & (c # 0X) & ~text.eol & ~text.eof DO
314
        putchar(lex, c);
7693 akron1 315
        c := nextc(text);
7597 akron1 316
        INC(n)
6613 leency 317
    END;
7597 akron1 318
 
319
    IF c = quot THEN
7693 akron1 320
        TXT.next(text);
7597 akron1 321
        IF lex.over THEN
322
            lex.sym := lxERROR05
323
        ELSE
324
            IF n # 1 THEN
325
                lex.sym := lxSTRING
326
            ELSE
327
                lex.sym := lxCHAR;
7693 akron1 328
                ARITH.setChar(lex.value, ORD(lex.s[0]))
7597 akron1 329
            END
330
        END
331
    ELSE
332
        lex.sym := lxERROR03
6613 leency 333
    END;
7597 akron1 334
 
335
    IF lex.sym = lxSTRING THEN
336
        lex.string := enterid(lex.s);
337
        lex.value.typ := ARITH.tSTRING;
338
        lex.value.string := lex.string
6613 leency 339
    END
340
 
7597 akron1 341
END string;
342
 
343
 
7693 akron1 344
PROCEDURE comment (text: TXT.TEXT);
7597 akron1 345
VAR
346
    c: CHAR;
347
    cond, depth: INTEGER;
348
 
6613 leency 349
BEGIN
7693 akron1 350
    cond  := 0;
7597 akron1 351
    depth := 1;
6613 leency 352
 
7597 akron1 353
    REPEAT
354
 
7693 akron1 355
        c := text.peak;
356
        TXT.next(text);
7597 akron1 357
 
358
        IF c = "*" THEN
359
            IF cond = 1 THEN
360
                cond := 0;
361
                INC(depth)
362
            ELSE
363
               cond := 2
364
            END
365
        ELSIF c = ")" THEN
366
            IF cond = 2 THEN
367
                DEC(depth)
368
            END;
369
            cond := 0
370
        ELSIF c = "(" THEN
371
            cond := 1
372
        ELSE
373
            cond := 0
374
        END
375
 
376
    UNTIL (depth = 0) OR text.eof
377
 
378
END comment;
379
 
380
 
7693 akron1 381
PROCEDURE delimiter (text: TXT.TEXT; VAR lex: LEX; c: CHAR);
6613 leency 382
BEGIN
7597 akron1 383
    putchar(lex, c);
7693 akron1 384
    c := nextc(text);
7597 akron1 385
 
7693 akron1 386
    CASE lex.s[0] OF
7597 akron1 387
    |"+":
388
        lex.sym := lxPLUS
389
 
390
    |"-":
391
        lex.sym := lxMINUS
392
 
393
    |"*":
394
        lex.sym := lxMUL
395
 
396
    |"/":
397
        lex.sym := lxSLASH;
398
 
7693 akron1 399
        IF c = "/" THEN
7597 akron1 400
            lex.sym := lxCOMMENT;
401
            REPEAT
7693 akron1 402
                TXT.next(text)
7597 akron1 403
            UNTIL text.eol OR text.eof
404
        END
405
 
406
    |"~":
407
        lex.sym := lxNOT
408
 
409
    |"&":
410
        lex.sym := lxAND
411
 
412
    |".":
7693 akron1 413
        lex.sym := lxPOINT;
7597 akron1 414
 
7693 akron1 415
        IF c = "." THEN
7597 akron1 416
            lex.sym := lxRANGE;
7693 akron1 417
            putchar(lex, c);
418
            TXT.next(text)
6613 leency 419
        END
7597 akron1 420
 
421
    |",":
422
        lex.sym := lxCOMMA
423
 
424
    |";":
425
        lex.sym := lxSEMI
426
 
427
    |"|":
428
        lex.sym := lxBAR
429
 
430
    |"(":
431
        lex.sym := lxLROUND;
432
 
433
        IF c = "*" THEN
434
            lex.sym := lxCOMMENT;
7693 akron1 435
            TXT.next(text);
7597 akron1 436
            comment(text)
437
        END
438
 
439
    |"[":
440
        lex.sym := lxLSQUARE
441
 
442
    |"{":
443
        lex.sym := lxLCURLY
444
 
445
    |"^":
446
        lex.sym := lxCARET
447
 
448
    |"=":
449
        lex.sym := lxEQ
450
 
451
    |"#":
452
        lex.sym := lxNE
453
 
454
    |"<":
455
        lex.sym := lxLT;
456
 
457
        IF c = "=" THEN
458
            lex.sym := lxLE;
459
            putchar(lex, c);
7693 akron1 460
            TXT.next(text)
7597 akron1 461
        END
462
 
463
    |">":
464
        lex.sym := lxGT;
465
 
466
        IF c = "=" THEN
467
            lex.sym := lxGE;
468
            putchar(lex, c);
7693 akron1 469
            TXT.next(text)
7597 akron1 470
        END
471
 
472
    |":":
473
        lex.sym := lxCOLON;
474
 
475
        IF c = "=" THEN
476
            lex.sym := lxASSIGN;
477
            putchar(lex, c);
7693 akron1 478
            TXT.next(text)
7597 akron1 479
        END
480
 
481
    |")":
482
        lex.sym := lxRROUND
483
 
484
    |"]":
485
        lex.sym := lxRSQUARE
486
 
487
    |"}":
488
        lex.sym := lxRCURLY
489
 
6613 leency 490
    END
491
 
7597 akron1 492
END delimiter;
493
 
494
 
7693 akron1 495
PROCEDURE Next* (text: SCANNER; VAR lex: LEX);
7597 akron1 496
VAR
497
    c: CHAR;
498
 
6613 leency 499
BEGIN
500
 
7597 akron1 501
    REPEAT
7693 akron1 502
        c := text.peak;
7597 akron1 503
 
504
        WHILE S.space(c) DO
7693 akron1 505
            c := nextc(text)
7597 akron1 506
        END;
507
 
508
        lex.s[0]     := 0X;
509
        lex.length   := 0;
510
        lex.pos.line := text.line;
511
        lex.pos.col  := text.col;
512
        lex.ident    := NIL;
513
        lex.over     := FALSE;
514
 
515
        IF S.letter(c) THEN
516
            ident(text, lex)
517
        ELSIF S.digit(c) THEN
7693 akron1 518
            number(text, lex)
7597 akron1 519
        ELSIF (c = '"') OR (c = "'") THEN
7693 akron1 520
            string(text, lex, c)
7597 akron1 521
        ELSIF vocabulary.delimiters[ORD(c)] THEN
7693 akron1 522
            delimiter(text, lex, c)
7597 akron1 523
        ELSIF c = 0X THEN
524
            lex.sym := lxEOF;
525
            IF text.eof THEN
526
                INC(lex.pos.col)
527
            END
7693 akron1 528
        ELSIF (c = 7FX) & upto THEN
529
            upto := FALSE;
530
            lex.sym := lxRANGE;
531
            putchar(lex, ".");
532
            putchar(lex, ".");
533
            DEC(lex.pos.col);
534
            TXT.next(text)
7597 akron1 535
        ELSE
536
            putchar(lex, c);
7693 akron1 537
            TXT.next(text);
7597 akron1 538
            lex.sym := lxERROR04
539
        END;
540
 
541
        IF lex.sym < 0 THEN
542
            lex.error := -lex.sym
543
        ELSE
544
            lex.error := 0
545
        END
546
 
547
    UNTIL lex.sym # lxCOMMENT
548
 
549
END Next;
550
 
551
 
552
PROCEDURE open* (name: ARRAY OF CHAR): SCANNER;
7693 akron1 553
    RETURN TXT.open(name)
7597 akron1 554
END open;
555
 
556
 
557
PROCEDURE close* (VAR scanner: SCANNER);
558
BEGIN
7693 akron1 559
    TXT.close(scanner)
7597 akron1 560
END close;
561
 
562
 
563
PROCEDURE init;
564
VAR
565
    i: INTEGER;
566
    delim: ARRAY 23 OF CHAR;
567
 
7693 akron1 568
 
7597 akron1 569
    PROCEDURE enterkw (VAR i: INTEGER; kw: KEYWORD);
570
    BEGIN
571
        vocabulary.KW[i] := kw;
572
        INC(i)
573
    END enterkw;
574
 
7693 akron1 575
 
7597 akron1 576
BEGIN
7693 akron1 577
    upto := FALSE;
7597 akron1 578
 
579
    FOR i := 0 TO 255 DO
580
        vocabulary.delimiters[i] := FALSE
6613 leency 581
    END;
7597 akron1 582
 
583
    delim := "+-*/~&.,;|([{^=#<>:)]}";
584
 
585
    FOR i := 0 TO LEN(delim) - 2 DO
586
        vocabulary.delimiters[ORD(delim[i])] := TRUE
6613 leency 587
    END;
588
 
7597 akron1 589
    i := 0;
590
    enterkw(i, "ARRAY");
591
    enterkw(i, "BEGIN");
592
    enterkw(i, "BY");
593
    enterkw(i, "CASE");
594
    enterkw(i, "CONST");
595
    enterkw(i, "DIV");
596
    enterkw(i, "DO");
597
    enterkw(i, "ELSE");
598
    enterkw(i, "ELSIF");
599
    enterkw(i, "END");
600
    enterkw(i, "FALSE");
601
    enterkw(i, "FOR");
602
    enterkw(i, "IF");
603
    enterkw(i, "IMPORT");
604
    enterkw(i, "IN");
605
    enterkw(i, "IS");
606
    enterkw(i, "MOD");
607
    enterkw(i, "MODULE");
608
    enterkw(i, "NIL");
609
    enterkw(i, "OF");
610
    enterkw(i, "OR");
611
    enterkw(i, "POINTER");
612
    enterkw(i, "PROCEDURE");
613
    enterkw(i, "RECORD");
614
    enterkw(i, "REPEAT");
615
    enterkw(i, "RETURN");
616
    enterkw(i, "THEN");
617
    enterkw(i, "TO");
618
    enterkw(i, "TRUE");
619
    enterkw(i, "TYPE");
620
    enterkw(i, "UNTIL");
621
    enterkw(i, "VAR");
622
    enterkw(i, "WHILE");
6613 leency 623
 
7597 akron1 624
    NEW(vocabulary.ident);
625
    vocabulary.ident.s       := "";
626
    vocabulary.ident.offset  := -1;
627
    vocabulary.ident.offsetW := -1;
7693 akron1 628
    vocabulary.idents := NIL
7597 akron1 629
END init;
6613 leency 630
 
7597 akron1 631
 
6613 leency 632
BEGIN
7597 akron1 633
    init
6613 leency 634
END SCAN.