Subversion Repositories Kolibri OS

Rev

Rev 6613 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
7597 akron1 1
(*
2
    BSD 2-Clause License
6613 leency 3
 
7597 akron1 4
    Copyright (c) 2018, Anton Krotov
5
    All rights reserved.
6613 leency 6
*)
7
 
8
MODULE SCAN;
9
 
7597 akron1 10
IMPORT TEXTDRV, AVL := AVLTREES, ARITH, S := STRINGS, C := COLLECTIONS;
6613 leency 11
 
7597 akron1 12
 
6613 leency 13
CONST
14
 
7597 akron1 15
    LEXLEN = 1024;
6613 leency 16
 
7597 akron1 17
    lxUNDEF*   =   0;  lxIDENT*   =   1;  lxINTEGER*   =   2;  lxHEX*     =   3;
18
    lxCHAR*    =   4;  lxFLOAT*   =   5;  lxSTRING*    =   6;  lxCOMMENT* =   7;
19
    lxEOF*     =   8;
6613 leency 20
 
7597 akron1 21
    lxKW       = 101;
6613 leency 22
 
7597 akron1 23
    lxARRAY*   = 101;  lxBEGIN*   = 102;  lxBY*        = 103;  lxCASE*    = 104;
24
    lxCONST*   = 105;  lxDIV*     = 106;  lxDO*        = 107;  lxELSE*    = 108;
25
    lxELSIF*   = 109;  lxEND*     = 110;  lxFALSE*     = 111;  lxFOR*     = 112;
26
    lxIF*      = 113;  lxIMPORT*  = 114;  lxIN*        = 115;  lxIS*      = 116;
27
    lxMOD*     = 117;  lxMODULE*  = 118;  lxNIL*       = 119;  lxOF*      = 120;
28
    lxOR*      = 121;  lxPOINTER* = 122;  lxPROCEDURE* = 123;  lxRECORD*  = 124;
29
    lxREPEAT*  = 125;  lxRETURN*  = 126;  lxTHEN*      = 127;  lxTO*      = 128;
30
    lxTRUE*    = 129;  lxTYPE*    = 130;  lxUNTIL*     = 131;  lxVAR*     = 132;
31
    lxWHILE*   = 133;
6613 leency 32
 
7597 akron1 33
    lxPLUS*    = 201;  lxMINUS*   = 202;  lxMUL*       = 203;  lxSLASH*   = 204;
34
    lxNOT*     = 205;  lxAND*     = 206;  lxPOINT*     = 207;  lxCOMMA*   = 208;
35
    lxSEMI*    = 209;  lxBAR*     = 210;  lxLROUND*    = 211;  lxLSQUARE* = 212;
36
    lxLCURLY*  = 213;  lxCARET*   = 214;  lxEQ*        = 215;  lxNE*      = 216;
37
    lxLT*      = 217;  lxGT*      = 218;  lxCOLON*     = 219;  lxRROUND*  = 220;
38
    lxRSQUARE* = 221;  lxRCURLY*  = 222;  lxLE*        = 223;  lxGE*      = 224;
39
    lxASSIGN*  = 225;  lxRANGE*   = 226;
40
 
41
    lxERROR01 =  -1;  lxERROR02 =  -2;  lxERROR03   =  -3;  lxERROR04 =  -4;
42
    lxERROR05 =  -5;  lxERROR06 =  -6;  lxERROR07   =  -7;  lxERROR08 =  -8;
43
    lxERROR09 =  -9;  lxERROR10 = -10;  lxERROR11   = -11;  lxERROR12 = -12;
44
 
45
 
6613 leency 46
TYPE
47
 
7597 akron1 48
    LEXSTR* = ARRAY LEXLEN OF CHAR;
6613 leency 49
 
7597 akron1 50
    IDENT* = POINTER TO RECORD (AVL.DATA)
6613 leency 51
 
7597 akron1 52
        s*: LEXSTR;
53
        offset*, offsetW*: INTEGER
6613 leency 54
 
7597 akron1 55
    END;
56
 
57
    POSITION* = RECORD
58
 
59
        line*, col*: INTEGER
60
 
61
    END;
62
 
63
    LEX* = RECORD
64
 
65
        s*:              LEXSTR;
66
        length*:         INTEGER;
67
        sym*:            INTEGER;
68
        pos*:            POSITION;
69
        ident*:          IDENT;
70
        string*:         IDENT;
71
        value*:          ARITH.VALUE;
72
        error*:          INTEGER;
73
 
74
        over:            BOOLEAN
75
 
76
    END;
77
 
78
    SCANNER* = POINTER TO RECORD (C.ITEM)
79
 
80
        text:            TEXTDRV.TEXT;
81
        range:           BOOLEAN
82
 
83
    END;
84
 
85
    KEYWORD = ARRAY 10 OF CHAR;
86
 
87
 
6613 leency 88
VAR
89
 
7597 akron1 90
    vocabulary: RECORD
6613 leency 91
 
7597 akron1 92
        KW:          ARRAY 33 OF KEYWORD;
6613 leency 93
 
7597 akron1 94
        delimiters:  ARRAY 256 OF BOOLEAN;
6613 leency 95
 
7597 akron1 96
        idents:      AVL.NODE;
97
        ident:       IDENT
6613 leency 98
 
7597 akron1 99
    END;
6613 leency 100
 
7597 akron1 101
    scanners: C.COLLECTION;
6613 leency 102
 
7597 akron1 103
 
104
PROCEDURE nodecmp (a, b: AVL.DATA): INTEGER;
105
    RETURN ORD(a(IDENT).s > b(IDENT).s) - ORD(a(IDENT).s < b(IDENT).s)
106
END nodecmp;
107
 
108
 
109
PROCEDURE key (VAR lex: LEX);
110
VAR
111
    L, R, M: INTEGER;
112
 
6613 leency 113
BEGIN
7597 akron1 114
    L := 0;
115
    R := LEN(vocabulary.KW) - 1;
116
    M := (L + R) DIV 2;
6613 leency 117
 
7597 akron1 118
    WHILE L # M DO
119
        IF lex.s > vocabulary.KW[M] THEN
120
            L := M;
121
            M := (L + R) DIV 2
122
        ELSIF lex.s < vocabulary.KW[M] THEN
123
            R := M;
124
            M := (L + R) DIV 2
125
        ELSE
126
            lex.sym := lxKW + M;
127
            L := M;
128
            R := M
6613 leency 129
        END
130
    END;
131
 
7597 akron1 132
    IF L # R THEN
133
        IF lex.s = vocabulary.KW[L] THEN
134
            lex.sym := lxKW + L
135
        END;
6613 leency 136
 
7597 akron1 137
        IF lex.s = vocabulary.KW[R] THEN
138
            lex.sym := lxKW + R
139
        END
140
    END
6613 leency 141
 
7597 akron1 142
END key;
6613 leency 143
 
144
 
7597 akron1 145
PROCEDURE enterid* (s: LEXSTR): IDENT;
146
VAR
147
    newnode: BOOLEAN;
148
    node:    AVL.NODE;
6613 leency 149
 
150
BEGIN
7597 akron1 151
    vocabulary.ident.s := s;
152
    vocabulary.idents := AVL.insert(vocabulary.idents, vocabulary.ident, nodecmp, newnode, node);
6613 leency 153
 
7597 akron1 154
    IF newnode THEN
155
        NEW(vocabulary.ident);
156
        vocabulary.ident.offset  := -1;
157
        vocabulary.ident.offsetW := -1
158
    END
6613 leency 159
 
7597 akron1 160
    RETURN node.data(IDENT)
161
END enterid;
6613 leency 162
 
7597 akron1 163
 
164
PROCEDURE putchar (VAR lex: LEX; c: CHAR);
6613 leency 165
BEGIN
7597 akron1 166
    IF lex.length < LEXLEN - 1 THEN
167
        lex.s[lex.length] := c;
168
        INC(lex.length);
169
        lex.s[lex.length] := 0X
6613 leency 170
    ELSE
7597 akron1 171
        lex.over := TRUE
6613 leency 172
    END
7597 akron1 173
END putchar;
6613 leency 174
 
7597 akron1 175
 
176
PROCEDURE ident (text: TEXTDRV.TEXT; VAR lex: LEX);
177
VAR
178
    c: CHAR;
179
 
6613 leency 180
BEGIN
7597 akron1 181
    c := text.peak(text);
182
    ASSERT(S.letter(c));
6613 leency 183
 
7597 akron1 184
    WHILE S.letter(c) OR S.digit(c) DO
185
        putchar(lex, c);
186
        text.nextc(text);
187
        c := text.peak(text)
188
    END;
189
 
190
    IF lex.over THEN
191
        lex.sym := lxERROR06
6613 leency 192
    ELSE
7597 akron1 193
        lex.sym := lxIDENT;
194
        key(lex)
195
    END;
196
 
197
    IF lex.sym = lxIDENT THEN
198
        lex.ident := enterid(lex.s)
6613 leency 199
    END
200
 
7597 akron1 201
END ident;
6613 leency 202
 
203
 
7597 akron1 204
PROCEDURE number (text: TEXTDRV.TEXT; VAR lex: LEX; VAR range: BOOLEAN);
205
VAR
206
    c: CHAR;
207
    hex: BOOLEAN;
208
    error: INTEGER;
6613 leency 209
 
7597 akron1 210
BEGIN
211
    c := text.peak(text);
212
    ASSERT(S.digit(c));
6613 leency 213
 
7597 akron1 214
    error := 0;
6613 leency 215
 
7597 akron1 216
    range := FALSE;
6613 leency 217
 
7597 akron1 218
    lex.sym := lxINTEGER;
219
    hex := FALSE;
220
 
221
    WHILE S.digit(c) DO
222
        putchar(lex, c);
223
        text.nextc(text);
224
        c := text.peak(text)
6613 leency 225
    END;
7597 akron1 226
 
227
    WHILE S.hexdigit(c) DO
228
        putchar(lex, c);
229
        text.nextc(text);
230
        c := text.peak(text);
231
        hex := TRUE
6613 leency 232
    END;
7597 akron1 233
 
234
    IF c = "H" THEN
235
        putchar(lex, c);
236
        text.nextc(text);
237
        lex.sym := lxHEX
238
 
239
    ELSIF c = "X" THEN
240
        putchar(lex, c);
241
        text.nextc(text);
242
        lex.sym := lxCHAR
243
 
244
    ELSIF c = "." THEN
245
 
246
        IF hex THEN
247
            lex.sym := lxERROR01
6613 leency 248
        ELSE
7597 akron1 249
 
250
            text.nextc(text);
251
            c := text.peak(text);
252
 
253
            IF c # "." THEN
254
                putchar(lex, ".");
255
                lex.sym := lxFLOAT
256
            ELSE
257
                lex.sym := lxINTEGER;
258
                range := TRUE
259
            END;
260
 
261
            WHILE S.digit(c) DO
262
                putchar(lex, c);
263
                text.nextc(text);
264
                c := text.peak(text)
265
            END;
266
 
267
            IF c = "E" THEN
268
 
269
                putchar(lex, c);
270
                text.nextc(text);
271
                c := text.peak(text);
272
                IF (c = "+") OR (c = "-") THEN
273
                    putchar(lex, c);
274
                    text.nextc(text);
275
                    c := text.peak(text)
276
                END;
277
 
278
                IF S.digit(c) THEN
279
                    WHILE S.digit(c) DO
280
                        putchar(lex, c);
281
                        text.nextc(text);
282
                        c := text.peak(text)
283
                    END
284
                ELSE
285
                    lex.sym := lxERROR02
286
                END
287
 
288
            END
289
 
6613 leency 290
        END
7597 akron1 291
 
292
    ELSE
293
 
294
        IF hex THEN
295
            lex.sym := lxERROR01
296
        END
297
 
298
    END;
299
 
300
    IF lex.over & (lex.sym >= 0) THEN
301
        lex.sym := lxERROR07
302
    END;
303
 
304
    IF lex.sym = lxINTEGER THEN
305
        ARITH.iconv(lex.s, lex.value, error)
306
    ELSIF (lex.sym = lxHEX) OR (lex.sym = lxCHAR) THEN
307
        ARITH.hconv(lex.s, lex.value, error)
308
    ELSIF lex.sym = lxFLOAT THEN
309
        ARITH.fconv(lex.s, lex.value, error)
310
    END;
311
 
312
    CASE error OF
313
    |0:
314
    |1: lex.sym := lxERROR08
315
    |2: lex.sym := lxERROR09
316
    |3: lex.sym := lxERROR10
317
    |4: lex.sym := lxERROR11
318
    |5: lex.sym := lxERROR12
6613 leency 319
    END
320
 
7597 akron1 321
END number;
322
 
323
 
324
PROCEDURE string (text: TEXTDRV.TEXT; VAR lex: LEX);
325
VAR
326
    c, c1: CHAR;
327
    n:     INTEGER;
328
    quot:  CHAR;
329
 
330
BEGIN
331
    quot := text.peak(text);
332
 
333
    ASSERT((quot = '"') OR (quot = "'"));
334
 
335
    text.nextc(text);
336
    c := text.peak(text);
337
    c1 := c;
338
    n := 0;
339
 
340
    WHILE (c # quot) & (c # 0X) & ~text.eol & ~text.eof DO
341
        putchar(lex, c);
342
        text.nextc(text);
343
        c := text.peak(text);
344
        INC(n)
6613 leency 345
    END;
7597 akron1 346
 
347
    IF c = quot THEN
348
        text.nextc(text);
349
        IF lex.over THEN
350
            lex.sym := lxERROR05
351
        ELSE
352
            IF n # 1 THEN
353
                lex.sym := lxSTRING
354
            ELSE
355
                lex.sym := lxCHAR;
356
                ARITH.setChar(lex.value, ORD(c1))
357
            END
358
        END
359
    ELSE
360
        lex.sym := lxERROR03
6613 leency 361
    END;
7597 akron1 362
 
363
    IF lex.sym = lxSTRING THEN
364
        lex.string := enterid(lex.s);
365
        lex.value.typ := ARITH.tSTRING;
366
        lex.value.string := lex.string
6613 leency 367
    END
368
 
7597 akron1 369
END string;
370
 
371
 
372
PROCEDURE comment (text: TEXTDRV.TEXT);
373
VAR
374
    c: CHAR;
375
    cond, depth: INTEGER;
376
 
6613 leency 377
BEGIN
7597 akron1 378
    cond := 0;
379
    depth := 1;
6613 leency 380
 
7597 akron1 381
    REPEAT
382
 
383
        c := text.peak(text);
384
        text.nextc(text);
385
 
386
        IF c = "*" THEN
387
            IF cond = 1 THEN
388
                cond := 0;
389
                INC(depth)
390
            ELSE
391
               cond := 2
392
            END
393
        ELSIF c = ")" THEN
394
            IF cond = 2 THEN
395
                DEC(depth)
396
            END;
397
            cond := 0
398
        ELSIF c = "(" THEN
399
            cond := 1
400
        ELSE
401
            cond := 0
402
        END
403
 
404
    UNTIL (depth = 0) OR text.eof
405
 
406
END comment;
407
 
408
 
409
PROCEDURE delimiter (text: TEXTDRV.TEXT; VAR lex: LEX; VAR range: BOOLEAN);
410
VAR
411
    c: CHAR;
412
 
6613 leency 413
BEGIN
7597 akron1 414
    c := text.peak(text);
415
 
416
    IF range THEN
417
        ASSERT(c = ".")
6613 leency 418
    END;
7597 akron1 419
 
420
    putchar(lex, c);
421
    text.nextc(text);
422
 
423
    CASE c OF
424
    |"+":
425
        lex.sym := lxPLUS
426
 
427
    |"-":
428
        lex.sym := lxMINUS
429
 
430
    |"*":
431
        lex.sym := lxMUL
432
 
433
    |"/":
434
        lex.sym := lxSLASH;
435
 
436
        IF text.peak(text) = "/" THEN
437
            lex.sym := lxCOMMENT;
438
            REPEAT
439
                text.nextc(text)
440
            UNTIL text.eol OR text.eof
441
        END
442
 
443
    |"~":
444
        lex.sym := lxNOT
445
 
446
    |"&":
447
        lex.sym := lxAND
448
 
449
    |".":
450
        IF range THEN
451
 
452
            putchar(lex, ".");
453
            lex.sym := lxRANGE;
454
            range := FALSE;
455
            DEC(lex.pos.col)
456
 
457
        ELSE
458
 
459
            lex.sym := lxPOINT;
460
            c := text.peak(text);
461
 
462
            IF c = "." THEN
463
                lex.sym := lxRANGE;
464
                putchar(lex, c);
465
                text.nextc(text)
6613 leency 466
            END
7597 akron1 467
 
6613 leency 468
        END
7597 akron1 469
 
470
    |",":
471
        lex.sym := lxCOMMA
472
 
473
    |";":
474
        lex.sym := lxSEMI
475
 
476
    |"|":
477
        lex.sym := lxBAR
478
 
479
    |"(":
480
        lex.sym := lxLROUND;
481
        c := text.peak(text);
482
 
483
        IF c = "*" THEN
484
            lex.sym := lxCOMMENT;
485
            putchar(lex, c);
486
            text.nextc(text);
487
            comment(text)
488
        END
489
 
490
    |"[":
491
        lex.sym := lxLSQUARE
492
 
493
    |"{":
494
        lex.sym := lxLCURLY
495
 
496
    |"^":
497
        lex.sym := lxCARET
498
 
499
    |"=":
500
        lex.sym := lxEQ
501
 
502
    |"#":
503
        lex.sym := lxNE
504
 
505
    |"<":
506
        lex.sym := lxLT;
507
        c := text.peak(text);
508
 
509
        IF c = "=" THEN
510
            lex.sym := lxLE;
511
            putchar(lex, c);
512
            text.nextc(text)
513
        END
514
 
515
    |">":
516
        lex.sym := lxGT;
517
        c := text.peak(text);
518
 
519
        IF c = "=" THEN
520
            lex.sym := lxGE;
521
            putchar(lex, c);
522
            text.nextc(text)
523
        END
524
 
525
    |":":
526
        lex.sym := lxCOLON;
527
        c := text.peak(text);
528
 
529
        IF c = "=" THEN
530
            lex.sym := lxASSIGN;
531
            putchar(lex, c);
532
            text.nextc(text)
533
        END
534
 
535
    |")":
536
        lex.sym := lxRROUND
537
 
538
    |"]":
539
        lex.sym := lxRSQUARE
540
 
541
    |"}":
542
        lex.sym := lxRCURLY
543
 
6613 leency 544
    END
545
 
7597 akron1 546
END delimiter;
547
 
548
 
549
PROCEDURE Next* (scanner: SCANNER; VAR lex: LEX);
550
VAR
551
    c: CHAR;
552
    text: TEXTDRV.TEXT;
553
 
6613 leency 554
BEGIN
7597 akron1 555
    text := scanner.text;
6613 leency 556
 
7597 akron1 557
    REPEAT
558
 
559
        c := text.peak(text);
560
 
561
        WHILE S.space(c) DO
562
            text.nextc(text);
563
            c := text.peak(text)
564
        END;
565
 
566
        lex.s[0]     := 0X;
567
        lex.length   := 0;
568
        lex.sym      := lxUNDEF;
569
        lex.pos.line := text.line;
570
        lex.pos.col  := text.col;
571
        lex.ident    := NIL;
572
        lex.over     := FALSE;
573
 
574
        IF S.letter(c) THEN
575
            ident(text, lex)
576
        ELSIF S.digit(c) THEN
577
            number(text, lex, scanner.range)
578
        ELSIF (c = '"') OR (c = "'") THEN
579
            string(text, lex)
580
        ELSIF vocabulary.delimiters[ORD(c)] THEN
581
            delimiter(text, lex, scanner.range)
582
        ELSIF c = 0X THEN
583
            lex.sym := lxEOF;
584
            IF text.eof THEN
585
                INC(lex.pos.col)
586
            END
587
        ELSE
588
            putchar(lex, c);
589
            text.nextc(text);
590
            lex.sym := lxERROR04
591
        END;
592
 
593
        IF lex.sym < 0 THEN
594
            lex.error := -lex.sym
595
        ELSE
596
            lex.error := 0
597
        END
598
 
599
    UNTIL lex.sym # lxCOMMENT
600
 
601
END Next;
602
 
603
 
604
PROCEDURE NewScanner (): SCANNER;
605
VAR
606
    scan:  SCANNER;
607
    citem: C.ITEM;
608
 
6613 leency 609
BEGIN
7597 akron1 610
    citem := C.pop(scanners);
611
    IF citem = NIL THEN
612
        NEW(scan)
6613 leency 613
    ELSE
7597 akron1 614
        scan := citem(SCANNER)
615
    END
6613 leency 616
 
7597 akron1 617
    RETURN scan
618
END NewScanner;
619
 
620
 
621
PROCEDURE open* (name: ARRAY OF CHAR): SCANNER;
622
VAR
623
    scanner: SCANNER;
624
    text:    TEXTDRV.TEXT;
625
 
6613 leency 626
BEGIN
7597 akron1 627
    text := TEXTDRV.create();
628
    IF text.open(text, name) THEN
629
        scanner := NewScanner();
630
        scanner.text   := text;
631
        scanner.range  := FALSE
6613 leency 632
    ELSE
7597 akron1 633
        scanner := NIL;
634
        TEXTDRV.destroy(text)
6613 leency 635
    END
7597 akron1 636
 
637
    RETURN scanner
638
END open;
639
 
640
 
641
PROCEDURE close* (VAR scanner: SCANNER);
642
BEGIN
643
    IF scanner # NIL THEN
644
        IF scanner.text # NIL THEN
645
            TEXTDRV.destroy(scanner.text)
646
        END;
647
 
648
        C.push(scanners, scanner);
649
        scanner := NIL
6613 leency 650
    END
7597 akron1 651
END close;
652
 
653
 
654
PROCEDURE init;
655
VAR
656
    i: INTEGER;
657
    delim: ARRAY 23 OF CHAR;
658
 
659
    PROCEDURE enterkw (VAR i: INTEGER; kw: KEYWORD);
660
    BEGIN
661
        vocabulary.KW[i] := kw;
662
        INC(i)
663
    END enterkw;
664
 
665
BEGIN
666
    scanners := C.create();
667
 
668
    FOR i := 0 TO 255 DO
669
        vocabulary.delimiters[i] := FALSE
6613 leency 670
    END;
7597 akron1 671
 
672
    delim := "+-*/~&.,;|([{^=#<>:)]}";
673
 
674
    FOR i := 0 TO LEN(delim) - 2 DO
675
        vocabulary.delimiters[ORD(delim[i])] := TRUE
6613 leency 676
    END;
677
 
7597 akron1 678
    i := 0;
679
    enterkw(i, "ARRAY");
680
    enterkw(i, "BEGIN");
681
    enterkw(i, "BY");
682
    enterkw(i, "CASE");
683
    enterkw(i, "CONST");
684
    enterkw(i, "DIV");
685
    enterkw(i, "DO");
686
    enterkw(i, "ELSE");
687
    enterkw(i, "ELSIF");
688
    enterkw(i, "END");
689
    enterkw(i, "FALSE");
690
    enterkw(i, "FOR");
691
    enterkw(i, "IF");
692
    enterkw(i, "IMPORT");
693
    enterkw(i, "IN");
694
    enterkw(i, "IS");
695
    enterkw(i, "MOD");
696
    enterkw(i, "MODULE");
697
    enterkw(i, "NIL");
698
    enterkw(i, "OF");
699
    enterkw(i, "OR");
700
    enterkw(i, "POINTER");
701
    enterkw(i, "PROCEDURE");
702
    enterkw(i, "RECORD");
703
    enterkw(i, "REPEAT");
704
    enterkw(i, "RETURN");
705
    enterkw(i, "THEN");
706
    enterkw(i, "TO");
707
    enterkw(i, "TRUE");
708
    enterkw(i, "TYPE");
709
    enterkw(i, "UNTIL");
710
    enterkw(i, "VAR");
711
    enterkw(i, "WHILE");
6613 leency 712
 
7597 akron1 713
    NEW(vocabulary.ident);
714
    vocabulary.ident.s       := "";
715
    vocabulary.ident.offset  := -1;
716
    vocabulary.ident.offsetW := -1;
717
    vocabulary.idents  := NIL
718
END init;
6613 leency 719
 
7597 akron1 720
 
6613 leency 721
BEGIN
7597 akron1 722
    init
6613 leency 723
END SCAN.