Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 7597 → Rev 7693

/programs/develop/oberon07/Source/SCAN.ob07
1,13 → 1,13
(*
BSD 2-Clause License
 
Copyright (c) 2018, Anton Krotov
Copyright (c) 2018, 2019, Anton Krotov
All rights reserved.
*)
 
MODULE SCAN;
 
IMPORT TEXTDRV, AVL := AVLTREES, ARITH, S := STRINGS, C := COLLECTIONS;
IMPORT TXT := TEXTDRV, AVL := AVLTREES, ARITH, S := STRINGS;
 
 
CONST
18,29 → 18,30
lxCHAR* = 4; lxFLOAT* = 5; lxSTRING* = 6; lxCOMMENT* = 7;
lxEOF* = 8;
 
lxKW = 101;
lxPLUS* = 21; lxMINUS* = 22; lxMUL* = 23; lxSLASH* = 24;
lxNOT* = 25; lxAND* = 26; lxPOINT* = 27; lxCOMMA* = 28;
lxSEMI* = 29; lxBAR* = 30; lxLROUND* = 31; lxLSQUARE* = 32;
lxLCURLY* = 33; lxCARET* = 34; lxEQ* = 35; lxNE* = 36;
lxLT* = 37; lxGT* = 38; lxCOLON* = 39; lxRROUND* = 40;
lxRSQUARE* = 41; lxRCURLY* = 42; lxLE* = 43; lxGE* = 44;
lxASSIGN* = 45; lxRANGE* = 46;
 
lxARRAY* = 101; lxBEGIN* = 102; lxBY* = 103; lxCASE* = 104;
lxCONST* = 105; lxDIV* = 106; lxDO* = 107; lxELSE* = 108;
lxELSIF* = 109; lxEND* = 110; lxFALSE* = 111; lxFOR* = 112;
lxIF* = 113; lxIMPORT* = 114; lxIN* = 115; lxIS* = 116;
lxMOD* = 117; lxMODULE* = 118; lxNIL* = 119; lxOF* = 120;
lxOR* = 121; lxPOINTER* = 122; lxPROCEDURE* = 123; lxRECORD* = 124;
lxREPEAT* = 125; lxRETURN* = 126; lxTHEN* = 127; lxTO* = 128;
lxTRUE* = 129; lxTYPE* = 130; lxUNTIL* = 131; lxVAR* = 132;
lxWHILE* = 133;
lxKW = 51;
 
lxPLUS* = 201; lxMINUS* = 202; lxMUL* = 203; lxSLASH* = 204;
lxNOT* = 205; lxAND* = 206; lxPOINT* = 207; lxCOMMA* = 208;
lxSEMI* = 209; lxBAR* = 210; lxLROUND* = 211; lxLSQUARE* = 212;
lxLCURLY* = 213; lxCARET* = 214; lxEQ* = 215; lxNE* = 216;
lxLT* = 217; lxGT* = 218; lxCOLON* = 219; lxRROUND* = 220;
lxRSQUARE* = 221; lxRCURLY* = 222; lxLE* = 223; lxGE* = 224;
lxASSIGN* = 225; lxRANGE* = 226;
lxARRAY* = 51; lxBEGIN* = 52; lxBY* = 53; lxCASE* = 54;
lxCONST* = 55; lxDIV* = 56; lxDO* = 57; lxELSE* = 58;
lxELSIF* = 59; lxEND* = 60; lxFALSE* = 61; lxFOR* = 62;
lxIF* = 63; lxIMPORT* = 64; lxIN* = 65; lxIS* = 66;
lxMOD* = 67; lxMODULE* = 68; lxNIL* = 69; lxOF* = 70;
lxOR* = 71; lxPOINTER* = 72; lxPROCEDURE* = 73; lxRECORD* = 74;
lxREPEAT* = 75; lxRETURN* = 76; lxTHEN* = 77; lxTO* = 78;
lxTRUE* = 79; lxTYPE* = 80; lxUNTIL* = 81; lxVAR* = 82;
lxWHILE* = 83;
 
lxERROR01 = -1; lxERROR02 = -2; lxERROR03 = -3; lxERROR04 = -4;
lxERROR05 = -5; lxERROR06 = -6; lxERROR07 = -7; lxERROR08 = -8;
lxERROR09 = -9; lxERROR10 = -10; lxERROR11 = -11; lxERROR12 = -12;
lxERROR01* = -1; lxERROR02* = -2; lxERROR03* = -3; lxERROR04* = -4;
lxERROR05* = -5; lxERROR06* = -6; lxERROR07* = -7; lxERROR08* = -8;
lxERROR09* = -9; lxERROR10* = -10; lxERROR11* = -11; lxERROR12* = -12;
lxERROR13* = -13;
 
 
TYPE
75,13 → 76,8
 
END;
 
SCANNER* = POINTER TO RECORD (C.ITEM)
SCANNER* = TXT.TEXT;
 
text: TEXTDRV.TEXT;
range: BOOLEAN
 
END;
 
KEYWORD = ARRAY 10 OF CHAR;
 
 
90,15 → 86,13
vocabulary: RECORD
 
KW: ARRAY 33 OF KEYWORD;
 
delimiters: ARRAY 256 OF BOOLEAN;
 
idents: AVL.NODE;
ident: IDENT
 
END;
 
scanners: C.COLLECTION;
upto: BOOLEAN;
 
 
PROCEDURE nodecmp (a, b: AVL.DATA): INTEGER;
109,36 → 103,27
PROCEDURE key (VAR lex: LEX);
VAR
L, R, M: INTEGER;
found: BOOLEAN;
 
BEGIN
L := 0;
R := LEN(vocabulary.KW) - 1;
found := FALSE;
 
REPEAT
M := (L + R) DIV 2;
 
WHILE L # M DO
IF lex.s # vocabulary.KW[M] THEN
IF lex.s > vocabulary.KW[M] THEN
L := M;
M := (L + R) DIV 2
ELSIF lex.s < vocabulary.KW[M] THEN
R := M;
M := (L + R) DIV 2
L := M + 1
ELSE
lex.sym := lxKW + M;
L := M;
R := M
R := M - 1
END
END;
 
IF L # R THEN
IF lex.s = vocabulary.KW[L] THEN
lex.sym := lxKW + L
END;
 
IF lex.s = vocabulary.KW[R] THEN
lex.sym := lxKW + R
ELSE
found := TRUE;
lex.sym := lxKW + M
END
END
 
UNTIL found OR (L > R)
END key;
 
 
173,18 → 158,24
END putchar;
 
 
PROCEDURE ident (text: TEXTDRV.TEXT; VAR lex: LEX);
PROCEDURE nextc (text: TXT.TEXT): CHAR;
BEGIN
TXT.next(text)
RETURN text.peak
END nextc;
 
 
PROCEDURE ident (text: TXT.TEXT; VAR lex: LEX);
VAR
c: CHAR;
 
BEGIN
c := text.peak(text);
c := text.peak;
ASSERT(S.letter(c));
 
WHILE S.letter(c) OR S.digit(c) DO
putchar(lex, c);
text.nextc(text);
c := text.peak(text)
c := nextc(text)
END;
 
IF lex.over THEN
201,7 → 192,7
END ident;
 
 
PROCEDURE number (text: TEXTDRV.TEXT; VAR lex: LEX; VAR range: BOOLEAN);
PROCEDURE number (text: TXT.TEXT; VAR lex: LEX);
VAR
c: CHAR;
hex: BOOLEAN;
208,37 → 199,33
error: INTEGER;
 
BEGIN
c := text.peak(text);
c := text.peak;
ASSERT(S.digit(c));
 
error := 0;
 
range := FALSE;
 
lex.sym := lxINTEGER;
hex := FALSE;
 
WHILE S.digit(c) DO
putchar(lex, c);
text.nextc(text);
c := text.peak(text)
c := nextc(text)
END;
 
WHILE S.hexdigit(c) DO
putchar(lex, c);
text.nextc(text);
c := text.peak(text);
c := nextc(text);
hex := TRUE
END;
 
IF c = "H" THEN
putchar(lex, c);
text.nextc(text);
TXT.next(text);
lex.sym := lxHEX
 
ELSIF c = "X" THEN
putchar(lex, c);
text.nextc(text);
TXT.next(text);
lex.sym := lxCHAR
 
ELSIF c = "." THEN
247,8 → 234,7
lex.sym := lxERROR01
ELSE
 
text.nextc(text);
c := text.peak(text);
c := nextc(text);
 
IF c # "." THEN
putchar(lex, ".");
255,31 → 241,28
lex.sym := lxFLOAT
ELSE
lex.sym := lxINTEGER;
range := TRUE
text.peak := 7FX;
upto := TRUE
END;
 
WHILE S.digit(c) DO
putchar(lex, c);
text.nextc(text);
c := text.peak(text)
c := nextc(text)
END;
 
IF c = "E" THEN
 
putchar(lex, c);
text.nextc(text);
c := text.peak(text);
c := nextc(text);
IF (c = "+") OR (c = "-") THEN
putchar(lex, c);
text.nextc(text);
c := text.peak(text)
c := nextc(text)
END;
 
IF S.digit(c) THEN
WHILE S.digit(c) DO
putchar(lex, c);
text.nextc(text);
c := text.peak(text)
c := nextc(text)
END
ELSE
lex.sym := lxERROR02
289,11 → 272,8
 
END
 
ELSE
 
IF hex THEN
ELSIF hex THEN
lex.sym := lxERROR01
END
 
END;
 
321,31 → 301,23
END number;
 
 
PROCEDURE string (text: TEXTDRV.TEXT; VAR lex: LEX);
PROCEDURE string (text: TXT.TEXT; VAR lex: LEX; quot: CHAR);
VAR
c, c1: CHAR;
c: CHAR;
n: INTEGER;
quot: CHAR;
 
BEGIN
quot := text.peak(text);
 
ASSERT((quot = '"') OR (quot = "'"));
 
text.nextc(text);
c := text.peak(text);
c1 := c;
c := nextc(text);
n := 0;
 
WHILE (c # quot) & (c # 0X) & ~text.eol & ~text.eof DO
putchar(lex, c);
text.nextc(text);
c := text.peak(text);
c := nextc(text);
INC(n)
END;
 
IF c = quot THEN
text.nextc(text);
TXT.next(text);
IF lex.over THEN
lex.sym := lxERROR05
ELSE
353,7 → 325,7
lex.sym := lxSTRING
ELSE
lex.sym := lxCHAR;
ARITH.setChar(lex.value, ORD(c1))
ARITH.setChar(lex.value, ORD(lex.s[0]))
END
END
ELSE
369,7 → 341,7
END string;
 
 
PROCEDURE comment (text: TEXTDRV.TEXT);
PROCEDURE comment (text: TXT.TEXT);
VAR
c: CHAR;
cond, depth: INTEGER;
380,8 → 352,8
 
REPEAT
 
c := text.peak(text);
text.nextc(text);
c := text.peak;
TXT.next(text);
 
IF c = "*" THEN
IF cond = 1 THEN
406,21 → 378,12
END comment;
 
 
PROCEDURE delimiter (text: TEXTDRV.TEXT; VAR lex: LEX; VAR range: BOOLEAN);
VAR
c: CHAR;
 
PROCEDURE delimiter (text: TXT.TEXT; VAR lex: LEX; c: CHAR);
BEGIN
c := text.peak(text);
 
IF range THEN
ASSERT(c = ".")
END;
 
putchar(lex, c);
text.nextc(text);
c := nextc(text);
 
CASE c OF
CASE lex.s[0] OF
|"+":
lex.sym := lxPLUS
 
433,10 → 396,10
|"/":
lex.sym := lxSLASH;
 
IF text.peak(text) = "/" THEN
IF c = "/" THEN
lex.sym := lxCOMMENT;
REPEAT
text.nextc(text)
TXT.next(text)
UNTIL text.eol OR text.eof
END
 
447,26 → 410,14
lex.sym := lxAND
 
|".":
IF range THEN
 
putchar(lex, ".");
lex.sym := lxRANGE;
range := FALSE;
DEC(lex.pos.col)
 
ELSE
 
lex.sym := lxPOINT;
c := text.peak(text);
 
IF c = "." THEN
lex.sym := lxRANGE;
putchar(lex, c);
text.nextc(text)
TXT.next(text)
END
 
END
 
|",":
lex.sym := lxCOMMA
 
478,12 → 429,10
 
|"(":
lex.sym := lxLROUND;
c := text.peak(text);
 
IF c = "*" THEN
lex.sym := lxCOMMENT;
putchar(lex, c);
text.nextc(text);
TXT.next(text);
comment(text)
END
 
504,32 → 453,29
 
|"<":
lex.sym := lxLT;
c := text.peak(text);
 
IF c = "=" THEN
lex.sym := lxLE;
putchar(lex, c);
text.nextc(text)
TXT.next(text)
END
 
|">":
lex.sym := lxGT;
c := text.peak(text);
 
IF c = "=" THEN
lex.sym := lxGE;
putchar(lex, c);
text.nextc(text)
TXT.next(text)
END
 
|":":
lex.sym := lxCOLON;
c := text.peak(text);
 
IF c = "=" THEN
lex.sym := lxASSIGN;
putchar(lex, c);
text.nextc(text)
TXT.next(text)
END
 
|")":
546,26 → 492,21
END delimiter;
 
 
PROCEDURE Next* (scanner: SCANNER; VAR lex: LEX);
PROCEDURE Next* (text: SCANNER; VAR lex: LEX);
VAR
c: CHAR;
text: TEXTDRV.TEXT;
 
BEGIN
text := scanner.text;
 
REPEAT
c := text.peak;
 
c := text.peak(text);
 
WHILE S.space(c) DO
text.nextc(text);
c := text.peak(text)
c := nextc(text)
END;
 
lex.s[0] := 0X;
lex.length := 0;
lex.sym := lxUNDEF;
lex.pos.line := text.line;
lex.pos.col := text.col;
lex.ident := NIL;
574,19 → 515,26
IF S.letter(c) THEN
ident(text, lex)
ELSIF S.digit(c) THEN
number(text, lex, scanner.range)
number(text, lex)
ELSIF (c = '"') OR (c = "'") THEN
string(text, lex)
string(text, lex, c)
ELSIF vocabulary.delimiters[ORD(c)] THEN
delimiter(text, lex, scanner.range)
delimiter(text, lex, c)
ELSIF c = 0X THEN
lex.sym := lxEOF;
IF text.eof THEN
INC(lex.pos.col)
END
ELSIF (c = 7FX) & upto THEN
upto := FALSE;
lex.sym := lxRANGE;
putchar(lex, ".");
putchar(lex, ".");
DEC(lex.pos.col);
TXT.next(text)
ELSE
putchar(lex, c);
text.nextc(text);
TXT.next(text);
lex.sym := lxERROR04
END;
 
601,53 → 549,14
END Next;
 
 
PROCEDURE NewScanner (): SCANNER;
VAR
scan: SCANNER;
citem: C.ITEM;
 
BEGIN
citem := C.pop(scanners);
IF citem = NIL THEN
NEW(scan)
ELSE
scan := citem(SCANNER)
END
 
RETURN scan
END NewScanner;
 
 
PROCEDURE open* (name: ARRAY OF CHAR): SCANNER;
VAR
scanner: SCANNER;
text: TEXTDRV.TEXT;
 
BEGIN
text := TEXTDRV.create();
IF text.open(text, name) THEN
scanner := NewScanner();
scanner.text := text;
scanner.range := FALSE
ELSE
scanner := NIL;
TEXTDRV.destroy(text)
END
 
RETURN scanner
RETURN TXT.open(name)
END open;
 
 
PROCEDURE close* (VAR scanner: SCANNER);
BEGIN
IF scanner # NIL THEN
IF scanner.text # NIL THEN
TEXTDRV.destroy(scanner.text)
END;
 
C.push(scanners, scanner);
scanner := NIL
END
TXT.close(scanner)
END close;
 
 
656,6 → 565,7
i: INTEGER;
delim: ARRAY 23 OF CHAR;
 
 
PROCEDURE enterkw (VAR i: INTEGER; kw: KEYWORD);
BEGIN
vocabulary.KW[i] := kw;
662,8 → 572,9
INC(i)
END enterkw;
 
 
BEGIN
scanners := C.create();
upto := FALSE;
 
FOR i := 0 TO 255 DO
vocabulary.delimiters[i] := FALSE