Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5205 | clevermous | 1 | /* |
2 | ** $Id: llex.c,v 2.59 2011/11/30 12:43:51 roberto Exp $ |
||
3 | ** Lexical Analyzer |
||
4 | ** See Copyright Notice in lua.h |
||
5 | */ |
||
6 | |||
7 | |||
8 | #include |
||
9 | #include |
||
10 | |||
11 | #define llex_c |
||
12 | #define LUA_CORE |
||
13 | |||
14 | #include "lua.h" |
||
15 | |||
16 | #include "lctype.h" |
||
17 | #include "ldo.h" |
||
18 | #include "llex.h" |
||
19 | #include "lobject.h" |
||
20 | #include "lparser.h" |
||
21 | #include "lstate.h" |
||
22 | #include "lstring.h" |
||
23 | #include "ltable.h" |
||
24 | #include "lzio.h" |
||
25 | |||
26 | |||
27 | |||
28 | #define next(ls) (ls->current = zgetc(ls->z)) |
||
29 | |||
30 | |||
31 | |||
32 | #define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r') |
||
33 | |||
34 | |||
35 | /* ORDER RESERVED */ |
||
36 | static const char *const luaX_tokens [] = { |
||
37 | "and", "break", "do", "else", "elseif", |
||
38 | "end", "false", "for", "function", "goto", "if", |
||
39 | "in", "local", "nil", "not", "or", "repeat", |
||
40 | "return", "then", "true", "until", "while", |
||
41 | "..", "...", "==", ">=", "<=", "~=", "::", " |
||
42 | " |
||
43 | }; |
||
44 | |||
45 | |||
46 | #define save_and_next(ls) (save(ls, ls->current), next(ls)) |
||
47 | |||
48 | |||
49 | static l_noret lexerror (LexState *ls, const char *msg, int token); |
||
50 | |||
51 | |||
52 | static void save (LexState *ls, int c) { |
||
53 | Mbuffer *b = ls->buff; |
||
54 | if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) { |
||
55 | size_t newsize; |
||
56 | if (luaZ_sizebuffer(b) >= MAX_SIZET/2) |
||
57 | lexerror(ls, "lexical element too long", 0); |
||
58 | newsize = luaZ_sizebuffer(b) * 2; |
||
59 | luaZ_resizebuffer(ls->L, b, newsize); |
||
60 | } |
||
61 | b->buffer[luaZ_bufflen(b)++] = cast(char, c); |
||
62 | } |
||
63 | |||
64 | |||
65 | void luaX_init (lua_State *L) { |
||
66 | int i; |
||
67 | for (i=0; i |
||
68 | TString *ts = luaS_new(L, luaX_tokens[i]); |
||
69 | luaS_fix(ts); /* reserved words are never collected */ |
||
70 | ts->tsv.reserved = cast_byte(i+1); /* reserved word */ |
||
71 | } |
||
72 | } |
||
73 | |||
74 | |||
75 | const char *luaX_token2str (LexState *ls, int token) { |
||
76 | if (token < FIRST_RESERVED) { |
||
77 | lua_assert(token == cast(unsigned char, token)); |
||
78 | return (lisprint(token)) ? luaO_pushfstring(ls->L, LUA_QL("%c"), token) : |
||
79 | luaO_pushfstring(ls->L, "char(%d)", token); |
||
80 | } |
||
81 | else { |
||
82 | const char *s = luaX_tokens[token - FIRST_RESERVED]; |
||
83 | if (token < TK_EOS) |
||
84 | return luaO_pushfstring(ls->L, LUA_QS, s); |
||
85 | else |
||
86 | return s; |
||
87 | } |
||
88 | } |
||
89 | |||
90 | |||
91 | static const char *txtToken (LexState *ls, int token) { |
||
92 | switch (token) { |
||
93 | case TK_NAME: |
||
94 | case TK_STRING: |
||
95 | case TK_NUMBER: |
||
96 | save(ls, '\0'); |
||
97 | return luaO_pushfstring(ls->L, LUA_QS, luaZ_buffer(ls->buff)); |
||
98 | default: |
||
99 | return luaX_token2str(ls, token); |
||
100 | } |
||
101 | } |
||
102 | |||
103 | |||
104 | static l_noret lexerror (LexState *ls, const char *msg, int token) { |
||
105 | char buff[LUA_IDSIZE]; |
||
106 | luaO_chunkid(buff, getstr(ls->source), LUA_IDSIZE); |
||
107 | msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg); |
||
108 | if (token) |
||
109 | luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token)); |
||
110 | luaD_throw(ls->L, LUA_ERRSYNTAX); |
||
111 | } |
||
112 | |||
113 | |||
114 | l_noret luaX_syntaxerror (LexState *ls, const char *msg) { |
||
115 | lexerror(ls, msg, ls->t.token); |
||
116 | } |
||
117 | |||
118 | |||
119 | /* |
||
120 | ** creates a new string and anchors it in function's table so that |
||
121 | ** it will not be collected until the end of the function's compilation |
||
122 | ** (by that time it should be anchored in function's prototype) |
||
123 | */ |
||
124 | TString *luaX_newstring (LexState *ls, const char *str, size_t l) { |
||
125 | lua_State *L = ls->L; |
||
126 | TValue *o; /* entry for `str' */ |
||
127 | TString *ts = luaS_newlstr(L, str, l); /* create new string */ |
||
128 | setsvalue2s(L, L->top++, ts); /* temporarily anchor it in stack */ |
||
129 | o = luaH_set(L, ls->fs->h, L->top - 1); |
||
130 | if (ttisnil(o)) { /* not in use yet? (see 'addK') */ |
||
131 | /* boolean value does not need GC barrier; |
||
132 | table has no metatable, so it does not need to invalidate cache */ |
||
133 | setbvalue(o, 1); /* t[string] = true */ |
||
134 | luaC_checkGC(L); |
||
135 | } |
||
136 | L->top--; /* remove string from stack */ |
||
137 | return ts; |
||
138 | } |
||
139 | |||
140 | |||
141 | /* |
||
142 | ** increment line number and skips newline sequence (any of |
||
143 | ** \n, \r, \n\r, or \r\n) |
||
144 | */ |
||
145 | static void inclinenumber (LexState *ls) { |
||
146 | int old = ls->current; |
||
147 | lua_assert(currIsNewline(ls)); |
||
148 | next(ls); /* skip `\n' or `\r' */ |
||
149 | if (currIsNewline(ls) && ls->current != old) |
||
150 | next(ls); /* skip `\n\r' or `\r\n' */ |
||
151 | if (++ls->linenumber >= MAX_INT) |
||
152 | luaX_syntaxerror(ls, "chunk has too many lines"); |
||
153 | } |
||
154 | |||
155 | |||
156 | void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source, |
||
157 | int firstchar) { |
||
158 | ls->decpoint = '.'; |
||
159 | ls->L = L; |
||
160 | ls->current = firstchar; |
||
161 | ls->lookahead.token = TK_EOS; /* no look-ahead token */ |
||
162 | ls->z = z; |
||
163 | ls->fs = NULL; |
||
164 | ls->linenumber = 1; |
||
165 | ls->lastline = 1; |
||
166 | ls->source = source; |
||
167 | ls->envn = luaS_new(L, LUA_ENV); /* create env name */ |
||
168 | luaS_fix(ls->envn); /* never collect this name */ |
||
169 | luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */ |
||
170 | } |
||
171 | |||
172 | |||
173 | |||
174 | /* |
||
175 | ** ======================================================= |
||
176 | ** LEXICAL ANALYZER |
||
177 | ** ======================================================= |
||
178 | */ |
||
179 | |||
180 | |||
181 | |||
182 | static int check_next (LexState *ls, const char *set) { |
||
183 | if (ls->current == '\0' || !strchr(set, ls->current)) |
||
184 | return 0; |
||
185 | save_and_next(ls); |
||
186 | return 1; |
||
187 | } |
||
188 | |||
189 | |||
190 | /* |
||
191 | ** change all characters 'from' in buffer to 'to' |
||
192 | */ |
||
193 | static void buffreplace (LexState *ls, char from, char to) { |
||
194 | size_t n = luaZ_bufflen(ls->buff); |
||
195 | char *p = luaZ_buffer(ls->buff); |
||
196 | while (n--) |
||
197 | if (p[n] == from) p[n] = to; |
||
198 | } |
||
199 | |||
200 | |||
201 | #if !defined(getlocaledecpoint) |
||
202 | #define getlocaledecpoint() (localeconv()->decimal_point[0]) |
||
203 | #endif |
||
204 | |||
205 | |||
206 | #define buff2d(b,e) luaO_str2d(luaZ_buffer(b), luaZ_bufflen(b) - 1, e) |
||
207 | |||
208 | /* |
||
209 | ** in case of format error, try to change decimal point separator to |
||
210 | ** the one defined in the current locale and check again |
||
211 | */ |
||
212 | static void trydecpoint (LexState *ls, SemInfo *seminfo) { |
||
213 | char old = ls->decpoint; |
||
214 | ls->decpoint = getlocaledecpoint(); |
||
215 | buffreplace(ls, old, ls->decpoint); /* try new decimal separator */ |
||
216 | if (!buff2d(ls->buff, &seminfo->r)) { |
||
217 | /* format error with correct decimal point: no more options */ |
||
218 | buffreplace(ls, ls->decpoint, '.'); /* undo change (for error message) */ |
||
219 | lexerror(ls, "malformed number", TK_NUMBER); |
||
220 | } |
||
221 | } |
||
222 | |||
223 | |||
224 | /* LUA_NUMBER */ |
||
225 | static void read_numeral (LexState *ls, SemInfo *seminfo) { |
||
226 | lua_assert(lisdigit(ls->current)); |
||
227 | do { |
||
228 | save_and_next(ls); |
||
229 | if (check_next(ls, "EePp")) /* exponent part? */ |
||
230 | check_next(ls, "+-"); /* optional exponent sign */ |
||
231 | } while (lislalnum(ls->current) || ls->current == '.'); |
||
232 | save(ls, '\0'); |
||
233 | buffreplace(ls, '.', ls->decpoint); /* follow locale for decimal point */ |
||
234 | if (!buff2d(ls->buff, &seminfo->r)) /* format error? */ |
||
235 | trydecpoint(ls, seminfo); /* try to update decimal point separator */ |
||
236 | } |
||
237 | |||
238 | |||
239 | /* |
||
240 | ** skip a sequence '[=*[' or ']=*]' and return its number of '='s or |
||
241 | ** -1 if sequence is malformed |
||
242 | */ |
||
243 | static int skip_sep (LexState *ls) { |
||
244 | int count = 0; |
||
245 | int s = ls->current; |
||
246 | lua_assert(s == '[' || s == ']'); |
||
247 | save_and_next(ls); |
||
248 | while (ls->current == '=') { |
||
249 | save_and_next(ls); |
||
250 | count++; |
||
251 | } |
||
252 | return (ls->current == s) ? count : (-count) - 1; |
||
253 | } |
||
254 | |||
255 | |||
256 | static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) { |
||
257 | save_and_next(ls); /* skip 2nd `[' */ |
||
258 | if (currIsNewline(ls)) /* string starts with a newline? */ |
||
259 | inclinenumber(ls); /* skip it */ |
||
260 | for (;;) { |
||
261 | switch (ls->current) { |
||
262 | case EOZ: |
||
263 | lexerror(ls, (seminfo) ? "unfinished long string" : |
||
264 | "unfinished long comment", TK_EOS); |
||
265 | break; /* to avoid warnings */ |
||
266 | case ']': { |
||
267 | if (skip_sep(ls) == sep) { |
||
268 | save_and_next(ls); /* skip 2nd `]' */ |
||
269 | goto endloop; |
||
270 | } |
||
271 | break; |
||
272 | } |
||
273 | case '\n': case '\r': { |
||
274 | save(ls, '\n'); |
||
275 | inclinenumber(ls); |
||
276 | if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */ |
||
277 | break; |
||
278 | } |
||
279 | default: { |
||
280 | if (seminfo) save_and_next(ls); |
||
281 | else next(ls); |
||
282 | } |
||
283 | } |
||
284 | } endloop: |
||
285 | if (seminfo) |
||
286 | seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep), |
||
287 | luaZ_bufflen(ls->buff) - 2*(2 + sep)); |
||
288 | } |
||
289 | |||
290 | |||
291 | static void escerror (LexState *ls, int *c, int n, const char *msg) { |
||
292 | int i; |
||
293 | luaZ_resetbuffer(ls->buff); /* prepare error message */ |
||
294 | save(ls, '\\'); |
||
295 | for (i = 0; i < n && c[i] != EOZ; i++) |
||
296 | save(ls, c[i]); |
||
297 | lexerror(ls, msg, TK_STRING); |
||
298 | } |
||
299 | |||
300 | |||
301 | static int readhexaesc (LexState *ls) { |
||
302 | int c[3], i; /* keep input for error message */ |
||
303 | int r = 0; /* result accumulator */ |
||
304 | c[0] = 'x'; /* for error message */ |
||
305 | for (i = 1; i < 3; i++) { /* read two hexa digits */ |
||
306 | c[i] = next(ls); |
||
307 | if (!lisxdigit(c[i])) |
||
308 | escerror(ls, c, i + 1, "hexadecimal digit expected"); |
||
309 | r = (r << 4) + luaO_hexavalue(c[i]); |
||
310 | } |
||
311 | return r; |
||
312 | } |
||
313 | |||
314 | |||
315 | static int readdecesc (LexState *ls) { |
||
316 | int c[3], i; |
||
317 | int r = 0; /* result accumulator */ |
||
318 | for (i = 0; i < 3 && lisdigit(ls->current); i++) { /* read up to 3 digits */ |
||
319 | c[i] = ls->current; |
||
320 | r = 10*r + c[i] - '0'; |
||
321 | next(ls); |
||
322 | } |
||
323 | if (r > UCHAR_MAX) |
||
324 | escerror(ls, c, i, "decimal escape too large"); |
||
325 | return r; |
||
326 | } |
||
327 | |||
328 | |||
329 | static void read_string (LexState *ls, int del, SemInfo *seminfo) { |
||
330 | save_and_next(ls); /* keep delimiter (for error messages) */ |
||
331 | while (ls->current != del) { |
||
332 | switch (ls->current) { |
||
333 | case EOZ: |
||
334 | lexerror(ls, "unfinished string", TK_EOS); |
||
335 | break; /* to avoid warnings */ |
||
336 | case '\n': |
||
337 | case '\r': |
||
338 | lexerror(ls, "unfinished string", TK_STRING); |
||
339 | break; /* to avoid warnings */ |
||
340 | case '\\': { /* escape sequences */ |
||
341 | int c; /* final character to be saved */ |
||
342 | next(ls); /* do not save the `\' */ |
||
343 | switch (ls->current) { |
||
344 | case 'a': c = '\a'; goto read_save; |
||
345 | case 'b': c = '\b'; goto read_save; |
||
346 | case 'f': c = '\f'; goto read_save; |
||
347 | case 'n': c = '\n'; goto read_save; |
||
348 | case 'r': c = '\r'; goto read_save; |
||
349 | case 't': c = '\t'; goto read_save; |
||
350 | case 'v': c = '\v'; goto read_save; |
||
351 | case 'x': c = readhexaesc(ls); goto read_save; |
||
352 | case '\n': case '\r': |
||
353 | inclinenumber(ls); c = '\n'; goto only_save; |
||
354 | case '\\': case '\"': case '\'': |
||
355 | c = ls->current; goto read_save; |
||
356 | case EOZ: goto no_save; /* will raise an error next loop */ |
||
357 | case 'z': { /* zap following span of spaces */ |
||
358 | next(ls); /* skip the 'z' */ |
||
359 | while (lisspace(ls->current)) { |
||
360 | if (currIsNewline(ls)) inclinenumber(ls); |
||
361 | else next(ls); |
||
362 | } |
||
363 | goto no_save; |
||
364 | } |
||
365 | default: { |
||
366 | if (!lisdigit(ls->current)) |
||
367 | escerror(ls, &ls->current, 1, "invalid escape sequence"); |
||
368 | /* digital escape \ddd */ |
||
369 | c = readdecesc(ls); |
||
370 | goto only_save; |
||
371 | } |
||
372 | } |
||
373 | read_save: next(ls); /* read next character */ |
||
374 | only_save: save(ls, c); /* save 'c' */ |
||
375 | no_save: break; |
||
376 | } |
||
377 | default: |
||
378 | save_and_next(ls); |
||
379 | } |
||
380 | } |
||
381 | save_and_next(ls); /* skip delimiter */ |
||
382 | seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1, |
||
383 | luaZ_bufflen(ls->buff) - 2); |
||
384 | } |
||
385 | |||
386 | |||
387 | static int llex (LexState *ls, SemInfo *seminfo) { |
||
388 | luaZ_resetbuffer(ls->buff); |
||
389 | for (;;) { |
||
390 | switch (ls->current) { |
||
391 | case '\n': case '\r': { /* line breaks */ |
||
392 | inclinenumber(ls); |
||
393 | break; |
||
394 | } |
||
395 | case ' ': case '\f': case '\t': case '\v': { /* spaces */ |
||
396 | next(ls); |
||
397 | break; |
||
398 | } |
||
399 | case '-': { /* '-' or '--' (comment) */ |
||
400 | next(ls); |
||
401 | if (ls->current != '-') return '-'; |
||
402 | /* else is a comment */ |
||
403 | next(ls); |
||
404 | if (ls->current == '[') { /* long comment? */ |
||
405 | int sep = skip_sep(ls); |
||
406 | luaZ_resetbuffer(ls->buff); /* `skip_sep' may dirty the buffer */ |
||
407 | if (sep >= 0) { |
||
408 | read_long_string(ls, NULL, sep); /* skip long comment */ |
||
409 | luaZ_resetbuffer(ls->buff); /* previous call may dirty the buff. */ |
||
410 | break; |
||
411 | } |
||
412 | } |
||
413 | /* else short comment */ |
||
414 | while (!currIsNewline(ls) && ls->current != EOZ) |
||
415 | next(ls); /* skip until end of line (or end of file) */ |
||
416 | break; |
||
417 | } |
||
418 | case '[': { /* long string or simply '[' */ |
||
419 | int sep = skip_sep(ls); |
||
420 | if (sep >= 0) { |
||
421 | read_long_string(ls, seminfo, sep); |
||
422 | return TK_STRING; |
||
423 | } |
||
424 | else if (sep == -1) return '['; |
||
425 | else lexerror(ls, "invalid long string delimiter", TK_STRING); |
||
426 | } |
||
427 | case '=': { |
||
428 | next(ls); |
||
429 | if (ls->current != '=') return '='; |
||
430 | else { next(ls); return TK_EQ; } |
||
431 | } |
||
432 | case '<': { |
||
433 | next(ls); |
||
434 | if (ls->current != '=') return '<'; |
||
435 | else { next(ls); return TK_LE; } |
||
436 | } |
||
437 | case '>': { |
||
438 | next(ls); |
||
439 | if (ls->current != '=') return '>'; |
||
440 | else { next(ls); return TK_GE; } |
||
441 | } |
||
442 | case '~': { |
||
443 | next(ls); |
||
444 | if (ls->current != '=') return '~'; |
||
445 | else { next(ls); return TK_NE; } |
||
446 | } |
||
447 | case ':': { |
||
448 | next(ls); |
||
449 | if (ls->current != ':') return ':'; |
||
450 | else { next(ls); return TK_DBCOLON; } |
||
451 | } |
||
452 | case '"': case '\'': { /* short literal strings */ |
||
453 | read_string(ls, ls->current, seminfo); |
||
454 | return TK_STRING; |
||
455 | } |
||
456 | case '.': { /* '.', '..', '...', or number */ |
||
457 | save_and_next(ls); |
||
458 | if (check_next(ls, ".")) { |
||
459 | if (check_next(ls, ".")) |
||
460 | return TK_DOTS; /* '...' */ |
||
461 | else return TK_CONCAT; /* '..' */ |
||
462 | } |
||
463 | else if (!lisdigit(ls->current)) return '.'; |
||
464 | /* else go through */ |
||
465 | } |
||
466 | case '0': case '1': case '2': case '3': case '4': |
||
467 | case '5': case '6': case '7': case '8': case '9': { |
||
468 | read_numeral(ls, seminfo); |
||
469 | return TK_NUMBER; |
||
470 | } |
||
471 | case EOZ: { |
||
472 | return TK_EOS; |
||
473 | } |
||
474 | default: { |
||
475 | if (lislalpha(ls->current)) { /* identifier or reserved word? */ |
||
476 | TString *ts; |
||
477 | do { |
||
478 | save_and_next(ls); |
||
479 | } while (lislalnum(ls->current)); |
||
480 | ts = luaX_newstring(ls, luaZ_buffer(ls->buff), |
||
481 | luaZ_bufflen(ls->buff)); |
||
482 | seminfo->ts = ts; |
||
483 | if (ts->tsv.reserved > 0) /* reserved word? */ |
||
484 | return ts->tsv.reserved - 1 + FIRST_RESERVED; |
||
485 | else { |
||
486 | return TK_NAME; |
||
487 | } |
||
488 | } |
||
489 | else { /* single-char tokens (+ - / ...) */ |
||
490 | int c = ls->current; |
||
491 | next(ls); |
||
492 | return c; |
||
493 | } |
||
494 | } |
||
495 | } |
||
496 | } |
||
497 | } |
||
498 | |||
499 | |||
500 | void luaX_next (LexState *ls) { |
||
501 | ls->lastline = ls->linenumber; |
||
502 | if (ls->lookahead.token != TK_EOS) { /* is there a look-ahead token? */ |
||
503 | ls->t = ls->lookahead; /* use this one */ |
||
504 | ls->lookahead.token = TK_EOS; /* and discharge it */ |
||
505 | } |
||
506 | else |
||
507 | ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */ |
||
508 | } |
||
509 | |||
510 | |||
511 | int luaX_lookahead (LexState *ls) { |
||
512 | lua_assert(ls->lookahead.token == TK_EOS); |
||
513 | ls->lookahead.token = llex(ls, &ls->lookahead.seminfo); |
||
514 | return ls->lookahead.token; |
||
515 | }'; |
||
516 |