Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5205 | clevermous | 1 | /* |
2 | ** $Id: lstrlib.c,v 1.173 2011/11/30 18:24:56 roberto Exp $ |
||
3 | ** Standard library for string operations and pattern-matching |
||
4 | ** See Copyright Notice in lua.h |
||
5 | */ |
||
6 | |||
7 | |||
8 | #include |
||
9 | #include |
||
10 | #include |
||
11 | #include |
||
12 | #include |
||
13 | |||
14 | #define lstrlib_c |
||
15 | #define LUA_LIB |
||
16 | |||
17 | #include "lua.h" |
||
18 | |||
19 | #include "lauxlib.h" |
||
20 | #include "lualib.h" |
||
21 | |||
22 | |||
23 | /* |
||
24 | ** maximum number of captures that a pattern can do during |
||
25 | ** pattern-matching. This limit is arbitrary. |
||
26 | */ |
||
27 | #if !defined(LUA_MAXCAPTURES) |
||
28 | #define LUA_MAXCAPTURES 32 |
||
29 | #endif |
||
30 | |||
31 | |||
32 | /* macro to `unsign' a character */ |
||
33 | #define uchar(c) ((unsigned char)(c)) |
||
34 | |||
35 | |||
36 | |||
37 | static int str_len (lua_State *L) { |
||
38 | size_t l; |
||
39 | luaL_checklstring(L, 1, &l); |
||
40 | lua_pushinteger(L, (lua_Integer)l); |
||
41 | return 1; |
||
42 | } |
||
43 | |||
44 | |||
45 | /* translate a relative string position: negative means back from end */ |
||
46 | static size_t posrelat (ptrdiff_t pos, size_t len) { |
||
47 | if (pos >= 0) return (size_t)pos; |
||
48 | else if (0u - (size_t)pos > len) return 0; |
||
49 | else return len - ((size_t)-pos) + 1; |
||
50 | } |
||
51 | |||
52 | |||
53 | static int str_sub (lua_State *L) { |
||
54 | size_t l; |
||
55 | const char *s = luaL_checklstring(L, 1, &l); |
||
56 | size_t start = posrelat(luaL_checkinteger(L, 2), l); |
||
57 | size_t end = posrelat(luaL_optinteger(L, 3, -1), l); |
||
58 | if (start < 1) start = 1; |
||
59 | if (end > l) end = l; |
||
60 | if (start <= end) |
||
61 | lua_pushlstring(L, s + start - 1, end - start + 1); |
||
62 | else lua_pushliteral(L, ""); |
||
63 | return 1; |
||
64 | } |
||
65 | |||
66 | |||
67 | static int str_reverse (lua_State *L) { |
||
68 | size_t l, i; |
||
69 | luaL_Buffer b; |
||
70 | const char *s = luaL_checklstring(L, 1, &l); |
||
71 | char *p = luaL_buffinitsize(L, &b, l); |
||
72 | for (i = 0; i < l; i++) |
||
73 | p[i] = s[l - i - 1]; |
||
74 | luaL_pushresultsize(&b, l); |
||
75 | return 1; |
||
76 | } |
||
77 | |||
78 | |||
79 | static int str_lower (lua_State *L) { |
||
80 | size_t l; |
||
81 | size_t i; |
||
82 | luaL_Buffer b; |
||
83 | const char *s = luaL_checklstring(L, 1, &l); |
||
84 | char *p = luaL_buffinitsize(L, &b, l); |
||
85 | for (i=0; i |
||
86 | p[i] = tolower(uchar(s[i])); |
||
87 | luaL_pushresultsize(&b, l); |
||
88 | return 1; |
||
89 | } |
||
90 | |||
91 | |||
92 | static int str_upper (lua_State *L) { |
||
93 | size_t l; |
||
94 | size_t i; |
||
95 | luaL_Buffer b; |
||
96 | const char *s = luaL_checklstring(L, 1, &l); |
||
97 | char *p = luaL_buffinitsize(L, &b, l); |
||
98 | for (i=0; i |
||
99 | p[i] = toupper(uchar(s[i])); |
||
100 | luaL_pushresultsize(&b, l); |
||
101 | return 1; |
||
102 | } |
||
103 | |||
104 | |||
105 | /* reasonable limit to avoid arithmetic overflow */ |
||
106 | #define MAXSIZE ((~(size_t)0) >> 1) |
||
107 | |||
108 | static int str_rep (lua_State *L) { |
||
109 | size_t l, lsep; |
||
110 | const char *s = luaL_checklstring(L, 1, &l); |
||
111 | int n = luaL_checkint(L, 2); |
||
112 | const char *sep = luaL_optlstring(L, 3, "", &lsep); |
||
113 | if (n <= 0) lua_pushliteral(L, ""); |
||
114 | else if (l + lsep < l || l + lsep >= MAXSIZE / n) /* may overflow? */ |
||
115 | return luaL_error(L, "resulting string too large"); |
||
116 | else { |
||
117 | size_t totallen = n * l + (n - 1) * lsep; |
||
118 | luaL_Buffer b; |
||
119 | char *p = luaL_buffinitsize(L, &b, totallen); |
||
120 | while (n-- > 1) { /* first n-1 copies (followed by separator) */ |
||
121 | memcpy(p, s, l * sizeof(char)); p += l; |
||
122 | memcpy(p, sep, lsep * sizeof(char)); p += lsep; |
||
123 | } |
||
124 | memcpy(p, s, l * sizeof(char)); /* last copy (not followed by separator) */ |
||
125 | luaL_pushresultsize(&b, totallen); |
||
126 | } |
||
127 | return 1; |
||
128 | } |
||
129 | |||
130 | |||
131 | static int str_byte (lua_State *L) { |
||
132 | size_t l; |
||
133 | const char *s = luaL_checklstring(L, 1, &l); |
||
134 | size_t posi = posrelat(luaL_optinteger(L, 2, 1), l); |
||
135 | size_t pose = posrelat(luaL_optinteger(L, 3, posi), l); |
||
136 | int n, i; |
||
137 | if (posi < 1) posi = 1; |
||
138 | if (pose > l) pose = l; |
||
139 | if (posi > pose) return 0; /* empty interval; return no values */ |
||
140 | n = (int)(pose - posi + 1); |
||
141 | if (posi + n <= pose) /* (size_t -> int) overflow? */ |
||
142 | return luaL_error(L, "string slice too long"); |
||
143 | luaL_checkstack(L, n, "string slice too long"); |
||
144 | for (i=0; i |
||
145 | lua_pushinteger(L, uchar(s[posi+i-1])); |
||
146 | return n; |
||
147 | } |
||
148 | |||
149 | |||
150 | static int str_char (lua_State *L) { |
||
151 | int n = lua_gettop(L); /* number of arguments */ |
||
152 | int i; |
||
153 | luaL_Buffer b; |
||
154 | char *p = luaL_buffinitsize(L, &b, n); |
||
155 | for (i=1; i<=n; i++) { |
||
156 | int c = luaL_checkint(L, i); |
||
157 | luaL_argcheck(L, uchar(c) == c, i, "value out of range"); |
||
158 | p[i - 1] = uchar(c); |
||
159 | } |
||
160 | luaL_pushresultsize(&b, n); |
||
161 | return 1; |
||
162 | } |
||
163 | |||
164 | |||
165 | static int writer (lua_State *L, const void* b, size_t size, void* B) { |
||
166 | (void)L; |
||
167 | luaL_addlstring((luaL_Buffer*) B, (const char *)b, size); |
||
168 | return 0; |
||
169 | } |
||
170 | |||
171 | |||
172 | static int str_dump (lua_State *L) { |
||
173 | luaL_Buffer b; |
||
174 | luaL_checktype(L, 1, LUA_TFUNCTION); |
||
175 | lua_settop(L, 1); |
||
176 | luaL_buffinit(L,&b); |
||
177 | if (lua_dump(L, writer, &b) != 0) |
||
178 | return luaL_error(L, "unable to dump given function"); |
||
179 | luaL_pushresult(&b); |
||
180 | return 1; |
||
181 | } |
||
182 | |||
183 | |||
184 | |||
185 | /* |
||
186 | ** {====================================================== |
||
187 | ** PATTERN MATCHING |
||
188 | ** ======================================================= |
||
189 | */ |
||
190 | |||
191 | |||
192 | #define CAP_UNFINISHED (-1) |
||
193 | #define CAP_POSITION (-2) |
||
194 | |||
195 | typedef struct MatchState { |
||
196 | const char *src_init; /* init of source string */ |
||
197 | const char *src_end; /* end ('\0') of source string */ |
||
198 | const char *p_end; /* end ('\0') of pattern */ |
||
199 | lua_State *L; |
||
200 | int level; /* total number of captures (finished or unfinished) */ |
||
201 | struct { |
||
202 | const char *init; |
||
203 | ptrdiff_t len; |
||
204 | } capture[LUA_MAXCAPTURES]; |
||
205 | } MatchState; |
||
206 | |||
207 | |||
208 | #define L_ESC '%' |
||
209 | #define SPECIALS "^$*+?.([%-" |
||
210 | |||
211 | |||
212 | static int check_capture (MatchState *ms, int l) { |
||
213 | l -= '1'; |
||
214 | if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED) |
||
215 | return luaL_error(ms->L, "invalid capture index %%%d", l + 1); |
||
216 | return l; |
||
217 | } |
||
218 | |||
219 | |||
220 | static int capture_to_close (MatchState *ms) { |
||
221 | int level = ms->level; |
||
222 | for (level--; level>=0; level--) |
||
223 | if (ms->capture[level].len == CAP_UNFINISHED) return level; |
||
224 | return luaL_error(ms->L, "invalid pattern capture"); |
||
225 | } |
||
226 | |||
227 | |||
228 | static const char *classend (MatchState *ms, const char *p) { |
||
229 | switch (*p++) { |
||
230 | case L_ESC: { |
||
231 | if (p == ms->p_end) |
||
232 | luaL_error(ms->L, "malformed pattern (ends with " LUA_QL("%%") ")"); |
||
233 | return p+1; |
||
234 | } |
||
235 | case '[': { |
||
236 | if (*p == '^') p++; |
||
237 | do { /* look for a `]' */ |
||
238 | if (p == ms->p_end) |
||
239 | luaL_error(ms->L, "malformed pattern (missing " LUA_QL("]") ")"); |
||
240 | if (*(p++) == L_ESC && p < ms->p_end) |
||
241 | p++; /* skip escapes (e.g. `%]') */ |
||
242 | } while (*p != ']'); |
||
243 | return p+1; |
||
244 | } |
||
245 | default: { |
||
246 | return p; |
||
247 | } |
||
248 | } |
||
249 | } |
||
250 | |||
251 | |||
252 | static int match_class (int c, int cl) { |
||
253 | int res; |
||
254 | switch (tolower(cl)) { |
||
255 | case 'a' : res = isalpha(c); break; |
||
256 | case 'c' : res = iscntrl(c); break; |
||
257 | case 'd' : res = isdigit(c); break; |
||
258 | case 'g' : res = isgraph(c); break; |
||
259 | case 'l' : res = islower(c); break; |
||
260 | case 'p' : res = ispunct(c); break; |
||
261 | case 's' : res = isspace(c); break; |
||
262 | case 'u' : res = isupper(c); break; |
||
263 | case 'w' : res = isalnum(c); break; |
||
264 | case 'x' : res = isxdigit(c); break; |
||
265 | case 'z' : res = (c == 0); break; /* deprecated option */ |
||
266 | default: return (cl == c); |
||
267 | } |
||
268 | return (islower(cl) ? res : !res); |
||
269 | } |
||
270 | |||
271 | |||
272 | static int matchbracketclass (int c, const char *p, const char *ec) { |
||
273 | int sig = 1; |
||
274 | if (*(p+1) == '^') { |
||
275 | sig = 0; |
||
276 | p++; /* skip the `^' */ |
||
277 | } |
||
278 | while (++p < ec) { |
||
279 | if (*p == L_ESC) { |
||
280 | p++; |
||
281 | if (match_class(c, uchar(*p))) |
||
282 | return sig; |
||
283 | } |
||
284 | else if ((*(p+1) == '-') && (p+2 < ec)) { |
||
285 | p+=2; |
||
286 | if (uchar(*(p-2)) <= c && c <= uchar(*p)) |
||
287 | return sig; |
||
288 | } |
||
289 | else if (uchar(*p) == c) return sig; |
||
290 | } |
||
291 | return !sig; |
||
292 | } |
||
293 | |||
294 | |||
295 | static int singlematch (int c, const char *p, const char *ep) { |
||
296 | switch (*p) { |
||
297 | case '.': return 1; /* matches any char */ |
||
298 | case L_ESC: return match_class(c, uchar(*(p+1))); |
||
299 | case '[': return matchbracketclass(c, p, ep-1); |
||
300 | default: return (uchar(*p) == c); |
||
301 | } |
||
302 | } |
||
303 | |||
304 | |||
305 | static const char *match (MatchState *ms, const char *s, const char *p); |
||
306 | |||
307 | |||
308 | static const char *matchbalance (MatchState *ms, const char *s, |
||
309 | const char *p) { |
||
310 | if (p >= ms->p_end - 1) |
||
311 | luaL_error(ms->L, "malformed pattern " |
||
312 | "(missing arguments to " LUA_QL("%%b") ")"); |
||
313 | if (*s != *p) return NULL; |
||
314 | else { |
||
315 | int b = *p; |
||
316 | int e = *(p+1); |
||
317 | int cont = 1; |
||
318 | while (++s < ms->src_end) { |
||
319 | if (*s == e) { |
||
320 | if (--cont == 0) return s+1; |
||
321 | } |
||
322 | else if (*s == b) cont++; |
||
323 | } |
||
324 | } |
||
325 | return NULL; /* string ends out of balance */ |
||
326 | } |
||
327 | |||
328 | |||
329 | static const char *max_expand (MatchState *ms, const char *s, |
||
330 | const char *p, const char *ep) { |
||
331 | ptrdiff_t i = 0; /* counts maximum expand for item */ |
||
332 | while ((s+i) |
||
333 | i++; |
||
334 | /* keeps trying to match with the maximum repetitions */ |
||
335 | while (i>=0) { |
||
336 | const char *res = match(ms, (s+i), ep+1); |
||
337 | if (res) return res; |
||
338 | i--; /* else didn't match; reduce 1 repetition to try again */ |
||
339 | } |
||
340 | return NULL; |
||
341 | } |
||
342 | |||
343 | |||
344 | static const char *min_expand (MatchState *ms, const char *s, |
||
345 | const char *p, const char *ep) { |
||
346 | for (;;) { |
||
347 | const char *res = match(ms, s, ep+1); |
||
348 | if (res != NULL) |
||
349 | return res; |
||
350 | else if (s |
||
351 | s++; /* try with one more repetition */ |
||
352 | else return NULL; |
||
353 | } |
||
354 | } |
||
355 | |||
356 | |||
357 | static const char *start_capture (MatchState *ms, const char *s, |
||
358 | const char *p, int what) { |
||
359 | const char *res; |
||
360 | int level = ms->level; |
||
361 | if (level >= LUA_MAXCAPTURES) luaL_error(ms->L, "too many captures"); |
||
362 | ms->capture[level].init = s; |
||
363 | ms->capture[level].len = what; |
||
364 | ms->level = level+1; |
||
365 | if ((res=match(ms, s, p)) == NULL) /* match failed? */ |
||
366 | ms->level--; /* undo capture */ |
||
367 | return res; |
||
368 | } |
||
369 | |||
370 | |||
371 | static const char *end_capture (MatchState *ms, const char *s, |
||
372 | const char *p) { |
||
373 | int l = capture_to_close(ms); |
||
374 | const char *res; |
||
375 | ms->capture[l].len = s - ms->capture[l].init; /* close capture */ |
||
376 | if ((res = match(ms, s, p)) == NULL) /* match failed? */ |
||
377 | ms->capture[l].len = CAP_UNFINISHED; /* undo capture */ |
||
378 | return res; |
||
379 | } |
||
380 | |||
381 | |||
382 | static const char *match_capture (MatchState *ms, const char *s, int l) { |
||
383 | size_t len; |
||
384 | l = check_capture(ms, l); |
||
385 | len = ms->capture[l].len; |
||
386 | if ((size_t)(ms->src_end-s) >= len && |
||
387 | memcmp(ms->capture[l].init, s, len) == 0) |
||
388 | return s+len; |
||
389 | else return NULL; |
||
390 | } |
||
391 | |||
392 | |||
393 | static const char *match (MatchState *ms, const char *s, const char *p) { |
||
394 | init: /* using goto's to optimize tail recursion */ |
||
395 | if (p == ms->p_end) /* end of pattern? */ |
||
396 | return s; /* match succeeded */ |
||
397 | switch (*p) { |
||
398 | case '(': { /* start capture */ |
||
399 | if (*(p+1) == ')') /* position capture? */ |
||
400 | return start_capture(ms, s, p+2, CAP_POSITION); |
||
401 | else |
||
402 | return start_capture(ms, s, p+1, CAP_UNFINISHED); |
||
403 | } |
||
404 | case ')': { /* end capture */ |
||
405 | return end_capture(ms, s, p+1); |
||
406 | } |
||
407 | case '$': { |
||
408 | if ((p+1) == ms->p_end) /* is the `$' the last char in pattern? */ |
||
409 | return (s == ms->src_end) ? s : NULL; /* check end of string */ |
||
410 | else goto dflt; |
||
411 | } |
||
412 | case L_ESC: { /* escaped sequences not in the format class[*+?-]? */ |
||
413 | switch (*(p+1)) { |
||
414 | case 'b': { /* balanced string? */ |
||
415 | s = matchbalance(ms, s, p+2); |
||
416 | if (s == NULL) return NULL; |
||
417 | p+=4; goto init; /* else return match(ms, s, p+4); */ |
||
418 | } |
||
419 | case 'f': { /* frontier? */ |
||
420 | const char *ep; char previous; |
||
421 | p += 2; |
||
422 | if (*p != '[') |
||
423 | luaL_error(ms->L, "missing " LUA_QL("[") " after " |
||
424 | LUA_QL("%%f") " in pattern"); |
||
425 | ep = classend(ms, p); /* points to what is next */ |
||
426 | previous = (s == ms->src_init) ? '\0' : *(s-1); |
||
427 | if (matchbracketclass(uchar(previous), p, ep-1) || |
||
428 | !matchbracketclass(uchar(*s), p, ep-1)) return NULL; |
||
429 | p=ep; goto init; /* else return match(ms, s, ep); */ |
||
430 | } |
||
431 | case '0': case '1': case '2': case '3': |
||
432 | case '4': case '5': case '6': case '7': |
||
433 | case '8': case '9': { /* capture results (%0-%9)? */ |
||
434 | s = match_capture(ms, s, uchar(*(p+1))); |
||
435 | if (s == NULL) return NULL; |
||
436 | p+=2; goto init; /* else return match(ms, s, p+2) */ |
||
437 | } |
||
438 | default: goto dflt; |
||
439 | } |
||
440 | } |
||
441 | default: dflt: { /* pattern class plus optional suffix */ |
||
442 | const char *ep = classend(ms, p); /* points to what is next */ |
||
443 | int m = s < ms->src_end && singlematch(uchar(*s), p, ep); |
||
444 | switch (*ep) { |
||
445 | case '?': { /* optional */ |
||
446 | const char *res; |
||
447 | if (m && ((res=match(ms, s+1, ep+1)) != NULL)) |
||
448 | return res; |
||
449 | p=ep+1; goto init; /* else return match(ms, s, ep+1); */ |
||
450 | } |
||
451 | case '*': { /* 0 or more repetitions */ |
||
452 | return max_expand(ms, s, p, ep); |
||
453 | } |
||
454 | case '+': { /* 1 or more repetitions */ |
||
455 | return (m ? max_expand(ms, s+1, p, ep) : NULL); |
||
456 | } |
||
457 | case '-': { /* 0 or more repetitions (minimum) */ |
||
458 | return min_expand(ms, s, p, ep); |
||
459 | } |
||
460 | default: { |
||
461 | if (!m) return NULL; |
||
462 | s++; p=ep; goto init; /* else return match(ms, s+1, ep); */ |
||
463 | } |
||
464 | } |
||
465 | } |
||
466 | } |
||
467 | } |
||
468 | |||
469 | |||
470 | |||
471 | static const char *lmemfind (const char *s1, size_t l1, |
||
472 | const char *s2, size_t l2) { |
||
473 | if (l2 == 0) return s1; /* empty strings are everywhere */ |
||
474 | else if (l2 > l1) return NULL; /* avoids a negative `l1' */ |
||
475 | else { |
||
476 | const char *init; /* to search for a `*s2' inside `s1' */ |
||
477 | l2--; /* 1st char will be checked by `memchr' */ |
||
478 | l1 = l1-l2; /* `s2' cannot be found after that */ |
||
479 | while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) { |
||
480 | init++; /* 1st char is already checked */ |
||
481 | if (memcmp(init, s2+1, l2) == 0) |
||
482 | return init-1; |
||
483 | else { /* correct `l1' and `s1' to try again */ |
||
484 | l1 -= init-s1; |
||
485 | s1 = init; |
||
486 | } |
||
487 | } |
||
488 | return NULL; /* not found */ |
||
489 | } |
||
490 | } |
||
491 | |||
492 | |||
493 | static void push_onecapture (MatchState *ms, int i, const char *s, |
||
494 | const char *e) { |
||
495 | if (i >= ms->level) { |
||
496 | if (i == 0) /* ms->level == 0, too */ |
||
497 | lua_pushlstring(ms->L, s, e - s); /* add whole match */ |
||
498 | else |
||
499 | luaL_error(ms->L, "invalid capture index"); |
||
500 | } |
||
501 | else { |
||
502 | ptrdiff_t l = ms->capture[i].len; |
||
503 | if (l == CAP_UNFINISHED) luaL_error(ms->L, "unfinished capture"); |
||
504 | if (l == CAP_POSITION) |
||
505 | lua_pushinteger(ms->L, ms->capture[i].init - ms->src_init + 1); |
||
506 | else |
||
507 | lua_pushlstring(ms->L, ms->capture[i].init, l); |
||
508 | } |
||
509 | } |
||
510 | |||
511 | |||
512 | static int push_captures (MatchState *ms, const char *s, const char *e) { |
||
513 | int i; |
||
514 | int nlevels = (ms->level == 0 && s) ? 1 : ms->level; |
||
515 | luaL_checkstack(ms->L, nlevels, "too many captures"); |
||
516 | for (i = 0; i < nlevels; i++) |
||
517 | push_onecapture(ms, i, s, e); |
||
518 | return nlevels; /* number of strings pushed */ |
||
519 | } |
||
520 | |||
521 | |||
522 | /* check whether pattern has no special characters */ |
||
523 | static int nospecials (const char *p, size_t l) { |
||
524 | size_t upto = 0; |
||
525 | do { |
||
526 | if (strpbrk(p + upto, SPECIALS)) |
||
527 | return 0; /* pattern has a special character */ |
||
528 | upto += strlen(p + upto) + 1; /* may have more after \0 */ |
||
529 | } while (upto <= l); |
||
530 | return 1; /* no special chars found */ |
||
531 | } |
||
532 | |||
533 | |||
534 | static int str_find_aux (lua_State *L, int find) { |
||
535 | size_t ls, lp; |
||
536 | const char *s = luaL_checklstring(L, 1, &ls); |
||
537 | const char *p = luaL_checklstring(L, 2, &lp); |
||
538 | size_t init = posrelat(luaL_optinteger(L, 3, 1), ls); |
||
539 | if (init < 1) init = 1; |
||
540 | else if (init > ls + 1) { /* start after string's end? */ |
||
541 | lua_pushnil(L); /* cannot find anything */ |
||
542 | return 1; |
||
543 | } |
||
544 | /* explicit request or no special characters? */ |
||
545 | if (find && (lua_toboolean(L, 4) || nospecials(p, lp))) { |
||
546 | /* do a plain search */ |
||
547 | const char *s2 = lmemfind(s + init - 1, ls - init + 1, p, lp); |
||
548 | if (s2) { |
||
549 | lua_pushinteger(L, s2 - s + 1); |
||
550 | lua_pushinteger(L, s2 - s + lp); |
||
551 | return 2; |
||
552 | } |
||
553 | } |
||
554 | else { |
||
555 | MatchState ms; |
||
556 | const char *s1 = s + init - 1; |
||
557 | int anchor = (*p == '^'); |
||
558 | if (anchor) { |
||
559 | p++; lp--; /* skip anchor character */ |
||
560 | } |
||
561 | ms.L = L; |
||
562 | ms.src_init = s; |
||
563 | ms.src_end = s + ls; |
||
564 | ms.p_end = p + lp; |
||
565 | do { |
||
566 | const char *res; |
||
567 | ms.level = 0; |
||
568 | if ((res=match(&ms, s1, p)) != NULL) { |
||
569 | if (find) { |
||
570 | lua_pushinteger(L, s1 - s + 1); /* start */ |
||
571 | lua_pushinteger(L, res - s); /* end */ |
||
572 | return push_captures(&ms, NULL, 0) + 2; |
||
573 | } |
||
574 | else |
||
575 | return push_captures(&ms, s1, res); |
||
576 | } |
||
577 | } while (s1++ < ms.src_end && !anchor); |
||
578 | } |
||
579 | lua_pushnil(L); /* not found */ |
||
580 | return 1; |
||
581 | } |
||
582 | |||
583 | |||
584 | static int str_find (lua_State *L) { |
||
585 | return str_find_aux(L, 1); |
||
586 | } |
||
587 | |||
588 | |||
589 | static int str_match (lua_State *L) { |
||
590 | return str_find_aux(L, 0); |
||
591 | } |
||
592 | |||
593 | |||
594 | static int gmatch_aux (lua_State *L) { |
||
595 | MatchState ms; |
||
596 | size_t ls, lp; |
||
597 | const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls); |
||
598 | const char *p = lua_tolstring(L, lua_upvalueindex(2), &lp); |
||
599 | const char *src; |
||
600 | ms.L = L; |
||
601 | ms.src_init = s; |
||
602 | ms.src_end = s+ls; |
||
603 | ms.p_end = p + lp; |
||
604 | for (src = s + (size_t)lua_tointeger(L, lua_upvalueindex(3)); |
||
605 | src <= ms.src_end; |
||
606 | src++) { |
||
607 | const char *e; |
||
608 | ms.level = 0; |
||
609 | if ((e = match(&ms, src, p)) != NULL) { |
||
610 | lua_Integer newstart = e-s; |
||
611 | if (e == src) newstart++; /* empty match? go at least one position */ |
||
612 | lua_pushinteger(L, newstart); |
||
613 | lua_replace(L, lua_upvalueindex(3)); |
||
614 | return push_captures(&ms, src, e); |
||
615 | } |
||
616 | } |
||
617 | return 0; /* not found */ |
||
618 | } |
||
619 | |||
620 | |||
621 | static int gmatch (lua_State *L) { |
||
622 | luaL_checkstring(L, 1); |
||
623 | luaL_checkstring(L, 2); |
||
624 | lua_settop(L, 2); |
||
625 | lua_pushinteger(L, 0); |
||
626 | lua_pushcclosure(L, gmatch_aux, 3); |
||
627 | return 1; |
||
628 | } |
||
629 | |||
630 | |||
631 | static void add_s (MatchState *ms, luaL_Buffer *b, const char *s, |
||
632 | const char *e) { |
||
633 | size_t l, i; |
||
634 | const char *news = lua_tolstring(ms->L, 3, &l); |
||
635 | for (i = 0; i < l; i++) { |
||
636 | if (news[i] != L_ESC) |
||
637 | luaL_addchar(b, news[i]); |
||
638 | else { |
||
639 | i++; /* skip ESC */ |
||
640 | if (!isdigit(uchar(news[i]))) { |
||
641 | if (news[i] != L_ESC) |
||
642 | luaL_error(ms->L, "invalid use of " LUA_QL("%c") |
||
643 | " in replacement string", L_ESC); |
||
644 | luaL_addchar(b, news[i]); |
||
645 | } |
||
646 | else if (news[i] == '0') |
||
647 | luaL_addlstring(b, s, e - s); |
||
648 | else { |
||
649 | push_onecapture(ms, news[i] - '1', s, e); |
||
650 | luaL_addvalue(b); /* add capture to accumulated result */ |
||
651 | } |
||
652 | } |
||
653 | } |
||
654 | } |
||
655 | |||
656 | |||
657 | static void add_value (MatchState *ms, luaL_Buffer *b, const char *s, |
||
658 | const char *e, int tr) { |
||
659 | lua_State *L = ms->L; |
||
660 | switch (tr) { |
||
661 | case LUA_TFUNCTION: { |
||
662 | int n; |
||
663 | lua_pushvalue(L, 3); |
||
664 | n = push_captures(ms, s, e); |
||
665 | lua_call(L, n, 1); |
||
666 | break; |
||
667 | } |
||
668 | case LUA_TTABLE: { |
||
669 | push_onecapture(ms, 0, s, e); |
||
670 | lua_gettable(L, 3); |
||
671 | break; |
||
672 | } |
||
673 | default: { /* LUA_TNUMBER or LUA_TSTRING */ |
||
674 | add_s(ms, b, s, e); |
||
675 | return; |
||
676 | } |
||
677 | } |
||
678 | if (!lua_toboolean(L, -1)) { /* nil or false? */ |
||
679 | lua_pop(L, 1); |
||
680 | lua_pushlstring(L, s, e - s); /* keep original text */ |
||
681 | } |
||
682 | else if (!lua_isstring(L, -1)) |
||
683 | luaL_error(L, "invalid replacement value (a %s)", luaL_typename(L, -1)); |
||
684 | luaL_addvalue(b); /* add result to accumulator */ |
||
685 | } |
||
686 | |||
687 | |||
688 | static int str_gsub (lua_State *L) { |
||
689 | size_t srcl, lp; |
||
690 | const char *src = luaL_checklstring(L, 1, &srcl); |
||
691 | const char *p = luaL_checklstring(L, 2, &lp); |
||
692 | int tr = lua_type(L, 3); |
||
693 | size_t max_s = luaL_optinteger(L, 4, srcl+1); |
||
694 | int anchor = (*p == '^'); |
||
695 | size_t n = 0; |
||
696 | MatchState ms; |
||
697 | luaL_Buffer b; |
||
698 | luaL_argcheck(L, tr == LUA_TNUMBER || tr == LUA_TSTRING || |
||
699 | tr == LUA_TFUNCTION || tr == LUA_TTABLE, 3, |
||
700 | "string/function/table expected"); |
||
701 | luaL_buffinit(L, &b); |
||
702 | if (anchor) { |
||
703 | p++; lp--; /* skip anchor character */ |
||
704 | } |
||
705 | ms.L = L; |
||
706 | ms.src_init = src; |
||
707 | ms.src_end = src+srcl; |
||
708 | ms.p_end = p + lp; |
||
709 | while (n < max_s) { |
||
710 | const char *e; |
||
711 | ms.level = 0; |
||
712 | e = match(&ms, src, p); |
||
713 | if (e) { |
||
714 | n++; |
||
715 | add_value(&ms, &b, src, e, tr); |
||
716 | } |
||
717 | if (e && e>src) /* non empty match? */ |
||
718 | src = e; /* skip it */ |
||
719 | else if (src < ms.src_end) |
||
720 | luaL_addchar(&b, *src++); |
||
721 | else break; |
||
722 | if (anchor) break; |
||
723 | } |
||
724 | luaL_addlstring(&b, src, ms.src_end-src); |
||
725 | luaL_pushresult(&b); |
||
726 | lua_pushinteger(L, n); /* number of substitutions */ |
||
727 | return 2; |
||
728 | } |
||
729 | |||
730 | /* }====================================================== */ |
||
731 | |||
732 | |||
733 | |||
734 | /* |
||
735 | ** {====================================================== |
||
736 | ** STRING FORMAT |
||
737 | ** ======================================================= |
||
738 | */ |
||
739 | |||
740 | /* |
||
741 | ** LUA_INTFRMLEN is the length modifier for integer conversions in |
||
742 | ** 'string.format'; LUA_INTFRM_T is the integer type corresponding to |
||
743 | ** the previous length |
||
744 | */ |
||
745 | #if !defined(LUA_INTFRMLEN) /* { */ |
||
746 | #if defined(LUA_USE_LONGLONG) |
||
747 | |||
748 | #define LUA_INTFRMLEN "ll" |
||
749 | #define LUA_INTFRM_T long long |
||
750 | |||
751 | #else |
||
752 | |||
753 | #define LUA_INTFRMLEN "l" |
||
754 | #define LUA_INTFRM_T long |
||
755 | |||
756 | #endif |
||
757 | #endif /* } */ |
||
758 | |||
759 | #define MAX_UINTFRM ((lua_Number)(~(unsigned LUA_INTFRM_T)0)) |
||
760 | #define MAX_INTFRM ((lua_Number)((~(unsigned LUA_INTFRM_T)0)/2)) |
||
761 | #define MIN_INTFRM (-(lua_Number)((~(unsigned LUA_INTFRM_T)0)/2) - 1) |
||
762 | |||
763 | /* |
||
764 | ** LUA_FLTFRMLEN is the length modifier for float conversions in |
||
765 | ** 'string.format'; LUA_FLTFRM_T is the float type corresponding to |
||
766 | ** the previous length |
||
767 | */ |
||
768 | #if !defined(LUA_FLTFRMLEN) |
||
769 | |||
770 | #define LUA_FLTFRMLEN "" |
||
771 | #define LUA_FLTFRM_T double |
||
772 | |||
773 | #endif |
||
774 | |||
775 | |||
776 | /* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */ |
||
777 | #define MAX_ITEM 512 |
||
778 | /* valid flags in a format specification */ |
||
779 | #define FLAGS "-+ #0" |
||
780 | /* |
||
781 | ** maximum size of each format specification (such as '%-099.99d') |
||
782 | ** (+10 accounts for %99.99x plus margin of error) |
||
783 | */ |
||
784 | #define MAX_FORMAT (sizeof(FLAGS) + sizeof(LUA_INTFRMLEN) + 10) |
||
785 | |||
786 | |||
787 | static void addquoted (lua_State *L, luaL_Buffer *b, int arg) { |
||
788 | size_t l; |
||
789 | const char *s = luaL_checklstring(L, arg, &l); |
||
790 | luaL_addchar(b, '"'); |
||
791 | while (l--) { |
||
792 | if (*s == '"' || *s == '\\' || *s == '\n') { |
||
793 | luaL_addchar(b, '\\'); |
||
794 | luaL_addchar(b, *s); |
||
795 | } |
||
796 | else if (*s == '\0' || iscntrl(uchar(*s))) { |
||
797 | char buff[10]; |
||
798 | if (!isdigit(uchar(*(s+1)))) |
||
799 | sprintf(buff, "\\%d", (int)uchar(*s)); |
||
800 | else |
||
801 | sprintf(buff, "\\%03d", (int)uchar(*s)); |
||
802 | luaL_addstring(b, buff); |
||
803 | } |
||
804 | else |
||
805 | luaL_addchar(b, *s); |
||
806 | s++; |
||
807 | } |
||
808 | luaL_addchar(b, '"'); |
||
809 | } |
||
810 | |||
811 | static const char *scanformat (lua_State *L, const char *strfrmt, char *form) { |
||
812 | const char *p = strfrmt; |
||
813 | while (*p != '\0' && strchr(FLAGS, *p) != NULL) p++; /* skip flags */ |
||
814 | if ((size_t)(p - strfrmt) >= sizeof(FLAGS)/sizeof(char)) |
||
815 | luaL_error(L, "invalid format (repeated flags)"); |
||
816 | if (isdigit(uchar(*p))) p++; /* skip width */ |
||
817 | if (isdigit(uchar(*p))) p++; /* (2 digits at most) */ |
||
818 | if (*p == '.') { |
||
819 | p++; |
||
820 | if (isdigit(uchar(*p))) p++; /* skip precision */ |
||
821 | if (isdigit(uchar(*p))) p++; /* (2 digits at most) */ |
||
822 | } |
||
823 | if (isdigit(uchar(*p))) |
||
824 | luaL_error(L, "invalid format (width or precision too long)"); |
||
825 | *(form++) = '%'; |
||
826 | memcpy(form, strfrmt, (p - strfrmt + 1) * sizeof(char)); |
||
827 | form += p - strfrmt + 1; |
||
828 | *form = '\0'; |
||
829 | return p; |
||
830 | } |
||
831 | |||
832 | |||
833 | /* |
||
834 | ** add length modifier into formats |
||
835 | */ |
||
836 | static void addlenmod (char *form, const char *lenmod) { |
||
837 | size_t l = strlen(form); |
||
838 | size_t lm = strlen(lenmod); |
||
839 | char spec = form[l - 1]; |
||
840 | strcpy(form + l - 1, lenmod); |
||
841 | form[l + lm - 1] = spec; |
||
842 | form[l + lm] = '\0'; |
||
843 | } |
||
844 | |||
845 | |||
846 | static int str_format (lua_State *L) { |
||
847 | int top = lua_gettop(L); |
||
848 | int arg = 1; |
||
849 | size_t sfl; |
||
850 | const char *strfrmt = luaL_checklstring(L, arg, &sfl); |
||
851 | const char *strfrmt_end = strfrmt+sfl; |
||
852 | luaL_Buffer b; |
||
853 | luaL_buffinit(L, &b); |
||
854 | while (strfrmt < strfrmt_end) { |
||
855 | if (*strfrmt != L_ESC) |
||
856 | luaL_addchar(&b, *strfrmt++); |
||
857 | else if (*++strfrmt == L_ESC) |
||
858 | luaL_addchar(&b, *strfrmt++); /* %% */ |
||
859 | else { /* format item */ |
||
860 | char form[MAX_FORMAT]; /* to store the format (`%...') */ |
||
861 | char *buff = luaL_prepbuffsize(&b, MAX_ITEM); /* to put formatted item */ |
||
862 | int nb = 0; /* number of bytes in added item */ |
||
863 | if (++arg > top) |
||
864 | luaL_argerror(L, arg, "no value"); |
||
865 | strfrmt = scanformat(L, strfrmt, form); |
||
866 | switch (*strfrmt++) { |
||
867 | case 'c': { |
||
868 | nb = sprintf(buff, form, luaL_checkint(L, arg)); |
||
869 | break; |
||
870 | } |
||
871 | case 'd': case 'i': { |
||
872 | lua_Number n = luaL_checknumber(L, arg); |
||
873 | luaL_argcheck(L, (MIN_INTFRM - 1) < n && n < (MAX_INTFRM + 1), arg, |
||
874 | "not a number in proper range"); |
||
875 | addlenmod(form, LUA_INTFRMLEN); |
||
876 | nb = sprintf(buff, form, (LUA_INTFRM_T)n); |
||
877 | break; |
||
878 | } |
||
879 | case 'o': case 'u': case 'x': case 'X': { |
||
880 | lua_Number n = luaL_checknumber(L, arg); |
||
881 | luaL_argcheck(L, 0 <= n && n < (MAX_UINTFRM + 1), arg, |
||
882 | "not a non-negative number in proper range"); |
||
883 | addlenmod(form, LUA_INTFRMLEN); |
||
884 | nb = sprintf(buff, form, (unsigned LUA_INTFRM_T)n); |
||
885 | break; |
||
886 | } |
||
887 | case 'e': case 'E': case 'f': |
||
888 | #if defined(LUA_USE_AFORMAT) |
||
889 | case 'a': case 'A': |
||
890 | #endif |
||
891 | case 'g': case 'G': { |
||
892 | addlenmod(form, LUA_FLTFRMLEN); |
||
893 | nb = sprintf(buff, form, (LUA_FLTFRM_T)luaL_checknumber(L, arg)); |
||
894 | break; |
||
895 | } |
||
896 | case 'q': { |
||
897 | addquoted(L, &b, arg); |
||
898 | break; |
||
899 | } |
||
900 | case 's': { |
||
901 | size_t l; |
||
902 | const char *s = luaL_tolstring(L, arg, &l); |
||
903 | if (!strchr(form, '.') && l >= 100) { |
||
904 | /* no precision and string is too long to be formatted; |
||
905 | keep original string */ |
||
906 | luaL_addvalue(&b); |
||
907 | break; |
||
908 | } |
||
909 | else { |
||
910 | nb = sprintf(buff, form, s); |
||
911 | lua_pop(L, 1); /* remove result from 'luaL_tolstring' */ |
||
912 | break; |
||
913 | } |
||
914 | } |
||
915 | default: { /* also treat cases `pnLlh' */ |
||
916 | return luaL_error(L, "invalid option " LUA_QL("%%%c") " to " |
||
917 | LUA_QL("format"), *(strfrmt - 1)); |
||
918 | } |
||
919 | } |
||
920 | luaL_addsize(&b, nb); |
||
921 | } |
||
922 | } |
||
923 | luaL_pushresult(&b); |
||
924 | return 1; |
||
925 | } |
||
926 | |||
927 | /* }====================================================== */ |
||
928 | |||
929 | |||
930 | static const luaL_Reg strlib[] = { |
||
931 | {"byte", str_byte}, |
||
932 | {"char", str_char}, |
||
933 | {"dump", str_dump}, |
||
934 | {"find", str_find}, |
||
935 | {"format", str_format}, |
||
936 | {"gmatch", gmatch}, |
||
937 | {"gsub", str_gsub}, |
||
938 | {"len", str_len}, |
||
939 | {"lower", str_lower}, |
||
940 | {"match", str_match}, |
||
941 | {"rep", str_rep}, |
||
942 | {"reverse", str_reverse}, |
||
943 | {"sub", str_sub}, |
||
944 | {"upper", str_upper}, |
||
945 | {NULL, NULL} |
||
946 | }; |
||
947 | |||
948 | |||
949 | static void createmetatable (lua_State *L) { |
||
950 | lua_createtable(L, 0, 1); /* table to be metatable for strings */ |
||
951 | lua_pushliteral(L, ""); /* dummy string */ |
||
952 | lua_pushvalue(L, -2); /* copy table */ |
||
953 | lua_setmetatable(L, -2); /* set table as metatable for strings */ |
||
954 | lua_pop(L, 1); /* pop dummy string */ |
||
955 | lua_pushvalue(L, -2); /* get string library */ |
||
956 | lua_setfield(L, -2, "__index"); /* metatable.__index = string */ |
||
957 | lua_pop(L, 1); /* pop metatable */ |
||
958 | } |
||
959 | |||
960 | |||
961 | /* |
||
962 | ** Open string library |
||
963 | */ |
||
964 | LUAMOD_API int luaopen_string (lua_State *L) { |
||
965 | luaL_newlib(L, strlib); |
||
966 | createmetatable(L); |
||
967 | return 1; |
||
968 | }>=>>>>>>>=>>>=>>>>=>=>>>>>=n;>=>>>=>>=>> |
||
969 |