Rev 5676 | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 5676 | Rev 6887 | ||
---|---|---|---|
1 | #ifndef INCLUDE_LEXER_H |
1 | #ifndef INCLUDE_LEXER_H |
2 | #define INCLUDE_LEXER_H |
2 | #define INCLUDE_LEXER_H |
3 | #print "[include |
- | |
4 | 3 | ||
5 | #ifndef INCLUDE_STRING_H |
4 | #ifndef INCLUDE_STRING_H |
6 | #include "../lib/strings.h" |
5 | #include "../lib/strings.h" |
7 | #endif |
6 | #endif |
8 | 7 | ||
9 | #ifndef INCLUDE_MEM_H |
8 | #ifndef INCLUDE_MEM_H |
10 | #include "../lib/mem.h" |
9 | #include "../lib/mem.h" |
11 | #endif |
10 | #endif |
12 | /** Splits text into tokens |
11 | /** Splits text into tokens |
13 | * Author :Pavel Yakovlev |
12 | * Author :Pavel Yakovlev |
14 | * Homepage:https://vk.com/pavelyakov39 |
13 | * Homepage:https://vk.com/pavelyakov39 |
15 | * Ver. : 1.51 |
14 | * Ver. : 1.51 |
16 | */ |
15 | */ |
17 | 16 | ||
18 | /** Example: |
17 | /** Example: |
19 | * lexer lex; |
18 | * lexer lex; |
20 | * lex.load("var a=123;"); |
19 | * lex.load("var a=123;"); |
21 | * lex.next(); |
20 | * lex.next(); |
22 | * lex.token; //TOKEN == 'var' |
21 | * lex.token; //TOKEN == 'var' |
23 | * lex.type ; //TYPE == LEX_VAR |
22 | * lex.type ; //TYPE == LEX_VAR |
24 | * |
23 | * |
25 | * lex.next(); |
24 | * lex.next(); |
26 | * lex.token; //TOKEN == 'a' |
25 | * lex.token; //TOKEN == 'a' |
27 | * lex.type ; //TYPE == LEX_VAR |
26 | * lex.type ; //TYPE == LEX_VAR |
28 | * |
27 | * |
29 | * lex.next(); |
28 | * lex.next(); |
30 | * lex.token; //TOKEN == '=' |
29 | * lex.token; //TOKEN == '=' |
31 | * lex.type ; //TYPE == LEX_IND |
30 | * lex.type ; //TYPE == LEX_IND |
32 | * |
31 | * |
33 | * lex.next(); |
32 | * lex.next(); |
34 | * lex.token; //TOKEN == '123' |
33 | * lex.token; //TOKEN == '123' |
35 | * lex.type ; //TYPE == LEX_DEC |
34 | * lex.type ; //TYPE == LEX_DEC |
36 | * |
35 | * |
37 | * lex.next(); |
36 | * lex.next(); |
38 | * lex.token; //TOKEN == ';' |
37 | * lex.token; //TOKEN == ';' |
39 | * lex.type ; //TYPE == LEX_IND |
38 | * lex.type ; //TYPE == LEX_IND |
40 | * |
39 | * |
41 | * lex.next(); |
40 | * lex.next(); |
42 | * lex.token; //TOKEN == '' |
41 | * lex.token; //TOKEN == '' |
43 | * lex.type ; //TYPE == LEX_END |
42 | * lex.type ; //TYPE == LEX_END |
44 | */ |
43 | */ |
45 | 44 | ||
46 | #define LEX_END 1 |
45 | #define LEX_END 1 |
47 | #define LEX_STR 2 |
46 | #define LEX_STR 2 |
48 | #define LEX_DEC 3 |
47 | #define LEX_DEC 3 |
49 | #define LEX_VAR 4 |
48 | #define LEX_VAR 4 |
50 | #define LEX_FNC 5 |
49 | #define LEX_FNC 5 |
51 | #define LEX_IND 6 |
50 | #define LEX_IND 6 |
52 | #define LEX_NUL 0 |
51 | #define LEX_NUL 0 |
53 | 52 | ||
54 | :char const_token_lexer[1024]; |
53 | :char const_token_lexer[1024]; |
55 | :struct lexer |
54 | :struct lexer |
56 | { |
55 | { |
57 | byte cmd; |
56 | byte cmd; |
58 | dword token,text,mem_list,count,buffer_loading; |
57 | dword token,text,mem_list,count,buffer_loading; |
59 | dword str_buffer; |
58 | dword str_buffer; |
60 | byte type; |
59 | byte type; |
61 | char quote; |
60 | char quote; |
62 | signed length; |
61 | signed length; |
63 | dword next(void); |
62 | dword next(void); |
64 | dword back(void); |
63 | dword back(void); |
65 | dword list(void); |
64 | dword list(void); |
66 | void free(void); |
65 | void free(void); |
67 | dword position(dword __); |
66 | dword position(dword __); |
68 | void load(dword _text); |
67 | void load(dword _text); |
69 | void expected(dword _text); |
68 | void expected(dword _text); |
70 | }; |
69 | }; |
71 | :dword back(void) |
70 | :dword back(void) |
72 | { |
71 | { |
73 | 72 | ||
74 | } |
73 | } |
75 | :dword lexer::list(void) |
74 | :dword lexer::list(void) |
76 | { |
75 | { |
77 | dword count_mem,buf_loop,pos; |
76 | dword count_mem,buf_loop,pos; |
78 | count_mem = 0; |
77 | count_mem = 0; |
79 | buf_loop = 5000; // на тыс элементов. |
78 | buf_loop = 5000; // на тыс элементов. |
80 | count = 0; |
79 | count = 0; |
81 | buffer_loading = malloc(buf_loop); |
80 | buffer_loading = malloc(buf_loop); |
82 | pos = buffer_loading; |
81 | pos = buffer_loading; |
83 | while(type!=LEX_END) |
82 | while(type!=LEX_END) |
84 | { |
83 | { |
85 | pos+=count_mem; |
84 | pos+=count_mem; |
86 | next(); |
85 | next(); |
87 | DSDWORD[pos] = strndup(token,length); |
86 | DSDWORD[pos] = strndup(token,length); |
88 | pos+=4; |
87 | pos+=4; |
89 | DSBYTE [pos] = type; |
88 | DSBYTE [pos] = type; |
90 | pos++; |
89 | pos++; |
91 | count++; |
90 | count++; |
92 | if(pos-buffer_loading>buf_loop) |
91 | if(pos-buffer_loading>buf_loop) |
93 | { |
92 | { |
94 | buf_loop*=2; |
93 | buf_loop*=2; |
95 | buffer_loading = realloc(buffer_loading,buf_loop); |
94 | buffer_loading = realloc(buffer_loading,buf_loop); |
96 | } |
95 | } |
97 | } |
96 | } |
98 | return buffer_loading; |
97 | return buffer_loading; |
99 | } |
98 | } |
100 | :void lexer::free(void) |
99 | :void lexer::free(void) |
101 | { |
100 | { |
102 | dword z; |
101 | dword z; |
103 | z = count; |
102 | z = count; |
104 | while(z) |
103 | while(z) |
105 | { |
104 | { |
106 | z--; |
105 | z--; |
107 | position(z); |
106 | position(z); |
108 | ::free(token); |
107 | ::free(token); |
109 | } |
108 | } |
110 | count = 0; |
109 | count = 0; |
111 | ::free(buffer_loading); |
110 | ::free(buffer_loading); |
112 | } |
111 | } |
113 | :dword lexer::position(dword __) |
112 | :dword lexer::position(dword __) |
114 | { |
113 | { |
115 | dword pos1; |
114 | dword pos1; |
116 | if(!count)list(); |
115 | if(!count)list(); |
117 | if(__>=count)__=count-1; |
116 | if(__>=count)__=count-1; |
118 | else if(__<0)__=0; |
117 | else if(__<0)__=0; |
119 | pos1 = __*5; |
118 | pos1 = __*5; |
120 | pos1 += buffer_loading; |
119 | pos1 += buffer_loading; |
121 | token = DSDWORD[pos1]; |
120 | token = DSDWORD[pos1]; |
122 | pos1++; |
121 | pos1++; |
123 | type = DSBYTE[pos1]; |
122 | type = DSBYTE[pos1]; |
124 | return token; |
123 | return token; |
125 | } |
124 | } |
126 | :void lexer::expected(dword _text) |
125 | :void lexer::expected(dword _text) |
127 | { |
126 | { |
128 | notify(_text); |
127 | notify(_text); |
129 | ExitProcess(); |
128 | ExitProcess(); |
130 | } |
129 | } |
131 | 130 | ||
132 | :void lexer::load(dword _text) |
131 | :void lexer::load(dword _text) |
133 | { |
132 | { |
134 | text = _text; |
133 | text = _text; |
135 | count = 0; |
134 | count = 0; |
136 | str_buffer = 0; |
135 | str_buffer = 0; |
137 | } |
136 | } |
138 | 137 | ||
139 | :dword lexer::next(void) |
138 | :dword lexer::next(void) |
140 | { |
139 | { |
141 | char s; |
140 | char s; |
142 | dword len_str_buf,tmp; |
141 | dword len_str_buf,tmp; |
143 | dword pos,in; |
142 | dword pos,in; |
144 | pos = #const_token_lexer; |
143 | pos = #const_token_lexer; |
145 | in = text; |
144 | in = text; |
146 | //len_str_buf = 1024; |
145 | //len_str_buf = 1024; |
147 | if(str_buffer)::free(str_buffer); |
146 | if(str_buffer)::free(str_buffer); |
148 | NEXT_TOKEN: |
147 | NEXT_TOKEN: |
149 | length = 0; |
148 | length = 0; |
150 | loop() |
149 | loop() |
151 | { |
150 | { |
152 | s = DSBYTE[in]; |
151 | s = DSBYTE[in]; |
153 | if(s!=9)&&(s!=10)&&(s!=13)&&(s!=32)break; |
152 | if(s!=9)&&(s!=10)&&(s!=13)&&(s!=32)break; |
154 | in++; |
153 | in++; |
155 | text++; |
154 | text++; |
156 | } |
155 | } |
157 | 156 | ||
158 | if(s==0){type=LEX_END;DSBYTE[pos]=0;token="";return token;} |
157 | if(s==0){type=LEX_END;DSBYTE[pos]=0;token="";return token;} |
159 | 158 | ||
160 | if(s=='/') |
159 | if(s=='/') |
161 | { |
160 | { |
162 | in++; |
161 | in++; |
163 | s = DSBYTE[in]; |
162 | s = DSBYTE[in]; |
164 | 163 | ||
165 | // Line comments |
164 | // Line comments |
166 | if(s=='/') |
165 | if(s=='/') |
167 | { |
166 | { |
168 | loop() |
167 | loop() |
169 | { |
168 | { |
170 | in++; |
169 | in++; |
171 | s = DSBYTE[in]; |
170 | s = DSBYTE[in]; |
172 | if(s==10)||(s==13)||(s==0)goto NEXT_TOKEN; |
171 | if(s==10)||(s==13)||(s==0)goto NEXT_TOKEN; |
173 | /* Add comments*/ |
172 | /* Add comments*/ |
174 | } |
173 | } |
175 | } |
174 | } |
176 | if(s=='*') |
175 | if(s=='*') |
177 | { |
176 | { |
178 | loop() |
177 | loop() |
179 | { |
178 | { |
180 | in++; |
179 | in++; |
181 | s = DSBYTE[in]; |
180 | s = DSBYTE[in]; |
182 | if(s=='*')if(DSBYTE[in+1]=='/') |
181 | if(s=='*')if(DSBYTE[in+1]=='/') |
183 | { |
182 | { |
184 | in+=2; |
183 | in+=2; |
185 | goto NEXT_TOKEN; |
184 | goto NEXT_TOKEN; |
186 | } |
185 | } |
187 | } |
186 | } |
188 | } |
187 | } |
189 | } |
188 | } |
190 | 189 | ||
191 | if (strchr("=<>!~&|#",s)) |
190 | if (strchr("=<>!~&|#",s)) |
192 | { |
191 | { |
193 | loop() |
192 | loop() |
194 | { |
193 | { |
195 | if (!strchr("=<>!~&|#",s)) break; |
194 | if (!strchr("=<>!~&|#",s)) break; |
196 | 195 | ||
197 | DSBYTE[pos] = s; |
196 | DSBYTE[pos] = s; |
198 | pos++; |
197 | pos++; |
199 | 198 | ||
200 | in++; |
199 | in++; |
201 | s = DSBYTE[in]; |
200 | s = DSBYTE[in]; |
202 | } |
201 | } |
203 | type = LEX_IND; |
202 | type = LEX_IND; |
204 | } |
203 | } |
205 | else if (strchr(";(,)}{[]+-.*/:^%?$@№`",s)) |
204 | else if (strchr(";(,)}{[]+-.*/:^%?$@№`",s)) |
206 | { |
205 | { |
207 | DSBYTE[pos] = s; |
206 | DSBYTE[pos] = s; |
208 | pos++; |
207 | pos++; |
209 | type = LEX_IND; |
208 | type = LEX_IND; |
210 | in++; |
209 | in++; |
211 | } |
210 | } |
212 | else if(s>='0')&&(s<='9') |
211 | else if(s>='0')&&(s<='9') |
213 | { |
212 | { |
214 | loop() |
213 | loop() |
215 | { |
214 | { |
216 | if(s<'0')||(s>'9')if(s!='.')break; |
215 | if(s<'0')||(s>'9')if(s!='.')break; |
217 | 216 | ||
218 | DSBYTE[pos] = s; |
217 | DSBYTE[pos] = s; |
219 | pos++; |
218 | pos++; |
220 | 219 | ||
221 | in++; |
220 | in++; |
222 | s = DSBYTE[in]; |
221 | s = DSBYTE[in]; |
223 | } |
222 | } |
224 | type = LEX_DEC; |
223 | type = LEX_DEC; |
225 | } |
224 | } |
226 | else if(s>='A')&&(s<='z')&&(!strchr("[]\\^`",s)) |
225 | else if(s>='A')&&(s<='z')&&(!strchr("[]\\^`",s)) |
227 | { |
226 | { |
228 | loop() |
227 | loop() |
229 | { |
228 | { |
230 | if(s<'A')||(s>'z')if(s<'0')||(s>'9')break; |
229 | if(s<'A')||(s>'z')if(s<'0')||(s>'9')break; |
231 | if(strchr("[]\\^`",s))break; |
230 | if(strchr("[]\\^`",s))break; |
232 | 231 | ||
233 | DSBYTE[pos] = s; |
232 | DSBYTE[pos] = s; |
234 | pos++; |
233 | pos++; |
235 | 234 | ||
236 | in++; |
235 | in++; |
237 | s = DSBYTE[in]; |
236 | s = DSBYTE[in]; |
238 | } |
237 | } |
239 | 238 | ||
240 | loop() |
239 | loop() |
241 | { |
240 | { |
242 | s = DSBYTE[in]; |
241 | s = DSBYTE[in]; |
243 | if(s!=9)if(s!=10)if(s!=13)if(s!=32)break; |
242 | if(s!=9)if(s!=10)if(s!=13)if(s!=32)break; |
244 | in++; |
243 | in++; |
245 | text++; |
244 | text++; |
246 | } |
245 | } |
247 | type = LEX_VAR; |
246 | type = LEX_VAR; |
248 | if(s=='(')type = LEX_FNC; |
247 | if(s=='(')type = LEX_FNC; |
249 | } |
248 | } |
250 | else if(s=='"')||(s=='\'') |
249 | else if(s=='"')||(s=='\'') |
251 | { |
250 | { |
252 | quote = s; |
251 | quote = s; |
253 | in++; |
252 | in++; |
254 | tmp = in; |
253 | tmp = in; |
255 | s = DSBYTE[in]; |
254 | s = DSBYTE[in]; |
256 | loop() |
255 | loop() |
257 | { |
256 | { |
258 | if(s=='\\') |
257 | if(s=='\\') |
259 | { |
258 | { |
260 | in++; |
259 | in++; |
261 | s = DSBYTE[in]; |
260 | s = DSBYTE[in]; |
262 | if(!s){type = LEX_STR;goto GOTO_LEX_END;} |
261 | if(!s){type = LEX_STR;goto GOTO_LEX_END;} |
263 | if(!cmd)switch(s) |
262 | if(!cmd)switch(s) |
264 | { |
263 | { |
265 | case 'n':s='\n';break; |
264 | case 'n':s='\n';break; |
266 | case 'r':s='\r';break; |
265 | case 'r':s='\r';break; |
267 | case 't':s='\t';break; |
266 | case 't':s='\t';break; |
268 | } |
267 | } |
269 | else { |
268 | else { |
270 | DSBYTE[pos] = '\\'; |
269 | DSBYTE[pos] = '\\'; |
271 | pos++; |
270 | pos++; |
272 | } |
271 | } |
273 | goto LEX_STEP_1; |
272 | goto LEX_STEP_1; |
274 | } |
273 | } |
275 | if(!s){type = LEX_STR;goto GOTO_LEX_END;} |
274 | if(!s){type = LEX_STR;goto GOTO_LEX_END;} |
276 | else if(s==quote)break; |
275 | else if(s==quote)break; |
277 | LEX_STEP_1: |
276 | LEX_STEP_1: |
278 | DSBYTE[pos] = s; |
277 | DSBYTE[pos] = s; |
279 | pos++; |
278 | pos++; |
280 | in++; |
279 | in++; |
281 | /*if(in-tmp>len_str_buf) |
280 | /*if(in-tmp>len_str_buf) |
282 | { |
281 | { |
283 | if(str_buffer) |
282 | if(str_buffer) |
284 | { |
283 | { |
285 | tmp = len_str_buf; |
284 | tmp = len_str_buf; |
286 | len_str_buf+=1024; |
285 | len_str_buf+=1024; |
287 | str_buffer = realloc(str_buffer,len_str_buf+1); |
286 | str_buffer = realloc(str_buffer,len_str_buf+1); |
288 | strlcpy(str_buffer+tmp,#const_token_lexer,1024); |
287 | strlcpy(str_buffer+tmp,#const_token_lexer,1024); |
289 | pos = #const_token_lexer; |
288 | pos = #const_token_lexer; |
290 | } |
289 | } |
291 | else { |
290 | else { |
292 | len_str_buf+=1024; |
291 | len_str_buf+=1024; |
293 | str_buffer = malloc(len_str_buf+1); |
292 | str_buffer = malloc(len_str_buf+1); |
294 | strlcpy(str_buffer,#const_token_lexer,1024); |
293 | strlcpy(str_buffer,#const_token_lexer,1024); |
295 | pos = #const_token_lexer; |
294 | pos = #const_token_lexer; |
296 | } |
295 | } |
297 | }*/ |
296 | }*/ |
298 | s = DSBYTE[in]; |
297 | s = DSBYTE[in]; |
299 | } |
298 | } |
300 | in++; |
299 | in++; |
301 | /*tmp = pos-in; |
300 | /*tmp = pos-in; |
302 | if(str_buffer) |
301 | if(str_buffer) |
303 | { |
302 | { |
304 | if(tmp) |
303 | if(tmp) |
305 | { |
304 | { |
306 | str_buffer = realloc(str_buffer,tmp+1); |
305 | str_buffer = realloc(str_buffer,tmp+1); |
307 | strlcpy(str_buffer+len_str_buf,#const_token_lexer,tmp); |
306 | strlcpy(str_buffer+len_str_buf,#const_token_lexer,tmp); |
308 | } |
307 | } |
309 | type = LEX_STR; |
308 | type = LEX_STR; |
310 | length = len_str_buf+tmp; |
309 | length = len_str_buf+tmp; |
311 | text = in; |
310 | text = in; |
312 | tmp = str_buffer+length; |
311 | tmp = str_buffer+length; |
313 | DSBYTE[tmp] = 0; |
312 | DSBYTE[tmp] = 0; |
314 | token = str_buffer; |
313 | token = str_buffer; |
315 | return token; |
314 | return token; |
316 | }*/ |
315 | }*/ |
317 | type = LEX_STR; |
316 | type = LEX_STR; |
318 | } |
317 | } |
319 | else { |
318 | else { |
320 | in++; |
319 | in++; |
321 | type = LEX_NUL; |
320 | type = LEX_NUL; |
322 | DSBYTE[pos] = s; |
321 | DSBYTE[pos] = s; |
323 | pos++; |
322 | pos++; |
324 | } |
323 | } |
325 | GOTO_LEX_END: |
324 | GOTO_LEX_END: |
326 | length = in-text; |
325 | length = in-text; |
327 | text = in; |
326 | text = in; |
328 | DSBYTE[pos] = 0; |
327 | DSBYTE[pos] = 0; |
329 | token = #const_token_lexer; |
328 | token = #const_token_lexer; |
330 | return token; |
329 | return token; |
331 | } |
330 | } |
332 | 331 | ||
333 | #endif'0')||(s>'A')||(s>='z')&&(!strchr("[]\\^`",s)) |
332 | #endif'0')||(s>'A')||(s>='z')&&(!strchr("[]\\^`",s)) |
334 | >'0')||(s>='9') |
333 | >'0')||(s>='9') |
335 | >>>0)__=0; |
334 | >>>0)__=0; |
336 | > |
335 | > |