Rev 5676 | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5598 | pavelyakov | 1 | #ifndef INCLUDE_LEXER_H |
2 | #define INCLUDE_LEXER_H |
||
3 | |||
4 | #ifndef INCLUDE_STRING_H |
||
5 | #include "../lib/strings.h" |
||
6 | #endif |
||
5631 | pavelyakov | 7 | |
8 | #ifndef INCLUDE_MEM_H |
||
9 | #include "../lib/mem.h" |
||
10 | #endif |
||
5598 | pavelyakov | 11 | /** Splits text into tokens |
12 | * Author :Pavel Yakovlev |
||
13 | * Homepage:https://vk.com/pavelyakov39 |
||
5631 | pavelyakov | 14 | * Ver. : 1.51 |
5598 | pavelyakov | 15 | */ |
16 | |||
17 | /** Example: |
||
18 | * lexer lex; |
||
19 | * lex.load("var a=123;"); |
||
20 | * lex.next(); |
||
21 | * lex.token; //TOKEN == 'var' |
||
22 | * lex.type ; //TYPE == LEX_VAR |
||
23 | * |
||
24 | * lex.next(); |
||
25 | * lex.token; //TOKEN == 'a' |
||
26 | * lex.type ; //TYPE == LEX_VAR |
||
27 | * |
||
28 | * lex.next(); |
||
29 | * lex.token; //TOKEN == '=' |
||
30 | * lex.type ; //TYPE == LEX_IND |
||
31 | * |
||
32 | * lex.next(); |
||
33 | * lex.token; //TOKEN == '123' |
||
34 | * lex.type ; //TYPE == LEX_DEC |
||
35 | * |
||
36 | * lex.next(); |
||
37 | * lex.token; //TOKEN == ';' |
||
38 | * lex.type ; //TYPE == LEX_IND |
||
39 | * |
||
40 | * lex.next(); |
||
41 | * lex.token; //TOKEN == '' |
||
42 | * lex.type ; //TYPE == LEX_END |
||
43 | */ |
||
44 | |||
45 | #define LEX_END 1 |
||
46 | #define LEX_STR 2 |
||
47 | #define LEX_DEC 3 |
||
48 | #define LEX_VAR 4 |
||
49 | #define LEX_FNC 5 |
||
50 | #define LEX_IND 6 |
||
51 | #define LEX_NUL 0 |
||
52 | |||
53 | :char const_token_lexer[1024]; |
||
54 | :struct lexer |
||
55 | { |
||
56 | byte cmd; |
||
5631 | pavelyakov | 57 | dword token,text,mem_list,count,buffer_loading; |
58 | dword str_buffer; |
||
5598 | pavelyakov | 59 | byte type; |
60 | char quote; |
||
5631 | pavelyakov | 61 | signed length; |
5598 | pavelyakov | 62 | dword next(void); |
63 | dword back(void); |
||
5631 | pavelyakov | 64 | dword list(void); |
65 | void free(void); |
||
66 | dword position(dword __); |
||
5598 | pavelyakov | 67 | void load(dword _text); |
68 | void expected(dword _text); |
||
69 | }; |
||
5631 | pavelyakov | 70 | :dword back(void) |
5598 | pavelyakov | 71 | { |
5631 | pavelyakov | 72 | |
73 | } |
||
74 | :dword lexer::list(void) |
||
75 | { |
||
76 | dword count_mem,buf_loop,pos; |
||
77 | count_mem = 0; |
||
78 | buf_loop = 5000; // на тыс элементов. |
||
79 | count = 0; |
||
80 | buffer_loading = malloc(buf_loop); |
||
81 | pos = buffer_loading; |
||
82 | while(type!=LEX_END) |
||
83 | { |
||
84 | pos+=count_mem; |
||
85 | next(); |
||
86 | DSDWORD[pos] = strndup(token,length); |
||
87 | pos+=4; |
||
88 | DSBYTE [pos] = type; |
||
89 | pos++; |
||
90 | count++; |
||
91 | if(pos-buffer_loading>buf_loop) |
||
92 | { |
||
93 | buf_loop*=2; |
||
94 | buffer_loading = realloc(buffer_loading,buf_loop); |
||
95 | } |
||
96 | } |
||
97 | return buffer_loading; |
||
98 | } |
||
99 | :void lexer::free(void) |
||
100 | { |
||
101 | dword z; |
||
102 | z = count; |
||
103 | while(z) |
||
104 | { |
||
105 | z--; |
||
106 | position(z); |
||
107 | ::free(token); |
||
108 | } |
||
109 | count = 0; |
||
110 | ::free(buffer_loading); |
||
111 | } |
||
112 | :dword lexer::position(dword __) |
||
113 | { |
||
114 | dword pos1; |
||
115 | if(!count)list(); |
||
116 | if(__>=count)__=count-1; |
||
117 | else if(__<0)__=0; |
||
118 | pos1 = __*5; |
||
119 | pos1 += buffer_loading; |
||
120 | token = DSDWORD[pos1]; |
||
121 | pos1++; |
||
122 | type = DSBYTE[pos1]; |
||
123 | return token; |
||
124 | } |
||
125 | :void lexer::expected(dword _text) |
||
126 | { |
||
5598 | pavelyakov | 127 | notify(_text); |
128 | ExitProcess(); |
||
129 | } |
||
130 | |||
131 | :void lexer::load(dword _text) |
||
132 | { |
||
133 | text = _text; |
||
5631 | pavelyakov | 134 | count = 0; |
135 | str_buffer = 0; |
||
5598 | pavelyakov | 136 | } |
137 | |||
138 | :dword lexer::next(void) |
||
139 | { |
||
140 | char s; |
||
5631 | pavelyakov | 141 | dword len_str_buf,tmp; |
5598 | pavelyakov | 142 | dword pos,in; |
143 | pos = #const_token_lexer; |
||
144 | in = text; |
||
5631 | pavelyakov | 145 | //len_str_buf = 1024; |
146 | if(str_buffer)::free(str_buffer); |
||
5598 | pavelyakov | 147 | NEXT_TOKEN: |
148 | length = 0; |
||
149 | loop() |
||
150 | { |
||
151 | s = DSBYTE[in]; |
||
152 | if(s!=9)&&(s!=10)&&(s!=13)&&(s!=32)break; |
||
153 | in++; |
||
154 | text++; |
||
155 | } |
||
156 | |||
157 | if(s==0){type=LEX_END;DSBYTE[pos]=0;token="";return token;} |
||
158 | |||
159 | if(s=='/') |
||
160 | { |
||
161 | in++; |
||
162 | s = DSBYTE[in]; |
||
163 | |||
164 | // Line comments |
||
165 | if(s=='/') |
||
166 | { |
||
167 | loop() |
||
168 | { |
||
169 | in++; |
||
170 | s = DSBYTE[in]; |
||
171 | if(s==10)||(s==13)||(s==0)goto NEXT_TOKEN; |
||
172 | /* Add comments*/ |
||
173 | } |
||
174 | } |
||
175 | if(s=='*') |
||
176 | { |
||
177 | loop() |
||
178 | { |
||
179 | in++; |
||
180 | s = DSBYTE[in]; |
||
181 | if(s=='*')if(DSBYTE[in+1]=='/') |
||
182 | { |
||
183 | in+=2; |
||
184 | goto NEXT_TOKEN; |
||
185 | } |
||
186 | } |
||
187 | } |
||
188 | } |
||
189 | |||
190 | if (strchr("=<>!~&|#",s)) |
||
191 | { |
||
192 | loop() |
||
193 | { |
||
194 | if (!strchr("=<>!~&|#",s)) break; |
||
195 | |||
196 | DSBYTE[pos] = s; |
||
197 | pos++; |
||
198 | |||
199 | in++; |
||
200 | s = DSBYTE[in]; |
||
201 | } |
||
202 | type = LEX_IND; |
||
203 | } |
||
204 | else if (strchr(";(,)}{[]+-.*/:^%?$@№`",s)) |
||
205 | { |
||
206 | DSBYTE[pos] = s; |
||
207 | pos++; |
||
208 | type = LEX_IND; |
||
209 | in++; |
||
210 | } |
||
211 | else if(s>='0')&&(s<='9') |
||
212 | { |
||
213 | loop() |
||
214 | { |
||
215 | if(s<'0')||(s>'9')if(s!='.')break; |
||
216 | |||
217 | DSBYTE[pos] = s; |
||
218 | pos++; |
||
219 | |||
220 | in++; |
||
221 | s = DSBYTE[in]; |
||
222 | } |
||
223 | type = LEX_DEC; |
||
224 | } |
||
225 | else if(s>='A')&&(s<='z')&&(!strchr("[]\\^`",s)) |
||
226 | { |
||
227 | loop() |
||
228 | { |
||
229 | if(s<'A')||(s>'z')if(s<'0')||(s>'9')break; |
||
230 | if(strchr("[]\\^`",s))break; |
||
231 | |||
232 | DSBYTE[pos] = s; |
||
233 | pos++; |
||
234 | |||
235 | in++; |
||
236 | s = DSBYTE[in]; |
||
237 | } |
||
238 | |||
239 | loop() |
||
240 | { |
||
241 | s = DSBYTE[in]; |
||
242 | if(s!=9)if(s!=10)if(s!=13)if(s!=32)break; |
||
243 | in++; |
||
244 | text++; |
||
245 | } |
||
246 | type = LEX_VAR; |
||
247 | if(s=='(')type = LEX_FNC; |
||
248 | } |
||
249 | else if(s=='"')||(s=='\'') |
||
250 | { |
||
251 | quote = s; |
||
252 | in++; |
||
5631 | pavelyakov | 253 | tmp = in; |
5598 | pavelyakov | 254 | s = DSBYTE[in]; |
255 | loop() |
||
256 | { |
||
257 | if(s=='\\') |
||
258 | { |
||
259 | in++; |
||
260 | s = DSBYTE[in]; |
||
261 | if(!s){type = LEX_STR;goto GOTO_LEX_END;} |
||
262 | if(!cmd)switch(s) |
||
263 | { |
||
264 | case 'n':s='\n';break; |
||
265 | case 'r':s='\r';break; |
||
266 | case 't':s='\t';break; |
||
267 | } |
||
268 | else { |
||
269 | DSBYTE[pos] = '\\'; |
||
270 | pos++; |
||
271 | } |
||
272 | goto LEX_STEP_1; |
||
273 | } |
||
274 | if(!s){type = LEX_STR;goto GOTO_LEX_END;} |
||
275 | else if(s==quote)break; |
||
276 | LEX_STEP_1: |
||
277 | DSBYTE[pos] = s; |
||
278 | pos++; |
||
279 | in++; |
||
5631 | pavelyakov | 280 | /*if(in-tmp>len_str_buf) |
281 | { |
||
282 | if(str_buffer) |
||
283 | { |
||
284 | tmp = len_str_buf; |
||
285 | len_str_buf+=1024; |
||
286 | str_buffer = realloc(str_buffer,len_str_buf+1); |
||
287 | strlcpy(str_buffer+tmp,#const_token_lexer,1024); |
||
288 | pos = #const_token_lexer; |
||
289 | } |
||
290 | else { |
||
291 | len_str_buf+=1024; |
||
292 | str_buffer = malloc(len_str_buf+1); |
||
293 | strlcpy(str_buffer,#const_token_lexer,1024); |
||
294 | pos = #const_token_lexer; |
||
295 | } |
||
296 | }*/ |
||
5598 | pavelyakov | 297 | s = DSBYTE[in]; |
298 | } |
||
299 | in++; |
||
5631 | pavelyakov | 300 | /*tmp = pos-in; |
301 | if(str_buffer) |
||
302 | { |
||
303 | if(tmp) |
||
304 | { |
||
305 | str_buffer = realloc(str_buffer,tmp+1); |
||
306 | strlcpy(str_buffer+len_str_buf,#const_token_lexer,tmp); |
||
307 | } |
||
308 | type = LEX_STR; |
||
309 | length = len_str_buf+tmp; |
||
310 | text = in; |
||
311 | tmp = str_buffer+length; |
||
312 | DSBYTE[tmp] = 0; |
||
313 | token = str_buffer; |
||
314 | return token; |
||
315 | }*/ |
||
5598 | pavelyakov | 316 | type = LEX_STR; |
317 | } |
||
318 | else { |
||
319 | in++; |
||
320 | type = LEX_NUL; |
||
321 | DSBYTE[pos] = s; |
||
322 | pos++; |
||
323 | } |
||
324 | GOTO_LEX_END: |
||
325 | length = in-text; |
||
326 | text = in; |
||
327 | DSBYTE[pos] = 0; |
||
328 | token = #const_token_lexer; |
||
329 | return token; |
||
330 | } |
||
331 | |||
332 | #endif'0')||(s>'A')||(s>='z')&&(!strchr("[]\\^`",s)) |