Rev 5631 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5598 | pavelyakov | 1 | #ifndef INCLUDE_LEXER_H |
2 | #define INCLUDE_LEXER_H |
||
5676 | pavelyakov | 3 | #print "[include |
5598 | pavelyakov | 4 | |
5 | #ifndef INCLUDE_STRING_H |
||
6 | #include "../lib/strings.h" |
||
7 | #endif |
||
5631 | pavelyakov | 8 | |
9 | #ifndef INCLUDE_MEM_H |
||
10 | #include "../lib/mem.h" |
||
11 | #endif |
||
5598 | pavelyakov | 12 | /** Splits text into tokens |
13 | * Author :Pavel Yakovlev |
||
14 | * Homepage:https://vk.com/pavelyakov39 |
||
5631 | pavelyakov | 15 | * Ver. : 1.51 |
5598 | pavelyakov | 16 | */ |
17 | |||
18 | /** Example: |
||
19 | * lexer lex; |
||
20 | * lex.load("var a=123;"); |
||
21 | * lex.next(); |
||
22 | * lex.token; //TOKEN == 'var' |
||
23 | * lex.type ; //TYPE == LEX_VAR |
||
24 | * |
||
25 | * lex.next(); |
||
26 | * lex.token; //TOKEN == 'a' |
||
27 | * lex.type ; //TYPE == LEX_VAR |
||
28 | * |
||
29 | * lex.next(); |
||
30 | * lex.token; //TOKEN == '=' |
||
31 | * lex.type ; //TYPE == LEX_IND |
||
32 | * |
||
33 | * lex.next(); |
||
34 | * lex.token; //TOKEN == '123' |
||
35 | * lex.type ; //TYPE == LEX_DEC |
||
36 | * |
||
37 | * lex.next(); |
||
38 | * lex.token; //TOKEN == ';' |
||
39 | * lex.type ; //TYPE == LEX_IND |
||
40 | * |
||
41 | * lex.next(); |
||
42 | * lex.token; //TOKEN == '' |
||
43 | * lex.type ; //TYPE == LEX_END |
||
44 | */ |
||
45 | |||
46 | #define LEX_END 1 |
||
47 | #define LEX_STR 2 |
||
48 | #define LEX_DEC 3 |
||
49 | #define LEX_VAR 4 |
||
50 | #define LEX_FNC 5 |
||
51 | #define LEX_IND 6 |
||
52 | #define LEX_NUL 0 |
||
53 | |||
54 | :char const_token_lexer[1024]; |
||
55 | :struct lexer |
||
56 | { |
||
57 | byte cmd; |
||
5631 | pavelyakov | 58 | dword token,text,mem_list,count,buffer_loading; |
59 | dword str_buffer; |
||
5598 | pavelyakov | 60 | byte type; |
61 | char quote; |
||
5631 | pavelyakov | 62 | signed length; |
5598 | pavelyakov | 63 | dword next(void); |
64 | dword back(void); |
||
5631 | pavelyakov | 65 | dword list(void); |
66 | void free(void); |
||
67 | dword position(dword __); |
||
5598 | pavelyakov | 68 | void load(dword _text); |
69 | void expected(dword _text); |
||
70 | }; |
||
5631 | pavelyakov | 71 | :dword back(void) |
5598 | pavelyakov | 72 | { |
5631 | pavelyakov | 73 | |
74 | } |
||
75 | :dword lexer::list(void) |
||
76 | { |
||
77 | dword count_mem,buf_loop,pos; |
||
78 | count_mem = 0; |
||
79 | buf_loop = 5000; // на тыс элементов. |
||
80 | count = 0; |
||
81 | buffer_loading = malloc(buf_loop); |
||
82 | pos = buffer_loading; |
||
83 | while(type!=LEX_END) |
||
84 | { |
||
85 | pos+=count_mem; |
||
86 | next(); |
||
87 | DSDWORD[pos] = strndup(token,length); |
||
88 | pos+=4; |
||
89 | DSBYTE [pos] = type; |
||
90 | pos++; |
||
91 | count++; |
||
92 | if(pos-buffer_loading>buf_loop) |
||
93 | { |
||
94 | buf_loop*=2; |
||
95 | buffer_loading = realloc(buffer_loading,buf_loop); |
||
96 | } |
||
97 | } |
||
98 | return buffer_loading; |
||
99 | } |
||
100 | :void lexer::free(void) |
||
101 | { |
||
102 | dword z; |
||
103 | z = count; |
||
104 | while(z) |
||
105 | { |
||
106 | z--; |
||
107 | position(z); |
||
108 | ::free(token); |
||
109 | } |
||
110 | count = 0; |
||
111 | ::free(buffer_loading); |
||
112 | } |
||
113 | :dword lexer::position(dword __) |
||
114 | { |
||
115 | dword pos1; |
||
116 | if(!count)list(); |
||
117 | if(__>=count)__=count-1; |
||
118 | else if(__<0)__=0; |
||
119 | pos1 = __*5; |
||
120 | pos1 += buffer_loading; |
||
121 | token = DSDWORD[pos1]; |
||
122 | pos1++; |
||
123 | type = DSBYTE[pos1]; |
||
124 | return token; |
||
125 | } |
||
126 | :void lexer::expected(dword _text) |
||
127 | { |
||
5598 | pavelyakov | 128 | notify(_text); |
129 | ExitProcess(); |
||
130 | } |
||
131 | |||
132 | :void lexer::load(dword _text) |
||
133 | { |
||
134 | text = _text; |
||
5631 | pavelyakov | 135 | count = 0; |
136 | str_buffer = 0; |
||
5598 | pavelyakov | 137 | } |
138 | |||
139 | :dword lexer::next(void) |
||
140 | { |
||
141 | char s; |
||
5631 | pavelyakov | 142 | dword len_str_buf,tmp; |
5598 | pavelyakov | 143 | dword pos,in; |
144 | pos = #const_token_lexer; |
||
145 | in = text; |
||
5631 | pavelyakov | 146 | //len_str_buf = 1024; |
147 | if(str_buffer)::free(str_buffer); |
||
5598 | pavelyakov | 148 | NEXT_TOKEN: |
149 | length = 0; |
||
150 | loop() |
||
151 | { |
||
152 | s = DSBYTE[in]; |
||
153 | if(s!=9)&&(s!=10)&&(s!=13)&&(s!=32)break; |
||
154 | in++; |
||
155 | text++; |
||
156 | } |
||
157 | |||
158 | if(s==0){type=LEX_END;DSBYTE[pos]=0;token="";return token;} |
||
159 | |||
160 | if(s=='/') |
||
161 | { |
||
162 | in++; |
||
163 | s = DSBYTE[in]; |
||
164 | |||
165 | // Line comments |
||
166 | if(s=='/') |
||
167 | { |
||
168 | loop() |
||
169 | { |
||
170 | in++; |
||
171 | s = DSBYTE[in]; |
||
172 | if(s==10)||(s==13)||(s==0)goto NEXT_TOKEN; |
||
173 | /* Add comments*/ |
||
174 | } |
||
175 | } |
||
176 | if(s=='*') |
||
177 | { |
||
178 | loop() |
||
179 | { |
||
180 | in++; |
||
181 | s = DSBYTE[in]; |
||
182 | if(s=='*')if(DSBYTE[in+1]=='/') |
||
183 | { |
||
184 | in+=2; |
||
185 | goto NEXT_TOKEN; |
||
186 | } |
||
187 | } |
||
188 | } |
||
189 | } |
||
190 | |||
191 | if (strchr("=<>!~&|#",s)) |
||
192 | { |
||
193 | loop() |
||
194 | { |
||
195 | if (!strchr("=<>!~&|#",s)) break; |
||
196 | |||
197 | DSBYTE[pos] = s; |
||
198 | pos++; |
||
199 | |||
200 | in++; |
||
201 | s = DSBYTE[in]; |
||
202 | } |
||
203 | type = LEX_IND; |
||
204 | } |
||
205 | else if (strchr(";(,)}{[]+-.*/:^%?$@№`",s)) |
||
206 | { |
||
207 | DSBYTE[pos] = s; |
||
208 | pos++; |
||
209 | type = LEX_IND; |
||
210 | in++; |
||
211 | } |
||
212 | else if(s>='0')&&(s<='9') |
||
213 | { |
||
214 | loop() |
||
215 | { |
||
216 | if(s<'0')||(s>'9')if(s!='.')break; |
||
217 | |||
218 | DSBYTE[pos] = s; |
||
219 | pos++; |
||
220 | |||
221 | in++; |
||
222 | s = DSBYTE[in]; |
||
223 | } |
||
224 | type = LEX_DEC; |
||
225 | } |
||
226 | else if(s>='A')&&(s<='z')&&(!strchr("[]\\^`",s)) |
||
227 | { |
||
228 | loop() |
||
229 | { |
||
230 | if(s<'A')||(s>'z')if(s<'0')||(s>'9')break; |
||
231 | if(strchr("[]\\^`",s))break; |
||
232 | |||
233 | DSBYTE[pos] = s; |
||
234 | pos++; |
||
235 | |||
236 | in++; |
||
237 | s = DSBYTE[in]; |
||
238 | } |
||
239 | |||
240 | loop() |
||
241 | { |
||
242 | s = DSBYTE[in]; |
||
243 | if(s!=9)if(s!=10)if(s!=13)if(s!=32)break; |
||
244 | in++; |
||
245 | text++; |
||
246 | } |
||
247 | type = LEX_VAR; |
||
248 | if(s=='(')type = LEX_FNC; |
||
249 | } |
||
250 | else if(s=='"')||(s=='\'') |
||
251 | { |
||
252 | quote = s; |
||
253 | in++; |
||
5631 | pavelyakov | 254 | tmp = in; |
5598 | pavelyakov | 255 | s = DSBYTE[in]; |
256 | loop() |
||
257 | { |
||
258 | if(s=='\\') |
||
259 | { |
||
260 | in++; |
||
261 | s = DSBYTE[in]; |
||
262 | if(!s){type = LEX_STR;goto GOTO_LEX_END;} |
||
263 | if(!cmd)switch(s) |
||
264 | { |
||
265 | case 'n':s='\n';break; |
||
266 | case 'r':s='\r';break; |
||
267 | case 't':s='\t';break; |
||
268 | } |
||
269 | else { |
||
270 | DSBYTE[pos] = '\\'; |
||
271 | pos++; |
||
272 | } |
||
273 | goto LEX_STEP_1; |
||
274 | } |
||
275 | if(!s){type = LEX_STR;goto GOTO_LEX_END;} |
||
276 | else if(s==quote)break; |
||
277 | LEX_STEP_1: |
||
278 | DSBYTE[pos] = s; |
||
279 | pos++; |
||
280 | in++; |
||
5631 | pavelyakov | 281 | /*if(in-tmp>len_str_buf) |
282 | { |
||
283 | if(str_buffer) |
||
284 | { |
||
285 | tmp = len_str_buf; |
||
286 | len_str_buf+=1024; |
||
287 | str_buffer = realloc(str_buffer,len_str_buf+1); |
||
288 | strlcpy(str_buffer+tmp,#const_token_lexer,1024); |
||
289 | pos = #const_token_lexer; |
||
290 | } |
||
291 | else { |
||
292 | len_str_buf+=1024; |
||
293 | str_buffer = malloc(len_str_buf+1); |
||
294 | strlcpy(str_buffer,#const_token_lexer,1024); |
||
295 | pos = #const_token_lexer; |
||
296 | } |
||
297 | }*/ |
||
5598 | pavelyakov | 298 | s = DSBYTE[in]; |
299 | } |
||
300 | in++; |
||
5631 | pavelyakov | 301 | /*tmp = pos-in; |
302 | if(str_buffer) |
||
303 | { |
||
304 | if(tmp) |
||
305 | { |
||
306 | str_buffer = realloc(str_buffer,tmp+1); |
||
307 | strlcpy(str_buffer+len_str_buf,#const_token_lexer,tmp); |
||
308 | } |
||
309 | type = LEX_STR; |
||
310 | length = len_str_buf+tmp; |
||
311 | text = in; |
||
312 | tmp = str_buffer+length; |
||
313 | DSBYTE[tmp] = 0; |
||
314 | token = str_buffer; |
||
315 | return token; |
||
316 | }*/ |
||
5598 | pavelyakov | 317 | type = LEX_STR; |
318 | } |
||
319 | else { |
||
320 | in++; |
||
321 | type = LEX_NUL; |
||
322 | DSBYTE[pos] = s; |
||
323 | pos++; |
||
324 | } |
||
325 | GOTO_LEX_END: |
||
326 | length = in-text; |
||
327 | text = in; |
||
328 | DSBYTE[pos] = 0; |
||
329 | token = #const_token_lexer; |
||
330 | return token; |
||
331 | } |
||
332 | |||
333 | #endif'0')||(s>'A')||(s>='z')&&(!strchr("[]\\^`",s)) |