Subversion Repositories Kolibri OS

Rev

Rev 5676 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
5598 pavelyakov 1
#ifndef INCLUDE_LEXER_H
2
#define INCLUDE_LEXER_H
3
 
4
#ifndef INCLUDE_STRING_H
5
#include "../lib/strings.h"
6
#endif
5631 pavelyakov 7
 
8
#ifndef INCLUDE_MEM_H
9
#include "../lib/mem.h"
10
#endif
5598 pavelyakov 11
/** Splits text into tokens
12
 *  Author  :Pavel Yakovlev
13
 *  Homepage:https://vk.com/pavelyakov39
5631 pavelyakov 14
 *  Ver.    : 1.51
5598 pavelyakov 15
 */
16
 
17
/** Example:
18
 *  lexer lex;
19
 *  lex.load("var a=123;");
20
 *  lex.next();
21
 *  lex.token; //TOKEN == 'var'
22
 *  lex.type ; //TYPE  == LEX_VAR
23
 *
24
 *  lex.next();
25
 *  lex.token; //TOKEN == 'a'
26
 *  lex.type ; //TYPE  == LEX_VAR
27
 *
28
 *  lex.next();
29
 *  lex.token; //TOKEN == '='
30
 *  lex.type ; //TYPE  == LEX_IND
31
 *
32
 *  lex.next();
33
 *  lex.token; //TOKEN == '123'
34
 *  lex.type ; //TYPE  == LEX_DEC
35
 *
36
 *  lex.next();
37
 *  lex.token; //TOKEN == ';'
38
 *  lex.type ; //TYPE  == LEX_IND
39
 *
40
 *  lex.next();
41
 *  lex.token; //TOKEN == ''
42
 *  lex.type ; //TYPE  == LEX_END
43
 */
44
 
45
#define LEX_END 1
46
#define LEX_STR 2
47
#define LEX_DEC 3
48
#define LEX_VAR 4
49
#define LEX_FNC 5
50
#define LEX_IND 6
51
#define LEX_NUL 0
52
 
53
:char const_token_lexer[1024];
54
:struct lexer
55
{
56
	byte cmd;
5631 pavelyakov 57
	dword token,text,mem_list,count,buffer_loading;
58
	dword str_buffer;
5598 pavelyakov 59
	byte type;
60
	char quote;
5631 pavelyakov 61
	signed length;
5598 pavelyakov 62
	dword next(void);
63
	dword back(void);
5631 pavelyakov 64
	dword list(void);
65
	void free(void);
66
	dword position(dword __);
5598 pavelyakov 67
	void load(dword _text);
68
	void expected(dword _text);
69
};
5631 pavelyakov 70
:dword back(void)
5598 pavelyakov 71
{
5631 pavelyakov 72
 
73
}
74
:dword lexer::list(void)
75
{
76
	dword count_mem,buf_loop,pos;
77
	count_mem = 0;
78
	buf_loop  = 5000; // на тыс элементов.
79
	count = 0;
80
	buffer_loading = malloc(buf_loop);
81
	pos = buffer_loading;
82
	while(type!=LEX_END)
83
	{
84
		pos+=count_mem;
85
		next();
86
		DSDWORD[pos] = strndup(token,length);
87
		pos+=4;
88
		DSBYTE [pos] = type;
89
		pos++;
90
		count++;
91
		if(pos-buffer_loading>buf_loop)
92
		{
93
			buf_loop*=2;
94
			buffer_loading = realloc(buffer_loading,buf_loop);
95
		}
96
	}
97
	return buffer_loading;
98
}
99
:void lexer::free(void)
100
{
101
	dword z;
102
	z = count;
103
	while(z)
104
	{
105
		z--;
106
		position(z);
107
		::free(token);
108
	}
109
	count = 0;
110
	::free(buffer_loading);
111
}
112
:dword lexer::position(dword __)
113
{
114
	dword pos1;
115
	if(!count)list();
116
	if(__>=count)__=count-1;
117
	else if(__<0)__=0;
118
	pos1 = __*5;
119
	pos1 += buffer_loading;
120
	token = DSDWORD[pos1];
121
	pos1++;
122
	type = DSBYTE[pos1];
123
	return token;
124
}
125
:void lexer::expected(dword _text)
126
{
5598 pavelyakov 127
	notify(_text);
128
	ExitProcess();
129
}
130
 
131
:void lexer::load(dword _text)
132
{
133
	text = _text;
5631 pavelyakov 134
	count = 0;
135
	str_buffer = 0;
5598 pavelyakov 136
}
137
 
138
:dword lexer::next(void)
139
{
140
	char s;
5631 pavelyakov 141
	dword len_str_buf,tmp;
5598 pavelyakov 142
	dword pos,in;
143
	pos = #const_token_lexer;
144
	in = text;
5631 pavelyakov 145
	//len_str_buf = 1024;
146
	if(str_buffer)::free(str_buffer);
5598 pavelyakov 147
	NEXT_TOKEN:
148
	length = 0;
149
	loop()
150
	{
151
		s = DSBYTE[in];
152
		if(s!=9)&&(s!=10)&&(s!=13)&&(s!=32)break;
153
		in++;
154
		text++;
155
	}
156
 
157
	if(s==0){type=LEX_END;DSBYTE[pos]=0;token="";return token;}
158
 
159
	if(s=='/')
160
	{
161
		in++;
162
		s = DSBYTE[in];
163
 
164
		// Line comments
165
		if(s=='/')
166
		{
167
			loop()
168
			{
169
				in++;
170
				s = DSBYTE[in];
171
				if(s==10)||(s==13)||(s==0)goto NEXT_TOKEN;
172
				/* Add comments*/
173
			}
174
		}
175
		if(s=='*')
176
		{
177
			loop()
178
			{
179
				in++;
180
				s = DSBYTE[in];
181
				if(s=='*')if(DSBYTE[in+1]=='/')
182
				{
183
					in+=2;
184
					goto NEXT_TOKEN;
185
				}
186
			}
187
		}
188
	}
189
 
190
	if (strchr("=<>!~&|#",s))
191
	{
192
		loop()
193
		{
194
			if (!strchr("=<>!~&|#",s)) break;
195
 
196
			DSBYTE[pos] = s;
197
			pos++;
198
 
199
			in++;
200
			s = DSBYTE[in];
201
		}
202
		type = LEX_IND;
203
	}
204
	else if (strchr(";(,)}{[]+-.*/:^%?$@№`",s))
205
	{
206
		DSBYTE[pos] = s;
207
		pos++;
208
		type = LEX_IND;
209
		in++;
210
	}
211
	else if(s>='0')&&(s<='9')
212
	{
213
		loop()
214
		{
215
			if(s<'0')||(s>'9')if(s!='.')break;
216
 
217
			DSBYTE[pos] = s;
218
			pos++;
219
 
220
			in++;
221
			s = DSBYTE[in];
222
		}
223
		type = LEX_DEC;
224
	}
225
	else if(s>='A')&&(s<='z')&&(!strchr("[]\\^`",s))
226
	{
227
		loop()
228
		{
229
			if(s<'A')||(s>'z')if(s<'0')||(s>'9')break;
230
			if(strchr("[]\\^`",s))break;
231
 
232
			DSBYTE[pos] = s;
233
			pos++;
234
 
235
			in++;
236
			s = DSBYTE[in];
237
		}
238
 
239
		loop()
240
		{
241
			s = DSBYTE[in];
242
			if(s!=9)if(s!=10)if(s!=13)if(s!=32)break;
243
			in++;
244
			text++;
245
		}
246
		type = LEX_VAR;
247
		if(s=='(')type = LEX_FNC;
248
	}
249
	else if(s=='"')||(s=='\'')
250
	{
251
		quote = s;
252
		in++;
5631 pavelyakov 253
		tmp = in;
5598 pavelyakov 254
		s = DSBYTE[in];
255
		loop()
256
		{
257
			if(s=='\\')
258
			{
259
				in++;
260
				s = DSBYTE[in];
261
				if(!s){type = LEX_STR;goto GOTO_LEX_END;}
262
				if(!cmd)switch(s)
263
				{
264
					case 'n':s='\n';break;
265
					case 'r':s='\r';break;
266
					case 't':s='\t';break;
267
				}
268
				else {
269
					DSBYTE[pos] = '\\';
270
					pos++;
271
				}
272
				goto LEX_STEP_1;
273
			}
274
			if(!s){type = LEX_STR;goto GOTO_LEX_END;}
275
			else if(s==quote)break;
276
			LEX_STEP_1:
277
			DSBYTE[pos] = s;
278
			pos++;
279
			in++;
5631 pavelyakov 280
			/*if(in-tmp>len_str_buf)
281
			{
282
				if(str_buffer)
283
				{
284
					tmp = len_str_buf;
285
					len_str_buf+=1024;
286
					str_buffer = realloc(str_buffer,len_str_buf+1);
287
					strlcpy(str_buffer+tmp,#const_token_lexer,1024);
288
					pos = #const_token_lexer;
289
				}
290
				else {
291
					len_str_buf+=1024;
292
					str_buffer = malloc(len_str_buf+1);
293
					strlcpy(str_buffer,#const_token_lexer,1024);
294
					pos = #const_token_lexer;
295
				}
296
			}*/
5598 pavelyakov 297
			s = DSBYTE[in];
298
		}
299
		in++;
5631 pavelyakov 300
		/*tmp = pos-in;
301
		if(str_buffer)
302
		{
303
			if(tmp)
304
			{
305
				str_buffer = realloc(str_buffer,tmp+1);
306
				strlcpy(str_buffer+len_str_buf,#const_token_lexer,tmp);
307
			}
308
			type = LEX_STR;
309
			length = len_str_buf+tmp;
310
			text = in;
311
			tmp = str_buffer+length;
312
			DSBYTE[tmp] = 0;
313
			token = str_buffer;
314
			return token;
315
		}*/
5598 pavelyakov 316
		type = LEX_STR;
317
	}
318
	else {
319
		in++;
320
		type = LEX_NUL;
321
		DSBYTE[pos] = s;
322
		pos++;
323
	}
324
	GOTO_LEX_END:
325
	length = in-text;
326
	text = in;
327
	DSBYTE[pos] = 0;
328
	token = #const_token_lexer;
329
	return token;
330
}
331
 
332
#endif