Subversion Repositories Kolibri OS

Rev

Rev 5631 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
5598 pavelyakov 1
#ifndef INCLUDE_LEXER_H
2
#define INCLUDE_LEXER_H
5676 pavelyakov 3
#print "[include ]\n"
5598 pavelyakov 4
 
5
#ifndef INCLUDE_STRING_H
6
#include "../lib/strings.h"
7
#endif
5631 pavelyakov 8
 
9
#ifndef INCLUDE_MEM_H
10
#include "../lib/mem.h"
11
#endif
5598 pavelyakov 12
/** Splits text into tokens
13
 *  Author  :Pavel Yakovlev
14
 *  Homepage:https://vk.com/pavelyakov39
5631 pavelyakov 15
 *  Ver.    : 1.51
5598 pavelyakov 16
 */
17
 
18
/** Example:
19
 *  lexer lex;
20
 *  lex.load("var a=123;");
21
 *  lex.next();
22
 *  lex.token; //TOKEN == 'var'
23
 *  lex.type ; //TYPE  == LEX_VAR
24
 *
25
 *  lex.next();
26
 *  lex.token; //TOKEN == 'a'
27
 *  lex.type ; //TYPE  == LEX_VAR
28
 *
29
 *  lex.next();
30
 *  lex.token; //TOKEN == '='
31
 *  lex.type ; //TYPE  == LEX_IND
32
 *
33
 *  lex.next();
34
 *  lex.token; //TOKEN == '123'
35
 *  lex.type ; //TYPE  == LEX_DEC
36
 *
37
 *  lex.next();
38
 *  lex.token; //TOKEN == ';'
39
 *  lex.type ; //TYPE  == LEX_IND
40
 *
41
 *  lex.next();
42
 *  lex.token; //TOKEN == ''
43
 *  lex.type ; //TYPE  == LEX_END
44
 */
45
 
46
#define LEX_END 1
47
#define LEX_STR 2
48
#define LEX_DEC 3
49
#define LEX_VAR 4
50
#define LEX_FNC 5
51
#define LEX_IND 6
52
#define LEX_NUL 0
53
 
54
:char const_token_lexer[1024];
55
:struct lexer
56
{
57
	byte cmd;
5631 pavelyakov 58
	dword token,text,mem_list,count,buffer_loading;
59
	dword str_buffer;
5598 pavelyakov 60
	byte type;
61
	char quote;
5631 pavelyakov 62
	signed length;
5598 pavelyakov 63
	dword next(void);
64
	dword back(void);
5631 pavelyakov 65
	dword list(void);
66
	void free(void);
67
	dword position(dword __);
5598 pavelyakov 68
	void load(dword _text);
69
	void expected(dword _text);
70
};
5631 pavelyakov 71
:dword back(void)
5598 pavelyakov 72
{
5631 pavelyakov 73
 
74
}
75
:dword lexer::list(void)
76
{
77
	dword count_mem,buf_loop,pos;
78
	count_mem = 0;
79
	buf_loop  = 5000; // на тыс элементов.
80
	count = 0;
81
	buffer_loading = malloc(buf_loop);
82
	pos = buffer_loading;
83
	while(type!=LEX_END)
84
	{
85
		pos+=count_mem;
86
		next();
87
		DSDWORD[pos] = strndup(token,length);
88
		pos+=4;
89
		DSBYTE [pos] = type;
90
		pos++;
91
		count++;
92
		if(pos-buffer_loading>buf_loop)
93
		{
94
			buf_loop*=2;
95
			buffer_loading = realloc(buffer_loading,buf_loop);
96
		}
97
	}
98
	return buffer_loading;
99
}
100
:void lexer::free(void)
101
{
102
	dword z;
103
	z = count;
104
	while(z)
105
	{
106
		z--;
107
		position(z);
108
		::free(token);
109
	}
110
	count = 0;
111
	::free(buffer_loading);
112
}
113
:dword lexer::position(dword __)
114
{
115
	dword pos1;
116
	if(!count)list();
117
	if(__>=count)__=count-1;
118
	else if(__<0)__=0;
119
	pos1 = __*5;
120
	pos1 += buffer_loading;
121
	token = DSDWORD[pos1];
122
	pos1++;
123
	type = DSBYTE[pos1];
124
	return token;
125
}
126
:void lexer::expected(dword _text)
127
{
5598 pavelyakov 128
	notify(_text);
129
	ExitProcess();
130
}
131
 
132
:void lexer::load(dword _text)
133
{
134
	text = _text;
5631 pavelyakov 135
	count = 0;
136
	str_buffer = 0;
5598 pavelyakov 137
}
138
 
139
:dword lexer::next(void)
140
{
141
	char s;
5631 pavelyakov 142
	dword len_str_buf,tmp;
5598 pavelyakov 143
	dword pos,in;
144
	pos = #const_token_lexer;
145
	in = text;
5631 pavelyakov 146
	//len_str_buf = 1024;
147
	if(str_buffer)::free(str_buffer);
5598 pavelyakov 148
	NEXT_TOKEN:
149
	length = 0;
150
	loop()
151
	{
152
		s = DSBYTE[in];
153
		if(s!=9)&&(s!=10)&&(s!=13)&&(s!=32)break;
154
		in++;
155
		text++;
156
	}
157
 
158
	if(s==0){type=LEX_END;DSBYTE[pos]=0;token="";return token;}
159
 
160
	if(s=='/')
161
	{
162
		in++;
163
		s = DSBYTE[in];
164
 
165
		// Line comments
166
		if(s=='/')
167
		{
168
			loop()
169
			{
170
				in++;
171
				s = DSBYTE[in];
172
				if(s==10)||(s==13)||(s==0)goto NEXT_TOKEN;
173
				/* Add comments*/
174
			}
175
		}
176
		if(s=='*')
177
		{
178
			loop()
179
			{
180
				in++;
181
				s = DSBYTE[in];
182
				if(s=='*')if(DSBYTE[in+1]=='/')
183
				{
184
					in+=2;
185
					goto NEXT_TOKEN;
186
				}
187
			}
188
		}
189
	}
190
 
191
	if (strchr("=<>!~&|#",s))
192
	{
193
		loop()
194
		{
195
			if (!strchr("=<>!~&|#",s)) break;
196
 
197
			DSBYTE[pos] = s;
198
			pos++;
199
 
200
			in++;
201
			s = DSBYTE[in];
202
		}
203
		type = LEX_IND;
204
	}
205
	else if (strchr(";(,)}{[]+-.*/:^%?$@№`",s))
206
	{
207
		DSBYTE[pos] = s;
208
		pos++;
209
		type = LEX_IND;
210
		in++;
211
	}
212
	else if(s>='0')&&(s<='9')
213
	{
214
		loop()
215
		{
216
			if(s<'0')||(s>'9')if(s!='.')break;
217
 
218
			DSBYTE[pos] = s;
219
			pos++;
220
 
221
			in++;
222
			s = DSBYTE[in];
223
		}
224
		type = LEX_DEC;
225
	}
226
	else if(s>='A')&&(s<='z')&&(!strchr("[]\\^`",s))
227
	{
228
		loop()
229
		{
230
			if(s<'A')||(s>'z')if(s<'0')||(s>'9')break;
231
			if(strchr("[]\\^`",s))break;
232
 
233
			DSBYTE[pos] = s;
234
			pos++;
235
 
236
			in++;
237
			s = DSBYTE[in];
238
		}
239
 
240
		loop()
241
		{
242
			s = DSBYTE[in];
243
			if(s!=9)if(s!=10)if(s!=13)if(s!=32)break;
244
			in++;
245
			text++;
246
		}
247
		type = LEX_VAR;
248
		if(s=='(')type = LEX_FNC;
249
	}
250
	else if(s=='"')||(s=='\'')
251
	{
252
		quote = s;
253
		in++;
5631 pavelyakov 254
		tmp = in;
5598 pavelyakov 255
		s = DSBYTE[in];
256
		loop()
257
		{
258
			if(s=='\\')
259
			{
260
				in++;
261
				s = DSBYTE[in];
262
				if(!s){type = LEX_STR;goto GOTO_LEX_END;}
263
				if(!cmd)switch(s)
264
				{
265
					case 'n':s='\n';break;
266
					case 'r':s='\r';break;
267
					case 't':s='\t';break;
268
				}
269
				else {
270
					DSBYTE[pos] = '\\';
271
					pos++;
272
				}
273
				goto LEX_STEP_1;
274
			}
275
			if(!s){type = LEX_STR;goto GOTO_LEX_END;}
276
			else if(s==quote)break;
277
			LEX_STEP_1:
278
			DSBYTE[pos] = s;
279
			pos++;
280
			in++;
5631 pavelyakov 281
			/*if(in-tmp>len_str_buf)
282
			{
283
				if(str_buffer)
284
				{
285
					tmp = len_str_buf;
286
					len_str_buf+=1024;
287
					str_buffer = realloc(str_buffer,len_str_buf+1);
288
					strlcpy(str_buffer+tmp,#const_token_lexer,1024);
289
					pos = #const_token_lexer;
290
				}
291
				else {
292
					len_str_buf+=1024;
293
					str_buffer = malloc(len_str_buf+1);
294
					strlcpy(str_buffer,#const_token_lexer,1024);
295
					pos = #const_token_lexer;
296
				}
297
			}*/
5598 pavelyakov 298
			s = DSBYTE[in];
299
		}
300
		in++;
5631 pavelyakov 301
		/*tmp = pos-in;
302
		if(str_buffer)
303
		{
304
			if(tmp)
305
			{
306
				str_buffer = realloc(str_buffer,tmp+1);
307
				strlcpy(str_buffer+len_str_buf,#const_token_lexer,tmp);
308
			}
309
			type = LEX_STR;
310
			length = len_str_buf+tmp;
311
			text = in;
312
			tmp = str_buffer+length;
313
			DSBYTE[tmp] = 0;
314
			token = str_buffer;
315
			return token;
316
		}*/
5598 pavelyakov 317
		type = LEX_STR;
318
	}
319
	else {
320
		in++;
321
		type = LEX_NUL;
322
		DSBYTE[pos] = s;
323
		pos++;
324
	}
325
	GOTO_LEX_END:
326
	length = in-text;
327
	text = in;
328
	DSBYTE[pos] = 0;
329
	token = #const_token_lexer;
330
	return token;
331
}
332
 
333
#endif