Subversion Repositories Kolibri OS

Rev

Rev 5676 | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 5676 Rev 6887
1
#ifndef INCLUDE_LEXER_H
1
#ifndef INCLUDE_LEXER_H
2
#define INCLUDE_LEXER_H
2
#define INCLUDE_LEXER_H
3
#print "[include ]\n"
-
 
4
 
3
 
5
#ifndef INCLUDE_STRING_H
4
#ifndef INCLUDE_STRING_H
6
#include "../lib/strings.h"
5
#include "../lib/strings.h"
7
#endif
6
#endif
8
 
7
 
9
#ifndef INCLUDE_MEM_H
8
#ifndef INCLUDE_MEM_H
10
#include "../lib/mem.h"
9
#include "../lib/mem.h"
11
#endif
10
#endif
12
/** Splits text into tokens
11
/** Splits text into tokens
13
 *  Author  :Pavel Yakovlev
12
 *  Author  :Pavel Yakovlev
14
 *  Homepage:https://vk.com/pavelyakov39
13
 *  Homepage:https://vk.com/pavelyakov39
15
 *  Ver.    : 1.51
14
 *  Ver.    : 1.51
16
 */
15
 */
17
 
16
 
18
/** Example:
17
/** Example:
19
 *  lexer lex;
18
 *  lexer lex;
20
 *  lex.load("var a=123;");
19
 *  lex.load("var a=123;");
21
 *  lex.next();
20
 *  lex.next();
22
 *  lex.token; //TOKEN == 'var'
21
 *  lex.token; //TOKEN == 'var'
23
 *  lex.type ; //TYPE  == LEX_VAR
22
 *  lex.type ; //TYPE  == LEX_VAR
24
 *  
23
 *  
25
 *  lex.next();
24
 *  lex.next();
26
 *  lex.token; //TOKEN == 'a'
25
 *  lex.token; //TOKEN == 'a'
27
 *  lex.type ; //TYPE  == LEX_VAR
26
 *  lex.type ; //TYPE  == LEX_VAR
28
 *  
27
 *  
29
 *  lex.next();
28
 *  lex.next();
30
 *  lex.token; //TOKEN == '='
29
 *  lex.token; //TOKEN == '='
31
 *  lex.type ; //TYPE  == LEX_IND
30
 *  lex.type ; //TYPE  == LEX_IND
32
 *  
31
 *  
33
 *  lex.next();
32
 *  lex.next();
34
 *  lex.token; //TOKEN == '123'
33
 *  lex.token; //TOKEN == '123'
35
 *  lex.type ; //TYPE  == LEX_DEC
34
 *  lex.type ; //TYPE  == LEX_DEC
36
 *  
35
 *  
37
 *  lex.next();
36
 *  lex.next();
38
 *  lex.token; //TOKEN == ';'
37
 *  lex.token; //TOKEN == ';'
39
 *  lex.type ; //TYPE  == LEX_IND
38
 *  lex.type ; //TYPE  == LEX_IND
40
 *  
39
 *  
41
 *  lex.next();
40
 *  lex.next();
42
 *  lex.token; //TOKEN == ''
41
 *  lex.token; //TOKEN == ''
43
 *  lex.type ; //TYPE  == LEX_END
42
 *  lex.type ; //TYPE  == LEX_END
44
 */
43
 */
45
 
44
 
46
#define LEX_END 1
45
#define LEX_END 1
47
#define LEX_STR 2
46
#define LEX_STR 2
48
#define LEX_DEC 3
47
#define LEX_DEC 3
49
#define LEX_VAR 4
48
#define LEX_VAR 4
50
#define LEX_FNC 5
49
#define LEX_FNC 5
51
#define LEX_IND 6
50
#define LEX_IND 6
52
#define LEX_NUL 0
51
#define LEX_NUL 0
53
 
52
 
54
:char const_token_lexer[1024];
53
:char const_token_lexer[1024];
55
:struct lexer
54
:struct lexer
56
{
55
{
57
	byte cmd;
56
	byte cmd;
58
	dword token,text,mem_list,count,buffer_loading;
57
	dword token,text,mem_list,count,buffer_loading;
59
	dword str_buffer;
58
	dword str_buffer;
60
	byte type;
59
	byte type;
61
	char quote;
60
	char quote;
62
	signed length;
61
	signed length;
63
	dword next(void);
62
	dword next(void);
64
	dword back(void);
63
	dword back(void);
65
	dword list(void);
64
	dword list(void);
66
	void free(void);
65
	void free(void);
67
	dword position(dword __);
66
	dword position(dword __);
68
	void load(dword _text);
67
	void load(dword _text);
69
	void expected(dword _text);
68
	void expected(dword _text);
70
};
69
};
71
:dword back(void)
70
:dword back(void)
72
{
71
{
73
	
72
	
74
}
73
}
75
:dword lexer::list(void)
74
:dword lexer::list(void)
76
{
75
{
77
	dword count_mem,buf_loop,pos;
76
	dword count_mem,buf_loop,pos;
78
	count_mem = 0;
77
	count_mem = 0;
79
	buf_loop  = 5000; // на тыс элементов.
78
	buf_loop  = 5000; // на тыс элементов.
80
	count = 0;
79
	count = 0;
81
	buffer_loading = malloc(buf_loop);
80
	buffer_loading = malloc(buf_loop);
82
	pos = buffer_loading;
81
	pos = buffer_loading;
83
	while(type!=LEX_END)
82
	while(type!=LEX_END)
84
	{
83
	{
85
		pos+=count_mem;
84
		pos+=count_mem;
86
		next();
85
		next();
87
		DSDWORD[pos] = strndup(token,length);
86
		DSDWORD[pos] = strndup(token,length);
88
		pos+=4;
87
		pos+=4;
89
		DSBYTE [pos] = type;
88
		DSBYTE [pos] = type;
90
		pos++;
89
		pos++;
91
		count++;
90
		count++;
92
		if(pos-buffer_loading>buf_loop)
91
		if(pos-buffer_loading>buf_loop)
93
		{
92
		{
94
			buf_loop*=2;
93
			buf_loop*=2;
95
			buffer_loading = realloc(buffer_loading,buf_loop);
94
			buffer_loading = realloc(buffer_loading,buf_loop);
96
		}
95
		}
97
	}
96
	}
98
	return buffer_loading;
97
	return buffer_loading;
99
}
98
}
100
:void lexer::free(void)
99
:void lexer::free(void)
101
{
100
{
102
	dword z;
101
	dword z;
103
	z = count;
102
	z = count;
104
	while(z)
103
	while(z)
105
	{
104
	{
106
		z--;
105
		z--;
107
		position(z);
106
		position(z);
108
		::free(token);
107
		::free(token);
109
	}
108
	}
110
	count = 0;
109
	count = 0;
111
	::free(buffer_loading);
110
	::free(buffer_loading);
112
}
111
}
113
:dword lexer::position(dword __)
112
:dword lexer::position(dword __)
114
{
113
{
115
	dword pos1;
114
	dword pos1;
116
	if(!count)list();
115
	if(!count)list();
117
	if(__>=count)__=count-1;
116
	if(__>=count)__=count-1;
118
	else if(__<0)__=0;
117
	else if(__<0)__=0;
119
	pos1 = __*5;
118
	pos1 = __*5;
120
	pos1 += buffer_loading;
119
	pos1 += buffer_loading;
121
	token = DSDWORD[pos1];
120
	token = DSDWORD[pos1];
122
	pos1++;
121
	pos1++;
123
	type = DSBYTE[pos1];
122
	type = DSBYTE[pos1];
124
	return token;
123
	return token;
125
}
124
}
126
:void lexer::expected(dword _text)
125
:void lexer::expected(dword _text)
127
{
126
{
128
	notify(_text);
127
	notify(_text);
129
	ExitProcess();
128
	ExitProcess();
130
}
129
}
131
 
130
 
132
:void lexer::load(dword _text)
131
:void lexer::load(dword _text)
133
{
132
{
134
	text = _text;
133
	text = _text;
135
	count = 0;
134
	count = 0;
136
	str_buffer = 0;
135
	str_buffer = 0;
137
}
136
}
138
 
137
 
139
:dword lexer::next(void)
138
:dword lexer::next(void)
140
{
139
{
141
	char s;
140
	char s;
142
	dword len_str_buf,tmp;
141
	dword len_str_buf,tmp;
143
	dword pos,in;
142
	dword pos,in;
144
	pos = #const_token_lexer;
143
	pos = #const_token_lexer;
145
	in = text;
144
	in = text;
146
	//len_str_buf = 1024;
145
	//len_str_buf = 1024;
147
	if(str_buffer)::free(str_buffer);
146
	if(str_buffer)::free(str_buffer);
148
	NEXT_TOKEN:
147
	NEXT_TOKEN:
149
	length = 0;
148
	length = 0;
150
	loop()
149
	loop()
151
	{
150
	{
152
		s = DSBYTE[in];
151
		s = DSBYTE[in];
153
		if(s!=9)&&(s!=10)&&(s!=13)&&(s!=32)break;
152
		if(s!=9)&&(s!=10)&&(s!=13)&&(s!=32)break;
154
		in++;
153
		in++;
155
		text++;
154
		text++;
156
	}
155
	}
157
	
156
	
158
	if(s==0){type=LEX_END;DSBYTE[pos]=0;token="";return token;}
157
	if(s==0){type=LEX_END;DSBYTE[pos]=0;token="";return token;}
159
	
158
	
160
	if(s=='/')
159
	if(s=='/')
161
	{
160
	{
162
		in++;
161
		in++;
163
		s = DSBYTE[in];
162
		s = DSBYTE[in];
164
		
163
		
165
		// Line comments
164
		// Line comments
166
		if(s=='/')
165
		if(s=='/')
167
		{
166
		{
168
			loop()
167
			loop()
169
			{
168
			{
170
				in++;
169
				in++;
171
				s = DSBYTE[in];
170
				s = DSBYTE[in];
172
				if(s==10)||(s==13)||(s==0)goto NEXT_TOKEN;
171
				if(s==10)||(s==13)||(s==0)goto NEXT_TOKEN;
173
				/* Add comments*/
172
				/* Add comments*/
174
			}
173
			}
175
		}
174
		}
176
		if(s=='*')
175
		if(s=='*')
177
		{
176
		{
178
			loop()
177
			loop()
179
			{
178
			{
180
				in++;
179
				in++;
181
				s = DSBYTE[in];
180
				s = DSBYTE[in];
182
				if(s=='*')if(DSBYTE[in+1]=='/')
181
				if(s=='*')if(DSBYTE[in+1]=='/')
183
				{
182
				{
184
					in+=2;
183
					in+=2;
185
					goto NEXT_TOKEN;
184
					goto NEXT_TOKEN;
186
				}
185
				}
187
			}
186
			}
188
		}
187
		}
189
	}
188
	}
190
	
189
	
191
	if (strchr("=<>!~&|#",s))
190
	if (strchr("=<>!~&|#",s))
192
	{
191
	{
193
		loop()
192
		loop()
194
		{
193
		{
195
			if (!strchr("=<>!~&|#",s)) break;
194
			if (!strchr("=<>!~&|#",s)) break;
196
			
195
			
197
			DSBYTE[pos] = s;
196
			DSBYTE[pos] = s;
198
			pos++;
197
			pos++;
199
			
198
			
200
			in++;
199
			in++;
201
			s = DSBYTE[in];
200
			s = DSBYTE[in];
202
		}
201
		}
203
		type = LEX_IND;
202
		type = LEX_IND;
204
	}
203
	}
205
	else if (strchr(";(,)}{[]+-.*/:^%?$@№`",s))
204
	else if (strchr(";(,)}{[]+-.*/:^%?$@№`",s))
206
	{
205
	{
207
		DSBYTE[pos] = s;
206
		DSBYTE[pos] = s;
208
		pos++;
207
		pos++;
209
		type = LEX_IND;
208
		type = LEX_IND;
210
		in++;
209
		in++;
211
	}
210
	}
212
	else if(s>='0')&&(s<='9')
211
	else if(s>='0')&&(s<='9')
213
	{
212
	{
214
		loop()
213
		loop()
215
		{
214
		{
216
			if(s<'0')||(s>'9')if(s!='.')break;
215
			if(s<'0')||(s>'9')if(s!='.')break;
217
			
216
			
218
			DSBYTE[pos] = s;
217
			DSBYTE[pos] = s;
219
			pos++;
218
			pos++;
220
			
219
			
221
			in++;
220
			in++;
222
			s = DSBYTE[in];
221
			s = DSBYTE[in];
223
		}
222
		}
224
		type = LEX_DEC;
223
		type = LEX_DEC;
225
	}
224
	}
226
	else if(s>='A')&&(s<='z')&&(!strchr("[]\\^`",s))
225
	else if(s>='A')&&(s<='z')&&(!strchr("[]\\^`",s))
227
	{
226
	{
228
		loop()
227
		loop()
229
		{
228
		{
230
			if(s<'A')||(s>'z')if(s<'0')||(s>'9')break;
229
			if(s<'A')||(s>'z')if(s<'0')||(s>'9')break;
231
			if(strchr("[]\\^`",s))break;
230
			if(strchr("[]\\^`",s))break;
232
			
231
			
233
			DSBYTE[pos] = s;
232
			DSBYTE[pos] = s;
234
			pos++;
233
			pos++;
235
			
234
			
236
			in++;
235
			in++;
237
			s = DSBYTE[in];
236
			s = DSBYTE[in];
238
		}
237
		}
239
		
238
		
240
		loop()
239
		loop()
241
		{
240
		{
242
			s = DSBYTE[in];
241
			s = DSBYTE[in];
243
			if(s!=9)if(s!=10)if(s!=13)if(s!=32)break;
242
			if(s!=9)if(s!=10)if(s!=13)if(s!=32)break;
244
			in++;
243
			in++;
245
			text++;
244
			text++;
246
		}
245
		}
247
		type = LEX_VAR;
246
		type = LEX_VAR;
248
		if(s=='(')type = LEX_FNC;
247
		if(s=='(')type = LEX_FNC;
249
	}
248
	}
250
	else if(s=='"')||(s=='\'')
249
	else if(s=='"')||(s=='\'')
251
	{
250
	{
252
		quote = s;
251
		quote = s;
253
		in++;
252
		in++;
254
		tmp = in;
253
		tmp = in;
255
		s = DSBYTE[in];
254
		s = DSBYTE[in];
256
		loop()
255
		loop()
257
		{
256
		{
258
			if(s=='\\')
257
			if(s=='\\')
259
			{
258
			{
260
				in++;
259
				in++;
261
				s = DSBYTE[in];
260
				s = DSBYTE[in];
262
				if(!s){type = LEX_STR;goto GOTO_LEX_END;}
261
				if(!s){type = LEX_STR;goto GOTO_LEX_END;}
263
				if(!cmd)switch(s)
262
				if(!cmd)switch(s)
264
				{
263
				{
265
					case 'n':s='\n';break;
264
					case 'n':s='\n';break;
266
					case 'r':s='\r';break;
265
					case 'r':s='\r';break;
267
					case 't':s='\t';break;
266
					case 't':s='\t';break;
268
				}
267
				}
269
				else {
268
				else {
270
					DSBYTE[pos] = '\\';
269
					DSBYTE[pos] = '\\';
271
					pos++;
270
					pos++;
272
				}
271
				}
273
				goto LEX_STEP_1;
272
				goto LEX_STEP_1;
274
			}
273
			}
275
			if(!s){type = LEX_STR;goto GOTO_LEX_END;}
274
			if(!s){type = LEX_STR;goto GOTO_LEX_END;}
276
			else if(s==quote)break;
275
			else if(s==quote)break;
277
			LEX_STEP_1:
276
			LEX_STEP_1:
278
			DSBYTE[pos] = s;
277
			DSBYTE[pos] = s;
279
			pos++;
278
			pos++;
280
			in++;
279
			in++;
281
			/*if(in-tmp>len_str_buf)
280
			/*if(in-tmp>len_str_buf)
282
			{
281
			{
283
				if(str_buffer)
282
				if(str_buffer)
284
				{
283
				{
285
					tmp = len_str_buf;
284
					tmp = len_str_buf;
286
					len_str_buf+=1024;
285
					len_str_buf+=1024;
287
					str_buffer = realloc(str_buffer,len_str_buf+1);
286
					str_buffer = realloc(str_buffer,len_str_buf+1);
288
					strlcpy(str_buffer+tmp,#const_token_lexer,1024);
287
					strlcpy(str_buffer+tmp,#const_token_lexer,1024);
289
					pos = #const_token_lexer;
288
					pos = #const_token_lexer;
290
				}
289
				}
291
				else {
290
				else {
292
					len_str_buf+=1024;
291
					len_str_buf+=1024;
293
					str_buffer = malloc(len_str_buf+1);
292
					str_buffer = malloc(len_str_buf+1);
294
					strlcpy(str_buffer,#const_token_lexer,1024);
293
					strlcpy(str_buffer,#const_token_lexer,1024);
295
					pos = #const_token_lexer;
294
					pos = #const_token_lexer;
296
				}
295
				}
297
			}*/
296
			}*/
298
			s = DSBYTE[in];
297
			s = DSBYTE[in];
299
		}
298
		}
300
		in++;
299
		in++;
301
		/*tmp = pos-in;
300
		/*tmp = pos-in;
302
		if(str_buffer)
301
		if(str_buffer)
303
		{
302
		{
304
			if(tmp)
303
			if(tmp)
305
			{
304
			{
306
				str_buffer = realloc(str_buffer,tmp+1);
305
				str_buffer = realloc(str_buffer,tmp+1);
307
				strlcpy(str_buffer+len_str_buf,#const_token_lexer,tmp);
306
				strlcpy(str_buffer+len_str_buf,#const_token_lexer,tmp);
308
			}
307
			}
309
			type = LEX_STR;
308
			type = LEX_STR;
310
			length = len_str_buf+tmp;
309
			length = len_str_buf+tmp;
311
			text = in;
310
			text = in;
312
			tmp = str_buffer+length;
311
			tmp = str_buffer+length;
313
			DSBYTE[tmp] = 0;
312
			DSBYTE[tmp] = 0;
314
			token = str_buffer;
313
			token = str_buffer;
315
			return token;
314
			return token;
316
		}*/
315
		}*/
317
		type = LEX_STR;
316
		type = LEX_STR;
318
	}
317
	}
319
	else {
318
	else {
320
		in++;
319
		in++;
321
		type = LEX_NUL;
320
		type = LEX_NUL;
322
		DSBYTE[pos] = s;
321
		DSBYTE[pos] = s;
323
		pos++;
322
		pos++;
324
	}
323
	}
325
	GOTO_LEX_END:
324
	GOTO_LEX_END:
326
	length = in-text;
325
	length = in-text;
327
	text = in;
326
	text = in;
328
	DSBYTE[pos] = 0;
327
	DSBYTE[pos] = 0;
329
	token = #const_token_lexer;
328
	token = #const_token_lexer;
330
	return token;
329
	return token;
331
}
330
}
332
 
331
 
333
#endif
332
#endif
334
>
333
>
335
>
334
>
336
>
335
>