Subversion Repositories Kolibri OS

Rev

Rev 4680 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
4680 right-hear 1
#include "fitz.h"
2
#include "mupdf.h"
3
 
4
#define IS_NUMBER \
5
	'+':case'-':case'.':case'0':case'1':case'2':case'3':\
6
	case'4':case'5':case'6':case'7':case'8':case'9'
7
#define IS_WHITE \
8
	'\000':case'\011':case'\012':case'\014':case'\015':case'\040'
9
#define IS_HEX \
10
	'0':case'1':case'2':case'3':case'4':case'5':case'6':\
11
	case'7':case'8':case'9':case'A':case'B':case'C':\
12
	case'D':case'E':case'F':case'a':case'b':case'c':\
13
	case'd':case'e':case'f'
14
#define IS_DELIM \
15
	'(':case')':case'<':case'>':case'[':case']':case'{':\
16
	case'}':case'/':case'%'
17
 
18
#define RANGE_0_9 \
19
	'0':case'1':case'2':case'3':case'4':case'5':\
20
	case'6':case'7':case'8':case'9'
21
#define RANGE_a_f \
22
	'a':case'b':case'c':case'd':case'e':case'f'
23
#define RANGE_A_F \
24
	'A':case'B':case'C':case'D':case'E':case'F'
25
 
26
static inline int iswhite(int ch)
27
{
28
	return
29
		ch == '\000' ||
30
		ch == '\011' ||
31
		ch == '\012' ||
32
		ch == '\014' ||
33
		ch == '\015' ||
34
		ch == '\040';
35
}
36
 
37
static inline int unhex(int ch)
38
{
39
	if (ch >= '0' && ch <= '9') return ch - '0';
40
	if (ch >= 'A' && ch <= 'F') return ch - 'A' + 0xA;
41
	if (ch >= 'a' && ch <= 'f') return ch - 'a' + 0xA;
42
	return 0;
43
}
44
 
45
static void
46
lex_white(fz_stream *f)
47
{
48
	int c;
49
	do {
50
		c = fz_read_byte(f);
51
	} while ((c <= 32) && (iswhite(c)));
52
	if (c != EOF)
53
		fz_unread_byte(f);
54
}
55
 
56
static void
57
lex_comment(fz_stream *f)
58
{
59
	int c;
60
	do {
61
		c = fz_read_byte(f);
62
	} while ((c != '\012') && (c != '\015') && (c != EOF));
63
}
64
 
65
static int
66
lex_number(fz_stream *f, char *s, int n, int *tok)
67
{
68
	char *buf = s;
69
	*tok = PDF_TOK_INT;
70
 
71
	/* Initially we might have +, -, . or a digit */
72
	if (n > 1)
73
	{
74
		int c = fz_read_byte(f);
75
		switch (c)
76
		{
77
		case '.':
78
			*tok = PDF_TOK_REAL;
79
			*s++ = c;
80
			n--;
81
			goto loop_after_dot;
82
		case '+':
83
		case '-':
84
		case RANGE_0_9:
85
			*s++ = c;
86
			n--;
87
			goto loop_after_sign;
88
		default:
89
			fz_unread_byte(f);
90
			goto end;
91
		case EOF:
92
			goto end;
93
		}
94
	}
95
 
96
	/* We can't accept a sign from here on in, just . or a digit */
97
loop_after_sign:
98
	while (n > 1)
99
	{
100
		int c = fz_read_byte(f);
101
		switch (c)
102
		{
103
		case '.':
104
			*tok = PDF_TOK_REAL;
105
			*s++ = c;
106
			n--;
107
			goto loop_after_dot;
108
		case RANGE_0_9:
109
			*s++ = c;
110
			break;
111
		default:
112
			fz_unread_byte(f);
113
			goto end;
114
		case EOF:
115
			goto end;
116
		}
117
		n--;
118
	}
119
 
120
	/* In here, we've seen a dot, so can accept just digits */
121
loop_after_dot:
122
	while (n > 1)
123
	{
124
		int c = fz_read_byte(f);
125
		switch (c)
126
		{
127
		case RANGE_0_9:
128
			*s++ = c;
129
			break;
130
		default:
131
			fz_unread_byte(f);
132
			goto end;
133
		case EOF:
134
			goto end;
135
		}
136
		n--;
137
	}
138
 
139
end:
140
	*s = '\0';
141
	return s-buf;
142
}
143
 
144
static void
145
lex_name(fz_stream *f, char *s, int n)
146
{
147
	while (n > 1)
148
	{
149
		int c = fz_read_byte(f);
150
		switch (c)
151
		{
152
		case IS_WHITE:
153
		case IS_DELIM:
154
			fz_unread_byte(f);
155
			goto end;
156
		case EOF:
157
			goto end;
158
		case '#':
159
		{
160
			int d;
161
			c = fz_read_byte(f);
162
			switch (c)
163
			{
164
			case RANGE_0_9:
165
				d = (c - '0') << 4;
166
				break;
167
			case RANGE_a_f:
168
				d = (c - 'a' + 10) << 4;
169
				break;
170
			case RANGE_A_F:
171
				d = (c - 'A' + 10) << 4;
172
				break;
173
			default:
174
				fz_unread_byte(f);
175
				/* fallthrough */
176
			case EOF:
177
				goto end;
178
			}
179
			c = fz_read_byte(f);
180
			switch (c)
181
			{
182
			case RANGE_0_9:
183
				c -= '0';
184
				break;
185
			case RANGE_a_f:
186
				c -= 'a' - 10;
187
				break;
188
			case RANGE_A_F:
189
				c -= 'A' - 10;
190
				break;
191
			default:
192
				fz_unread_byte(f);
193
				/* fallthrough */
194
			case EOF:
195
				*s++ = d;
196
				n--;
197
				goto end;
198
			}
199
			*s++ = d + c;
200
			n--;
201
			break;
202
		}
203
		default:
204
			*s++ = c;
205
			n--;
206
			break;
207
		}
208
	}
209
end:
210
	*s = '\0';
211
}
212
 
213
static int
214
lex_string(fz_stream *f, char *buf, int n)
215
{
216
	char *s = buf;
217
	char *e = buf + n;
218
	int bal = 1;
219
	int oct;
220
	int c;
221
 
222
	while (s < e)
223
	{
224
		c = fz_read_byte(f);
225
		switch (c)
226
		{
227
		case EOF:
228
			goto end;
229
		case '(':
230
			bal++;
231
			*s++ = c;
232
			break;
233
		case ')':
234
			bal --;
235
			if (bal == 0)
236
				goto end;
237
			*s++ = c;
238
			break;
239
		case '\\':
240
			c = fz_read_byte(f);
241
			switch (c)
242
			{
243
			case EOF:
244
				goto end;
245
			case 'n':
246
				*s++ = '\n';
247
				break;
248
			case 'r':
249
				*s++ = '\r';
250
				break;
251
			case 't':
252
				*s++ = '\t';
253
				break;
254
			case 'b':
255
				*s++ = '\b';
256
				break;
257
			case 'f':
258
				*s++ = '\f';
259
				break;
260
			case '(':
261
				*s++ = '(';
262
				break;
263
			case ')':
264
				*s++ = ')';
265
				break;
266
			case '\\':
267
				*s++ = '\\';
268
				break;
269
			case RANGE_0_9:
270
				oct = c - '0';
271
				c = fz_read_byte(f);
272
				if (c >= '0' && c <= '9')
273
				{
274
					oct = oct * 8 + (c - '0');
275
					c = fz_read_byte(f);
276
					if (c >= '0' && c <= '9')
277
						oct = oct * 8 + (c - '0');
278
					else if (c != EOF)
279
						fz_unread_byte(f);
280
				}
281
				else if (c != EOF)
282
					fz_unread_byte(f);
283
				*s++ = oct;
284
				break;
285
			case '\n':
286
				break;
287
			case '\r':
288
				c = fz_read_byte(f);
289
				if ((c != '\n') && (c != EOF))
290
					fz_unread_byte(f);
291
				break;
292
			default:
293
				*s++ = c;
294
			}
295
			break;
296
		default:
297
			*s++ = c;
298
			break;
299
		}
300
	}
301
end:
302
	return s - buf;
303
}
304
 
305
static int
306
lex_hex_string(fz_stream *f, char *buf, int n)
307
{
308
	char *s = buf;
309
	char *e = buf + n;
310
	int a = 0, x = 0;
311
	int c;
312
 
313
	while (s < e)
314
	{
315
		c = fz_read_byte(f);
316
		switch (c)
317
		{
318
		case IS_WHITE:
319
			break;
320
		case IS_HEX:
321
			if (x)
322
			{
323
				*s++ = a * 16 + unhex(c);
324
				x = !x;
325
			}
326
			else
327
			{
328
				a = unhex(c);
329
				x = !x;
330
			}
331
			break;
332
		case '>':
333
		case EOF:
334
			goto end;
335
		default:
336
			fz_warn("ignoring invalid character in hex string: '%c'", c);
337
		}
338
	}
339
end:
340
	return s - buf;
341
}
342
 
343
static int
344
pdf_token_from_keyword(char *key)
345
{
346
	switch (*key)
347
	{
348
	case 'R':
349
		if (!strcmp(key, "R")) return PDF_TOK_R;
350
		break;
351
	case 't':
352
		if (!strcmp(key, "true")) return PDF_TOK_TRUE;
353
		if (!strcmp(key, "trailer")) return PDF_TOK_TRAILER;
354
		break;
355
	case 'f':
356
		if (!strcmp(key, "false")) return PDF_TOK_FALSE;
357
		break;
358
	case 'n':
359
		if (!strcmp(key, "null")) return PDF_TOK_NULL;
360
		break;
361
	case 'o':
362
		if (!strcmp(key, "obj")) return PDF_TOK_OBJ;
363
		break;
364
	case 'e':
365
		if (!strcmp(key, "endobj")) return PDF_TOK_ENDOBJ;
366
		if (!strcmp(key, "endstream")) return PDF_TOK_ENDSTREAM;
367
		break;
368
	case 's':
369
		if (!strcmp(key, "stream")) return PDF_TOK_STREAM;
370
		if (!strcmp(key, "startxref")) return PDF_TOK_STARTXREF;
371
		break;
372
	case 'x':
373
		if (!strcmp(key, "xref")) return PDF_TOK_XREF;
374
		break;
375
	default:
376
		break;
377
	}
378
 
379
	return PDF_TOK_KEYWORD;
380
}
381
 
382
fz_error
383
pdf_lex(int *tok, fz_stream *f, char *buf, int n, int *sl)
384
{
385
	while (1)
386
	{
387
		int c = fz_read_byte(f);
388
		switch (c)
389
		{
390
		case EOF:
391
			*tok = PDF_TOK_EOF;
392
			return fz_okay;
393
		case IS_WHITE:
394
			lex_white(f);
395
			break;
396
		case '%':
397
			lex_comment(f);
398
			break;
399
		case '/':
400
			lex_name(f, buf, n);
401
			*sl = strlen(buf);
402
			*tok = PDF_TOK_NAME;
403
			return fz_okay;
404
		case '(':
405
			*sl = lex_string(f, buf, n);
406
			*tok = PDF_TOK_STRING;
407
			return fz_okay;
408
		case ')':
409
			*tok = PDF_TOK_ERROR;
410
			goto cleanuperror;
411
		case '<':
412
			c = fz_read_byte(f);
413
			if (c == '<')
414
			{
415
				*tok = PDF_TOK_OPEN_DICT;
416
			}
417
			else
418
			{
419
				fz_unread_byte(f);
420
				*sl = lex_hex_string(f, buf, n);
421
				*tok = PDF_TOK_STRING;
422
			}
423
			return fz_okay;
424
		case '>':
425
			c = fz_read_byte(f);
426
			if (c == '>')
427
			{
428
				*tok = PDF_TOK_CLOSE_DICT;
429
				return fz_okay;
430
			}
431
			*tok = PDF_TOK_ERROR;
432
			goto cleanuperror;
433
		case '[':
434
			*tok = PDF_TOK_OPEN_ARRAY;
435
			return fz_okay;
436
		case ']':
437
			*tok = PDF_TOK_CLOSE_ARRAY;
438
			return fz_okay;
439
		case '{':
440
			*tok = PDF_TOK_OPEN_BRACE;
441
			return fz_okay;
442
		case '}':
443
			*tok = PDF_TOK_CLOSE_BRACE;
444
			return fz_okay;
445
		case IS_NUMBER:
446
			fz_unread_byte(f);
447
			*sl = lex_number(f, buf, n, tok);
448
			return fz_okay;
449
		default: /* isregular: !isdelim && !iswhite && c != EOF */
450
			fz_unread_byte(f);
451
			lex_name(f, buf, n);
452
			*sl = strlen(buf);
453
			*tok = pdf_token_from_keyword(buf);
454
			return fz_okay;
455
		}
456
	}
457
 
458
cleanuperror:
459
	*tok = PDF_TOK_ERROR;
460
	return fz_throw("lexical error");
461
}