Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4680 right-hear 1
#include "fitz.h"
2
#include "mupdf.h"
3
 
4
/*
5
 * CMap parser
6
 */
7
 
8
enum
9
{
10
	TOK_USECMAP = PDF_NUM_TOKENS,
11
	TOK_BEGIN_CODESPACE_RANGE,
12
	TOK_END_CODESPACE_RANGE,
13
	TOK_BEGIN_BF_CHAR,
14
	TOK_END_BF_CHAR,
15
	TOK_BEGIN_BF_RANGE,
16
	TOK_END_BF_RANGE,
17
	TOK_BEGIN_CID_CHAR,
18
	TOK_END_CID_CHAR,
19
	TOK_BEGIN_CID_RANGE,
20
	TOK_END_CID_RANGE,
21
	TOK_END_CMAP
22
};
23
 
24
static int
25
pdf_cmap_token_from_keyword(char *key)
26
{
27
	if (!strcmp(key, "usecmap")) return TOK_USECMAP;
28
	if (!strcmp(key, "begincodespacerange")) return TOK_BEGIN_CODESPACE_RANGE;
29
	if (!strcmp(key, "endcodespacerange")) return TOK_END_CODESPACE_RANGE;
30
	if (!strcmp(key, "beginbfchar")) return TOK_BEGIN_BF_CHAR;
31
	if (!strcmp(key, "endbfchar")) return TOK_END_BF_CHAR;
32
	if (!strcmp(key, "beginbfrange")) return TOK_BEGIN_BF_RANGE;
33
	if (!strcmp(key, "endbfrange")) return TOK_END_BF_RANGE;
34
	if (!strcmp(key, "begincidchar")) return TOK_BEGIN_CID_CHAR;
35
	if (!strcmp(key, "endcidchar")) return TOK_END_CID_CHAR;
36
	if (!strcmp(key, "begincidrange")) return TOK_BEGIN_CID_RANGE;
37
	if (!strcmp(key, "endcidrange")) return TOK_END_CID_RANGE;
38
	if (!strcmp(key, "endcmap")) return TOK_END_CMAP;
39
	return PDF_TOK_KEYWORD;
40
}
41
 
42
static int
43
pdf_code_from_string(char *buf, int len)
44
{
45
	int a = 0;
46
	while (len--)
47
		a = (a << 8) | *(unsigned char *)buf++;
48
	return a;
49
}
50
 
51
static fz_error
52
pdf_lex_cmap(int *tok, fz_stream *file, char *buf, int n, int *sl)
53
{
54
	fz_error error;
55
 
56
	error = pdf_lex(tok, file, buf, n, sl);
57
	if (error)
58
		return fz_rethrow(error, "cannot parse cmap token");
59
 
60
	if (*tok == PDF_TOK_KEYWORD)
61
		*tok = pdf_cmap_token_from_keyword(buf);
62
 
63
	return fz_okay;
64
}
65
 
66
static fz_error
67
pdf_parse_cmap_name(pdf_cmap *cmap, fz_stream *file)
68
{
69
	fz_error error;
70
	char buf[256];
71
	int tok;
72
	int len;
73
 
74
	error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
75
	if (error)
76
		return fz_rethrow(error, "syntaxerror in cmap");
77
 
78
	if (tok == PDF_TOK_NAME)
79
		fz_strlcpy(cmap->cmap_name, buf, sizeof(cmap->cmap_name));
80
	else
81
		fz_warn("expected name after CMapName in cmap");
82
 
83
	return fz_okay;
84
}
85
 
86
static fz_error
87
pdf_parse_wmode(pdf_cmap *cmap, fz_stream *file)
88
{
89
	fz_error error;
90
	char buf[256];
91
	int tok;
92
	int len;
93
 
94
	error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
95
	if (error)
96
		return fz_rethrow(error, "syntaxerror in cmap");
97
 
98
	if (tok == PDF_TOK_INT)
99
		pdf_set_wmode(cmap, atoi(buf));
100
	else
101
		fz_warn("expected integer after WMode in cmap");
102
 
103
	return fz_okay;
104
}
105
 
106
static fz_error
107
pdf_parse_codespace_range(pdf_cmap *cmap, fz_stream *file)
108
{
109
	fz_error error;
110
	char buf[256];
111
	int tok;
112
	int len;
113
	int lo, hi;
114
 
115
	while (1)
116
	{
117
		error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
118
		if (error)
119
			return fz_rethrow(error, "syntaxerror in cmap");
120
 
121
		if (tok == TOK_END_CODESPACE_RANGE)
122
			return fz_okay;
123
 
124
		else if (tok == PDF_TOK_STRING)
125
		{
126
			lo = pdf_code_from_string(buf, len);
127
			error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
128
			if (error)
129
				return fz_rethrow(error, "syntaxerror in cmap");
130
			if (tok == PDF_TOK_STRING)
131
			{
132
				hi = pdf_code_from_string(buf, len);
133
				pdf_add_codespace(cmap, lo, hi, len);
134
			}
135
			else break;
136
		}
137
 
138
		else break;
139
	}
140
 
141
	return fz_throw("expected string or endcodespacerange");
142
}
143
 
144
static fz_error
145
pdf_parse_cid_range(pdf_cmap *cmap, fz_stream *file)
146
{
147
	fz_error error;
148
	char buf[256];
149
	int tok;
150
	int len;
151
	int lo, hi, dst;
152
 
153
	while (1)
154
	{
155
		error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
156
		if (error)
157
			return fz_rethrow(error, "syntaxerror in cmap");
158
 
159
		if (tok == TOK_END_CID_RANGE)
160
			return fz_okay;
161
 
162
		else if (tok != PDF_TOK_STRING)
163
			return fz_throw("expected string or endcidrange");
164
 
165
		lo = pdf_code_from_string(buf, len);
166
 
167
		error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
168
		if (error)
169
			return fz_rethrow(error, "syntaxerror in cmap");
170
		if (tok != PDF_TOK_STRING)
171
			return fz_throw("expected string");
172
 
173
		hi = pdf_code_from_string(buf, len);
174
 
175
		error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
176
		if (error)
177
			return fz_rethrow(error, "syntaxerror in cmap");
178
		if (tok != PDF_TOK_INT)
179
			return fz_throw("expected integer");
180
 
181
		dst = atoi(buf);
182
 
183
		pdf_map_range_to_range(cmap, lo, hi, dst);
184
	}
185
}
186
 
187
static fz_error
188
pdf_parse_cid_char(pdf_cmap *cmap, fz_stream *file)
189
{
190
	fz_error error;
191
	char buf[256];
192
	int tok;
193
	int len;
194
	int src, dst;
195
 
196
	while (1)
197
	{
198
		error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
199
		if (error)
200
			return fz_rethrow(error, "syntaxerror in cmap");
201
 
202
		if (tok == TOK_END_CID_CHAR)
203
			return fz_okay;
204
 
205
		else if (tok != PDF_TOK_STRING)
206
			return fz_throw("expected string or endcidchar");
207
 
208
		src = pdf_code_from_string(buf, len);
209
 
210
		error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
211
		if (error)
212
			return fz_rethrow(error, "syntaxerror in cmap");
213
		if (tok != PDF_TOK_INT)
214
			return fz_throw("expected integer");
215
 
216
		dst = atoi(buf);
217
 
218
		pdf_map_range_to_range(cmap, src, src, dst);
219
	}
220
}
221
 
222
static fz_error
223
pdf_parse_bf_range_array(pdf_cmap *cmap, fz_stream *file, int lo, int hi)
224
{
225
	fz_error error;
226
	char buf[256];
227
	int tok;
228
	int len;
229
	int dst[256];
230
	int i;
231
 
232
	while (1)
233
	{
234
		error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
235
		if (error)
236
			return fz_rethrow(error, "syntaxerror in cmap");
237
 
238
		if (tok == PDF_TOK_CLOSE_ARRAY)
239
			return fz_okay;
240
 
241
		/* Note: does not handle [ /Name /Name ... ] */
242
		else if (tok != PDF_TOK_STRING)
243
			return fz_throw("expected string or ]");
244
 
245
		if (len / 2)
246
		{
247
			for (i = 0; i < len / 2; i++)
248
				dst[i] = pdf_code_from_string(buf + i * 2, 2);
249
 
250
			pdf_map_one_to_many(cmap, lo, dst, len / 2);
251
		}
252
 
253
		lo ++;
254
	}
255
}
256
 
257
static fz_error
258
pdf_parse_bf_range(pdf_cmap *cmap, fz_stream *file)
259
{
260
	fz_error error;
261
	char buf[256];
262
	int tok;
263
	int len;
264
	int lo, hi, dst;
265
 
266
	while (1)
267
	{
268
		error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
269
		if (error)
270
			return fz_rethrow(error, "syntaxerror in cmap");
271
 
272
		if (tok == TOK_END_BF_RANGE)
273
			return fz_okay;
274
 
275
		else if (tok != PDF_TOK_STRING)
276
			return fz_throw("expected string or endbfrange");
277
 
278
		lo = pdf_code_from_string(buf, len);
279
 
280
		error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
281
		if (error)
282
			return fz_rethrow(error, "syntaxerror in cmap");
283
		if (tok != PDF_TOK_STRING)
284
			return fz_throw("expected string");
285
 
286
		hi = pdf_code_from_string(buf, len);
287
 
288
		error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
289
		if (error)
290
			return fz_rethrow(error, "syntaxerror in cmap");
291
 
292
		if (tok == PDF_TOK_STRING)
293
		{
294
			if (len == 2)
295
			{
296
				dst = pdf_code_from_string(buf, len);
297
				pdf_map_range_to_range(cmap, lo, hi, dst);
298
			}
299
			else
300
			{
301
				int dststr[256];
302
				int i;
303
 
304
				if (len / 2)
305
				{
306
					for (i = 0; i < len / 2; i++)
307
						dststr[i] = pdf_code_from_string(buf + i * 2, 2);
308
 
309
					while (lo <= hi)
310
					{
311
						dststr[i-1] ++;
312
						pdf_map_one_to_many(cmap, lo, dststr, i);
313
						lo ++;
314
					}
315
				}
316
			}
317
		}
318
 
319
		else if (tok == PDF_TOK_OPEN_ARRAY)
320
		{
321
			error = pdf_parse_bf_range_array(cmap, file, lo, hi);
322
			if (error)
323
				return fz_rethrow(error, "cannot map bfrange");
324
		}
325
 
326
		else
327
		{
328
			return fz_throw("expected string or array or endbfrange");
329
		}
330
	}
331
}
332
 
333
static fz_error
334
pdf_parse_bf_char(pdf_cmap *cmap, fz_stream *file)
335
{
336
	fz_error error;
337
	char buf[256];
338
	int tok;
339
	int len;
340
	int dst[256];
341
	int src;
342
	int i;
343
 
344
	while (1)
345
	{
346
		error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
347
		if (error)
348
			return fz_rethrow(error, "syntaxerror in cmap");
349
 
350
		if (tok == TOK_END_BF_CHAR)
351
			return fz_okay;
352
 
353
		else if (tok != PDF_TOK_STRING)
354
			return fz_throw("expected string or endbfchar");
355
 
356
		src = pdf_code_from_string(buf, len);
357
 
358
		error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
359
		if (error)
360
			return fz_rethrow(error, "syntaxerror in cmap");
361
		/* Note: does not handle /dstName */
362
		if (tok != PDF_TOK_STRING)
363
			return fz_throw("expected string");
364
 
365
		if (len / 2)
366
		{
367
			for (i = 0; i < len / 2; i++)
368
				dst[i] = pdf_code_from_string(buf + i * 2, 2);
369
			pdf_map_one_to_many(cmap, src, dst, i);
370
		}
371
	}
372
}
373
 
374
fz_error
375
pdf_parse_cmap(pdf_cmap **cmapp, fz_stream *file)
376
{
377
	fz_error error;
378
	pdf_cmap *cmap;
379
	char key[64];
380
	char buf[256];
381
	int tok;
382
	int len;
383
 
384
	cmap = pdf_new_cmap();
385
 
386
	strcpy(key, ".notdef");
387
 
388
	while (1)
389
	{
390
		error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
391
		if (error)
392
		{
393
			error = fz_rethrow(error, "syntaxerror in cmap");
394
			goto cleanup;
395
		}
396
 
397
		if (tok == PDF_TOK_EOF || tok == TOK_END_CMAP)
398
			break;
399
 
400
		else if (tok == PDF_TOK_NAME)
401
		{
402
			if (!strcmp(buf, "CMapName"))
403
			{
404
				error = pdf_parse_cmap_name(cmap, file);
405
				if (error)
406
				{
407
					error = fz_rethrow(error, "syntaxerror in cmap after CMapName");
408
					goto cleanup;
409
				}
410
			}
411
			else if (!strcmp(buf, "WMode"))
412
			{
413
				error = pdf_parse_wmode(cmap, file);
414
				if (error)
415
				{
416
					error = fz_rethrow(error, "syntaxerror in cmap after WMode");
417
					goto cleanup;
418
				}
419
			}
420
			else
421
				fz_strlcpy(key, buf, sizeof key);
422
		}
423
 
424
		else if (tok == TOK_USECMAP)
425
		{
426
			fz_strlcpy(cmap->usecmap_name, key, sizeof(cmap->usecmap_name));
427
		}
428
 
429
		else if (tok == TOK_BEGIN_CODESPACE_RANGE)
430
		{
431
			error = pdf_parse_codespace_range(cmap, file);
432
			if (error)
433
			{
434
				error = fz_rethrow(error, "syntaxerror in cmap codespacerange");
435
				goto cleanup;
436
			}
437
		}
438
 
439
		else if (tok == TOK_BEGIN_BF_CHAR)
440
		{
441
			error = pdf_parse_bf_char(cmap, file);
442
			if (error)
443
			{
444
				error = fz_rethrow(error, "syntaxerror in cmap bfchar");
445
				goto cleanup;
446
			}
447
		}
448
 
449
		else if (tok == TOK_BEGIN_CID_CHAR)
450
		{
451
			error = pdf_parse_cid_char(cmap, file);
452
			if (error)
453
			{
454
				error = fz_rethrow(error, "syntaxerror in cmap cidchar");
455
				goto cleanup;
456
			}
457
		}
458
 
459
		else if (tok == TOK_BEGIN_BF_RANGE)
460
		{
461
			error = pdf_parse_bf_range(cmap, file);
462
			if (error)
463
			{
464
				error = fz_rethrow(error, "syntaxerror in cmap bfrange");
465
				goto cleanup;
466
			}
467
		}
468
 
469
		else if (tok == TOK_BEGIN_CID_RANGE)
470
		{
471
			error = pdf_parse_cid_range(cmap, file);
472
			if (error)
473
			{
474
				error = fz_rethrow(error, "syntaxerror in cmap cidrange");
475
				goto cleanup;
476
			}
477
		}
478
 
479
		/* ignore everything else */
480
	}
481
 
482
	pdf_sort_cmap(cmap);
483
 
484
	*cmapp = cmap;
485
	return fz_okay;
486
 
487
cleanup:
488
	pdf_drop_cmap(cmap);
489
	return error; /* already rethrown */
490
}