Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4680 right-hear 1
#include "fitz.h"
2
#include "mupdf.h"
3
 
4
fz_rect
5
pdf_to_rect(fz_obj *array)
6
{
7
	fz_rect r;
8
	float a = fz_to_real(fz_array_get(array, 0));
9
	float b = fz_to_real(fz_array_get(array, 1));
10
	float c = fz_to_real(fz_array_get(array, 2));
11
	float d = fz_to_real(fz_array_get(array, 3));
12
	r.x0 = MIN(a, c);
13
	r.y0 = MIN(b, d);
14
	r.x1 = MAX(a, c);
15
	r.y1 = MAX(b, d);
16
	return r;
17
}
18
 
19
fz_matrix
20
pdf_to_matrix(fz_obj *array)
21
{
22
	fz_matrix m;
23
	m.a = fz_to_real(fz_array_get(array, 0));
24
	m.b = fz_to_real(fz_array_get(array, 1));
25
	m.c = fz_to_real(fz_array_get(array, 2));
26
	m.d = fz_to_real(fz_array_get(array, 3));
27
	m.e = fz_to_real(fz_array_get(array, 4));
28
	m.f = fz_to_real(fz_array_get(array, 5));
29
	return m;
30
}
31
 
32
/* Convert Unicode/PdfDocEncoding string into utf-8 */
33
char *
34
pdf_to_utf8(fz_obj *src)
35
{
36
	unsigned char *srcptr = (unsigned char *) fz_to_str_buf(src);
37
	char *dstptr, *dst;
38
	int srclen = fz_to_str_len(src);
39
	int dstlen = 0;
40
	int ucs;
41
	int i;
42
 
43
	if (srclen > 2 && srcptr[0] == 254 && srcptr[1] == 255)
44
	{
45
		for (i = 2; i < srclen; i += 2)
46
		{
47
			ucs = (srcptr[i] << 8) | srcptr[i+1];
48
			dstlen += runelen(ucs);
49
		}
50
 
51
		dstptr = dst = fz_malloc(dstlen + 1);
52
 
53
		for (i = 2; i < srclen; i += 2)
54
		{
55
			ucs = (srcptr[i] << 8) | srcptr[i+1];
56
			dstptr += runetochar(dstptr, &ucs);
57
		}
58
	}
59
 
60
	else
61
	{
62
		for (i = 0; i < srclen; i++)
63
			dstlen += runelen(pdf_doc_encoding[srcptr[i]]);
64
 
65
		dstptr = dst = fz_malloc(dstlen + 1);
66
 
67
		for (i = 0; i < srclen; i++)
68
		{
69
			ucs = pdf_doc_encoding[srcptr[i]];
70
			dstptr += runetochar(dstptr, &ucs);
71
		}
72
	}
73
 
74
	*dstptr = '\0';
75
	return dst;
76
}
77
 
78
/* Convert Unicode/PdfDocEncoding string into ucs-2 */
79
unsigned short *
80
pdf_to_ucs2(fz_obj *src)
81
{
82
	unsigned char *srcptr = (unsigned char *) fz_to_str_buf(src);
83
	unsigned short *dstptr, *dst;
84
	int srclen = fz_to_str_len(src);
85
	int i;
86
 
87
	if (srclen > 2 && srcptr[0] == 254 && srcptr[1] == 255)
88
	{
89
		dstptr = dst = fz_calloc((srclen - 2) / 2 + 1, sizeof(short));
90
		for (i = 2; i < srclen; i += 2)
91
			*dstptr++ = (srcptr[i] << 8) | srcptr[i+1];
92
	}
93
 
94
	else
95
	{
96
		dstptr = dst = fz_calloc(srclen + 1, sizeof(short));
97
		for (i = 0; i < srclen; i++)
98
			*dstptr++ = pdf_doc_encoding[srcptr[i]];
99
	}
100
 
101
	*dstptr = '\0';
102
	return dst;
103
}
104
 
105
/* Convert UCS-2 string into PdfDocEncoding for authentication */
106
char *
107
pdf_from_ucs2(unsigned short *src)
108
{
109
	int i, j, len;
110
	char *docstr;
111
 
112
	len = 0;
113
	while (src[len])
114
		len++;
115
 
116
	docstr = fz_malloc(len + 1);
117
 
118
	for (i = 0; i < len; i++)
119
	{
120
		/* shortcut: check if the character has the same code point in both encodings */
121
		if (0 < src[i] && src[i] < 256 && pdf_doc_encoding[src[i]] == src[i]) {
122
			docstr[i] = src[i];
123
			continue;
124
		}
125
 
126
		/* search through pdf_docencoding for the character's code point */
127
		for (j = 0; j < 256; j++)
128
			if (pdf_doc_encoding[j] == src[i])
129
				break;
130
		docstr[i] = j;
131
 
132
		/* fail, if a character can't be encoded */
133
		if (!docstr[i])
134
		{
135
			fz_free(docstr);
136
			return NULL;
137
		}
138
	}
139
	docstr[len] = '\0';
140
 
141
	return docstr;
142
}
143
 
144
fz_obj *
145
pdf_to_utf8_name(fz_obj *src)
146
{
147
	char *buf = pdf_to_utf8(src);
148
	fz_obj *dst = fz_new_name(buf);
149
	fz_free(buf);
150
	return dst;
151
}
152
 
153
fz_error
154
pdf_parse_array(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap)
155
{
156
	fz_error error = fz_okay;
157
	fz_obj *ary = NULL;
158
	fz_obj *obj = NULL;
159
	int a = 0, b = 0, n = 0;
160
	int tok;
161
	int len;
162
 
163
	ary = fz_new_array(4);
164
 
165
	while (1)
166
	{
167
		error = pdf_lex(&tok, file, buf, cap, &len);
168
		if (error)
169
		{
170
			fz_drop_obj(ary);
171
			return fz_rethrow(error, "cannot parse array");
172
		}
173
 
174
		if (tok != PDF_TOK_INT && tok != PDF_TOK_R)
175
		{
176
			if (n > 0)
177
			{
178
				obj = fz_new_int(a);
179
				fz_array_push(ary, obj);
180
				fz_drop_obj(obj);
181
			}
182
			if (n > 1)
183
			{
184
				obj = fz_new_int(b);
185
				fz_array_push(ary, obj);
186
				fz_drop_obj(obj);
187
			}
188
			n = 0;
189
		}
190
 
191
		if (tok == PDF_TOK_INT && n == 2)
192
		{
193
			obj = fz_new_int(a);
194
			fz_array_push(ary, obj);
195
			fz_drop_obj(obj);
196
			a = b;
197
			n --;
198
		}
199
 
200
		switch (tok)
201
		{
202
		case PDF_TOK_CLOSE_ARRAY:
203
			*op = ary;
204
			return fz_okay;
205
 
206
		case PDF_TOK_INT:
207
			if (n == 0)
208
				a = atoi(buf);
209
			if (n == 1)
210
				b = atoi(buf);
211
			n ++;
212
			break;
213
 
214
		case PDF_TOK_R:
215
			if (n != 2)
216
			{
217
				fz_drop_obj(ary);
218
				return fz_throw("cannot parse indirect reference in array");
219
			}
220
			obj = fz_new_indirect(a, b, xref);
221
			fz_array_push(ary, obj);
222
			fz_drop_obj(obj);
223
			n = 0;
224
			break;
225
 
226
		case PDF_TOK_OPEN_ARRAY:
227
			error = pdf_parse_array(&obj, xref, file, buf, cap);
228
			if (error)
229
			{
230
				fz_drop_obj(ary);
231
				return fz_rethrow(error, "cannot parse array");
232
			}
233
			fz_array_push(ary, obj);
234
			fz_drop_obj(obj);
235
			break;
236
 
237
		case PDF_TOK_OPEN_DICT:
238
			error = pdf_parse_dict(&obj, xref, file, buf, cap);
239
			if (error)
240
			{
241
				fz_drop_obj(ary);
242
				return fz_rethrow(error, "cannot parse array");
243
			}
244
			fz_array_push(ary, obj);
245
			fz_drop_obj(obj);
246
			break;
247
 
248
		case PDF_TOK_NAME:
249
			obj = fz_new_name(buf);
250
			fz_array_push(ary, obj);
251
			fz_drop_obj(obj);
252
			break;
253
		case PDF_TOK_REAL:
254
			obj = fz_new_real(fz_atof(buf));
255
			fz_array_push(ary, obj);
256
			fz_drop_obj(obj);
257
			break;
258
		case PDF_TOK_STRING:
259
			obj = fz_new_string(buf, len);
260
			fz_array_push(ary, obj);
261
			fz_drop_obj(obj);
262
			break;
263
		case PDF_TOK_TRUE:
264
			obj = fz_new_bool(1);
265
			fz_array_push(ary, obj);
266
			fz_drop_obj(obj);
267
			break;
268
		case PDF_TOK_FALSE:
269
			obj = fz_new_bool(0);
270
			fz_array_push(ary, obj);
271
			fz_drop_obj(obj);
272
			break;
273
		case PDF_TOK_NULL:
274
			obj = fz_new_null();
275
			fz_array_push(ary, obj);
276
			fz_drop_obj(obj);
277
			break;
278
 
279
		default:
280
			fz_drop_obj(ary);
281
			return fz_throw("cannot parse token in array");
282
		}
283
	}
284
}
285
 
286
fz_error
287
pdf_parse_dict(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap)
288
{
289
	fz_error error = fz_okay;
290
	fz_obj *dict = NULL;
291
	fz_obj *key = NULL;
292
	fz_obj *val = NULL;
293
	int tok;
294
	int len;
295
	int a, b;
296
 
297
	dict = fz_new_dict(8);
298
 
299
	while (1)
300
	{
301
		error = pdf_lex(&tok, file, buf, cap, &len);
302
		if (error)
303
		{
304
			fz_drop_obj(dict);
305
			return fz_rethrow(error, "cannot parse dict");
306
		}
307
 
308
skip:
309
		if (tok == PDF_TOK_CLOSE_DICT)
310
		{
311
			*op = dict;
312
			return fz_okay;
313
		}
314
 
315
		/* for BI .. ID .. EI in content streams */
316
		if (tok == PDF_TOK_KEYWORD && !strcmp(buf, "ID"))
317
		{
318
			*op = dict;
319
			return fz_okay;
320
		}
321
 
322
		if (tok != PDF_TOK_NAME)
323
		{
324
			fz_drop_obj(dict);
325
			return fz_throw("invalid key in dict");
326
		}
327
 
328
		key = fz_new_name(buf);
329
 
330
		error = pdf_lex(&tok, file, buf, cap, &len);
331
		if (error)
332
		{
333
			fz_drop_obj(key);
334
			fz_drop_obj(dict);
335
			return fz_rethrow(error, "cannot parse dict");
336
		}
337
 
338
		switch (tok)
339
		{
340
		case PDF_TOK_OPEN_ARRAY:
341
			error = pdf_parse_array(&val, xref, file, buf, cap);
342
			if (error)
343
			{
344
				fz_drop_obj(key);
345
				fz_drop_obj(dict);
346
				return fz_rethrow(error, "cannot parse dict");
347
			}
348
			break;
349
 
350
		case PDF_TOK_OPEN_DICT:
351
			error = pdf_parse_dict(&val, xref, file, buf, cap);
352
			if (error)
353
			{
354
				fz_drop_obj(key);
355
				fz_drop_obj(dict);
356
				return fz_rethrow(error, "cannot parse dict");
357
			}
358
			break;
359
 
360
		case PDF_TOK_NAME: val = fz_new_name(buf); break;
361
		case PDF_TOK_REAL: val = fz_new_real(fz_atof(buf)); break;
362
		case PDF_TOK_STRING: val = fz_new_string(buf, len); break;
363
		case PDF_TOK_TRUE: val = fz_new_bool(1); break;
364
		case PDF_TOK_FALSE: val = fz_new_bool(0); break;
365
		case PDF_TOK_NULL: val = fz_new_null(); break;
366
 
367
		case PDF_TOK_INT:
368
			/* 64-bit to allow for numbers > INT_MAX and overflow */
369
			a = (int) strtoll(buf, 0, 10);
370
			error = pdf_lex(&tok, file, buf, cap, &len);
371
			if (error)
372
			{
373
				fz_drop_obj(key);
374
				fz_drop_obj(dict);
375
				return fz_rethrow(error, "cannot parse dict");
376
			}
377
			if (tok == PDF_TOK_CLOSE_DICT || tok == PDF_TOK_NAME ||
378
				(tok == PDF_TOK_KEYWORD && !strcmp(buf, "ID")))
379
			{
380
				val = fz_new_int(a);
381
				fz_dict_put(dict, key, val);
382
				fz_drop_obj(val);
383
				fz_drop_obj(key);
384
				goto skip;
385
			}
386
			if (tok == PDF_TOK_INT)
387
			{
388
				b = atoi(buf);
389
				error = pdf_lex(&tok, file, buf, cap, &len);
390
				if (error)
391
				{
392
					fz_drop_obj(key);
393
					fz_drop_obj(dict);
394
					return fz_rethrow(error, "cannot parse dict");
395
				}
396
				if (tok == PDF_TOK_R)
397
				{
398
					val = fz_new_indirect(a, b, xref);
399
					break;
400
				}
401
			}
402
			fz_drop_obj(key);
403
			fz_drop_obj(dict);
404
			return fz_throw("invalid indirect reference in dict");
405
 
406
		default:
407
			fz_drop_obj(key);
408
			fz_drop_obj(dict);
409
			return fz_throw("unknown token in dict");
410
		}
411
 
412
		fz_dict_put(dict, key, val);
413
		fz_drop_obj(val);
414
		fz_drop_obj(key);
415
	}
416
}
417
 
418
fz_error
419
pdf_parse_stm_obj(fz_obj **op, pdf_xref *xref, fz_stream *file, char *buf, int cap)
420
{
421
	fz_error error;
422
	int tok;
423
	int len;
424
 
425
	error = pdf_lex(&tok, file, buf, cap, &len);
426
	if (error)
427
		return fz_rethrow(error, "cannot parse token in object stream");
428
 
429
	switch (tok)
430
	{
431
	case PDF_TOK_OPEN_ARRAY:
432
		error = pdf_parse_array(op, xref, file, buf, cap);
433
		if (error)
434
			return fz_rethrow(error, "cannot parse object stream");
435
		break;
436
	case PDF_TOK_OPEN_DICT:
437
		error = pdf_parse_dict(op, xref, file, buf, cap);
438
		if (error)
439
			return fz_rethrow(error, "cannot parse object stream");
440
		break;
441
	case PDF_TOK_NAME: *op = fz_new_name(buf); break;
442
	case PDF_TOK_REAL: *op = fz_new_real(fz_atof(buf)); break;
443
	case PDF_TOK_STRING: *op = fz_new_string(buf, len); break;
444
	case PDF_TOK_TRUE: *op = fz_new_bool(1); break;
445
	case PDF_TOK_FALSE: *op = fz_new_bool(0); break;
446
	case PDF_TOK_NULL: *op = fz_new_null(); break;
447
	case PDF_TOK_INT: *op = fz_new_int(atoi(buf)); break;
448
	default: return fz_throw("unknown token in object stream");
449
	}
450
 
451
	return fz_okay;
452
}
453
 
454
fz_error
455
pdf_parse_ind_obj(fz_obj **op, pdf_xref *xref,
456
	fz_stream *file, char *buf, int cap,
457
	int *onum, int *ogen, int *ostmofs)
458
{
459
	fz_error error = fz_okay;
460
	fz_obj *obj = NULL;
461
	int num = 0, gen = 0, stm_ofs;
462
	int tok;
463
	int len;
464
	int a, b;
465
 
466
	error = pdf_lex(&tok, file, buf, cap, &len);
467
	if (error)
468
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
469
	if (tok != PDF_TOK_INT)
470
		return fz_throw("expected object number (%d %d R)", num, gen);
471
	num = atoi(buf);
472
 
473
	error = pdf_lex(&tok, file, buf, cap, &len);
474
	if (error)
475
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
476
	if (tok != PDF_TOK_INT)
477
		return fz_throw("expected generation number (%d %d R)", num, gen);
478
	gen = atoi(buf);
479
 
480
	error = pdf_lex(&tok, file, buf, cap, &len);
481
	if (error)
482
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
483
	if (tok != PDF_TOK_OBJ)
484
		return fz_throw("expected 'obj' keyword (%d %d R)", num, gen);
485
 
486
	error = pdf_lex(&tok, file, buf, cap, &len);
487
	if (error)
488
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
489
 
490
	switch (tok)
491
	{
492
	case PDF_TOK_OPEN_ARRAY:
493
		error = pdf_parse_array(&obj, xref, file, buf, cap);
494
		if (error)
495
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
496
		break;
497
 
498
	case PDF_TOK_OPEN_DICT:
499
		error = pdf_parse_dict(&obj, xref, file, buf, cap);
500
		if (error)
501
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
502
		break;
503
 
504
	case PDF_TOK_NAME: obj = fz_new_name(buf); break;
505
	case PDF_TOK_REAL: obj = fz_new_real(fz_atof(buf)); break;
506
	case PDF_TOK_STRING: obj = fz_new_string(buf, len); break;
507
	case PDF_TOK_TRUE: obj = fz_new_bool(1); break;
508
	case PDF_TOK_FALSE: obj = fz_new_bool(0); break;
509
	case PDF_TOK_NULL: obj = fz_new_null(); break;
510
 
511
	case PDF_TOK_INT:
512
		a = atoi(buf);
513
		error = pdf_lex(&tok, file, buf, cap, &len);
514
		if (error)
515
			return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
516
		if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ)
517
		{
518
			obj = fz_new_int(a);
519
			goto skip;
520
		}
521
		if (tok == PDF_TOK_INT)
522
		{
523
			b = atoi(buf);
524
			error = pdf_lex(&tok, file, buf, cap, &len);
525
			if (error)
526
				return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
527
			if (tok == PDF_TOK_R)
528
			{
529
				obj = fz_new_indirect(a, b, xref);
530
				break;
531
			}
532
		}
533
		return fz_throw("expected 'R' keyword (%d %d R)", num, gen);
534
 
535
	case PDF_TOK_ENDOBJ:
536
		obj = fz_new_null();
537
		goto skip;
538
 
539
	default:
540
		return fz_throw("syntax error in object (%d %d R)", num, gen);
541
	}
542
 
543
	error = pdf_lex(&tok, file, buf, cap, &len);
544
	if (error)
545
	{
546
		fz_drop_obj(obj);
547
		return fz_rethrow(error, "cannot parse indirect object (%d %d R)", num, gen);
548
	}
549
 
550
skip:
551
	if (tok == PDF_TOK_STREAM)
552
	{
553
		int c = fz_read_byte(file);
554
		while (c == ' ')
555
			c = fz_read_byte(file);
556
		if (c == '\r')
557
		{
558
			c = fz_peek_byte(file);
559
			if (c != '\n')
560
				fz_warn("line feed missing after stream begin marker (%d %d R)", num, gen);
561
			else
562
				fz_read_byte(file);
563
		}
564
		stm_ofs = fz_tell(file);
565
	}
566
	else if (tok == PDF_TOK_ENDOBJ)
567
	{
568
		stm_ofs = 0;
569
	}
570
	else
571
	{
572
		fz_warn("expected 'endobj' or 'stream' keyword (%d %d R)", num, gen);
573
		stm_ofs = 0;
574
	}
575
 
576
	if (onum) *onum = num;
577
	if (ogen) *ogen = gen;
578
	if (ostmofs) *ostmofs = stm_ofs;
579
	*op = obj;
580
	return fz_okay;
581
}