Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4680 right-hear 1
#include "fitz.h"
2
#include "mupdf.h"
3
 
4
static inline int iswhite(int ch)
5
{
6
	return
7
		ch == '\000' || ch == '\011' || ch == '\012' ||
8
		ch == '\014' || ch == '\015' || ch == '\040';
9
}
10
 
11
/*
12
 * magic version tag and startxref
13
 */
14
 
15
static fz_error
16
pdf_load_version(pdf_xref *xref)
17
{
18
	char buf[20];
19
 
20
	fz_seek(xref->file, 0, 0);
21
	fz_read_line(xref->file, buf, sizeof buf);
22
	if (memcmp(buf, "%PDF-", 5) != 0)
23
		return fz_throw("cannot recognize version marker");
24
 
25
	xref->version = atoi(buf + 5) * 10 + atoi(buf + 7);
26
 
27
	return fz_okay;
28
}
29
 
30
static fz_error
31
pdf_read_start_xref(pdf_xref *xref)
32
{
33
	unsigned char buf[1024];
34
	int t, n;
35
	int i;
36
 
37
	fz_seek(xref->file, 0, 2);
38
 
39
	xref->file_size = fz_tell(xref->file);
40
 
41
	t = MAX(0, xref->file_size - (int)sizeof buf);
42
	fz_seek(xref->file, t, 0);
43
 
44
	n = fz_read(xref->file, buf, sizeof buf);
45
	if (n < 0)
46
		return fz_rethrow(n, "cannot read from file");
47
 
48
	for (i = n - 9; i >= 0; i--)
49
	{
50
		if (memcmp(buf + i, "startxref", 9) == 0)
51
		{
52
			i += 9;
53
			while (iswhite(buf[i]) && i < n)
54
				i ++;
55
			xref->startxref = atoi((char*)(buf + i));
56
			return fz_okay;
57
		}
58
	}
59
 
60
	return fz_throw("cannot find startxref");
61
}
62
 
63
/*
64
 * trailer dictionary
65
 */
66
 
67
static fz_error
68
pdf_read_old_trailer(pdf_xref *xref, char *buf, int cap)
69
{
70
	fz_error error;
71
	int len;
72
	char *s;
73
	int n;
74
	int t;
75
	int tok;
76
	int c;
77
 
78
	fz_read_line(xref->file, buf, cap);
79
	if (strncmp(buf, "xref", 4) != 0)
80
		return fz_throw("cannot find xref marker");
81
 
82
	while (1)
83
	{
84
		c = fz_peek_byte(xref->file);
85
		if (!(c >= '0' && c <= '9'))
86
			break;
87
 
88
		fz_read_line(xref->file, buf, cap);
89
		s = buf;
90
		fz_strsep(&s, " "); /* ignore ofs */
91
		if (!s)
92
			return fz_throw("invalid range marker in xref");
93
		len = atoi(fz_strsep(&s, " "));
94
 
95
		/* broken pdfs where the section is not on a separate line */
96
		if (s && *s != '\0')
97
			fz_seek(xref->file, -(2 + (int)strlen(s)), 1);
98
 
99
		t = fz_tell(xref->file);
100
		if (t < 0)
101
			return fz_throw("cannot tell in file");
102
 
103
		fz_seek(xref->file, t + 20 * len, 0);
104
	}
105
 
106
	error = pdf_lex(&tok, xref->file, buf, cap, &n);
107
	if (error)
108
		return fz_rethrow(error, "cannot parse trailer");
109
	if (tok != PDF_TOK_TRAILER)
110
		return fz_throw("expected trailer marker");
111
 
112
	error = pdf_lex(&tok, xref->file, buf, cap, &n);
113
	if (error)
114
		return fz_rethrow(error, "cannot parse trailer");
115
	if (tok != PDF_TOK_OPEN_DICT)
116
		return fz_throw("expected trailer dictionary");
117
 
118
	error = pdf_parse_dict(&xref->trailer, xref, xref->file, buf, cap);
119
	if (error)
120
		return fz_rethrow(error, "cannot parse trailer");
121
	return fz_okay;
122
}
123
 
124
static fz_error
125
pdf_read_new_trailer(pdf_xref *xref, char *buf, int cap)
126
{
127
	fz_error error;
128
	error = pdf_parse_ind_obj(&xref->trailer, xref, xref->file, buf, cap, NULL, NULL, NULL);
129
	if (error)
130
		return fz_rethrow(error, "cannot parse trailer (compressed)");
131
	return fz_okay;
132
}
133
 
134
static fz_error
135
pdf_read_trailer(pdf_xref *xref, char *buf, int cap)
136
{
137
	fz_error error;
138
	int c;
139
 
140
	fz_seek(xref->file, xref->startxref, 0);
141
 
142
	while (iswhite(fz_peek_byte(xref->file)))
143
		fz_read_byte(xref->file);
144
 
145
	c = fz_peek_byte(xref->file);
146
	if (c == 'x')
147
	{
148
		error = pdf_read_old_trailer(xref, buf, cap);
149
		if (error)
150
			return fz_rethrow(error, "cannot read trailer");
151
	}
152
	else if (c >= '0' && c <= '9')
153
	{
154
		error = pdf_read_new_trailer(xref, buf, cap);
155
		if (error)
156
			return fz_rethrow(error, "cannot read trailer");
157
	}
158
	else
159
	{
160
		return fz_throw("cannot recognize xref format: '%c'", c);
161
	}
162
 
163
	return fz_okay;
164
}
165
 
166
/*
167
 * xref tables
168
 */
169
 
170
void
171
pdf_resize_xref(pdf_xref *xref, int newlen)
172
{
173
	int i;
174
 
175
	xref->table = fz_realloc(xref->table, newlen, sizeof(pdf_xref_entry));
176
	for (i = xref->len; i < newlen; i++)
177
	{
178
		xref->table[i].type = 0;
179
		xref->table[i].ofs = 0;
180
		xref->table[i].gen = 0;
181
		xref->table[i].stm_ofs = 0;
182
		xref->table[i].obj = NULL;
183
	}
184
	xref->len = newlen;
185
}
186
 
187
static fz_error
188
pdf_read_old_xref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap)
189
{
190
	fz_error error;
191
	int ofs, len;
192
	char *s;
193
	int n;
194
	int tok;
195
	int i;
196
	int c;
197
 
198
	fz_read_line(xref->file, buf, cap);
199
	if (strncmp(buf, "xref", 4) != 0)
200
		return fz_throw("cannot find xref marker");
201
 
202
	while (1)
203
	{
204
		c = fz_peek_byte(xref->file);
205
		if (!(c >= '0' && c <= '9'))
206
			break;
207
 
208
		fz_read_line(xref->file, buf, cap);
209
		s = buf;
210
		ofs = atoi(fz_strsep(&s, " "));
211
		len = atoi(fz_strsep(&s, " "));
212
 
213
		/* broken pdfs where the section is not on a separate line */
214
		if (s && *s != '\0')
215
		{
216
			fz_warn("broken xref section. proceeding anyway.");
217
			fz_seek(xref->file, -(2 + (int)strlen(s)), 1);
218
		}
219
 
220
		/* broken pdfs where size in trailer undershoots entries in xref sections */
221
		if (ofs + len > xref->len)
222
		{
223
			fz_warn("broken xref section, proceeding anyway.");
224
			pdf_resize_xref(xref, ofs + len);
225
		}
226
 
227
		for (i = ofs; i < ofs + len; i++)
228
		{
229
			n = fz_read(xref->file, (unsigned char *) buf, 20);
230
			if (n < 0)
231
				return fz_rethrow(n, "cannot read xref table");
232
			if (!xref->table[i].type)
233
			{
234
				s = buf;
235
 
236
				/* broken pdfs where line start with white space */
237
				while (*s != '\0' && iswhite(*s))
238
					s++;
239
 
240
				xref->table[i].ofs = atoi(s);
241
				xref->table[i].gen = atoi(s + 11);
242
				xref->table[i].type = s[17];
243
				if (s[17] != 'f' && s[17] != 'n' && s[17] != 'o')
244
					return fz_throw("unexpected xref type: %#x (%d %d R)", s[17], i, xref->table[i].gen);
245
			}
246
		}
247
	}
248
 
249
	error = pdf_lex(&tok, xref->file, buf, cap, &n);
250
	if (error)
251
		return fz_rethrow(error, "cannot parse trailer");
252
	if (tok != PDF_TOK_TRAILER)
253
		return fz_throw("expected trailer marker");
254
 
255
	error = pdf_lex(&tok, xref->file, buf, cap, &n);
256
	if (error)
257
		return fz_rethrow(error, "cannot parse trailer");
258
	if (tok != PDF_TOK_OPEN_DICT)
259
		return fz_throw("expected trailer dictionary");
260
 
261
	error = pdf_parse_dict(trailerp, xref, xref->file, buf, cap);
262
	if (error)
263
		return fz_rethrow(error, "cannot parse trailer");
264
	return fz_okay;
265
}
266
 
267
static fz_error
268
pdf_read_new_xref_section(pdf_xref *xref, fz_stream *stm, int i0, int i1, int w0, int w1, int w2)
269
{
270
	int i, n;
271
 
272
	if (i0 < 0 || i0 + i1 > xref->len)
273
		return fz_throw("xref stream has too many entries");
274
 
275
	for (i = i0; i < i0 + i1; i++)
276
	{
277
		int a = 0;
278
		int b = 0;
279
		int c = 0;
280
 
281
		if (fz_is_eof(stm))
282
			return fz_throw("truncated xref stream");
283
 
284
		for (n = 0; n < w0; n++)
285
			a = (a << 8) + fz_read_byte(stm);
286
		for (n = 0; n < w1; n++)
287
			b = (b << 8) + fz_read_byte(stm);
288
		for (n = 0; n < w2; n++)
289
			c = (c << 8) + fz_read_byte(stm);
290
 
291
		if (!xref->table[i].type)
292
		{
293
			int t = w0 ? a : 1;
294
			xref->table[i].type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0;
295
			xref->table[i].ofs = w1 ? b : 0;
296
			xref->table[i].gen = w2 ? c : 0;
297
		}
298
	}
299
 
300
	return fz_okay;
301
}
302
 
303
static fz_error
304
pdf_read_new_xref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap)
305
{
306
	fz_error error;
307
	fz_stream *stm;
308
	fz_obj *trailer;
309
	fz_obj *index;
310
	fz_obj *obj;
311
	int num, gen, stm_ofs;
312
	int size, w0, w1, w2;
313
	int t;
314
 
315
	error = pdf_parse_ind_obj(&trailer, xref, xref->file, buf, cap, &num, &gen, &stm_ofs);
316
	if (error)
317
		return fz_rethrow(error, "cannot parse compressed xref stream object");
318
 
319
	obj = fz_dict_gets(trailer, "Size");
320
	if (!obj)
321
	{
322
		fz_drop_obj(trailer);
323
		return fz_throw("xref stream missing Size entry (%d %d R)", num, gen);
324
	}
325
	size = fz_to_int(obj);
326
 
327
	if (size > xref->len)
328
	{
329
		pdf_resize_xref(xref, size);
330
	}
331
 
332
	if (num < 0 || num >= xref->len)
333
	{
334
		fz_drop_obj(trailer);
335
		return fz_throw("object id (%d %d R) out of range (0..%d)", num, gen, xref->len - 1);
336
	}
337
 
338
	obj = fz_dict_gets(trailer, "W");
339
	if (!obj) {
340
		fz_drop_obj(trailer);
341
		return fz_throw("xref stream missing W entry (%d %d R)", num, gen);
342
	}
343
	w0 = fz_to_int(fz_array_get(obj, 0));
344
	w1 = fz_to_int(fz_array_get(obj, 1));
345
	w2 = fz_to_int(fz_array_get(obj, 2));
346
 
347
	index = fz_dict_gets(trailer, "Index");
348
 
349
	error = pdf_open_stream_at(&stm, xref, num, gen, trailer, stm_ofs);
350
	if (error)
351
	{
352
		fz_drop_obj(trailer);
353
		return fz_rethrow(error, "cannot open compressed xref stream (%d %d R)", num, gen);
354
	}
355
 
356
	if (!index)
357
	{
358
		error = pdf_read_new_xref_section(xref, stm, 0, size, w0, w1, w2);
359
		if (error)
360
		{
361
			fz_close(stm);
362
			fz_drop_obj(trailer);
363
			return fz_rethrow(error, "cannot read xref stream (%d %d R)", num, gen);
364
		}
365
	}
366
	else
367
	{
368
		for (t = 0; t < fz_array_len(index); t += 2)
369
		{
370
			int i0 = fz_to_int(fz_array_get(index, t + 0));
371
			int i1 = fz_to_int(fz_array_get(index, t + 1));
372
			error = pdf_read_new_xref_section(xref, stm, i0, i1, w0, w1, w2);
373
			if (error)
374
			{
375
				fz_close(stm);
376
				fz_drop_obj(trailer);
377
				return fz_rethrow(error, "cannot read xref stream section (%d %d R)", num, gen);
378
			}
379
		}
380
	}
381
 
382
	fz_close(stm);
383
 
384
	*trailerp = trailer;
385
 
386
	return fz_okay;
387
}
388
 
389
static fz_error
390
pdf_read_xref(fz_obj **trailerp, pdf_xref *xref, int ofs, char *buf, int cap)
391
{
392
	fz_error error;
393
	int c;
394
 
395
	fz_seek(xref->file, ofs, 0);
396
 
397
	while (iswhite(fz_peek_byte(xref->file)))
398
		fz_read_byte(xref->file);
399
 
400
	c = fz_peek_byte(xref->file);
401
	if (c == 'x')
402
	{
403
		error = pdf_read_old_xref(trailerp, xref, buf, cap);
404
		if (error)
405
			return fz_rethrow(error, "cannot read xref (ofs=%d)", ofs);
406
	}
407
	else if (c >= '0' && c <= '9')
408
	{
409
		error = pdf_read_new_xref(trailerp, xref, buf, cap);
410
		if (error)
411
			return fz_rethrow(error, "cannot read xref (ofs=%d)", ofs);
412
	}
413
	else
414
	{
415
		return fz_throw("cannot recognize xref format");
416
	}
417
 
418
	return fz_okay;
419
}
420
 
421
static fz_error
422
pdf_read_xref_sections(pdf_xref *xref, int ofs, char *buf, int cap)
423
{
424
	fz_error error;
425
	fz_obj *trailer;
426
	fz_obj *prev;
427
	fz_obj *xrefstm;
428
 
429
	error = pdf_read_xref(&trailer, xref, ofs, buf, cap);
430
	if (error)
431
		return fz_rethrow(error, "cannot read xref section");
432
 
433
	/* FIXME: do we overwrite free entries properly? */
434
	xrefstm = fz_dict_gets(trailer, "XRefStm");
435
	if (xrefstm)
436
	{
437
		error = pdf_read_xref_sections(xref, fz_to_int(xrefstm), buf, cap);
438
		if (error)
439
		{
440
			fz_drop_obj(trailer);
441
			return fz_rethrow(error, "cannot read /XRefStm xref section");
442
		}
443
	}
444
 
445
	prev = fz_dict_gets(trailer, "Prev");
446
	if (prev)
447
	{
448
		error = pdf_read_xref_sections(xref, fz_to_int(prev), buf, cap);
449
		if (error)
450
		{
451
			fz_drop_obj(trailer);
452
			return fz_rethrow(error, "cannot read /Prev xref section");
453
		}
454
	}
455
 
456
	fz_drop_obj(trailer);
457
	return fz_okay;
458
}
459
 
460
/*
461
 * load xref tables from pdf
462
 */
463
 
464
static fz_error
465
pdf_load_xref(pdf_xref *xref, char *buf, int bufsize)
466
{
467
	fz_error error;
468
	fz_obj *size;
469
	int i;
470
 
471
	error = pdf_load_version(xref);
472
	if (error)
473
		return fz_rethrow(error, "cannot read version marker");
474
 
475
	error = pdf_read_start_xref(xref);
476
	if (error)
477
		return fz_rethrow(error, "cannot read startxref");
478
 
479
	error = pdf_read_trailer(xref, buf, bufsize);
480
	if (error)
481
		return fz_rethrow(error, "cannot read trailer");
482
 
483
	size = fz_dict_gets(xref->trailer, "Size");
484
	if (!size)
485
		return fz_throw("trailer missing Size entry");
486
 
487
	pdf_resize_xref(xref, fz_to_int(size));
488
 
489
	error = pdf_read_xref_sections(xref, xref->startxref, buf, bufsize);
490
	if (error)
491
		return fz_rethrow(error, "cannot read xref");
492
 
493
	/* broken pdfs where first object is not free */
494
	if (xref->table[0].type != 'f')
495
		return fz_throw("first object in xref is not free");
496
 
497
	/* broken pdfs where object offsets are out of range */
498
	for (i = 0; i < xref->len; i++)
499
	{
500
		if (xref->table[i].type == 'n')
501
			if (xref->table[i].ofs <= 0 || xref->table[i].ofs >= xref->file_size)
502
				return fz_throw("object offset out of range: %d (%d 0 R)", xref->table[i].ofs, i);
503
		if (xref->table[i].type == 'o')
504
			if (xref->table[i].ofs <= 0 || xref->table[i].ofs >= xref->len || xref->table[xref->table[i].ofs].type != 'n')
505
				return fz_throw("invalid reference to an objstm that does not exist: %d (%d 0 R)", xref->table[i].ofs, i);
506
	}
507
 
508
	return fz_okay;
509
}
510
 
511
/*
512
 * Initialize and load xref tables.
513
 * If password is not null, try to decrypt.
514
 */
515
 
516
fz_error
517
pdf_open_xref_with_stream(pdf_xref **xrefp, fz_stream *file, char *password)
518
{
519
	pdf_xref *xref;
520
	fz_error error;
521
	fz_obj *encrypt, *id;
522
	fz_obj *dict, *obj;
523
	int i, repaired = 0;
524
 
525
	/* install pdf specific callback */
526
	fz_resolve_indirect = pdf_resolve_indirect;
527
 
528
	xref = fz_malloc(sizeof(pdf_xref));
529
 
530
	memset(xref, 0, sizeof(pdf_xref));
531
 
532
	xref->file = fz_keep_stream(file);
533
 
534
	error = pdf_load_xref(xref, xref->scratch, sizeof xref->scratch);
535
	if (error)
536
	{
537
		fz_catch(error, "trying to repair");
538
		if (xref->table)
539
		{
540
			fz_free(xref->table);
541
			xref->table = NULL;
542
			xref->len = 0;
543
		}
544
		if (xref->trailer)
545
		{
546
			fz_drop_obj(xref->trailer);
547
			xref->trailer = NULL;
548
		}
549
		error = pdf_repair_xref(xref, xref->scratch, sizeof xref->scratch);
550
		if (error)
551
		{
552
			pdf_free_xref(xref);
553
			return fz_rethrow(error, "cannot repair document");
554
		}
555
		repaired = 1;
556
	}
557
 
558
	encrypt = fz_dict_gets(xref->trailer, "Encrypt");
559
	id = fz_dict_gets(xref->trailer, "ID");
560
	if (fz_is_dict(encrypt))
561
	{
562
		error = pdf_new_crypt(&xref->crypt, encrypt, id);
563
		if (error)
564
		{
565
			pdf_free_xref(xref);
566
			return fz_rethrow(error, "cannot decrypt document");
567
		}
568
	}
569
 
570
	if (pdf_needs_password(xref))
571
	{
572
		/* Only care if we have a password */
573
		if (password)
574
		{
575
			int okay = pdf_authenticate_password(xref, password);
576
			if (!okay)
577
			{
578
				pdf_free_xref(xref);
579
				return fz_throw("invalid password");
580
			}
581
		}
582
	}
583
 
584
	if (repaired)
585
	{
586
		int hasroot, hasinfo;
587
 
588
		error = pdf_repair_obj_stms(xref);
589
		if (error)
590
		{
591
			pdf_free_xref(xref);
592
			return fz_rethrow(error, "cannot repair document");
593
		}
594
 
595
		hasroot = fz_dict_gets(xref->trailer, "Root") != NULL;
596
		hasinfo = fz_dict_gets(xref->trailer, "Info") != NULL;
597
 
598
		for (i = 1; i < xref->len; i++)
599
		{
600
			if (xref->table[i].type == 0 || xref->table[i].type == 'f')
601
				continue;
602
 
603
			error = pdf_load_object(&dict, xref, i, 0);
604
			if (error)
605
			{
606
				fz_catch(error, "ignoring broken object (%d 0 R)", i);
607
				continue;
608
			}
609
 
610
			if (!hasroot)
611
			{
612
				obj = fz_dict_gets(dict, "Type");
613
				if (fz_is_name(obj) && !strcmp(fz_to_name(obj), "Catalog"))
614
				{
615
					obj = fz_new_indirect(i, 0, xref);
616
					fz_dict_puts(xref->trailer, "Root", obj);
617
					fz_drop_obj(obj);
618
				}
619
			}
620
 
621
			if (!hasinfo)
622
			{
623
				if (fz_dict_gets(dict, "Creator") || fz_dict_gets(dict, "Producer"))
624
				{
625
					obj = fz_new_indirect(i, 0, xref);
626
					fz_dict_puts(xref->trailer, "Info", obj);
627
					fz_drop_obj(obj);
628
				}
629
			}
630
 
631
			fz_drop_obj(dict);
632
		}
633
	}
634
 
635
	*xrefp = xref;
636
	return fz_okay;
637
}
638
 
639
void
640
pdf_free_xref(pdf_xref *xref)
641
{
642
	int i;
643
 
644
	if (xref->store)
645
		pdf_free_store(xref->store);
646
 
647
	if (xref->table)
648
	{
649
		for (i = 0; i < xref->len; i++)
650
		{
651
			if (xref->table[i].obj)
652
			{
653
				fz_drop_obj(xref->table[i].obj);
654
				xref->table[i].obj = NULL;
655
			}
656
		}
657
		fz_free(xref->table);
658
	}
659
 
660
	if (xref->page_objs)
661
	{
662
		for (i = 0; i < xref->page_len; i++)
663
			fz_drop_obj(xref->page_objs[i]);
664
		fz_free(xref->page_objs);
665
	}
666
 
667
	if (xref->page_refs)
668
	{
669
		for (i = 0; i < xref->page_len; i++)
670
			fz_drop_obj(xref->page_refs[i]);
671
		fz_free(xref->page_refs);
672
	}
673
 
674
	if (xref->file)
675
		fz_close(xref->file);
676
	if (xref->trailer)
677
		fz_drop_obj(xref->trailer);
678
	if (xref->crypt)
679
		pdf_free_crypt(xref->crypt);
680
 
681
	fz_free(xref);
682
}
683
 
684
void
685
pdf_debug_xref(pdf_xref *xref)
686
{
687
	int i;
688
	printf("xref\n0 %d\n", xref->len);
689
	for (i = 0; i < xref->len; i++)
690
	{
691
		printf("%05d: %010d %05d %c (stm_ofs=%d)\n", i,
692
			xref->table[i].ofs,
693
			xref->table[i].gen,
694
			xref->table[i].type ? xref->table[i].type : '-',
695
			xref->table[i].stm_ofs);
696
	}
697
}
698
 
699
/*
700
 * compressed object streams
701
 */
702
 
703
static fz_error
704
pdf_load_obj_stm(pdf_xref *xref, int num, int gen, char *buf, int cap)
705
{
706
	fz_error error;
707
	fz_stream *stm;
708
	fz_obj *objstm;
709
	int *numbuf;
710
	int *ofsbuf;
711
 
712
	fz_obj *obj;
713
	int first;
714
	int count;
715
	int i, n;
716
	int tok;
717
 
718
	error = pdf_load_object(&objstm, xref, num, gen);
719
	if (error)
720
		return fz_rethrow(error, "cannot load object stream object (%d %d R)", num, gen);
721
 
722
	count = fz_to_int(fz_dict_gets(objstm, "N"));
723
	first = fz_to_int(fz_dict_gets(objstm, "First"));
724
 
725
	numbuf = fz_calloc(count, sizeof(int));
726
	ofsbuf = fz_calloc(count, sizeof(int));
727
 
728
	error = pdf_open_stream(&stm, xref, num, gen);
729
	if (error)
730
	{
731
		error = fz_rethrow(error, "cannot open object stream (%d %d R)", num, gen);
732
		goto cleanupbuf;
733
	}
734
 
735
	for (i = 0; i < count; i++)
736
	{
737
		error = pdf_lex(&tok, stm, buf, cap, &n);
738
		if (error || tok != PDF_TOK_INT)
739
		{
740
			error = fz_rethrow(error, "corrupt object stream (%d %d R)", num, gen);
741
			goto cleanupstm;
742
		}
743
		numbuf[i] = atoi(buf);
744
 
745
		error = pdf_lex(&tok, stm, buf, cap, &n);
746
		if (error || tok != PDF_TOK_INT)
747
		{
748
			error = fz_rethrow(error, "corrupt object stream (%d %d R)", num, gen);
749
			goto cleanupstm;
750
		}
751
		ofsbuf[i] = atoi(buf);
752
	}
753
 
754
	fz_seek(stm, first, 0);
755
 
756
	for (i = 0; i < count; i++)
757
	{
758
		fz_seek(stm, first + ofsbuf[i], 0);
759
 
760
		error = pdf_parse_stm_obj(&obj, xref, stm, buf, cap);
761
		if (error)
762
		{
763
			error = fz_rethrow(error, "cannot parse object %d in stream (%d %d R)", i, num, gen);
764
			goto cleanupstm;
765
		}
766
 
767
		if (numbuf[i] < 1 || numbuf[i] >= xref->len)
768
		{
769
			fz_drop_obj(obj);
770
			error = fz_throw("object id (%d 0 R) out of range (0..%d)", numbuf[i], xref->len - 1);
771
			goto cleanupstm;
772
		}
773
 
774
		if (xref->table[numbuf[i]].type == 'o' && xref->table[numbuf[i]].ofs == num)
775
		{
776
			if (xref->table[numbuf[i]].obj)
777
				fz_drop_obj(xref->table[numbuf[i]].obj);
778
			xref->table[numbuf[i]].obj = obj;
779
		}
780
		else
781
		{
782
			fz_drop_obj(obj);
783
		}
784
	}
785
 
786
	fz_close(stm);
787
	fz_free(ofsbuf);
788
	fz_free(numbuf);
789
	fz_drop_obj(objstm);
790
	return fz_okay;
791
 
792
cleanupstm:
793
	fz_close(stm);
794
cleanupbuf:
795
	fz_free(ofsbuf);
796
	fz_free(numbuf);
797
	fz_drop_obj(objstm);
798
	return error; /* already rethrown */
799
}
800
 
801
/*
802
 * object loading
803
 */
804
 
805
fz_error
806
pdf_cache_object(pdf_xref *xref, int num, int gen)
807
{
808
	fz_error error;
809
	pdf_xref_entry *x;
810
	int rnum, rgen;
811
 
812
	if (num < 0 || num >= xref->len)
813
		return fz_throw("object out of range (%d %d R); xref size %d", num, gen, xref->len);
814
 
815
	x = &xref->table[num];
816
 
817
	if (x->obj)
818
		return fz_okay;
819
 
820
	if (x->type == 'f')
821
	{
822
		x->obj = fz_new_null();
823
		return fz_okay;
824
	}
825
	else if (x->type == 'n')
826
	{
827
		fz_seek(xref->file, x->ofs, 0);
828
 
829
		error = pdf_parse_ind_obj(&x->obj, xref, xref->file, xref->scratch, sizeof xref->scratch,
830
			&rnum, &rgen, &x->stm_ofs);
831
		if (error)
832
			return fz_rethrow(error, "cannot parse object (%d %d R)", num, gen);
833
 
834
		if (rnum != num)
835
			return fz_throw("found object (%d %d R) instead of (%d %d R)", rnum, rgen, num, gen);
836
 
837
		if (xref->crypt)
838
			pdf_crypt_obj(xref->crypt, x->obj, num, gen);
839
	}
840
	else if (x->type == 'o')
841
	{
842
		if (!x->obj)
843
		{
844
			error = pdf_load_obj_stm(xref, x->ofs, 0, xref->scratch, sizeof xref->scratch);
845
			if (error)
846
				return fz_rethrow(error, "cannot load object stream containing object (%d %d R)", num, gen);
847
			if (!x->obj)
848
				return fz_throw("object (%d %d R) was not found in its object stream", num, gen);
849
		}
850
	}
851
	else
852
	{
853
		return fz_throw("assert: corrupt xref struct");
854
	}
855
 
856
	return fz_okay;
857
}
858
 
859
fz_error
860
pdf_load_object(fz_obj **objp, pdf_xref *xref, int num, int gen)
861
{
862
	fz_error error;
863
 
864
	error = pdf_cache_object(xref, num, gen);
865
	if (error)
866
		return fz_rethrow(error, "cannot load object (%d %d R) into cache", num, gen);
867
 
868
	assert(xref->table[num].obj);
869
 
870
	*objp = fz_keep_obj(xref->table[num].obj);
871
 
872
	return fz_okay;
873
}
874
 
875
fz_obj *
876
pdf_resolve_indirect(fz_obj *ref)
877
{
878
	if (fz_is_indirect(ref))
879
	{
880
		pdf_xref *xref = fz_get_indirect_xref(ref);
881
		int num = fz_to_num(ref);
882
		int gen = fz_to_gen(ref);
883
		if (xref)
884
		{
885
			fz_error error = pdf_cache_object(xref, num, gen);
886
			if (error)
887
			{
888
				fz_catch(error, "cannot load object (%d %d R) into cache", num, gen);
889
				return ref;
890
			}
891
			if (xref->table[num].obj)
892
				return xref->table[num].obj;
893
		}
894
	}
895
	return ref;
896
}
897
 
898
/* Replace numbered object -- for use by pdfclean and similar tools */
899
void
900
pdf_update_object(pdf_xref *xref, int num, int gen, fz_obj *newobj)
901
{
902
	pdf_xref_entry *x;
903
 
904
	if (num < 0 || num >= xref->len)
905
	{
906
		fz_warn("object out of range (%d %d R); xref size %d", num, gen, xref->len);
907
		return;
908
	}
909
 
910
	x = &xref->table[num];
911
 
912
	if (x->obj)
913
		fz_drop_obj(x->obj);
914
 
915
	x->obj = fz_keep_obj(newobj);
916
	x->type = 'n';
917
	x->ofs = 0;
918
}
919
 
920
/*
921
 * Convenience function to open a file then call pdf_open_xref_with_stream.
922
 */
923
 
924
fz_error
925
pdf_open_xref(pdf_xref **xrefp, const char *filename, char *password)
926
{
927
	fz_error error;
928
	fz_stream *file;
929
 
930
	file = fz_open_file(filename);
931
	if (!file)
932
		return fz_throw("cannot open file '%s': %s", filename, strerror(errno));
933
 
934
	error = pdf_open_xref_with_stream(xrefp, file, password);
935
	if (error)
936
		return fz_rethrow(error, "cannot load document '%s'", filename);
937
 
938
	fz_close(file);
939
	return fz_okay;
940
}