Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4680 | right-hear | 1 | #include "fitz.h" |
2 | #include "mupdf.h" |
||
3 | |||
4 | static inline int iswhite(int ch) |
||
5 | { |
||
6 | return |
||
7 | ch == '\000' || ch == '\011' || ch == '\012' || |
||
8 | ch == '\014' || ch == '\015' || ch == '\040'; |
||
9 | } |
||
10 | |||
11 | /* |
||
12 | * magic version tag and startxref |
||
13 | */ |
||
14 | |||
15 | static fz_error |
||
16 | pdf_load_version(pdf_xref *xref) |
||
17 | { |
||
18 | char buf[20]; |
||
19 | |||
20 | fz_seek(xref->file, 0, 0); |
||
21 | fz_read_line(xref->file, buf, sizeof buf); |
||
22 | if (memcmp(buf, "%PDF-", 5) != 0) |
||
23 | return fz_throw("cannot recognize version marker"); |
||
24 | |||
25 | xref->version = atoi(buf + 5) * 10 + atoi(buf + 7); |
||
26 | |||
27 | return fz_okay; |
||
28 | } |
||
29 | |||
30 | static fz_error |
||
31 | pdf_read_start_xref(pdf_xref *xref) |
||
32 | { |
||
33 | unsigned char buf[1024]; |
||
34 | int t, n; |
||
35 | int i; |
||
36 | |||
37 | fz_seek(xref->file, 0, 2); |
||
38 | |||
39 | xref->file_size = fz_tell(xref->file); |
||
40 | |||
41 | t = MAX(0, xref->file_size - (int)sizeof buf); |
||
42 | fz_seek(xref->file, t, 0); |
||
43 | |||
44 | n = fz_read(xref->file, buf, sizeof buf); |
||
45 | if (n < 0) |
||
46 | return fz_rethrow(n, "cannot read from file"); |
||
47 | |||
48 | for (i = n - 9; i >= 0; i--) |
||
49 | { |
||
50 | if (memcmp(buf + i, "startxref", 9) == 0) |
||
51 | { |
||
52 | i += 9; |
||
53 | while (iswhite(buf[i]) && i < n) |
||
54 | i ++; |
||
55 | xref->startxref = atoi((char*)(buf + i)); |
||
56 | return fz_okay; |
||
57 | } |
||
58 | } |
||
59 | |||
60 | return fz_throw("cannot find startxref"); |
||
61 | } |
||
62 | |||
63 | /* |
||
64 | * trailer dictionary |
||
65 | */ |
||
66 | |||
67 | static fz_error |
||
68 | pdf_read_old_trailer(pdf_xref *xref, char *buf, int cap) |
||
69 | { |
||
70 | fz_error error; |
||
71 | int len; |
||
72 | char *s; |
||
73 | int n; |
||
74 | int t; |
||
75 | int tok; |
||
76 | int c; |
||
77 | |||
78 | fz_read_line(xref->file, buf, cap); |
||
79 | if (strncmp(buf, "xref", 4) != 0) |
||
80 | return fz_throw("cannot find xref marker"); |
||
81 | |||
82 | while (1) |
||
83 | { |
||
84 | c = fz_peek_byte(xref->file); |
||
85 | if (!(c >= '0' && c <= '9')) |
||
86 | break; |
||
87 | |||
88 | fz_read_line(xref->file, buf, cap); |
||
89 | s = buf; |
||
90 | fz_strsep(&s, " "); /* ignore ofs */ |
||
91 | if (!s) |
||
92 | return fz_throw("invalid range marker in xref"); |
||
93 | len = atoi(fz_strsep(&s, " ")); |
||
94 | |||
95 | /* broken pdfs where the section is not on a separate line */ |
||
96 | if (s && *s != '\0') |
||
97 | fz_seek(xref->file, -(2 + (int)strlen(s)), 1); |
||
98 | |||
99 | t = fz_tell(xref->file); |
||
100 | if (t < 0) |
||
101 | return fz_throw("cannot tell in file"); |
||
102 | |||
103 | fz_seek(xref->file, t + 20 * len, 0); |
||
104 | } |
||
105 | |||
106 | error = pdf_lex(&tok, xref->file, buf, cap, &n); |
||
107 | if (error) |
||
108 | return fz_rethrow(error, "cannot parse trailer"); |
||
109 | if (tok != PDF_TOK_TRAILER) |
||
110 | return fz_throw("expected trailer marker"); |
||
111 | |||
112 | error = pdf_lex(&tok, xref->file, buf, cap, &n); |
||
113 | if (error) |
||
114 | return fz_rethrow(error, "cannot parse trailer"); |
||
115 | if (tok != PDF_TOK_OPEN_DICT) |
||
116 | return fz_throw("expected trailer dictionary"); |
||
117 | |||
118 | error = pdf_parse_dict(&xref->trailer, xref, xref->file, buf, cap); |
||
119 | if (error) |
||
120 | return fz_rethrow(error, "cannot parse trailer"); |
||
121 | return fz_okay; |
||
122 | } |
||
123 | |||
124 | static fz_error |
||
125 | pdf_read_new_trailer(pdf_xref *xref, char *buf, int cap) |
||
126 | { |
||
127 | fz_error error; |
||
128 | error = pdf_parse_ind_obj(&xref->trailer, xref, xref->file, buf, cap, NULL, NULL, NULL); |
||
129 | if (error) |
||
130 | return fz_rethrow(error, "cannot parse trailer (compressed)"); |
||
131 | return fz_okay; |
||
132 | } |
||
133 | |||
134 | static fz_error |
||
135 | pdf_read_trailer(pdf_xref *xref, char *buf, int cap) |
||
136 | { |
||
137 | fz_error error; |
||
138 | int c; |
||
139 | |||
140 | fz_seek(xref->file, xref->startxref, 0); |
||
141 | |||
142 | while (iswhite(fz_peek_byte(xref->file))) |
||
143 | fz_read_byte(xref->file); |
||
144 | |||
145 | c = fz_peek_byte(xref->file); |
||
146 | if (c == 'x') |
||
147 | { |
||
148 | error = pdf_read_old_trailer(xref, buf, cap); |
||
149 | if (error) |
||
150 | return fz_rethrow(error, "cannot read trailer"); |
||
151 | } |
||
152 | else if (c >= '0' && c <= '9') |
||
153 | { |
||
154 | error = pdf_read_new_trailer(xref, buf, cap); |
||
155 | if (error) |
||
156 | return fz_rethrow(error, "cannot read trailer"); |
||
157 | } |
||
158 | else |
||
159 | { |
||
160 | return fz_throw("cannot recognize xref format: '%c'", c); |
||
161 | } |
||
162 | |||
163 | return fz_okay; |
||
164 | } |
||
165 | |||
166 | /* |
||
167 | * xref tables |
||
168 | */ |
||
169 | |||
170 | void |
||
171 | pdf_resize_xref(pdf_xref *xref, int newlen) |
||
172 | { |
||
173 | int i; |
||
174 | |||
175 | xref->table = fz_realloc(xref->table, newlen, sizeof(pdf_xref_entry)); |
||
176 | for (i = xref->len; i < newlen; i++) |
||
177 | { |
||
178 | xref->table[i].type = 0; |
||
179 | xref->table[i].ofs = 0; |
||
180 | xref->table[i].gen = 0; |
||
181 | xref->table[i].stm_ofs = 0; |
||
182 | xref->table[i].obj = NULL; |
||
183 | } |
||
184 | xref->len = newlen; |
||
185 | } |
||
186 | |||
187 | static fz_error |
||
188 | pdf_read_old_xref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap) |
||
189 | { |
||
190 | fz_error error; |
||
191 | int ofs, len; |
||
192 | char *s; |
||
193 | int n; |
||
194 | int tok; |
||
195 | int i; |
||
196 | int c; |
||
197 | |||
198 | fz_read_line(xref->file, buf, cap); |
||
199 | if (strncmp(buf, "xref", 4) != 0) |
||
200 | return fz_throw("cannot find xref marker"); |
||
201 | |||
202 | while (1) |
||
203 | { |
||
204 | c = fz_peek_byte(xref->file); |
||
205 | if (!(c >= '0' && c <= '9')) |
||
206 | break; |
||
207 | |||
208 | fz_read_line(xref->file, buf, cap); |
||
209 | s = buf; |
||
210 | ofs = atoi(fz_strsep(&s, " ")); |
||
211 | len = atoi(fz_strsep(&s, " ")); |
||
212 | |||
213 | /* broken pdfs where the section is not on a separate line */ |
||
214 | if (s && *s != '\0') |
||
215 | { |
||
216 | fz_warn("broken xref section. proceeding anyway."); |
||
217 | fz_seek(xref->file, -(2 + (int)strlen(s)), 1); |
||
218 | } |
||
219 | |||
220 | /* broken pdfs where size in trailer undershoots entries in xref sections */ |
||
221 | if (ofs + len > xref->len) |
||
222 | { |
||
223 | fz_warn("broken xref section, proceeding anyway."); |
||
224 | pdf_resize_xref(xref, ofs + len); |
||
225 | } |
||
226 | |||
227 | for (i = ofs; i < ofs + len; i++) |
||
228 | { |
||
229 | n = fz_read(xref->file, (unsigned char *) buf, 20); |
||
230 | if (n < 0) |
||
231 | return fz_rethrow(n, "cannot read xref table"); |
||
232 | if (!xref->table[i].type) |
||
233 | { |
||
234 | s = buf; |
||
235 | |||
236 | /* broken pdfs where line start with white space */ |
||
237 | while (*s != '\0' && iswhite(*s)) |
||
238 | s++; |
||
239 | |||
240 | xref->table[i].ofs = atoi(s); |
||
241 | xref->table[i].gen = atoi(s + 11); |
||
242 | xref->table[i].type = s[17]; |
||
243 | if (s[17] != 'f' && s[17] != 'n' && s[17] != 'o') |
||
244 | return fz_throw("unexpected xref type: %#x (%d %d R)", s[17], i, xref->table[i].gen); |
||
245 | } |
||
246 | } |
||
247 | } |
||
248 | |||
249 | error = pdf_lex(&tok, xref->file, buf, cap, &n); |
||
250 | if (error) |
||
251 | return fz_rethrow(error, "cannot parse trailer"); |
||
252 | if (tok != PDF_TOK_TRAILER) |
||
253 | return fz_throw("expected trailer marker"); |
||
254 | |||
255 | error = pdf_lex(&tok, xref->file, buf, cap, &n); |
||
256 | if (error) |
||
257 | return fz_rethrow(error, "cannot parse trailer"); |
||
258 | if (tok != PDF_TOK_OPEN_DICT) |
||
259 | return fz_throw("expected trailer dictionary"); |
||
260 | |||
261 | error = pdf_parse_dict(trailerp, xref, xref->file, buf, cap); |
||
262 | if (error) |
||
263 | return fz_rethrow(error, "cannot parse trailer"); |
||
264 | return fz_okay; |
||
265 | } |
||
266 | |||
267 | static fz_error |
||
268 | pdf_read_new_xref_section(pdf_xref *xref, fz_stream *stm, int i0, int i1, int w0, int w1, int w2) |
||
269 | { |
||
270 | int i, n; |
||
271 | |||
272 | if (i0 < 0 || i0 + i1 > xref->len) |
||
273 | return fz_throw("xref stream has too many entries"); |
||
274 | |||
275 | for (i = i0; i < i0 + i1; i++) |
||
276 | { |
||
277 | int a = 0; |
||
278 | int b = 0; |
||
279 | int c = 0; |
||
280 | |||
281 | if (fz_is_eof(stm)) |
||
282 | return fz_throw("truncated xref stream"); |
||
283 | |||
284 | for (n = 0; n < w0; n++) |
||
285 | a = (a << 8) + fz_read_byte(stm); |
||
286 | for (n = 0; n < w1; n++) |
||
287 | b = (b << 8) + fz_read_byte(stm); |
||
288 | for (n = 0; n < w2; n++) |
||
289 | c = (c << 8) + fz_read_byte(stm); |
||
290 | |||
291 | if (!xref->table[i].type) |
||
292 | { |
||
293 | int t = w0 ? a : 1; |
||
294 | xref->table[i].type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0; |
||
295 | xref->table[i].ofs = w1 ? b : 0; |
||
296 | xref->table[i].gen = w2 ? c : 0; |
||
297 | } |
||
298 | } |
||
299 | |||
300 | return fz_okay; |
||
301 | } |
||
302 | |||
303 | static fz_error |
||
304 | pdf_read_new_xref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap) |
||
305 | { |
||
306 | fz_error error; |
||
307 | fz_stream *stm; |
||
308 | fz_obj *trailer; |
||
309 | fz_obj *index; |
||
310 | fz_obj *obj; |
||
311 | int num, gen, stm_ofs; |
||
312 | int size, w0, w1, w2; |
||
313 | int t; |
||
314 | |||
315 | error = pdf_parse_ind_obj(&trailer, xref, xref->file, buf, cap, &num, &gen, &stm_ofs); |
||
316 | if (error) |
||
317 | return fz_rethrow(error, "cannot parse compressed xref stream object"); |
||
318 | |||
319 | obj = fz_dict_gets(trailer, "Size"); |
||
320 | if (!obj) |
||
321 | { |
||
322 | fz_drop_obj(trailer); |
||
323 | return fz_throw("xref stream missing Size entry (%d %d R)", num, gen); |
||
324 | } |
||
325 | size = fz_to_int(obj); |
||
326 | |||
327 | if (size > xref->len) |
||
328 | { |
||
329 | pdf_resize_xref(xref, size); |
||
330 | } |
||
331 | |||
332 | if (num < 0 || num >= xref->len) |
||
333 | { |
||
334 | fz_drop_obj(trailer); |
||
335 | return fz_throw("object id (%d %d R) out of range (0..%d)", num, gen, xref->len - 1); |
||
336 | } |
||
337 | |||
338 | obj = fz_dict_gets(trailer, "W"); |
||
339 | if (!obj) { |
||
340 | fz_drop_obj(trailer); |
||
341 | return fz_throw("xref stream missing W entry (%d %d R)", num, gen); |
||
342 | } |
||
343 | w0 = fz_to_int(fz_array_get(obj, 0)); |
||
344 | w1 = fz_to_int(fz_array_get(obj, 1)); |
||
345 | w2 = fz_to_int(fz_array_get(obj, 2)); |
||
346 | |||
347 | index = fz_dict_gets(trailer, "Index"); |
||
348 | |||
349 | error = pdf_open_stream_at(&stm, xref, num, gen, trailer, stm_ofs); |
||
350 | if (error) |
||
351 | { |
||
352 | fz_drop_obj(trailer); |
||
353 | return fz_rethrow(error, "cannot open compressed xref stream (%d %d R)", num, gen); |
||
354 | } |
||
355 | |||
356 | if (!index) |
||
357 | { |
||
358 | error = pdf_read_new_xref_section(xref, stm, 0, size, w0, w1, w2); |
||
359 | if (error) |
||
360 | { |
||
361 | fz_close(stm); |
||
362 | fz_drop_obj(trailer); |
||
363 | return fz_rethrow(error, "cannot read xref stream (%d %d R)", num, gen); |
||
364 | } |
||
365 | } |
||
366 | else |
||
367 | { |
||
368 | for (t = 0; t < fz_array_len(index); t += 2) |
||
369 | { |
||
370 | int i0 = fz_to_int(fz_array_get(index, t + 0)); |
||
371 | int i1 = fz_to_int(fz_array_get(index, t + 1)); |
||
372 | error = pdf_read_new_xref_section(xref, stm, i0, i1, w0, w1, w2); |
||
373 | if (error) |
||
374 | { |
||
375 | fz_close(stm); |
||
376 | fz_drop_obj(trailer); |
||
377 | return fz_rethrow(error, "cannot read xref stream section (%d %d R)", num, gen); |
||
378 | } |
||
379 | } |
||
380 | } |
||
381 | |||
382 | fz_close(stm); |
||
383 | |||
384 | *trailerp = trailer; |
||
385 | |||
386 | return fz_okay; |
||
387 | } |
||
388 | |||
389 | static fz_error |
||
390 | pdf_read_xref(fz_obj **trailerp, pdf_xref *xref, int ofs, char *buf, int cap) |
||
391 | { |
||
392 | fz_error error; |
||
393 | int c; |
||
394 | |||
395 | fz_seek(xref->file, ofs, 0); |
||
396 | |||
397 | while (iswhite(fz_peek_byte(xref->file))) |
||
398 | fz_read_byte(xref->file); |
||
399 | |||
400 | c = fz_peek_byte(xref->file); |
||
401 | if (c == 'x') |
||
402 | { |
||
403 | error = pdf_read_old_xref(trailerp, xref, buf, cap); |
||
404 | if (error) |
||
405 | return fz_rethrow(error, "cannot read xref (ofs=%d)", ofs); |
||
406 | } |
||
407 | else if (c >= '0' && c <= '9') |
||
408 | { |
||
409 | error = pdf_read_new_xref(trailerp, xref, buf, cap); |
||
410 | if (error) |
||
411 | return fz_rethrow(error, "cannot read xref (ofs=%d)", ofs); |
||
412 | } |
||
413 | else |
||
414 | { |
||
415 | return fz_throw("cannot recognize xref format"); |
||
416 | } |
||
417 | |||
418 | return fz_okay; |
||
419 | } |
||
420 | |||
421 | static fz_error |
||
422 | pdf_read_xref_sections(pdf_xref *xref, int ofs, char *buf, int cap) |
||
423 | { |
||
424 | fz_error error; |
||
425 | fz_obj *trailer; |
||
426 | fz_obj *prev; |
||
427 | fz_obj *xrefstm; |
||
428 | |||
429 | error = pdf_read_xref(&trailer, xref, ofs, buf, cap); |
||
430 | if (error) |
||
431 | return fz_rethrow(error, "cannot read xref section"); |
||
432 | |||
433 | /* FIXME: do we overwrite free entries properly? */ |
||
434 | xrefstm = fz_dict_gets(trailer, "XRefStm"); |
||
435 | if (xrefstm) |
||
436 | { |
||
437 | error = pdf_read_xref_sections(xref, fz_to_int(xrefstm), buf, cap); |
||
438 | if (error) |
||
439 | { |
||
440 | fz_drop_obj(trailer); |
||
441 | return fz_rethrow(error, "cannot read /XRefStm xref section"); |
||
442 | } |
||
443 | } |
||
444 | |||
445 | prev = fz_dict_gets(trailer, "Prev"); |
||
446 | if (prev) |
||
447 | { |
||
448 | error = pdf_read_xref_sections(xref, fz_to_int(prev), buf, cap); |
||
449 | if (error) |
||
450 | { |
||
451 | fz_drop_obj(trailer); |
||
452 | return fz_rethrow(error, "cannot read /Prev xref section"); |
||
453 | } |
||
454 | } |
||
455 | |||
456 | fz_drop_obj(trailer); |
||
457 | return fz_okay; |
||
458 | } |
||
459 | |||
460 | /* |
||
461 | * load xref tables from pdf |
||
462 | */ |
||
463 | |||
464 | static fz_error |
||
465 | pdf_load_xref(pdf_xref *xref, char *buf, int bufsize) |
||
466 | { |
||
467 | fz_error error; |
||
468 | fz_obj *size; |
||
469 | int i; |
||
470 | |||
471 | error = pdf_load_version(xref); |
||
472 | if (error) |
||
473 | return fz_rethrow(error, "cannot read version marker"); |
||
474 | |||
475 | error = pdf_read_start_xref(xref); |
||
476 | if (error) |
||
477 | return fz_rethrow(error, "cannot read startxref"); |
||
478 | |||
479 | error = pdf_read_trailer(xref, buf, bufsize); |
||
480 | if (error) |
||
481 | return fz_rethrow(error, "cannot read trailer"); |
||
482 | |||
483 | size = fz_dict_gets(xref->trailer, "Size"); |
||
484 | if (!size) |
||
485 | return fz_throw("trailer missing Size entry"); |
||
486 | |||
487 | pdf_resize_xref(xref, fz_to_int(size)); |
||
488 | |||
489 | error = pdf_read_xref_sections(xref, xref->startxref, buf, bufsize); |
||
490 | if (error) |
||
491 | return fz_rethrow(error, "cannot read xref"); |
||
492 | |||
493 | /* broken pdfs where first object is not free */ |
||
494 | if (xref->table[0].type != 'f') |
||
495 | return fz_throw("first object in xref is not free"); |
||
496 | |||
497 | /* broken pdfs where object offsets are out of range */ |
||
498 | for (i = 0; i < xref->len; i++) |
||
499 | { |
||
500 | if (xref->table[i].type == 'n') |
||
501 | if (xref->table[i].ofs <= 0 || xref->table[i].ofs >= xref->file_size) |
||
502 | return fz_throw("object offset out of range: %d (%d 0 R)", xref->table[i].ofs, i); |
||
503 | if (xref->table[i].type == 'o') |
||
504 | if (xref->table[i].ofs <= 0 || xref->table[i].ofs >= xref->len || xref->table[xref->table[i].ofs].type != 'n') |
||
505 | return fz_throw("invalid reference to an objstm that does not exist: %d (%d 0 R)", xref->table[i].ofs, i); |
||
506 | } |
||
507 | |||
508 | return fz_okay; |
||
509 | } |
||
510 | |||
511 | /* |
||
512 | * Initialize and load xref tables. |
||
513 | * If password is not null, try to decrypt. |
||
514 | */ |
||
515 | |||
516 | fz_error |
||
517 | pdf_open_xref_with_stream(pdf_xref **xrefp, fz_stream *file, char *password) |
||
518 | { |
||
519 | pdf_xref *xref; |
||
520 | fz_error error; |
||
521 | fz_obj *encrypt, *id; |
||
522 | fz_obj *dict, *obj; |
||
523 | int i, repaired = 0; |
||
524 | |||
525 | /* install pdf specific callback */ |
||
526 | fz_resolve_indirect = pdf_resolve_indirect; |
||
527 | |||
528 | xref = fz_malloc(sizeof(pdf_xref)); |
||
529 | |||
530 | memset(xref, 0, sizeof(pdf_xref)); |
||
531 | |||
532 | xref->file = fz_keep_stream(file); |
||
533 | |||
534 | error = pdf_load_xref(xref, xref->scratch, sizeof xref->scratch); |
||
535 | if (error) |
||
536 | { |
||
537 | fz_catch(error, "trying to repair"); |
||
538 | if (xref->table) |
||
539 | { |
||
540 | fz_free(xref->table); |
||
541 | xref->table = NULL; |
||
542 | xref->len = 0; |
||
543 | } |
||
544 | if (xref->trailer) |
||
545 | { |
||
546 | fz_drop_obj(xref->trailer); |
||
547 | xref->trailer = NULL; |
||
548 | } |
||
549 | error = pdf_repair_xref(xref, xref->scratch, sizeof xref->scratch); |
||
550 | if (error) |
||
551 | { |
||
552 | pdf_free_xref(xref); |
||
553 | return fz_rethrow(error, "cannot repair document"); |
||
554 | } |
||
555 | repaired = 1; |
||
556 | } |
||
557 | |||
558 | encrypt = fz_dict_gets(xref->trailer, "Encrypt"); |
||
559 | id = fz_dict_gets(xref->trailer, "ID"); |
||
560 | if (fz_is_dict(encrypt)) |
||
561 | { |
||
562 | error = pdf_new_crypt(&xref->crypt, encrypt, id); |
||
563 | if (error) |
||
564 | { |
||
565 | pdf_free_xref(xref); |
||
566 | return fz_rethrow(error, "cannot decrypt document"); |
||
567 | } |
||
568 | } |
||
569 | |||
570 | if (pdf_needs_password(xref)) |
||
571 | { |
||
572 | /* Only care if we have a password */ |
||
573 | if (password) |
||
574 | { |
||
575 | int okay = pdf_authenticate_password(xref, password); |
||
576 | if (!okay) |
||
577 | { |
||
578 | pdf_free_xref(xref); |
||
579 | return fz_throw("invalid password"); |
||
580 | } |
||
581 | } |
||
582 | } |
||
583 | |||
584 | if (repaired) |
||
585 | { |
||
586 | int hasroot, hasinfo; |
||
587 | |||
588 | error = pdf_repair_obj_stms(xref); |
||
589 | if (error) |
||
590 | { |
||
591 | pdf_free_xref(xref); |
||
592 | return fz_rethrow(error, "cannot repair document"); |
||
593 | } |
||
594 | |||
595 | hasroot = fz_dict_gets(xref->trailer, "Root") != NULL; |
||
596 | hasinfo = fz_dict_gets(xref->trailer, "Info") != NULL; |
||
597 | |||
598 | for (i = 1; i < xref->len; i++) |
||
599 | { |
||
600 | if (xref->table[i].type == 0 || xref->table[i].type == 'f') |
||
601 | continue; |
||
602 | |||
603 | error = pdf_load_object(&dict, xref, i, 0); |
||
604 | if (error) |
||
605 | { |
||
606 | fz_catch(error, "ignoring broken object (%d 0 R)", i); |
||
607 | continue; |
||
608 | } |
||
609 | |||
610 | if (!hasroot) |
||
611 | { |
||
612 | obj = fz_dict_gets(dict, "Type"); |
||
613 | if (fz_is_name(obj) && !strcmp(fz_to_name(obj), "Catalog")) |
||
614 | { |
||
615 | obj = fz_new_indirect(i, 0, xref); |
||
616 | fz_dict_puts(xref->trailer, "Root", obj); |
||
617 | fz_drop_obj(obj); |
||
618 | } |
||
619 | } |
||
620 | |||
621 | if (!hasinfo) |
||
622 | { |
||
623 | if (fz_dict_gets(dict, "Creator") || fz_dict_gets(dict, "Producer")) |
||
624 | { |
||
625 | obj = fz_new_indirect(i, 0, xref); |
||
626 | fz_dict_puts(xref->trailer, "Info", obj); |
||
627 | fz_drop_obj(obj); |
||
628 | } |
||
629 | } |
||
630 | |||
631 | fz_drop_obj(dict); |
||
632 | } |
||
633 | } |
||
634 | |||
635 | *xrefp = xref; |
||
636 | return fz_okay; |
||
637 | } |
||
638 | |||
639 | void |
||
640 | pdf_free_xref(pdf_xref *xref) |
||
641 | { |
||
642 | int i; |
||
643 | |||
644 | if (xref->store) |
||
645 | pdf_free_store(xref->store); |
||
646 | |||
647 | if (xref->table) |
||
648 | { |
||
649 | for (i = 0; i < xref->len; i++) |
||
650 | { |
||
651 | if (xref->table[i].obj) |
||
652 | { |
||
653 | fz_drop_obj(xref->table[i].obj); |
||
654 | xref->table[i].obj = NULL; |
||
655 | } |
||
656 | } |
||
657 | fz_free(xref->table); |
||
658 | } |
||
659 | |||
660 | if (xref->page_objs) |
||
661 | { |
||
662 | for (i = 0; i < xref->page_len; i++) |
||
663 | fz_drop_obj(xref->page_objs[i]); |
||
664 | fz_free(xref->page_objs); |
||
665 | } |
||
666 | |||
667 | if (xref->page_refs) |
||
668 | { |
||
669 | for (i = 0; i < xref->page_len; i++) |
||
670 | fz_drop_obj(xref->page_refs[i]); |
||
671 | fz_free(xref->page_refs); |
||
672 | } |
||
673 | |||
674 | if (xref->file) |
||
675 | fz_close(xref->file); |
||
676 | if (xref->trailer) |
||
677 | fz_drop_obj(xref->trailer); |
||
678 | if (xref->crypt) |
||
679 | pdf_free_crypt(xref->crypt); |
||
680 | |||
681 | fz_free(xref); |
||
682 | } |
||
683 | |||
684 | void |
||
685 | pdf_debug_xref(pdf_xref *xref) |
||
686 | { |
||
687 | int i; |
||
688 | printf("xref\n0 %d\n", xref->len); |
||
689 | for (i = 0; i < xref->len; i++) |
||
690 | { |
||
691 | printf("%05d: %010d %05d %c (stm_ofs=%d)\n", i, |
||
692 | xref->table[i].ofs, |
||
693 | xref->table[i].gen, |
||
694 | xref->table[i].type ? xref->table[i].type : '-', |
||
695 | xref->table[i].stm_ofs); |
||
696 | } |
||
697 | } |
||
698 | |||
699 | /* |
||
700 | * compressed object streams |
||
701 | */ |
||
702 | |||
703 | static fz_error |
||
704 | pdf_load_obj_stm(pdf_xref *xref, int num, int gen, char *buf, int cap) |
||
705 | { |
||
706 | fz_error error; |
||
707 | fz_stream *stm; |
||
708 | fz_obj *objstm; |
||
709 | int *numbuf; |
||
710 | int *ofsbuf; |
||
711 | |||
712 | fz_obj *obj; |
||
713 | int first; |
||
714 | int count; |
||
715 | int i, n; |
||
716 | int tok; |
||
717 | |||
718 | error = pdf_load_object(&objstm, xref, num, gen); |
||
719 | if (error) |
||
720 | return fz_rethrow(error, "cannot load object stream object (%d %d R)", num, gen); |
||
721 | |||
722 | count = fz_to_int(fz_dict_gets(objstm, "N")); |
||
723 | first = fz_to_int(fz_dict_gets(objstm, "First")); |
||
724 | |||
725 | numbuf = fz_calloc(count, sizeof(int)); |
||
726 | ofsbuf = fz_calloc(count, sizeof(int)); |
||
727 | |||
728 | error = pdf_open_stream(&stm, xref, num, gen); |
||
729 | if (error) |
||
730 | { |
||
731 | error = fz_rethrow(error, "cannot open object stream (%d %d R)", num, gen); |
||
732 | goto cleanupbuf; |
||
733 | } |
||
734 | |||
735 | for (i = 0; i < count; i++) |
||
736 | { |
||
737 | error = pdf_lex(&tok, stm, buf, cap, &n); |
||
738 | if (error || tok != PDF_TOK_INT) |
||
739 | { |
||
740 | error = fz_rethrow(error, "corrupt object stream (%d %d R)", num, gen); |
||
741 | goto cleanupstm; |
||
742 | } |
||
743 | numbuf[i] = atoi(buf); |
||
744 | |||
745 | error = pdf_lex(&tok, stm, buf, cap, &n); |
||
746 | if (error || tok != PDF_TOK_INT) |
||
747 | { |
||
748 | error = fz_rethrow(error, "corrupt object stream (%d %d R)", num, gen); |
||
749 | goto cleanupstm; |
||
750 | } |
||
751 | ofsbuf[i] = atoi(buf); |
||
752 | } |
||
753 | |||
754 | fz_seek(stm, first, 0); |
||
755 | |||
756 | for (i = 0; i < count; i++) |
||
757 | { |
||
758 | fz_seek(stm, first + ofsbuf[i], 0); |
||
759 | |||
760 | error = pdf_parse_stm_obj(&obj, xref, stm, buf, cap); |
||
761 | if (error) |
||
762 | { |
||
763 | error = fz_rethrow(error, "cannot parse object %d in stream (%d %d R)", i, num, gen); |
||
764 | goto cleanupstm; |
||
765 | } |
||
766 | |||
767 | if (numbuf[i] < 1 || numbuf[i] >= xref->len) |
||
768 | { |
||
769 | fz_drop_obj(obj); |
||
770 | error = fz_throw("object id (%d 0 R) out of range (0..%d)", numbuf[i], xref->len - 1); |
||
771 | goto cleanupstm; |
||
772 | } |
||
773 | |||
774 | if (xref->table[numbuf[i]].type == 'o' && xref->table[numbuf[i]].ofs == num) |
||
775 | { |
||
776 | if (xref->table[numbuf[i]].obj) |
||
777 | fz_drop_obj(xref->table[numbuf[i]].obj); |
||
778 | xref->table[numbuf[i]].obj = obj; |
||
779 | } |
||
780 | else |
||
781 | { |
||
782 | fz_drop_obj(obj); |
||
783 | } |
||
784 | } |
||
785 | |||
786 | fz_close(stm); |
||
787 | fz_free(ofsbuf); |
||
788 | fz_free(numbuf); |
||
789 | fz_drop_obj(objstm); |
||
790 | return fz_okay; |
||
791 | |||
792 | cleanupstm: |
||
793 | fz_close(stm); |
||
794 | cleanupbuf: |
||
795 | fz_free(ofsbuf); |
||
796 | fz_free(numbuf); |
||
797 | fz_drop_obj(objstm); |
||
798 | return error; /* already rethrown */ |
||
799 | } |
||
800 | |||
801 | /* |
||
802 | * object loading |
||
803 | */ |
||
804 | |||
805 | fz_error |
||
806 | pdf_cache_object(pdf_xref *xref, int num, int gen) |
||
807 | { |
||
808 | fz_error error; |
||
809 | pdf_xref_entry *x; |
||
810 | int rnum, rgen; |
||
811 | |||
812 | if (num < 0 || num >= xref->len) |
||
813 | return fz_throw("object out of range (%d %d R); xref size %d", num, gen, xref->len); |
||
814 | |||
815 | x = &xref->table[num]; |
||
816 | |||
817 | if (x->obj) |
||
818 | return fz_okay; |
||
819 | |||
820 | if (x->type == 'f') |
||
821 | { |
||
822 | x->obj = fz_new_null(); |
||
823 | return fz_okay; |
||
824 | } |
||
825 | else if (x->type == 'n') |
||
826 | { |
||
827 | fz_seek(xref->file, x->ofs, 0); |
||
828 | |||
829 | error = pdf_parse_ind_obj(&x->obj, xref, xref->file, xref->scratch, sizeof xref->scratch, |
||
830 | &rnum, &rgen, &x->stm_ofs); |
||
831 | if (error) |
||
832 | return fz_rethrow(error, "cannot parse object (%d %d R)", num, gen); |
||
833 | |||
834 | if (rnum != num) |
||
835 | return fz_throw("found object (%d %d R) instead of (%d %d R)", rnum, rgen, num, gen); |
||
836 | |||
837 | if (xref->crypt) |
||
838 | pdf_crypt_obj(xref->crypt, x->obj, num, gen); |
||
839 | } |
||
840 | else if (x->type == 'o') |
||
841 | { |
||
842 | if (!x->obj) |
||
843 | { |
||
844 | error = pdf_load_obj_stm(xref, x->ofs, 0, xref->scratch, sizeof xref->scratch); |
||
845 | if (error) |
||
846 | return fz_rethrow(error, "cannot load object stream containing object (%d %d R)", num, gen); |
||
847 | if (!x->obj) |
||
848 | return fz_throw("object (%d %d R) was not found in its object stream", num, gen); |
||
849 | } |
||
850 | } |
||
851 | else |
||
852 | { |
||
853 | return fz_throw("assert: corrupt xref struct"); |
||
854 | } |
||
855 | |||
856 | return fz_okay; |
||
857 | } |
||
858 | |||
859 | fz_error |
||
860 | pdf_load_object(fz_obj **objp, pdf_xref *xref, int num, int gen) |
||
861 | { |
||
862 | fz_error error; |
||
863 | |||
864 | error = pdf_cache_object(xref, num, gen); |
||
865 | if (error) |
||
866 | return fz_rethrow(error, "cannot load object (%d %d R) into cache", num, gen); |
||
867 | |||
868 | assert(xref->table[num].obj); |
||
869 | |||
870 | *objp = fz_keep_obj(xref->table[num].obj); |
||
871 | |||
872 | return fz_okay; |
||
873 | } |
||
874 | |||
875 | fz_obj * |
||
876 | pdf_resolve_indirect(fz_obj *ref) |
||
877 | { |
||
878 | if (fz_is_indirect(ref)) |
||
879 | { |
||
880 | pdf_xref *xref = fz_get_indirect_xref(ref); |
||
881 | int num = fz_to_num(ref); |
||
882 | int gen = fz_to_gen(ref); |
||
883 | if (xref) |
||
884 | { |
||
885 | fz_error error = pdf_cache_object(xref, num, gen); |
||
886 | if (error) |
||
887 | { |
||
888 | fz_catch(error, "cannot load object (%d %d R) into cache", num, gen); |
||
889 | return ref; |
||
890 | } |
||
891 | if (xref->table[num].obj) |
||
892 | return xref->table[num].obj; |
||
893 | } |
||
894 | } |
||
895 | return ref; |
||
896 | } |
||
897 | |||
898 | /* Replace numbered object -- for use by pdfclean and similar tools */ |
||
899 | void |
||
900 | pdf_update_object(pdf_xref *xref, int num, int gen, fz_obj *newobj) |
||
901 | { |
||
902 | pdf_xref_entry *x; |
||
903 | |||
904 | if (num < 0 || num >= xref->len) |
||
905 | { |
||
906 | fz_warn("object out of range (%d %d R); xref size %d", num, gen, xref->len); |
||
907 | return; |
||
908 | } |
||
909 | |||
910 | x = &xref->table[num]; |
||
911 | |||
912 | if (x->obj) |
||
913 | fz_drop_obj(x->obj); |
||
914 | |||
915 | x->obj = fz_keep_obj(newobj); |
||
916 | x->type = 'n'; |
||
917 | x->ofs = 0; |
||
918 | } |
||
919 | |||
920 | /* |
||
921 | * Convenience function to open a file then call pdf_open_xref_with_stream. |
||
922 | */ |
||
923 | |||
924 | fz_error |
||
925 | pdf_open_xref(pdf_xref **xrefp, const char *filename, char *password) |
||
926 | { |
||
927 | fz_error error; |
||
928 | fz_stream *file; |
||
929 | |||
930 | file = fz_open_file(filename); |
||
931 | if (!file) |
||
932 | return fz_throw("cannot open file '%s': %s", filename, strerror(errno)); |
||
933 | |||
934 | error = pdf_open_xref_with_stream(xrefp, file, password); |
||
935 | if (error) |
||
936 | return fz_rethrow(error, "cannot load document '%s'", filename); |
||
937 | |||
938 | fz_close(file); |
||
939 | return fz_okay; |
||
940 | }>>>>>>>>>>=>=>>=>>>><>>><>>><>>>>>>=>>=>>=>>> |