Rev 4680 | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4680 | right-hear | 1 | #include "fitz.h" |
2 | #include "mupdf.h" |
||
3 | |||
4 | struct info |
||
5 | { |
||
6 | fz_obj *resources; |
||
7 | fz_obj *mediabox; |
||
8 | fz_obj *cropbox; |
||
9 | fz_obj *rotate; |
||
10 | }; |
||
11 | |||
12 | int |
||
13 | pdf_count_pages(pdf_xref *xref) |
||
14 | { |
||
15 | return xref->page_len; |
||
16 | } |
||
17 | |||
18 | int |
||
19 | pdf_find_page_number(pdf_xref *xref, fz_obj *page) |
||
20 | { |
||
21 | int i, num = fz_to_num(page); |
||
22 | for (i = 0; i < xref->page_len; i++) |
||
23 | if (num == fz_to_num(xref->page_refs[i])) |
||
24 | return i; |
||
25 | return -1; |
||
26 | } |
||
27 | |||
28 | static void |
||
29 | pdf_load_page_tree_node(pdf_xref *xref, fz_obj *node, struct info info) |
||
30 | { |
||
31 | fz_obj *dict, *kids, *count; |
||
32 | fz_obj *obj, *tmp; |
||
33 | int i, n; |
||
34 | |||
35 | /* prevent infinite recursion */ |
||
36 | if (fz_dict_gets(node, ".seen")) |
||
37 | return; |
||
38 | |||
39 | kids = fz_dict_gets(node, "Kids"); |
||
40 | count = fz_dict_gets(node, "Count"); |
||
41 | |||
42 | if (fz_is_array(kids) && fz_is_int(count)) |
||
43 | { |
||
44 | obj = fz_dict_gets(node, "Resources"); |
||
45 | if (obj) |
||
46 | info.resources = obj; |
||
47 | obj = fz_dict_gets(node, "MediaBox"); |
||
48 | if (obj) |
||
49 | info.mediabox = obj; |
||
50 | obj = fz_dict_gets(node, "CropBox"); |
||
51 | if (obj) |
||
52 | info.cropbox = obj; |
||
53 | obj = fz_dict_gets(node, "Rotate"); |
||
54 | if (obj) |
||
55 | info.rotate = obj; |
||
56 | |||
57 | tmp = fz_new_null(); |
||
58 | fz_dict_puts(node, ".seen", tmp); |
||
59 | fz_drop_obj(tmp); |
||
60 | |||
61 | n = fz_array_len(kids); |
||
62 | for (i = 0; i < n; i++) |
||
63 | { |
||
64 | obj = fz_array_get(kids, i); |
||
65 | pdf_load_page_tree_node(xref, obj, info); |
||
66 | } |
||
67 | |||
68 | fz_dict_dels(node, ".seen"); |
||
69 | } |
||
70 | else |
||
71 | { |
||
72 | dict = fz_resolve_indirect(node); |
||
73 | |||
74 | if (info.resources && !fz_dict_gets(dict, "Resources")) |
||
75 | fz_dict_puts(dict, "Resources", info.resources); |
||
76 | if (info.mediabox && !fz_dict_gets(dict, "MediaBox")) |
||
77 | fz_dict_puts(dict, "MediaBox", info.mediabox); |
||
78 | if (info.cropbox && !fz_dict_gets(dict, "CropBox")) |
||
79 | fz_dict_puts(dict, "CropBox", info.cropbox); |
||
80 | if (info.rotate && !fz_dict_gets(dict, "Rotate")) |
||
81 | fz_dict_puts(dict, "Rotate", info.rotate); |
||
82 | |||
83 | if (xref->page_len == xref->page_cap) |
||
84 | { |
||
85 | fz_warn("found more pages than expected"); |
||
86 | xref->page_cap ++; |
||
87 | xref->page_refs = fz_realloc(xref->page_refs, xref->page_cap, sizeof(fz_obj*)); |
||
88 | xref->page_objs = fz_realloc(xref->page_objs, xref->page_cap, sizeof(fz_obj*)); |
||
89 | } |
||
90 | |||
91 | xref->page_refs[xref->page_len] = fz_keep_obj(node); |
||
92 | xref->page_objs[xref->page_len] = fz_keep_obj(dict); |
||
93 | xref->page_len ++; |
||
94 | } |
||
95 | } |
||
96 | |||
97 | fz_error |
||
98 | pdf_load_page_tree(pdf_xref *xref) |
||
99 | { |
||
100 | struct info info; |
||
101 | fz_obj *catalog = fz_dict_gets(xref->trailer, "Root"); |
||
102 | fz_obj *pages = fz_dict_gets(catalog, "Pages"); |
||
103 | fz_obj *count = fz_dict_gets(pages, "Count"); |
||
104 | |||
105 | if (!fz_is_dict(pages)) |
||
106 | return fz_throw("missing page tree"); |
||
107 | if (!fz_is_int(count)) |
||
108 | return fz_throw("missing page count"); |
||
109 | |||
110 | xref->page_cap = fz_to_int(count); |
||
111 | xref->page_len = 0; |
||
112 | xref->page_refs = fz_calloc(xref->page_cap, sizeof(fz_obj*)); |
||
113 | xref->page_objs = fz_calloc(xref->page_cap, sizeof(fz_obj*)); |
||
114 | |||
115 | info.resources = NULL; |
||
116 | info.mediabox = NULL; |
||
117 | info.cropbox = NULL; |
||
118 | info.rotate = NULL; |
||
119 | |||
120 | pdf_load_page_tree_node(xref, pages, info); |
||
121 | |||
122 | return fz_okay; |
||
123 | } |
||
124 | |||
125 | /* We need to know whether to install a page-level transparency group */ |
||
126 | |||
127 | static int pdf_resources_use_blending(fz_obj *rdb); |
||
128 | |||
129 | static int |
||
130 | pdf_extgstate_uses_blending(fz_obj *dict) |
||
131 | { |
||
132 | fz_obj *obj = fz_dict_gets(dict, "BM"); |
||
133 | if (fz_is_name(obj) && strcmp(fz_to_name(obj), "Normal")) |
||
134 | return 1; |
||
135 | return 0; |
||
136 | } |
||
137 | |||
138 | static int |
||
139 | pdf_pattern_uses_blending(fz_obj *dict) |
||
140 | { |
||
141 | fz_obj *obj; |
||
142 | obj = fz_dict_gets(dict, "Resources"); |
||
143 | if (pdf_resources_use_blending(obj)) |
||
144 | return 1; |
||
145 | obj = fz_dict_gets(dict, "ExtGState"); |
||
146 | if (pdf_extgstate_uses_blending(obj)) |
||
147 | return 1; |
||
148 | return 0; |
||
149 | } |
||
150 | |||
151 | static int |
||
152 | pdf_xobject_uses_blending(fz_obj *dict) |
||
153 | { |
||
154 | fz_obj *obj = fz_dict_gets(dict, "Resources"); |
||
155 | if (pdf_resources_use_blending(obj)) |
||
156 | return 1; |
||
157 | return 0; |
||
158 | } |
||
159 | |||
160 | static int |
||
161 | pdf_resources_use_blending(fz_obj *rdb) |
||
162 | { |
||
163 | fz_obj *dict; |
||
164 | fz_obj *tmp; |
||
165 | int i; |
||
166 | |||
167 | if (!rdb) |
||
168 | return 0; |
||
169 | |||
170 | /* stop on cyclic resource dependencies */ |
||
171 | if (fz_dict_gets(rdb, ".useBM")) |
||
172 | return fz_to_bool(fz_dict_gets(rdb, ".useBM")); |
||
173 | |||
174 | tmp = fz_new_bool(0); |
||
175 | fz_dict_puts(rdb, ".useBM", tmp); |
||
176 | fz_drop_obj(tmp); |
||
177 | |||
178 | dict = fz_dict_gets(rdb, "ExtGState"); |
||
179 | for (i = 0; i < fz_dict_len(dict); i++) |
||
180 | if (pdf_extgstate_uses_blending(fz_dict_get_val(dict, i))) |
||
181 | goto found; |
||
182 | |||
183 | dict = fz_dict_gets(rdb, "Pattern"); |
||
184 | for (i = 0; i < fz_dict_len(dict); i++) |
||
185 | if (pdf_pattern_uses_blending(fz_dict_get_val(dict, i))) |
||
186 | goto found; |
||
187 | |||
188 | dict = fz_dict_gets(rdb, "XObject"); |
||
189 | for (i = 0; i < fz_dict_len(dict); i++) |
||
190 | if (pdf_xobject_uses_blending(fz_dict_get_val(dict, i))) |
||
191 | goto found; |
||
192 | |||
193 | return 0; |
||
194 | |||
195 | found: |
||
196 | tmp = fz_new_bool(1); |
||
197 | fz_dict_puts(rdb, ".useBM", tmp); |
||
198 | fz_drop_obj(tmp); |
||
199 | return 1; |
||
200 | } |
||
201 | |||
202 | /* we need to combine all sub-streams into one for the content stream interpreter */ |
||
203 | |||
204 | static fz_error |
||
205 | pdf_load_page_contents_array(fz_buffer **bigbufp, pdf_xref *xref, fz_obj *list) |
||
206 | { |
||
207 | fz_error error; |
||
208 | fz_buffer *big; |
||
209 | fz_buffer *one; |
||
210 | int i, n; |
||
211 | |||
212 | big = fz_new_buffer(32 * 1024); |
||
213 | |||
214 | n = fz_array_len(list); |
||
215 | for (i = 0; i < n; i++) |
||
216 | { |
||
217 | fz_obj *stm = fz_array_get(list, i); |
||
218 | error = pdf_load_stream(&one, xref, fz_to_num(stm), fz_to_gen(stm)); |
||
219 | if (error) |
||
220 | { |
||
221 | fz_catch(error, "cannot load content stream part %d/%d", i + 1, n); |
||
222 | continue; |
||
223 | } |
||
224 | |||
225 | if (big->len + one->len + 1 > big->cap) |
||
226 | fz_resize_buffer(big, big->len + one->len + 1); |
||
227 | memcpy(big->data + big->len, one->data, one->len); |
||
228 | big->data[big->len + one->len] = ' '; |
||
229 | big->len += one->len + 1; |
||
230 | |||
231 | fz_drop_buffer(one); |
||
232 | } |
||
233 | |||
234 | if (n > 0 && big->len == 0) |
||
235 | { |
||
236 | fz_drop_buffer(big); |
||
237 | return fz_throw("cannot load content stream"); |
||
238 | } |
||
239 | |||
240 | *bigbufp = big; |
||
241 | return fz_okay; |
||
242 | } |
||
243 | |||
244 | static fz_error |
||
245 | pdf_load_page_contents(fz_buffer **bufp, pdf_xref *xref, fz_obj *obj) |
||
246 | { |
||
247 | fz_error error; |
||
248 | |||
249 | if (fz_is_array(obj)) |
||
250 | { |
||
251 | error = pdf_load_page_contents_array(bufp, xref, obj); |
||
252 | if (error) |
||
253 | return fz_rethrow(error, "cannot load content stream array"); |
||
254 | } |
||
255 | else if (pdf_is_stream(xref, fz_to_num(obj), fz_to_gen(obj))) |
||
256 | { |
||
257 | error = pdf_load_stream(bufp, xref, fz_to_num(obj), fz_to_gen(obj)); |
||
258 | if (error) |
||
259 | return fz_rethrow(error, "cannot load content stream (%d 0 R)", fz_to_num(obj)); |
||
260 | } |
||
261 | else |
||
262 | { |
||
263 | fz_warn("page contents missing, leaving page blank"); |
||
264 | *bufp = fz_new_buffer(0); |
||
265 | } |
||
266 | |||
267 | return fz_okay; |
||
268 | } |
||
269 | |||
270 | fz_error |
||
271 | pdf_load_page(pdf_page **pagep, pdf_xref *xref, int number) |
||
272 | { |
||
273 | fz_error error; |
||
274 | pdf_page *page; |
||
275 | pdf_annot *annot; |
||
276 | fz_obj *pageobj, *pageref; |
||
277 | fz_obj *obj; |
||
278 | fz_bbox bbox; |
||
279 | |||
280 | if (number < 0 || number >= xref->page_len) |
||
281 | return fz_throw("cannot find page %d", number + 1); |
||
282 | |||
283 | /* Ensure that we have a store for resource objects */ |
||
284 | if (!xref->store) |
||
285 | xref->store = pdf_new_store(); |
||
286 | |||
287 | pageobj = xref->page_objs[number]; |
||
288 | pageref = xref->page_refs[number]; |
||
289 | |||
290 | page = fz_malloc(sizeof(pdf_page)); |
||
291 | page->resources = NULL; |
||
292 | page->contents = NULL; |
||
293 | page->transparency = 0; |
||
294 | page->links = NULL; |
||
295 | page->annots = NULL; |
||
296 | |||
297 | obj = fz_dict_gets(pageobj, "MediaBox"); |
||
298 | bbox = fz_round_rect(pdf_to_rect(obj)); |
||
299 | if (fz_is_empty_rect(pdf_to_rect(obj))) |
||
300 | { |
||
301 | fz_warn("cannot find page size for page %d", number + 1); |
||
302 | bbox.x0 = 0; |
||
303 | bbox.y0 = 0; |
||
304 | bbox.x1 = 612; |
||
305 | bbox.y1 = 792; |
||
306 | } |
||
307 | |||
308 | obj = fz_dict_gets(pageobj, "CropBox"); |
||
309 | if (fz_is_array(obj)) |
||
310 | { |
||
311 | fz_bbox cropbox = fz_round_rect(pdf_to_rect(obj)); |
||
312 | bbox = fz_intersect_bbox(bbox, cropbox); |
||
313 | } |
||
314 | |||
315 | page->mediabox.x0 = MIN(bbox.x0, bbox.x1); |
||
316 | page->mediabox.y0 = MIN(bbox.y0, bbox.y1); |
||
317 | page->mediabox.x1 = MAX(bbox.x0, bbox.x1); |
||
318 | page->mediabox.y1 = MAX(bbox.y0, bbox.y1); |
||
319 | |||
320 | if (page->mediabox.x1 - page->mediabox.x0 < 1 || page->mediabox.y1 - page->mediabox.y0 < 1) |
||
321 | { |
||
322 | fz_warn("invalid page size in page %d", number + 1); |
||
323 | page->mediabox = fz_unit_rect; |
||
324 | } |
||
325 | |||
326 | page->rotate = fz_to_int(fz_dict_gets(pageobj, "Rotate")); |
||
327 | |||
328 | obj = fz_dict_gets(pageobj, "Annots"); |
||
329 | if (obj) |
||
330 | { |
||
331 | pdf_load_links(&page->links, xref, obj); |
||
332 | pdf_load_annots(&page->annots, xref, obj); |
||
333 | } |
||
334 | |||
335 | page->resources = fz_dict_gets(pageobj, "Resources"); |
||
336 | if (page->resources) |
||
337 | fz_keep_obj(page->resources); |
||
338 | |||
339 | obj = fz_dict_gets(pageobj, "Contents"); |
||
340 | error = pdf_load_page_contents(&page->contents, xref, obj); |
||
341 | if (error) |
||
342 | { |
||
343 | pdf_free_page(page); |
||
344 | return fz_rethrow(error, "cannot load page %d contents (%d 0 R)", number + 1, fz_to_num(pageref)); |
||
345 | } |
||
346 | |||
347 | if (pdf_resources_use_blending(page->resources)) |
||
348 | page->transparency = 1; |
||
349 | |||
350 | for (annot = page->annots; annot && !page->transparency; annot = annot->next) |
||
351 | if (pdf_resources_use_blending(annot->ap->resources)) |
||
352 | page->transparency = 1; |
||
353 | |||
354 | *pagep = page; |
||
355 | return fz_okay; |
||
356 | } |
||
357 | |||
358 | void |
||
359 | pdf_free_page(pdf_page *page) |
||
360 | { |
||
361 | if (page->resources) |
||
362 | fz_drop_obj(page->resources); |
||
363 | if (page->contents) |
||
364 | fz_drop_buffer(page->contents); |
||
365 | if (page->links) |
||
366 | pdf_free_link(page->links); |
||
367 | if (page->annots) |
||
368 | pdf_free_annot(page->annots); |
||
369 | fz_free(page); |
||
370 | }>>>>>>>>> |