0,0 → 1,370 |
#include "fitz.h" |
#include "mupdf.h" |
|
struct info |
{ |
fz_obj *resources; |
fz_obj *mediabox; |
fz_obj *cropbox; |
fz_obj *rotate; |
}; |
|
int |
pdf_count_pages(pdf_xref *xref) |
{ |
return xref->page_len; |
} |
|
int |
pdf_find_page_number(pdf_xref *xref, fz_obj *page) |
{ |
int i, num = fz_to_num(page); |
for (i = 0; i < xref->page_len; i++) |
if (num == fz_to_num(xref->page_refs[i])) |
return i; |
return -1; |
} |
|
static void |
pdf_load_page_tree_node(pdf_xref *xref, fz_obj *node, struct info info) |
{ |
fz_obj *dict, *kids, *count; |
fz_obj *obj, *tmp; |
int i, n; |
|
/* prevent infinite recursion */ |
if (fz_dict_gets(node, ".seen")) |
return; |
|
kids = fz_dict_gets(node, "Kids"); |
count = fz_dict_gets(node, "Count"); |
|
if (fz_is_array(kids) && fz_is_int(count)) |
{ |
obj = fz_dict_gets(node, "Resources"); |
if (obj) |
info.resources = obj; |
obj = fz_dict_gets(node, "MediaBox"); |
if (obj) |
info.mediabox = obj; |
obj = fz_dict_gets(node, "CropBox"); |
if (obj) |
info.cropbox = obj; |
obj = fz_dict_gets(node, "Rotate"); |
if (obj) |
info.rotate = obj; |
|
tmp = fz_new_null(); |
fz_dict_puts(node, ".seen", tmp); |
fz_drop_obj(tmp); |
|
n = fz_array_len(kids); |
for (i = 0; i < n; i++) |
{ |
obj = fz_array_get(kids, i); |
pdf_load_page_tree_node(xref, obj, info); |
} |
|
fz_dict_dels(node, ".seen"); |
} |
else |
{ |
dict = fz_resolve_indirect(node); |
|
if (info.resources && !fz_dict_gets(dict, "Resources")) |
fz_dict_puts(dict, "Resources", info.resources); |
if (info.mediabox && !fz_dict_gets(dict, "MediaBox")) |
fz_dict_puts(dict, "MediaBox", info.mediabox); |
if (info.cropbox && !fz_dict_gets(dict, "CropBox")) |
fz_dict_puts(dict, "CropBox", info.cropbox); |
if (info.rotate && !fz_dict_gets(dict, "Rotate")) |
fz_dict_puts(dict, "Rotate", info.rotate); |
|
if (xref->page_len == xref->page_cap) |
{ |
fz_warn("found more pages than expected"); |
xref->page_cap ++; |
xref->page_refs = fz_realloc(xref->page_refs, xref->page_cap, sizeof(fz_obj*)); |
xref->page_objs = fz_realloc(xref->page_objs, xref->page_cap, sizeof(fz_obj*)); |
} |
|
xref->page_refs[xref->page_len] = fz_keep_obj(node); |
xref->page_objs[xref->page_len] = fz_keep_obj(dict); |
xref->page_len ++; |
} |
} |
|
fz_error |
pdf_load_page_tree(pdf_xref *xref) |
{ |
struct info info; |
fz_obj *catalog = fz_dict_gets(xref->trailer, "Root"); |
fz_obj *pages = fz_dict_gets(catalog, "Pages"); |
fz_obj *count = fz_dict_gets(pages, "Count"); |
|
if (!fz_is_dict(pages)) |
return fz_throw("missing page tree"); |
if (!fz_is_int(count)) |
return fz_throw("missing page count"); |
|
xref->page_cap = fz_to_int(count); |
xref->page_len = 0; |
xref->page_refs = fz_calloc(xref->page_cap, sizeof(fz_obj*)); |
xref->page_objs = fz_calloc(xref->page_cap, sizeof(fz_obj*)); |
|
info.resources = NULL; |
info.mediabox = NULL; |
info.cropbox = NULL; |
info.rotate = NULL; |
|
pdf_load_page_tree_node(xref, pages, info); |
|
return fz_okay; |
} |
|
/* We need to know whether to install a page-level transparency group */ |
|
static int pdf_resources_use_blending(fz_obj *rdb); |
|
static int |
pdf_extgstate_uses_blending(fz_obj *dict) |
{ |
fz_obj *obj = fz_dict_gets(dict, "BM"); |
if (fz_is_name(obj) && strcmp(fz_to_name(obj), "Normal")) |
return 1; |
return 0; |
} |
|
static int |
pdf_pattern_uses_blending(fz_obj *dict) |
{ |
fz_obj *obj; |
obj = fz_dict_gets(dict, "Resources"); |
if (pdf_resources_use_blending(obj)) |
return 1; |
obj = fz_dict_gets(dict, "ExtGState"); |
if (pdf_extgstate_uses_blending(obj)) |
return 1; |
return 0; |
} |
|
static int |
pdf_xobject_uses_blending(fz_obj *dict) |
{ |
fz_obj *obj = fz_dict_gets(dict, "Resources"); |
if (pdf_resources_use_blending(obj)) |
return 1; |
return 0; |
} |
|
static int |
pdf_resources_use_blending(fz_obj *rdb) |
{ |
fz_obj *dict; |
fz_obj *tmp; |
int i; |
|
if (!rdb) |
return 0; |
|
/* stop on cyclic resource dependencies */ |
if (fz_dict_gets(rdb, ".useBM")) |
return fz_to_bool(fz_dict_gets(rdb, ".useBM")); |
|
tmp = fz_new_bool(0); |
fz_dict_puts(rdb, ".useBM", tmp); |
fz_drop_obj(tmp); |
|
dict = fz_dict_gets(rdb, "ExtGState"); |
for (i = 0; i < fz_dict_len(dict); i++) |
if (pdf_extgstate_uses_blending(fz_dict_get_val(dict, i))) |
goto found; |
|
dict = fz_dict_gets(rdb, "Pattern"); |
for (i = 0; i < fz_dict_len(dict); i++) |
if (pdf_pattern_uses_blending(fz_dict_get_val(dict, i))) |
goto found; |
|
dict = fz_dict_gets(rdb, "XObject"); |
for (i = 0; i < fz_dict_len(dict); i++) |
if (pdf_xobject_uses_blending(fz_dict_get_val(dict, i))) |
goto found; |
|
return 0; |
|
found: |
tmp = fz_new_bool(1); |
fz_dict_puts(rdb, ".useBM", tmp); |
fz_drop_obj(tmp); |
return 1; |
} |
|
/* we need to combine all sub-streams into one for the content stream interpreter */ |
|
static fz_error |
pdf_load_page_contents_array(fz_buffer **bigbufp, pdf_xref *xref, fz_obj *list) |
{ |
fz_error error; |
fz_buffer *big; |
fz_buffer *one; |
int i, n; |
|
big = fz_new_buffer(32 * 1024); |
|
n = fz_array_len(list); |
for (i = 0; i < n; i++) |
{ |
fz_obj *stm = fz_array_get(list, i); |
error = pdf_load_stream(&one, xref, fz_to_num(stm), fz_to_gen(stm)); |
if (error) |
{ |
fz_catch(error, "cannot load content stream part %d/%d", i + 1, n); |
continue; |
} |
|
if (big->len + one->len + 1 > big->cap) |
fz_resize_buffer(big, big->len + one->len + 1); |
memcpy(big->data + big->len, one->data, one->len); |
big->data[big->len + one->len] = ' '; |
big->len += one->len + 1; |
|
fz_drop_buffer(one); |
} |
|
if (n > 0 && big->len == 0) |
{ |
fz_drop_buffer(big); |
return fz_throw("cannot load content stream"); |
} |
|
*bigbufp = big; |
return fz_okay; |
} |
|
static fz_error |
pdf_load_page_contents(fz_buffer **bufp, pdf_xref *xref, fz_obj *obj) |
{ |
fz_error error; |
|
if (fz_is_array(obj)) |
{ |
error = pdf_load_page_contents_array(bufp, xref, obj); |
if (error) |
return fz_rethrow(error, "cannot load content stream array"); |
} |
else if (pdf_is_stream(xref, fz_to_num(obj), fz_to_gen(obj))) |
{ |
error = pdf_load_stream(bufp, xref, fz_to_num(obj), fz_to_gen(obj)); |
if (error) |
return fz_rethrow(error, "cannot load content stream (%d 0 R)", fz_to_num(obj)); |
} |
else |
{ |
fz_warn("page contents missing, leaving page blank"); |
*bufp = fz_new_buffer(0); |
} |
|
return fz_okay; |
} |
|
fz_error |
pdf_load_page(pdf_page **pagep, pdf_xref *xref, int number) |
{ |
fz_error error; |
pdf_page *page; |
pdf_annot *annot; |
fz_obj *pageobj, *pageref; |
fz_obj *obj; |
fz_bbox bbox; |
|
if (number < 0 || number >= xref->page_len) |
return fz_throw("cannot find page %d", number + 1); |
|
/* Ensure that we have a store for resource objects */ |
if (!xref->store) |
xref->store = pdf_new_store(); |
|
pageobj = xref->page_objs[number]; |
pageref = xref->page_refs[number]; |
|
page = fz_malloc(sizeof(pdf_page)); |
page->resources = NULL; |
page->contents = NULL; |
page->transparency = 0; |
page->links = NULL; |
page->annots = NULL; |
|
obj = fz_dict_gets(pageobj, "MediaBox"); |
bbox = fz_round_rect(pdf_to_rect(obj)); |
if (fz_is_empty_rect(pdf_to_rect(obj))) |
{ |
fz_warn("cannot find page size for page %d", number + 1); |
bbox.x0 = 0; |
bbox.y0 = 0; |
bbox.x1 = 612; |
bbox.y1 = 792; |
} |
|
obj = fz_dict_gets(pageobj, "CropBox"); |
if (fz_is_array(obj)) |
{ |
fz_bbox cropbox = fz_round_rect(pdf_to_rect(obj)); |
bbox = fz_intersect_bbox(bbox, cropbox); |
} |
|
page->mediabox.x0 = MIN(bbox.x0, bbox.x1); |
page->mediabox.y0 = MIN(bbox.y0, bbox.y1); |
page->mediabox.x1 = MAX(bbox.x0, bbox.x1); |
page->mediabox.y1 = MAX(bbox.y0, bbox.y1); |
|
if (page->mediabox.x1 - page->mediabox.x0 < 1 || page->mediabox.y1 - page->mediabox.y0 < 1) |
{ |
fz_warn("invalid page size in page %d", number + 1); |
page->mediabox = fz_unit_rect; |
} |
|
page->rotate = fz_to_int(fz_dict_gets(pageobj, "Rotate")); |
|
obj = fz_dict_gets(pageobj, "Annots"); |
if (obj) |
{ |
pdf_load_links(&page->links, xref, obj); |
pdf_load_annots(&page->annots, xref, obj); |
} |
|
page->resources = fz_dict_gets(pageobj, "Resources"); |
if (page->resources) |
fz_keep_obj(page->resources); |
|
obj = fz_dict_gets(pageobj, "Contents"); |
error = pdf_load_page_contents(&page->contents, xref, obj); |
if (error) |
{ |
pdf_free_page(page); |
return fz_rethrow(error, "cannot load page %d contents (%d 0 R)", number + 1, fz_to_num(pageref)); |
} |
|
if (pdf_resources_use_blending(page->resources)) |
page->transparency = 1; |
|
for (annot = page->annots; annot && !page->transparency; annot = annot->next) |
if (pdf_resources_use_blending(annot->ap->resources)) |
page->transparency = 1; |
|
*pagep = page; |
return fz_okay; |
} |
|
void |
pdf_free_page(pdf_page *page) |
{ |
if (page->resources) |
fz_drop_obj(page->resources); |
if (page->contents) |
fz_drop_buffer(page->contents); |
if (page->links) |
pdf_free_link(page->links); |
if (page->annots) |
pdf_free_annot(page->annots); |
fz_free(page); |
} |