Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4680 right-hear 1
/*
2
 * pdfextract -- the ultimate way to extract images and fonts from pdfs
3
 */
4
 
5
#include "fitz.h"
6
#include "mupdf.h"
7
 
8
static pdf_xref *xref = NULL;
9
static int dorgb = 0;
10
 
11
void die(fz_error error)
12
{
13
	fz_catch(error, "aborting");
14
	if (xref)
15
		pdf_free_xref(xref);
16
	exit(1);
17
}
18
 
19
static void usage(void)
20
{
21
	fprintf(stderr, "usage: pdfextract [options] file.pdf [object numbers]\n");
22
	fprintf(stderr, "\t-p\tpassword\n");
23
	fprintf(stderr, "\t-r\tconvert images to rgb\n");
24
	exit(1);
25
}
26
 
27
static int isimage(fz_obj *obj)
28
{
29
	fz_obj *type = fz_dict_gets(obj, "Subtype");
30
	return fz_is_name(type) && !strcmp(fz_to_name(type), "Image");
31
}
32
 
33
static int isfontdesc(fz_obj *obj)
34
{
35
	fz_obj *type = fz_dict_gets(obj, "Type");
36
	return fz_is_name(type) && !strcmp(fz_to_name(type), "FontDescriptor");
37
}
38
 
39
static void saveimage(int num)
40
{
41
	fz_error error;
42
	fz_pixmap *img;
43
	fz_obj *ref;
44
	char name[1024];
45
 
46
	ref = fz_new_indirect(num, 0, xref);
47
 
48
	/* TODO: detect DCTD and save as jpeg */
49
 
50
	error = pdf_load_image(&img, xref, ref);
51
	if (error)
52
		die(error);
53
 
54
	if (dorgb && img->colorspace && img->colorspace != fz_device_rgb)
55
	{
56
		fz_pixmap *temp;
57
		temp = fz_new_pixmap_with_rect(fz_device_rgb, fz_bound_pixmap(img));
58
		fz_convert_pixmap(img, temp);
59
		fz_drop_pixmap(img);
60
		img = temp;
61
	}
62
 
63
	if (img->n <= 4)
64
	{
65
		sprintf(name, "img-%04d.png", num);
66
		printf("extracting image %s\n", name);
67
		fz_write_png(img, name, 0);
68
	}
69
	else
70
	{
71
		sprintf(name, "img-%04d.pam", num);
72
		printf("extracting image %s\n", name);
73
		fz_write_pam(img, name, 0);
74
	}
75
 
76
	fz_drop_pixmap(img);
77
	fz_drop_obj(ref);
78
}
79
 
80
static void savefont(fz_obj *dict, int num)
81
{
82
	fz_error error;
83
	char name[1024];
84
	char *subtype;
85
	fz_buffer *buf;
86
	fz_obj *stream = NULL;
87
	fz_obj *obj;
88
	char *ext = "";
89
	FILE *f;
90
	char *fontname = "font";
91
	int n;
92
 
93
	obj = fz_dict_gets(dict, "FontName");
94
	if (obj)
95
		fontname = fz_to_name(obj);
96
 
97
	obj = fz_dict_gets(dict, "FontFile");
98
	if (obj)
99
	{
100
		stream = obj;
101
		ext = "pfa";
102
	}
103
 
104
	obj = fz_dict_gets(dict, "FontFile2");
105
	if (obj)
106
	{
107
		stream = obj;
108
		ext = "ttf";
109
	}
110
 
111
	obj = fz_dict_gets(dict, "FontFile3");
112
	if (obj)
113
	{
114
		stream = obj;
115
 
116
		obj = fz_dict_gets(obj, "Subtype");
117
		if (obj && !fz_is_name(obj))
118
			die(fz_throw("Invalid font descriptor subtype"));
119
 
120
		subtype = fz_to_name(obj);
121
		if (!strcmp(subtype, "Type1C"))
122
			ext = "cff";
123
		else if (!strcmp(subtype, "CIDFontType0C"))
124
			ext = "cid";
125
		else
126
			die(fz_throw("Unhandled font type '%s'", subtype));
127
	}
128
 
129
	if (!stream)
130
	{
131
		fz_warn("Unhandled font type");
132
		return;
133
	}
134
 
135
	buf = fz_new_buffer(0);
136
 
137
	error = pdf_load_stream(&buf, xref, fz_to_num(stream), fz_to_gen(stream));
138
	if (error)
139
		die(error);
140
 
141
	sprintf(name, "%s-%04d.%s", fontname, num, ext);
142
	printf("extracting font %s\n", name);
143
 
144
	f = fopen(name, "wb");
145
	if (f == NULL)
146
		die(fz_throw("Error creating font file"));
147
 
148
	n = fwrite(buf->data, 1, buf->len, f);
149
	if (n < buf->len)
150
		die(fz_throw("Error writing font file"));
151
 
152
	if (fclose(f) < 0)
153
		die(fz_throw("Error closing font file"));
154
 
155
	fz_drop_buffer(buf);
156
}
157
 
158
static void showobject(int num)
159
{
160
	fz_error error;
161
	fz_obj *obj;
162
 
163
	if (!xref)
164
		die(fz_throw("no file specified"));
165
 
166
	error = pdf_load_object(&obj, xref, num, 0);
167
	if (error)
168
		die(error);
169
 
170
	if (isimage(obj))
171
		saveimage(num);
172
	else if (isfontdesc(obj))
173
		savefont(obj, num);
174
 
175
	fz_drop_obj(obj);
176
}
177
 
178
int main(int argc, char **argv)
179
{
180
	fz_error error;
181
	char *infile;
182
	char *password = "";
183
	int c, o;
184
 
185
	while ((c = fz_getopt(argc, argv, "p:r")) != -1)
186
	{
187
		switch (c)
188
		{
189
		case 'p': password = fz_optarg; break;
190
		case 'r': dorgb++; break;
191
		default: usage(); break;
192
		}
193
	}
194
 
195
	if (fz_optind == argc)
196
		usage();
197
 
198
	infile = argv[fz_optind++];
199
	error = pdf_open_xref(&xref, infile, password);
200
	if (error)
201
		die(fz_rethrow(error, "cannot open input file '%s'", infile));
202
 
203
	if (fz_optind == argc)
204
	{
205
		for (o = 0; o < xref->len; o++)
206
			showobject(o);
207
	}
208
	else
209
	{
210
		while (fz_optind < argc)
211
		{
212
			showobject(atoi(argv[fz_optind]));
213
			fz_optind++;
214
		}
215
	}
216
 
217
	pdf_free_xref(xref);
218
 
219
	fz_flush_warnings();
220
 
221
	return 0;
222
}