Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4680 | right-hear | 1 | #include "fitz.h" |
2 | #include "mupdf.h" |
||
3 | |||
4 | /* |
||
5 | * CMap parser |
||
6 | */ |
||
7 | |||
8 | enum |
||
9 | { |
||
10 | TOK_USECMAP = PDF_NUM_TOKENS, |
||
11 | TOK_BEGIN_CODESPACE_RANGE, |
||
12 | TOK_END_CODESPACE_RANGE, |
||
13 | TOK_BEGIN_BF_CHAR, |
||
14 | TOK_END_BF_CHAR, |
||
15 | TOK_BEGIN_BF_RANGE, |
||
16 | TOK_END_BF_RANGE, |
||
17 | TOK_BEGIN_CID_CHAR, |
||
18 | TOK_END_CID_CHAR, |
||
19 | TOK_BEGIN_CID_RANGE, |
||
20 | TOK_END_CID_RANGE, |
||
21 | TOK_END_CMAP |
||
22 | }; |
||
23 | |||
24 | static int |
||
25 | pdf_cmap_token_from_keyword(char *key) |
||
26 | { |
||
27 | if (!strcmp(key, "usecmap")) return TOK_USECMAP; |
||
28 | if (!strcmp(key, "begincodespacerange")) return TOK_BEGIN_CODESPACE_RANGE; |
||
29 | if (!strcmp(key, "endcodespacerange")) return TOK_END_CODESPACE_RANGE; |
||
30 | if (!strcmp(key, "beginbfchar")) return TOK_BEGIN_BF_CHAR; |
||
31 | if (!strcmp(key, "endbfchar")) return TOK_END_BF_CHAR; |
||
32 | if (!strcmp(key, "beginbfrange")) return TOK_BEGIN_BF_RANGE; |
||
33 | if (!strcmp(key, "endbfrange")) return TOK_END_BF_RANGE; |
||
34 | if (!strcmp(key, "begincidchar")) return TOK_BEGIN_CID_CHAR; |
||
35 | if (!strcmp(key, "endcidchar")) return TOK_END_CID_CHAR; |
||
36 | if (!strcmp(key, "begincidrange")) return TOK_BEGIN_CID_RANGE; |
||
37 | if (!strcmp(key, "endcidrange")) return TOK_END_CID_RANGE; |
||
38 | if (!strcmp(key, "endcmap")) return TOK_END_CMAP; |
||
39 | return PDF_TOK_KEYWORD; |
||
40 | } |
||
41 | |||
42 | static int |
||
43 | pdf_code_from_string(char *buf, int len) |
||
44 | { |
||
45 | int a = 0; |
||
46 | while (len--) |
||
47 | a = (a << 8) | *(unsigned char *)buf++; |
||
48 | return a; |
||
49 | } |
||
50 | |||
51 | static fz_error |
||
52 | pdf_lex_cmap(int *tok, fz_stream *file, char *buf, int n, int *sl) |
||
53 | { |
||
54 | fz_error error; |
||
55 | |||
56 | error = pdf_lex(tok, file, buf, n, sl); |
||
57 | if (error) |
||
58 | return fz_rethrow(error, "cannot parse cmap token"); |
||
59 | |||
60 | if (*tok == PDF_TOK_KEYWORD) |
||
61 | *tok = pdf_cmap_token_from_keyword(buf); |
||
62 | |||
63 | return fz_okay; |
||
64 | } |
||
65 | |||
66 | static fz_error |
||
67 | pdf_parse_cmap_name(pdf_cmap *cmap, fz_stream *file) |
||
68 | { |
||
69 | fz_error error; |
||
70 | char buf[256]; |
||
71 | int tok; |
||
72 | int len; |
||
73 | |||
74 | error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len); |
||
75 | if (error) |
||
76 | return fz_rethrow(error, "syntaxerror in cmap"); |
||
77 | |||
78 | if (tok == PDF_TOK_NAME) |
||
79 | fz_strlcpy(cmap->cmap_name, buf, sizeof(cmap->cmap_name)); |
||
80 | else |
||
81 | fz_warn("expected name after CMapName in cmap"); |
||
82 | |||
83 | return fz_okay; |
||
84 | } |
||
85 | |||
86 | static fz_error |
||
87 | pdf_parse_wmode(pdf_cmap *cmap, fz_stream *file) |
||
88 | { |
||
89 | fz_error error; |
||
90 | char buf[256]; |
||
91 | int tok; |
||
92 | int len; |
||
93 | |||
94 | error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len); |
||
95 | if (error) |
||
96 | return fz_rethrow(error, "syntaxerror in cmap"); |
||
97 | |||
98 | if (tok == PDF_TOK_INT) |
||
99 | pdf_set_wmode(cmap, atoi(buf)); |
||
100 | else |
||
101 | fz_warn("expected integer after WMode in cmap"); |
||
102 | |||
103 | return fz_okay; |
||
104 | } |
||
105 | |||
106 | static fz_error |
||
107 | pdf_parse_codespace_range(pdf_cmap *cmap, fz_stream *file) |
||
108 | { |
||
109 | fz_error error; |
||
110 | char buf[256]; |
||
111 | int tok; |
||
112 | int len; |
||
113 | int lo, hi; |
||
114 | |||
115 | while (1) |
||
116 | { |
||
117 | error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len); |
||
118 | if (error) |
||
119 | return fz_rethrow(error, "syntaxerror in cmap"); |
||
120 | |||
121 | if (tok == TOK_END_CODESPACE_RANGE) |
||
122 | return fz_okay; |
||
123 | |||
124 | else if (tok == PDF_TOK_STRING) |
||
125 | { |
||
126 | lo = pdf_code_from_string(buf, len); |
||
127 | error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len); |
||
128 | if (error) |
||
129 | return fz_rethrow(error, "syntaxerror in cmap"); |
||
130 | if (tok == PDF_TOK_STRING) |
||
131 | { |
||
132 | hi = pdf_code_from_string(buf, len); |
||
133 | pdf_add_codespace(cmap, lo, hi, len); |
||
134 | } |
||
135 | else break; |
||
136 | } |
||
137 | |||
138 | else break; |
||
139 | } |
||
140 | |||
141 | return fz_throw("expected string or endcodespacerange"); |
||
142 | } |
||
143 | |||
144 | static fz_error |
||
145 | pdf_parse_cid_range(pdf_cmap *cmap, fz_stream *file) |
||
146 | { |
||
147 | fz_error error; |
||
148 | char buf[256]; |
||
149 | int tok; |
||
150 | int len; |
||
151 | int lo, hi, dst; |
||
152 | |||
153 | while (1) |
||
154 | { |
||
155 | error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len); |
||
156 | if (error) |
||
157 | return fz_rethrow(error, "syntaxerror in cmap"); |
||
158 | |||
159 | if (tok == TOK_END_CID_RANGE) |
||
160 | return fz_okay; |
||
161 | |||
162 | else if (tok != PDF_TOK_STRING) |
||
163 | return fz_throw("expected string or endcidrange"); |
||
164 | |||
165 | lo = pdf_code_from_string(buf, len); |
||
166 | |||
167 | error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len); |
||
168 | if (error) |
||
169 | return fz_rethrow(error, "syntaxerror in cmap"); |
||
170 | if (tok != PDF_TOK_STRING) |
||
171 | return fz_throw("expected string"); |
||
172 | |||
173 | hi = pdf_code_from_string(buf, len); |
||
174 | |||
175 | error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len); |
||
176 | if (error) |
||
177 | return fz_rethrow(error, "syntaxerror in cmap"); |
||
178 | if (tok != PDF_TOK_INT) |
||
179 | return fz_throw("expected integer"); |
||
180 | |||
181 | dst = atoi(buf); |
||
182 | |||
183 | pdf_map_range_to_range(cmap, lo, hi, dst); |
||
184 | } |
||
185 | } |
||
186 | |||
187 | static fz_error |
||
188 | pdf_parse_cid_char(pdf_cmap *cmap, fz_stream *file) |
||
189 | { |
||
190 | fz_error error; |
||
191 | char buf[256]; |
||
192 | int tok; |
||
193 | int len; |
||
194 | int src, dst; |
||
195 | |||
196 | while (1) |
||
197 | { |
||
198 | error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len); |
||
199 | if (error) |
||
200 | return fz_rethrow(error, "syntaxerror in cmap"); |
||
201 | |||
202 | if (tok == TOK_END_CID_CHAR) |
||
203 | return fz_okay; |
||
204 | |||
205 | else if (tok != PDF_TOK_STRING) |
||
206 | return fz_throw("expected string or endcidchar"); |
||
207 | |||
208 | src = pdf_code_from_string(buf, len); |
||
209 | |||
210 | error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len); |
||
211 | if (error) |
||
212 | return fz_rethrow(error, "syntaxerror in cmap"); |
||
213 | if (tok != PDF_TOK_INT) |
||
214 | return fz_throw("expected integer"); |
||
215 | |||
216 | dst = atoi(buf); |
||
217 | |||
218 | pdf_map_range_to_range(cmap, src, src, dst); |
||
219 | } |
||
220 | } |
||
221 | |||
222 | static fz_error |
||
223 | pdf_parse_bf_range_array(pdf_cmap *cmap, fz_stream *file, int lo, int hi) |
||
224 | { |
||
225 | fz_error error; |
||
226 | char buf[256]; |
||
227 | int tok; |
||
228 | int len; |
||
229 | int dst[256]; |
||
230 | int i; |
||
231 | |||
232 | while (1) |
||
233 | { |
||
234 | error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len); |
||
235 | if (error) |
||
236 | return fz_rethrow(error, "syntaxerror in cmap"); |
||
237 | |||
238 | if (tok == PDF_TOK_CLOSE_ARRAY) |
||
239 | return fz_okay; |
||
240 | |||
241 | /* Note: does not handle [ /Name /Name ... ] */ |
||
242 | else if (tok != PDF_TOK_STRING) |
||
243 | return fz_throw("expected string or ]"); |
||
244 | |||
245 | if (len / 2) |
||
246 | { |
||
247 | for (i = 0; i < len / 2; i++) |
||
248 | dst[i] = pdf_code_from_string(buf + i * 2, 2); |
||
249 | |||
250 | pdf_map_one_to_many(cmap, lo, dst, len / 2); |
||
251 | } |
||
252 | |||
253 | lo ++; |
||
254 | } |
||
255 | } |
||
256 | |||
257 | static fz_error |
||
258 | pdf_parse_bf_range(pdf_cmap *cmap, fz_stream *file) |
||
259 | { |
||
260 | fz_error error; |
||
261 | char buf[256]; |
||
262 | int tok; |
||
263 | int len; |
||
264 | int lo, hi, dst; |
||
265 | |||
266 | while (1) |
||
267 | { |
||
268 | error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len); |
||
269 | if (error) |
||
270 | return fz_rethrow(error, "syntaxerror in cmap"); |
||
271 | |||
272 | if (tok == TOK_END_BF_RANGE) |
||
273 | return fz_okay; |
||
274 | |||
275 | else if (tok != PDF_TOK_STRING) |
||
276 | return fz_throw("expected string or endbfrange"); |
||
277 | |||
278 | lo = pdf_code_from_string(buf, len); |
||
279 | |||
280 | error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len); |
||
281 | if (error) |
||
282 | return fz_rethrow(error, "syntaxerror in cmap"); |
||
283 | if (tok != PDF_TOK_STRING) |
||
284 | return fz_throw("expected string"); |
||
285 | |||
286 | hi = pdf_code_from_string(buf, len); |
||
287 | |||
288 | error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len); |
||
289 | if (error) |
||
290 | return fz_rethrow(error, "syntaxerror in cmap"); |
||
291 | |||
292 | if (tok == PDF_TOK_STRING) |
||
293 | { |
||
294 | if (len == 2) |
||
295 | { |
||
296 | dst = pdf_code_from_string(buf, len); |
||
297 | pdf_map_range_to_range(cmap, lo, hi, dst); |
||
298 | } |
||
299 | else |
||
300 | { |
||
301 | int dststr[256]; |
||
302 | int i; |
||
303 | |||
304 | if (len / 2) |
||
305 | { |
||
306 | for (i = 0; i < len / 2; i++) |
||
307 | dststr[i] = pdf_code_from_string(buf + i * 2, 2); |
||
308 | |||
309 | while (lo <= hi) |
||
310 | { |
||
311 | dststr[i-1] ++; |
||
312 | pdf_map_one_to_many(cmap, lo, dststr, i); |
||
313 | lo ++; |
||
314 | } |
||
315 | } |
||
316 | } |
||
317 | } |
||
318 | |||
319 | else if (tok == PDF_TOK_OPEN_ARRAY) |
||
320 | { |
||
321 | error = pdf_parse_bf_range_array(cmap, file, lo, hi); |
||
322 | if (error) |
||
323 | return fz_rethrow(error, "cannot map bfrange"); |
||
324 | } |
||
325 | |||
326 | else |
||
327 | { |
||
328 | return fz_throw("expected string or array or endbfrange"); |
||
329 | } |
||
330 | } |
||
331 | } |
||
332 | |||
333 | static fz_error |
||
334 | pdf_parse_bf_char(pdf_cmap *cmap, fz_stream *file) |
||
335 | { |
||
336 | fz_error error; |
||
337 | char buf[256]; |
||
338 | int tok; |
||
339 | int len; |
||
340 | int dst[256]; |
||
341 | int src; |
||
342 | int i; |
||
343 | |||
344 | while (1) |
||
345 | { |
||
346 | error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len); |
||
347 | if (error) |
||
348 | return fz_rethrow(error, "syntaxerror in cmap"); |
||
349 | |||
350 | if (tok == TOK_END_BF_CHAR) |
||
351 | return fz_okay; |
||
352 | |||
353 | else if (tok != PDF_TOK_STRING) |
||
354 | return fz_throw("expected string or endbfchar"); |
||
355 | |||
356 | src = pdf_code_from_string(buf, len); |
||
357 | |||
358 | error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len); |
||
359 | if (error) |
||
360 | return fz_rethrow(error, "syntaxerror in cmap"); |
||
361 | /* Note: does not handle /dstName */ |
||
362 | if (tok != PDF_TOK_STRING) |
||
363 | return fz_throw("expected string"); |
||
364 | |||
365 | if (len / 2) |
||
366 | { |
||
367 | for (i = 0; i < len / 2; i++) |
||
368 | dst[i] = pdf_code_from_string(buf + i * 2, 2); |
||
369 | pdf_map_one_to_many(cmap, src, dst, i); |
||
370 | } |
||
371 | } |
||
372 | } |
||
373 | |||
374 | fz_error |
||
375 | pdf_parse_cmap(pdf_cmap **cmapp, fz_stream *file) |
||
376 | { |
||
377 | fz_error error; |
||
378 | pdf_cmap *cmap; |
||
379 | char key[64]; |
||
380 | char buf[256]; |
||
381 | int tok; |
||
382 | int len; |
||
383 | |||
384 | cmap = pdf_new_cmap(); |
||
385 | |||
386 | strcpy(key, ".notdef"); |
||
387 | |||
388 | while (1) |
||
389 | { |
||
390 | error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len); |
||
391 | if (error) |
||
392 | { |
||
393 | error = fz_rethrow(error, "syntaxerror in cmap"); |
||
394 | goto cleanup; |
||
395 | } |
||
396 | |||
397 | if (tok == PDF_TOK_EOF || tok == TOK_END_CMAP) |
||
398 | break; |
||
399 | |||
400 | else if (tok == PDF_TOK_NAME) |
||
401 | { |
||
402 | if (!strcmp(buf, "CMapName")) |
||
403 | { |
||
404 | error = pdf_parse_cmap_name(cmap, file); |
||
405 | if (error) |
||
406 | { |
||
407 | error = fz_rethrow(error, "syntaxerror in cmap after CMapName"); |
||
408 | goto cleanup; |
||
409 | } |
||
410 | } |
||
411 | else if (!strcmp(buf, "WMode")) |
||
412 | { |
||
413 | error = pdf_parse_wmode(cmap, file); |
||
414 | if (error) |
||
415 | { |
||
416 | error = fz_rethrow(error, "syntaxerror in cmap after WMode"); |
||
417 | goto cleanup; |
||
418 | } |
||
419 | } |
||
420 | else |
||
421 | fz_strlcpy(key, buf, sizeof key); |
||
422 | } |
||
423 | |||
424 | else if (tok == TOK_USECMAP) |
||
425 | { |
||
426 | fz_strlcpy(cmap->usecmap_name, key, sizeof(cmap->usecmap_name)); |
||
427 | } |
||
428 | |||
429 | else if (tok == TOK_BEGIN_CODESPACE_RANGE) |
||
430 | { |
||
431 | error = pdf_parse_codespace_range(cmap, file); |
||
432 | if (error) |
||
433 | { |
||
434 | error = fz_rethrow(error, "syntaxerror in cmap codespacerange"); |
||
435 | goto cleanup; |
||
436 | } |
||
437 | } |
||
438 | |||
439 | else if (tok == TOK_BEGIN_BF_CHAR) |
||
440 | { |
||
441 | error = pdf_parse_bf_char(cmap, file); |
||
442 | if (error) |
||
443 | { |
||
444 | error = fz_rethrow(error, "syntaxerror in cmap bfchar"); |
||
445 | goto cleanup; |
||
446 | } |
||
447 | } |
||
448 | |||
449 | else if (tok == TOK_BEGIN_CID_CHAR) |
||
450 | { |
||
451 | error = pdf_parse_cid_char(cmap, file); |
||
452 | if (error) |
||
453 | { |
||
454 | error = fz_rethrow(error, "syntaxerror in cmap cidchar"); |
||
455 | goto cleanup; |
||
456 | } |
||
457 | } |
||
458 | |||
459 | else if (tok == TOK_BEGIN_BF_RANGE) |
||
460 | { |
||
461 | error = pdf_parse_bf_range(cmap, file); |
||
462 | if (error) |
||
463 | { |
||
464 | error = fz_rethrow(error, "syntaxerror in cmap bfrange"); |
||
465 | goto cleanup; |
||
466 | } |
||
467 | } |
||
468 | |||
469 | else if (tok == TOK_BEGIN_CID_RANGE) |
||
470 | { |
||
471 | error = pdf_parse_cid_range(cmap, file); |
||
472 | if (error) |
||
473 | { |
||
474 | error = fz_rethrow(error, "syntaxerror in cmap cidrange"); |
||
475 | goto cleanup; |
||
476 | } |
||
477 | } |
||
478 | |||
479 | /* ignore everything else */ |
||
480 | } |
||
481 | |||
482 | pdf_sort_cmap(cmap); |
||
483 | |||
484 | *cmapp = cmap; |
||
485 | return fz_okay; |
||
486 | |||
487 | cleanup: |
||
488 | pdf_drop_cmap(cmap); |
||
489 | return error; /* already rethrown */ |
||
490 | }>=>>>><> |