Subversion Repositories Kolibri OS

Rev

Rev 8429 | Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4680 right-hear 1
#include "fitz.h"
2
#include "mupdf.h"
3
 
4
#include 
5
#include FT_FREETYPE_H
6
#include FT_XFREE86_H
7
 
8
static fz_error pdf_load_font_descriptor(pdf_font_desc *fontdesc, pdf_xref *xref, fz_obj *dict, char *collection, char *basefont);
9
 
10
static char *base_font_names[14][7] =
11
{
12
	{ "Courier", "CourierNew", "CourierNewPSMT", NULL },
13
	{ "Courier-Bold", "CourierNew,Bold", "Courier,Bold",
14
		"CourierNewPS-BoldMT", "CourierNew-Bold", NULL },
15
	{ "Courier-Oblique", "CourierNew,Italic", "Courier,Italic",
16
		"CourierNewPS-ItalicMT", "CourierNew-Italic", NULL },
17
	{ "Courier-BoldOblique", "CourierNew,BoldItalic", "Courier,BoldItalic",
18
		"CourierNewPS-BoldItalicMT", "CourierNew-BoldItalic", NULL },
19
	{ "Helvetica", "ArialMT", "Arial", NULL },
20
	{ "Helvetica-Bold", "Arial-BoldMT", "Arial,Bold", "Arial-Bold",
21
		"Helvetica,Bold", NULL },
22
	{ "Helvetica-Oblique", "Arial-ItalicMT", "Arial,Italic", "Arial-Italic",
23
		"Helvetica,Italic", "Helvetica-Italic", NULL },
24
	{ "Helvetica-BoldOblique", "Arial-BoldItalicMT",
25
		"Arial,BoldItalic", "Arial-BoldItalic",
26
		"Helvetica,BoldItalic", "Helvetica-BoldItalic", NULL },
27
	{ "Times-Roman", "TimesNewRomanPSMT", "TimesNewRoman",
28
		"TimesNewRomanPS", NULL },
29
	{ "Times-Bold", "TimesNewRomanPS-BoldMT", "TimesNewRoman,Bold",
30
		"TimesNewRomanPS-Bold", "TimesNewRoman-Bold", NULL },
31
	{ "Times-Italic", "TimesNewRomanPS-ItalicMT", "TimesNewRoman,Italic",
32
		"TimesNewRomanPS-Italic", "TimesNewRoman-Italic", NULL },
33
	{ "Times-BoldItalic", "TimesNewRomanPS-BoldItalicMT",
34
		"TimesNewRoman,BoldItalic", "TimesNewRomanPS-BoldItalic",
35
		"TimesNewRoman-BoldItalic", NULL },
36
	{ "Symbol", NULL },
37
	{ "ZapfDingbats", NULL }
38
};
39
 
40
static int is_dynalab(char *name)
41
{
42
	if (strstr(name, "HuaTian"))
43
		return 1;
44
	if (strstr(name, "MingLi"))
45
		return 1;
46
	if ((strstr(name, "DF") == name) || strstr(name, "+DF"))
47
		return 1;
48
	if ((strstr(name, "DLC") == name) || strstr(name, "+DLC"))
49
		return 1;
50
	return 0;
51
}
52
 
53
static int strcmp_ignore_space(char *a, char *b)
54
{
55
	while (1)
56
	{
57
		while (*a == ' ')
58
			a++;
59
		while (*b == ' ')
60
			b++;
61
		if (*a != *b)
62
			return 1;
63
		if (*a == 0)
64
			return *a != *b;
65
		if (*b == 0)
66
			return *a != *b;
67
		a++;
68
		b++;
69
	}
70
}
71
 
72
static char *clean_font_name(char *fontname)
73
{
74
	int i, k;
75
	for (i = 0; i < 14; i++)
76
		for (k = 0; base_font_names[i][k]; k++)
77
			if (!strcmp_ignore_space(base_font_names[i][k], fontname))
78
				return base_font_names[i][0];
79
	return fontname;
80
}
81
 
82
/*
83
 * FreeType and Rendering glue
84
 */
85
 
86
enum { UNKNOWN, TYPE1, TRUETYPE };
87
 
88
static int ft_kind(FT_Face face)
89
{
90
	const char *kind = FT_Get_X11_Font_Format(face);
91
	if (!strcmp(kind, "TrueType"))
92
		return TRUETYPE;
93
	if (!strcmp(kind, "Type 1"))
94
		return TYPE1;
95
	if (!strcmp(kind, "CFF"))
96
		return TYPE1;
97
	if (!strcmp(kind, "CID Type 1"))
98
		return TYPE1;
99
	return UNKNOWN;
100
}
101
 
102
static int ft_is_bold(FT_Face face)
103
{
104
	return face->style_flags & FT_STYLE_FLAG_BOLD;
105
}
106
 
107
static int ft_is_italic(FT_Face face)
108
{
109
	return face->style_flags & FT_STYLE_FLAG_ITALIC;
110
}
111
 
112
static int ft_char_index(FT_Face face, int cid)
113
{
114
	int gid = FT_Get_Char_Index(face, cid);
115
	if (gid == 0)
116
		gid = FT_Get_Char_Index(face, 0xf000 + cid);
117
 
118
	/* some chinese fonts only ship the similarly looking 0x2026 */
119
	if (gid == 0 && cid == 0x22ef)
120
		gid = FT_Get_Char_Index(face, 0x2026);
121
 
122
	return gid;
123
}
124
 
125
static int ft_cid_to_gid(pdf_font_desc *fontdesc, int cid)
126
{
127
	if (fontdesc->to_ttf_cmap)
128
	{
129
		cid = pdf_lookup_cmap(fontdesc->to_ttf_cmap, cid);
130
		return ft_char_index(fontdesc->font->ft_face, cid);
131
	}
132
 
133
	if (fontdesc->cid_to_gid)
134
		return fontdesc->cid_to_gid[cid];
135
 
136
	return cid;
137
}
138
 
139
int
140
pdf_font_cid_to_gid(pdf_font_desc *fontdesc, int cid)
141
{
142
	if (fontdesc->font->ft_face)
143
		return ft_cid_to_gid(fontdesc, cid);
144
	return cid;
145
}
146
 
147
static int ft_width(pdf_font_desc *fontdesc, int cid)
148
{
149
	int gid = ft_cid_to_gid(fontdesc, cid);
150
	int fterr = FT_Load_Glyph(fontdesc->font->ft_face, gid,
151
			FT_LOAD_NO_HINTING | FT_LOAD_NO_BITMAP | FT_LOAD_IGNORE_TRANSFORM);
152
	if (fterr)
153
	{
154
		fz_warn("freetype load glyph (gid %d): %s", gid, ft_error_string(fterr));
155
		return 0;
156
	}
157
	return ((FT_Face)fontdesc->font->ft_face)->glyph->advance.x;
158
}
159
 
160
static int lookup_mre_code(char *name)
161
{
162
	int i;
163
	for (i = 0; i < 256; i++)
164
		if (pdf_mac_roman[i] && !strcmp(name, pdf_mac_roman[i]))
165
			return i;
166
	return -1;
167
}
168
 
169
/*
170
 * Load font files.
171
 */
172
 
173
static fz_error
174
pdf_load_builtin_font(pdf_font_desc *fontdesc, char *fontname)
175
{
176
	fz_error error;
177
	unsigned char *data;
178
	unsigned int len;
179
 
180
	data = pdf_find_builtin_font(fontname, &len);
181
	if (!data)
182
		return fz_throw("cannot find builtin font: '%s'", fontname);
183
 
184
	error = fz_new_font_from_memory(&fontdesc->font, data, len, 0);
185
	if (error)
186
		return fz_rethrow(error, "cannot load freetype font from memory");
187
 
188
	if (!strcmp(fontname, "Symbol") || !strcmp(fontname, "ZapfDingbats"))
189
		fontdesc->flags |= PDF_FD_SYMBOLIC;
190
 
191
	return fz_okay;
192
}
193
 
194
static fz_error
195
pdf_load_substitute_font(pdf_font_desc *fontdesc, int mono, int serif, int bold, int italic)
196
{
197
	fz_error error;
198
	unsigned char *data;
199
	unsigned int len;
200
 
201
	data = pdf_find_substitute_font(mono, serif, bold, italic, &len);
202
	if (!data)
203
		return fz_throw("cannot find substitute font");
204
 
205
	error = fz_new_font_from_memory(&fontdesc->font, data, len, 0);
206
	if (error)
207
		return fz_rethrow(error, "cannot load freetype font from memory");
208
 
209
	fontdesc->font->ft_substitute = 1;
210
	fontdesc->font->ft_bold = bold && !ft_is_bold(fontdesc->font->ft_face);
211
	fontdesc->font->ft_italic = italic && !ft_is_italic(fontdesc->font->ft_face);
212
	return fz_okay;
213
}
214
 
215
static fz_error
216
pdf_load_substitute_cjk_font(pdf_font_desc *fontdesc, int ros, int serif)
217
{
218
	fz_error error;
219
	unsigned char *data;
220
	unsigned int len;
221
 
222
	data = pdf_find_substitute_cjk_font(ros, serif, &len);
223
	if (!data)
224
		return fz_throw("cannot find builtin CJK font");
225
 
226
	error = fz_new_font_from_memory(&fontdesc->font, data, len, 0);
227
	if (error)
228
		return fz_rethrow(error, "cannot load builtin CJK font");
229
 
230
	fontdesc->font->ft_substitute = 1;
231
	return fz_okay;
232
}
233
 
234
static fz_error
235
pdf_load_system_font(pdf_font_desc *fontdesc, char *fontname, char *collection)
236
{
237
	fz_error error;
238
	int bold = 0;
239
	int italic = 0;
240
	int serif = 0;
241
	int mono = 0;
242
 
243
	if (strstr(fontname, "Bold"))
244
		bold = 1;
245
	if (strstr(fontname, "Italic"))
246
		italic = 1;
247
	if (strstr(fontname, "Oblique"))
248
		italic = 1;
249
 
250
	if (fontdesc->flags & PDF_FD_FIXED_PITCH)
251
		mono = 1;
252
	if (fontdesc->flags & PDF_FD_SERIF)
253
		serif = 1;
254
	if (fontdesc->flags & PDF_FD_ITALIC)
255
		italic = 1;
256
	if (fontdesc->flags & PDF_FD_FORCE_BOLD)
257
		bold = 1;
258
 
259
	if (collection)
260
	{
261
		if (!strcmp(collection, "Adobe-CNS1"))
262
			return pdf_load_substitute_cjk_font(fontdesc, PDF_ROS_CNS, serif);
263
		else if (!strcmp(collection, "Adobe-GB1"))
264
			return pdf_load_substitute_cjk_font(fontdesc, PDF_ROS_GB, serif);
265
		else if (!strcmp(collection, "Adobe-Japan1"))
266
			return pdf_load_substitute_cjk_font(fontdesc, PDF_ROS_JAPAN, serif);
267
		else if (!strcmp(collection, "Adobe-Korea1"))
268
			return pdf_load_substitute_cjk_font(fontdesc, PDF_ROS_KOREA, serif);
269
		return fz_throw("unknown cid collection: %s", collection);
270
	}
271
 
272
	error = pdf_load_substitute_font(fontdesc, mono, serif, bold, italic);
273
	if (error)
274
		return fz_rethrow(error, "cannot load substitute font");
275
 
276
	return fz_okay;
277
}
278
 
279
static fz_error
280
pdf_load_embedded_font(pdf_font_desc *fontdesc, pdf_xref *xref, fz_obj *stmref)
281
{
282
	fz_error error;
283
	fz_buffer *buf;
284
 
285
	error = pdf_load_stream(&buf, xref, fz_to_num(stmref), fz_to_gen(stmref));
286
	if (error)
287
		return fz_rethrow(error, "cannot load font stream (%d %d R)", fz_to_num(stmref), fz_to_gen(stmref));
288
 
289
	error = fz_new_font_from_memory(&fontdesc->font, buf->data, buf->len, 0);
290
	if (error)
291
	{
292
		fz_drop_buffer(buf);
293
		return fz_rethrow(error, "cannot load embedded font (%d %d R)", fz_to_num(stmref), fz_to_gen(stmref));
294
	}
295
 
296
	/* save the buffer so we can free it later */
297
	fontdesc->font->ft_data = buf->data;
298
	fontdesc->font->ft_size = buf->len;
299
	fz_free(buf); /* only free the fz_buffer struct, not the contained data */
300
 
301
	fontdesc->is_embedded = 1;
302
 
303
	return fz_okay;
304
}
305
 
306
/*
307
 * Create and destroy
308
 */
309
 
310
pdf_font_desc *
311
pdf_keep_font(pdf_font_desc *fontdesc)
312
{
313
	fontdesc->refs ++;
314
	return fontdesc;
315
}
316
 
317
void
318
pdf_drop_font(pdf_font_desc *fontdesc)
319
{
320
	if (fontdesc && --fontdesc->refs == 0)
321
	{
322
		if (fontdesc->font)
323
			fz_drop_font(fontdesc->font);
324
		if (fontdesc->encoding)
325
			pdf_drop_cmap(fontdesc->encoding);
326
		if (fontdesc->to_ttf_cmap)
327
			pdf_drop_cmap(fontdesc->to_ttf_cmap);
328
		if (fontdesc->to_unicode)
329
			pdf_drop_cmap(fontdesc->to_unicode);
330
		fz_free(fontdesc->cid_to_gid);
331
		fz_free(fontdesc->cid_to_ucs);
332
		fz_free(fontdesc->hmtx);
333
		fz_free(fontdesc->vmtx);
334
		fz_free(fontdesc);
335
	}
336
}
337
 
338
pdf_font_desc *
339
pdf_new_font_desc(void)
340
{
341
	pdf_font_desc *fontdesc;
342
 
343
	fontdesc = fz_malloc(sizeof(pdf_font_desc));
344
	fontdesc->refs = 1;
345
 
346
	fontdesc->font = NULL;
347
 
348
	fontdesc->flags = 0;
349
	fontdesc->italic_angle = 0;
350
	fontdesc->ascent = 0;
351
	fontdesc->descent = 0;
352
	fontdesc->cap_height = 0;
353
	fontdesc->x_height = 0;
354
	fontdesc->missing_width = 0;
355
 
356
	fontdesc->encoding = NULL;
357
	fontdesc->to_ttf_cmap = NULL;
358
	fontdesc->cid_to_gid_len = 0;
359
	fontdesc->cid_to_gid = NULL;
360
 
361
	fontdesc->to_unicode = NULL;
362
	fontdesc->cid_to_ucs_len = 0;
363
	fontdesc->cid_to_ucs = NULL;
364
 
365
	fontdesc->wmode = 0;
366
 
367
	fontdesc->hmtx_cap = 0;
368
	fontdesc->vmtx_cap = 0;
369
	fontdesc->hmtx_len = 0;
370
	fontdesc->vmtx_len = 0;
371
	fontdesc->hmtx = NULL;
372
	fontdesc->vmtx = NULL;
373
 
374
	fontdesc->dhmtx.lo = 0x0000;
375
	fontdesc->dhmtx.hi = 0xFFFF;
376
	fontdesc->dhmtx.w = 1000;
377
 
378
	fontdesc->dvmtx.lo = 0x0000;
379
	fontdesc->dvmtx.hi = 0xFFFF;
380
	fontdesc->dvmtx.x = 0;
381
	fontdesc->dvmtx.y = 880;
382
	fontdesc->dvmtx.w = -1000;
383
 
384
	fontdesc->is_embedded = 0;
385
 
386
	return fontdesc;
387
}
388
 
389
/*
390
 * Simple fonts (Type1 and TrueType)
391
 */
392
 
393
static fz_error
394
pdf_load_simple_font(pdf_font_desc **fontdescp, pdf_xref *xref, fz_obj *dict)
395
{
396
	fz_error error;
397
	fz_obj *descriptor;
398
	fz_obj *encoding;
399
	fz_obj *widths;
400
	unsigned short *etable = NULL;
401
	pdf_font_desc *fontdesc;
402
	FT_Face face;
403
	FT_CharMap cmap;
404
	int symbolic;
405
	int kind;
406
 
407
	char *basefont;
408
	char *fontname;
409
	char *estrings[256];
410
	char ebuffer[256][32];
411
	int i, k, n;
412
	int fterr;
413
 
414
	basefont = fz_to_name(fz_dict_gets(dict, "BaseFont"));
415
	fontname = clean_font_name(basefont);
416
 
417
	/* Load font file */
418
 
419
	fontdesc = pdf_new_font_desc();
420
 
421
	descriptor = fz_dict_gets(dict, "FontDescriptor");
422
	if (descriptor)
423
		error = pdf_load_font_descriptor(fontdesc, xref, descriptor, NULL, basefont);
424
	else
425
		error = pdf_load_builtin_font(fontdesc, fontname);
426
	if (error)
427
		goto cleanup;
428
 
429
	/* Some chinese documents mistakenly consider WinAnsiEncoding to be codepage 936 */
430
	if (!*fontdesc->font->name &&
431
		!fz_dict_gets(dict, "ToUnicode") &&
432
		!strcmp(fz_to_name(fz_dict_gets(dict, "Encoding")), "WinAnsiEncoding") &&
433
		fz_to_int(fz_dict_gets(descriptor, "Flags")) == 4)
434
	{
435
		/* note: without the comma, pdf_load_font_descriptor would prefer /FontName over /BaseFont */
436
		char *cp936fonts[] = {
437
			"\xCB\xCE\xCC\xE5", "SimSun,Regular",
438
			"\xBA\xDA\xCC\xE5", "SimHei,Regular",
439
			"\xBF\xAC\xCC\xE5_GB2312", "SimKai,Regular",
440
			"\xB7\xC2\xCB\xCE_GB2312", "SimFang,Regular",
441
			"\xC1\xA5\xCA\xE9", "SimLi,Regular",
442
			NULL
443
		};
444
		for (i = 0; cp936fonts[i]; i += 2)
445
			if (!strcmp(basefont, cp936fonts[i]))
446
				break;
447
		if (cp936fonts[i])
448
		{
449
			fz_warn("workaround for S22PDF lying about chinese font encodings");
450
			pdf_drop_font(fontdesc);
451
			fontdesc = pdf_new_font_desc();
452
			error = pdf_load_font_descriptor(fontdesc, xref, descriptor, "Adobe-GB1", cp936fonts[i+1]);
453
			error |= pdf_load_system_cmap(&fontdesc->encoding, "GBK-EUC-H");
454
			error |= pdf_load_system_cmap(&fontdesc->to_unicode, "Adobe-GB1-UCS2");
455
			error |= pdf_load_system_cmap(&fontdesc->to_ttf_cmap, "Adobe-GB1-UCS2");
456
			if (error)
457
				return fz_rethrow(error, "cannot load font");
458
 
459
			face = fontdesc->font->ft_face;
460
			kind = ft_kind(face);
461
			goto skip_encoding;
462
		}
463
	}
464
 
465
	face = fontdesc->font->ft_face;
466
	kind = ft_kind(face);
467
 
468
	/* Encoding */
469
 
470
	symbolic = fontdesc->flags & 4;
471
 
472
	if (face->num_charmaps > 0)
473
		cmap = face->charmaps[0];
474
	else
475
		cmap = NULL;
476
 
477
	for (i = 0; i < face->num_charmaps; i++)
478
	{
479
		FT_CharMap test = face->charmaps[i];
480
 
481
		if (kind == TYPE1)
482
		{
483
			if (test->platform_id == 7)
484
				cmap = test;
485
		}
486
 
487
		if (kind == TRUETYPE)
488
		{
489
			if (test->platform_id == 1 && test->encoding_id == 0)
490
				cmap = test;
491
			if (test->platform_id == 3 && test->encoding_id == 1)
492
				cmap = test;
493
		}
494
	}
495
 
496
	if (cmap)
497
	{
498
		fterr = FT_Set_Charmap(face, cmap);
499
		if (fterr)
500
			fz_warn("freetype could not set cmap: %s", ft_error_string(fterr));
501
	}
502
	else
503
		fz_warn("freetype could not find any cmaps");
504
 
505
	etable = fz_calloc(256, sizeof(unsigned short));
506
	for (i = 0; i < 256; i++)
507
	{
508
		estrings[i] = NULL;
509
		etable[i] = 0;
510
	}
511
 
512
	encoding = fz_dict_gets(dict, "Encoding");
513
	if (encoding)
514
	{
515
		if (fz_is_name(encoding))
516
			pdf_load_encoding(estrings, fz_to_name(encoding));
517
 
518
		if (fz_is_dict(encoding))
519
		{
520
			fz_obj *base, *diff, *item;
521
 
522
			base = fz_dict_gets(encoding, "BaseEncoding");
523
			if (fz_is_name(base))
524
				pdf_load_encoding(estrings, fz_to_name(base));
525
			else if (!fontdesc->is_embedded && !symbolic)
526
				pdf_load_encoding(estrings, "StandardEncoding");
527
 
528
			diff = fz_dict_gets(encoding, "Differences");
529
			if (fz_is_array(diff))
530
			{
531
				n = fz_array_len(diff);
532
				k = 0;
533
				for (i = 0; i < n; i++)
534
				{
535
					item = fz_array_get(diff, i);
536
					if (fz_is_int(item))
537
						k = fz_to_int(item);
538
					if (fz_is_name(item))
539
						estrings[k++] = fz_to_name(item);
540
					if (k < 0) k = 0;
541
					if (k > 255) k = 255;
542
				}
543
			}
544
		}
545
	}
546
 
547
	/* start with the builtin encoding */
548
	for (i = 0; i < 256; i++)
549
		etable[i] = ft_char_index(face, i);
550
 
551
	/* encode by glyph name where we can */
552
	if (kind == TYPE1)
553
	{
554
		for (i = 0; i < 256; i++)
555
		{
556
			if (estrings[i])
557
			{
558
				etable[i] = FT_Get_Name_Index(face, estrings[i]);
559
				if (etable[i] == 0)
560
				{
561
					int aglcode = pdf_lookup_agl(estrings[i]);
562
					const char **dupnames = pdf_lookup_agl_duplicates(aglcode);
563
					while (*dupnames)
564
					{
565
						etable[i] = FT_Get_Name_Index(face, (char*)*dupnames);
566
						if (etable[i])
567
							break;
568
						dupnames++;
569
					}
570
				}
571
			}
572
		}
573
	}
574
 
575
	/* encode by glyph name where we can */
576
	if (kind == TRUETYPE)
577
	{
578
		/* Unicode cmap */
579
		if (!symbolic && face->charmap && face->charmap->platform_id == 3)
580
		{
581
			for (i = 0; i < 256; i++)
582
			{
583
				if (estrings[i])
584
				{
585
					int aglcode = pdf_lookup_agl(estrings[i]);
586
					if (!aglcode)
587
						etable[i] = FT_Get_Name_Index(face, estrings[i]);
588
					else
589
						etable[i] = ft_char_index(face, aglcode);
590
				}
591
			}
592
		}
593
 
594
		/* MacRoman cmap */
595
		else if (!symbolic && face->charmap && face->charmap->platform_id == 1)
596
		{
597
			for (i = 0; i < 256; i++)
598
			{
599
				if (estrings[i])
600
				{
601
					k = lookup_mre_code(estrings[i]);
602
					if (k <= 0)
603
						etable[i] = FT_Get_Name_Index(face, estrings[i]);
604
					else
605
						etable[i] = ft_char_index(face, k);
606
				}
607
			}
608
		}
609
 
610
		/* Symbolic cmap */
611
		else
612
		{
613
			for (i = 0; i < 256; i++)
614
			{
615
				if (estrings[i])
616
				{
617
					etable[i] = FT_Get_Name_Index(face, estrings[i]);
618
					if (etable[i] == 0)
619
						etable[i] = ft_char_index(face, i);
620
				}
621
			}
622
		}
623
	}
624
 
625
	/* try to reverse the glyph names from the builtin encoding */
626
	for (i = 0; i < 256; i++)
627
	{
628
		if (etable[i] && !estrings[i])
629
		{
630
			if (FT_HAS_GLYPH_NAMES(face))
631
			{
632
				fterr = FT_Get_Glyph_Name(face, etable[i], ebuffer[i], 32);
633
				if (fterr)
634
					fz_warn("freetype get glyph name (gid %d): %s", etable[i], ft_error_string(fterr));
635
				if (ebuffer[i][0])
636
					estrings[i] = ebuffer[i];
637
			}
638
			else
639
			{
640
				estrings[i] = (char*) pdf_win_ansi[i]; /* discard const */
641
			}
642
		}
643
	}
644
 
645
	fontdesc->encoding = pdf_new_identity_cmap(0, 1);
646
	fontdesc->cid_to_gid_len = 256;
647
	fontdesc->cid_to_gid = etable;
648
 
649
	error = pdf_load_to_unicode(fontdesc, xref, estrings, NULL, fz_dict_gets(dict, "ToUnicode"));
650
	if (error)
651
		fz_catch(error, "cannot load to_unicode");
652
 
653
skip_encoding:
654
 
655
	/* Widths */
656
 
657
	pdf_set_default_hmtx(fontdesc, fontdesc->missing_width);
658
 
659
	widths = fz_dict_gets(dict, "Widths");
660
	if (widths)
661
	{
662
		int first, last;
663
 
664
		first = fz_to_int(fz_dict_gets(dict, "FirstChar"));
665
		last = fz_to_int(fz_dict_gets(dict, "LastChar"));
666
 
667
		if (first < 0 || last > 255 || first > last)
668
			first = last = 0;
669
 
670
		for (i = 0; i < last - first + 1; i++)
671
		{
672
			int wid = fz_to_int(fz_array_get(widths, i));
673
			pdf_add_hmtx(fontdesc, i + first, i + first, wid);
674
		}
675
	}
676
	else
677
	{
678
		fterr = FT_Set_Char_Size(face, 1000, 1000, 72, 72);
679
		if (fterr)
680
			fz_warn("freetype set character size: %s", ft_error_string(fterr));
681
		for (i = 0; i < 256; i++)
682
		{
683
			pdf_add_hmtx(fontdesc, i, i, ft_width(fontdesc, i));
684
		}
685
	}
686
 
687
	pdf_end_hmtx(fontdesc);
688
 
689
	*fontdescp = fontdesc;
690
	return fz_okay;
691
 
692
cleanup:
693
	if (etable != fontdesc->cid_to_gid)
694
		fz_free(etable);
695
	pdf_drop_font(fontdesc);
696
	return fz_rethrow(error, "cannot load simple font (%d %d R)", fz_to_num(dict), fz_to_gen(dict));
697
}
698
 
699
/*
700
 * CID Fonts
701
 */
702
 
703
static fz_error
704
load_cid_font(pdf_font_desc **fontdescp, pdf_xref *xref, fz_obj *dict, fz_obj *encoding, fz_obj *to_unicode)
705
{
706
	fz_error error;
707
	fz_obj *widths;
708
	fz_obj *descriptor;
709
	pdf_font_desc *fontdesc;
710
	FT_Face face;
711
	int kind;
712
	char collection[256];
713
	char *basefont;
714
	int i, k, fterr;
715
	fz_obj *obj;
716
	int dw;
717
 
718
	/* Get font name and CID collection */
719
 
720
	basefont = fz_to_name(fz_dict_gets(dict, "BaseFont"));
721
 
722
	{
723
		fz_obj *cidinfo;
724
		char tmpstr[64];
725
		int tmplen;
726
 
727
		cidinfo = fz_dict_gets(dict, "CIDSystemInfo");
728
		if (!cidinfo)
729
			return fz_throw("cid font is missing info");
730
 
731
		obj = fz_dict_gets(cidinfo, "Registry");
732
		tmplen = MIN(sizeof tmpstr - 1, fz_to_str_len(obj));
733
		memcpy(tmpstr, fz_to_str_buf(obj), tmplen);
734
		tmpstr[tmplen] = '\0';
735
		fz_strlcpy(collection, tmpstr, sizeof collection);
736
 
737
		fz_strlcat(collection, "-", sizeof collection);
738
 
739
		obj = fz_dict_gets(cidinfo, "Ordering");
740
		tmplen = MIN(sizeof tmpstr - 1, fz_to_str_len(obj));
741
		memcpy(tmpstr, fz_to_str_buf(obj), tmplen);
742
		tmpstr[tmplen] = '\0';
743
		fz_strlcat(collection, tmpstr, sizeof collection);
744
	}
745
 
746
	/* Load font file */
747
 
748
	fontdesc = pdf_new_font_desc();
749
 
750
	descriptor = fz_dict_gets(dict, "FontDescriptor");
751
	if (descriptor)
752
		error = pdf_load_font_descriptor(fontdesc, xref, descriptor, collection, basefont);
753
	else
754
		error = fz_throw("syntaxerror: missing font descriptor");
755
	if (error)
756
		goto cleanup;
757
 
758
	face = fontdesc->font->ft_face;
759
	kind = ft_kind(face);
760
 
761
	/* Encoding */
762
 
763
	error = fz_okay;
764
	if (fz_is_name(encoding))
765
	{
766
		if (!strcmp(fz_to_name(encoding), "Identity-H"))
767
			fontdesc->encoding = pdf_new_identity_cmap(0, 2);
768
		else if (!strcmp(fz_to_name(encoding), "Identity-V"))
769
			fontdesc->encoding = pdf_new_identity_cmap(1, 2);
770
		else
771
			error = pdf_load_system_cmap(&fontdesc->encoding, fz_to_name(encoding));
772
	}
773
	else if (fz_is_indirect(encoding))
774
	{
775
		error = pdf_load_embedded_cmap(&fontdesc->encoding, xref, encoding);
776
	}
777
	else
778
	{
779
		error = fz_throw("syntaxerror: font missing encoding");
780
	}
781
	if (error)
782
		goto cleanup;
783
 
784
	pdf_set_font_wmode(fontdesc, pdf_get_wmode(fontdesc->encoding));
785
 
786
	if (kind == TRUETYPE)
787
	{
788
		fz_obj *cidtogidmap;
789
 
790
		cidtogidmap = fz_dict_gets(dict, "CIDToGIDMap");
791
		if (fz_is_indirect(cidtogidmap))
792
		{
793
			fz_buffer *buf;
794
 
795
			error = pdf_load_stream(&buf, xref, fz_to_num(cidtogidmap), fz_to_gen(cidtogidmap));
796
			if (error)
797
				goto cleanup;
798
 
799
			fontdesc->cid_to_gid_len = (buf->len) / 2;
800
			fontdesc->cid_to_gid = fz_calloc(fontdesc->cid_to_gid_len, sizeof(unsigned short));
801
			for (i = 0; i < fontdesc->cid_to_gid_len; i++)
802
				fontdesc->cid_to_gid[i] = (buf->data[i * 2] << 8) + buf->data[i * 2 + 1];
803
 
804
			fz_drop_buffer(buf);
805
		}
806
 
807
		/* if truetype font is external, cidtogidmap should not be identity */
808
		/* so we map from cid to unicode and then map that through the (3 1) */
809
		/* unicode cmap to get a glyph id */
810
		else if (fontdesc->font->ft_substitute)
811
		{
812
			fterr = FT_Select_Charmap(face, ft_encoding_unicode);
813
			if (fterr)
814
			{
815
				error = fz_throw("fonterror: no unicode cmap when emulating CID font: %s", ft_error_string(fterr));
816
				goto cleanup;
817
			}
818
 
819
			if (!strcmp(collection, "Adobe-CNS1"))
820
				error = pdf_load_system_cmap(&fontdesc->to_ttf_cmap, "Adobe-CNS1-UCS2");
821
			else if (!strcmp(collection, "Adobe-GB1"))
822
				error = pdf_load_system_cmap(&fontdesc->to_ttf_cmap, "Adobe-GB1-UCS2");
823
			else if (!strcmp(collection, "Adobe-Japan1"))
824
				error = pdf_load_system_cmap(&fontdesc->to_ttf_cmap, "Adobe-Japan1-UCS2");
825
			else if (!strcmp(collection, "Adobe-Japan2"))
826
				error = pdf_load_system_cmap(&fontdesc->to_ttf_cmap, "Adobe-Japan2-UCS2");
827
			else if (!strcmp(collection, "Adobe-Korea1"))
828
				error = pdf_load_system_cmap(&fontdesc->to_ttf_cmap, "Adobe-Korea1-UCS2");
829
			else
830
				error = fz_okay;
831
 
832
			if (error)
833
			{
834
				error = fz_rethrow(error, "cannot load system cmap %s", collection);
835
				goto cleanup;
836
			}
837
		}
838
	}
839
 
840
	error = pdf_load_to_unicode(fontdesc, xref, NULL, collection, to_unicode);
841
	if (error)
842
		fz_catch(error, "cannot load to_unicode");
843
 
844
	/* Horizontal */
845
 
846
	dw = 1000;
847
	obj = fz_dict_gets(dict, "DW");
848
	if (obj)
849
		dw = fz_to_int(obj);
850
	pdf_set_default_hmtx(fontdesc, dw);
851
 
852
	widths = fz_dict_gets(dict, "W");
853
	if (widths)
854
	{
855
		int c0, c1, w;
856
 
857
		for (i = 0; i < fz_array_len(widths); )
858
		{
859
			c0 = fz_to_int(fz_array_get(widths, i));
860
			obj = fz_array_get(widths, i + 1);
861
			if (fz_is_array(obj))
862
			{
863
				for (k = 0; k < fz_array_len(obj); k++)
864
				{
865
					w = fz_to_int(fz_array_get(obj, k));
866
					pdf_add_hmtx(fontdesc, c0 + k, c0 + k, w);
867
				}
868
				i += 2;
869
			}
870
			else
871
			{
872
				c1 = fz_to_int(obj);
873
				w = fz_to_int(fz_array_get(widths, i + 2));
874
				pdf_add_hmtx(fontdesc, c0, c1, w);
875
				i += 3;
876
			}
877
		}
878
	}
879
 
880
	pdf_end_hmtx(fontdesc);
881
 
882
	/* Vertical */
883
 
884
	if (pdf_get_wmode(fontdesc->encoding) == 1)
885
	{
886
		int dw2y = 880;
887
		int dw2w = -1000;
888
 
889
		obj = fz_dict_gets(dict, "DW2");
890
		if (obj)
891
		{
892
			dw2y = fz_to_int(fz_array_get(obj, 0));
893
			dw2w = fz_to_int(fz_array_get(obj, 1));
894
		}
895
 
896
		pdf_set_default_vmtx(fontdesc, dw2y, dw2w);
897
 
898
		widths = fz_dict_gets(dict, "W2");
899
		if (widths)
900
		{
901
			int c0, c1, w, x, y;
902
 
903
			for (i = 0; i < fz_array_len(widths); )
904
			{
905
				c0 = fz_to_int(fz_array_get(widths, i));
906
				obj = fz_array_get(widths, i + 1);
907
				if (fz_is_array(obj))
908
				{
909
					for (k = 0; k * 3 < fz_array_len(obj); k ++)
910
					{
911
						w = fz_to_int(fz_array_get(obj, k * 3 + 0));
912
						x = fz_to_int(fz_array_get(obj, k * 3 + 1));
913
						y = fz_to_int(fz_array_get(obj, k * 3 + 2));
914
						pdf_add_vmtx(fontdesc, c0 + k, c0 + k, x, y, w);
915
					}
916
					i += 2;
917
				}
918
				else
919
				{
920
					c1 = fz_to_int(obj);
921
					w = fz_to_int(fz_array_get(widths, i + 2));
922
					x = fz_to_int(fz_array_get(widths, i + 3));
923
					y = fz_to_int(fz_array_get(widths, i + 4));
924
					pdf_add_vmtx(fontdesc, c0, c1, x, y, w);
925
					i += 5;
926
				}
927
			}
928
		}
929
 
930
		pdf_end_vmtx(fontdesc);
931
	}
932
 
933
	*fontdescp = fontdesc;
934
	return fz_okay;
935
 
936
cleanup:
937
	pdf_drop_font(fontdesc);
938
	return fz_rethrow(error, "cannot load cid font (%d %d R)", fz_to_num(dict), fz_to_gen(dict));
939
}
940
 
941
static fz_error
942
pdf_load_type0_font(pdf_font_desc **fontdescp, pdf_xref *xref, fz_obj *dict)
943
{
944
	fz_error error;
945
	fz_obj *dfonts;
946
	fz_obj *dfont;
947
	fz_obj *subtype;
948
	fz_obj *encoding;
949
	fz_obj *to_unicode;
950
 
951
	dfonts = fz_dict_gets(dict, "DescendantFonts");
952
	if (!dfonts)
953
		return fz_throw("cid font is missing descendant fonts");
954
 
955
	dfont = fz_array_get(dfonts, 0);
956
 
957
	subtype = fz_dict_gets(dfont, "Subtype");
958
	encoding = fz_dict_gets(dict, "Encoding");
959
	to_unicode = fz_dict_gets(dict, "ToUnicode");
960
 
961
	if (fz_is_name(subtype) && !strcmp(fz_to_name(subtype), "CIDFontType0"))
962
		error = load_cid_font(fontdescp, xref, dfont, encoding, to_unicode);
963
	else if (fz_is_name(subtype) && !strcmp(fz_to_name(subtype), "CIDFontType2"))
964
		error = load_cid_font(fontdescp, xref, dfont, encoding, to_unicode);
965
	else
966
		error = fz_throw("syntaxerror: unknown cid font type");
967
	if (error)
968
		return fz_rethrow(error, "cannot load descendant font (%d %d R)", fz_to_num(dfont), fz_to_gen(dfont));
969
 
970
	return fz_okay;
971
}
972
 
973
/*
974
 * FontDescriptor
975
 */
976
 
977
static fz_error
978
pdf_load_font_descriptor(pdf_font_desc *fontdesc, pdf_xref *xref, fz_obj *dict, char *collection, char *basefont)
979
{
980
	fz_error error;
981
	fz_obj *obj1, *obj2, *obj3, *obj;
982
	char *fontname;
983
	char *origname;
984
	FT_Face face;
985
 
986
	if (!strchr(basefont, ',') || strchr(basefont, '+'))
987
		origname = fz_to_name(fz_dict_gets(dict, "FontName"));
988
	else
989
		origname = basefont;
990
	fontname = clean_font_name(origname);
991
 
992
	fontdesc->flags = fz_to_int(fz_dict_gets(dict, "Flags"));
993
	fontdesc->italic_angle = fz_to_real(fz_dict_gets(dict, "ItalicAngle"));
994
	fontdesc->ascent = fz_to_real(fz_dict_gets(dict, "Ascent"));
995
	fontdesc->descent = fz_to_real(fz_dict_gets(dict, "Descent"));
996
	fontdesc->cap_height = fz_to_real(fz_dict_gets(dict, "CapHeight"));
997
	fontdesc->x_height = fz_to_real(fz_dict_gets(dict, "XHeight"));
998
	fontdesc->missing_width = fz_to_real(fz_dict_gets(dict, "MissingWidth"));
999
 
1000
	obj1 = fz_dict_gets(dict, "FontFile");
1001
	obj2 = fz_dict_gets(dict, "FontFile2");
1002
	obj3 = fz_dict_gets(dict, "FontFile3");
1003
	obj = obj1 ? obj1 : obj2 ? obj2 : obj3;
1004
 
1005
	if (fz_is_indirect(obj))
1006
	{
1007
		error = pdf_load_embedded_font(fontdesc, xref, obj);
1008
		if (error)
1009
		{
1010
			fz_catch(error, "ignored error when loading embedded font, attempting to load system font");
1011
			if (origname != fontname)
1012
				error = pdf_load_builtin_font(fontdesc, fontname);
1013
			else
1014
				error = pdf_load_system_font(fontdesc, fontname, collection);
1015
			if (error)
1016
				return fz_rethrow(error, "cannot load font descriptor (%d %d R)", fz_to_num(dict), fz_to_gen(dict));
1017
		}
1018
	}
1019
	else
1020
	{
1021
		if (origname != fontname)
1022
			error = pdf_load_builtin_font(fontdesc, fontname);
1023
		else
1024
			error = pdf_load_system_font(fontdesc, fontname, collection);
1025
		if (error)
1026
			return fz_rethrow(error, "cannot load font descriptor (%d %d R)", fz_to_num(dict), fz_to_gen(dict));
1027
	}
1028
 
1029
	fz_strlcpy(fontdesc->font->name, fontname, sizeof fontdesc->font->name);
1030
 
1031
	/* Check for DynaLab fonts that must use hinting */
1032
	face = fontdesc->font->ft_face;
1033
	if (ft_kind(face) == TRUETYPE)
1034
	{
1035
		if (FT_IS_TRICKY(face) || is_dynalab(fontdesc->font->name))
1036
			fontdesc->font->ft_hint = 1;
1037
	}
1038
 
1039
	return fz_okay;
1040
 
1041
}
1042
 
1043
static void
1044
pdf_make_width_table(pdf_font_desc *fontdesc)
1045
{
1046
	fz_font *font = fontdesc->font;
1047
	int i, k, cid, gid;
1048
 
1049
	font->width_count = 0;
1050
	for (i = 0; i < fontdesc->hmtx_len; i++)
1051
	{
1052
		for (k = fontdesc->hmtx[i].lo; k <= fontdesc->hmtx[i].hi; k++)
1053
		{
1054
			cid = pdf_lookup_cmap(fontdesc->encoding, k);
1055
			gid = pdf_font_cid_to_gid(fontdesc, cid);
1056
			if (gid > font->width_count)
1057
				font->width_count = gid;
1058
		}
1059
	}
1060
	font->width_count ++;
1061
 
1062
	font->width_table = fz_calloc(font->width_count, sizeof(int));
1063
	memset(font->width_table, 0, sizeof(int) * font->width_count);
1064
 
1065
	for (i = 0; i < fontdesc->hmtx_len; i++)
1066
	{
1067
		for (k = fontdesc->hmtx[i].lo; k <= fontdesc->hmtx[i].hi; k++)
1068
		{
1069
			cid = pdf_lookup_cmap(fontdesc->encoding, k);
1070
			gid = pdf_font_cid_to_gid(fontdesc, cid);
1071
			if (gid >= 0 && gid < font->width_count)
1072
				font->width_table[gid] = fontdesc->hmtx[i].w;
1073
		}
1074
	}
1075
}
1076
 
1077
fz_error
1078
pdf_load_font(pdf_font_desc **fontdescp, pdf_xref *xref, fz_obj *rdb, fz_obj *dict)
1079
{
1080
	fz_error error;
1081
	char *subtype;
1082
	fz_obj *dfonts;
1083
	fz_obj *charprocs;
1084
 
1085
	if ((*fontdescp = pdf_find_item(xref->store, pdf_drop_font, dict)))
1086
	{
1087
		pdf_keep_font(*fontdescp);
1088
		return fz_okay;
1089
	}
1090
 
1091
	subtype = fz_to_name(fz_dict_gets(dict, "Subtype"));
1092
	dfonts = fz_dict_gets(dict, "DescendantFonts");
1093
	charprocs = fz_dict_gets(dict, "CharProcs");
1094
 
1095
	if (subtype && !strcmp(subtype, "Type0"))
1096
		error = pdf_load_type0_font(fontdescp, xref, dict);
1097
	else if (subtype && !strcmp(subtype, "Type1"))
1098
		error = pdf_load_simple_font(fontdescp, xref, dict);
1099
	else if (subtype && !strcmp(subtype, "MMType1"))
1100
		error = pdf_load_simple_font(fontdescp, xref, dict);
1101
	else if (subtype && !strcmp(subtype, "TrueType"))
1102
		error = pdf_load_simple_font(fontdescp, xref, dict);
1103
	else if (subtype && !strcmp(subtype, "Type3"))
1104
		error = pdf_load_type3_font(fontdescp, xref, rdb, dict);
1105
	else if (charprocs)
1106
	{
1107
		fz_warn("unknown font format, guessing type3.");
1108
		error = pdf_load_type3_font(fontdescp, xref, rdb, dict);
1109
	}
1110
	else if (dfonts)
1111
	{
1112
		fz_warn("unknown font format, guessing type0.");
1113
		error = pdf_load_type0_font(fontdescp, xref, dict);
1114
	}
1115
	else
1116
	{
1117
		fz_warn("unknown font format, guessing type1 or truetype.");
1118
		error = pdf_load_simple_font(fontdescp, xref, dict);
1119
	}
1120
	if (error)
1121
		return fz_rethrow(error, "cannot load font (%d %d R)", fz_to_num(dict), fz_to_gen(dict));
1122
 
1123
	/* Save the widths to stretch non-CJK substitute fonts */
1124
	if ((*fontdescp)->font->ft_substitute && !(*fontdescp)->to_ttf_cmap)
1125
		pdf_make_width_table(*fontdescp);
1126
 
1127
	pdf_store_item(xref->store, pdf_keep_font, pdf_drop_font, dict, *fontdescp);
1128
 
1129
	return fz_okay;
1130
}
1131
 
1132
void
1133
pdf_debug_font(pdf_font_desc *fontdesc)
1134
{
1135
	int i;
1136
 
1137
	printf("fontdesc {\n");
1138
 
1139
	if (fontdesc->font->ft_face)
1140
		printf("\tfreetype font\n");
1141
	if (fontdesc->font->t3procs)
1142
		printf("\ttype3 font\n");
1143
 
1144
	printf("\twmode %d\n", fontdesc->wmode);
1145
	printf("\tDW %d\n", fontdesc->dhmtx.w);
1146
 
1147
	printf("\tW {\n");
1148
	for (i = 0; i < fontdesc->hmtx_len; i++)
1149
		printf("\t\t<%04x> <%04x> %d\n",
1150
			fontdesc->hmtx[i].lo, fontdesc->hmtx[i].hi, fontdesc->hmtx[i].w);
1151
	printf("\t}\n");
1152
 
1153
	if (fontdesc->wmode)
1154
	{
1155
		printf("\tDW2 [%d %d]\n", fontdesc->dvmtx.y, fontdesc->dvmtx.w);
1156
		printf("\tW2 {\n");
1157
		for (i = 0; i < fontdesc->vmtx_len; i++)
1158
			printf("\t\t<%04x> <%04x> %d %d %d\n", fontdesc->vmtx[i].lo, fontdesc->vmtx[i].hi,
1159
				fontdesc->vmtx[i].x, fontdesc->vmtx[i].y, fontdesc->vmtx[i].w);
1160
		printf("\t}\n");
1161
	}
1162
}