Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4680 right-hear 1
/*
2
 * The CMap data structure here is constructed on the fly by
3
 * adding simple range-to-range mappings. Then the data structure
4
 * is optimized to contain both range-to-range and range-to-table
5
 * lookups.
6
 *
7
 * Any one-to-many mappings are inserted as one-to-table
8
 * lookups in the beginning, and are not affected by the optimization
9
 * stage.
10
 *
11
 * There is a special function to add a 256-length range-to-table mapping.
12
 * The ranges do not have to be added in order.
13
 *
14
 * This code can be a lot simpler if we don't care about wasting memory,
15
 * or can trust the parser to give us optimal mappings.
16
 */
17
 
18
#include "fitz.h"
19
#include "mupdf.h"
20
 
21
/* Macros for accessing the combined extent_flags field */
22
#define pdf_range_high(r) ((r)->low + ((r)->extent_flags >> 2))
23
#define pdf_range_flags(r) ((r)->extent_flags & 3)
24
#define pdf_range_set_high(r, h) \
25
	((r)->extent_flags = (((r)->extent_flags & 3) | ((h - (r)->low) << 2)))
26
#define pdf_range_set_flags(r, f) \
27
	((r)->extent_flags = (((r)->extent_flags & ~3) | f))
28
 
29
/*
30
 * Allocate, destroy and simple parameters.
31
 */
32
 
33
pdf_cmap *
34
pdf_new_cmap(void)
35
{
36
	pdf_cmap *cmap;
37
 
38
	cmap = fz_malloc(sizeof(pdf_cmap));
39
	cmap->refs = 1;
40
 
41
	strcpy(cmap->cmap_name, "");
42
	strcpy(cmap->usecmap_name, "");
43
	cmap->usecmap = NULL;
44
	cmap->wmode = 0;
45
	cmap->codespace_len = 0;
46
 
47
	cmap->rlen = 0;
48
	cmap->rcap = 0;
49
	cmap->ranges = NULL;
50
 
51
	cmap->tlen = 0;
52
	cmap->tcap = 0;
53
	cmap->table = NULL;
54
 
55
	return cmap;
56
}
57
 
58
pdf_cmap *
59
pdf_keep_cmap(pdf_cmap *cmap)
60
{
61
	if (cmap->refs >= 0)
62
		cmap->refs ++;
63
	return cmap;
64
}
65
 
66
void
67
pdf_drop_cmap(pdf_cmap *cmap)
68
{
69
	if (cmap->refs >= 0)
70
	{
71
		if (--cmap->refs == 0)
72
		{
73
			if (cmap->usecmap)
74
				pdf_drop_cmap(cmap->usecmap);
75
			fz_free(cmap->ranges);
76
			fz_free(cmap->table);
77
			fz_free(cmap);
78
		}
79
	}
80
}
81
 
82
void
83
pdf_set_usecmap(pdf_cmap *cmap, pdf_cmap *usecmap)
84
{
85
	int i;
86
 
87
	if (cmap->usecmap)
88
		pdf_drop_cmap(cmap->usecmap);
89
	cmap->usecmap = pdf_keep_cmap(usecmap);
90
 
91
	if (cmap->codespace_len == 0)
92
	{
93
		cmap->codespace_len = usecmap->codespace_len;
94
		for (i = 0; i < usecmap->codespace_len; i++)
95
			cmap->codespace[i] = usecmap->codespace[i];
96
	}
97
}
98
 
99
int
100
pdf_get_wmode(pdf_cmap *cmap)
101
{
102
	return cmap->wmode;
103
}
104
 
105
void
106
pdf_set_wmode(pdf_cmap *cmap, int wmode)
107
{
108
	cmap->wmode = wmode;
109
}
110
 
111
void
112
pdf_debug_cmap(pdf_cmap *cmap)
113
{
114
	int i, k, n;
115
 
116
	printf("cmap $%p /%s {\n", (void *) cmap, cmap->cmap_name);
117
 
118
	if (cmap->usecmap_name[0])
119
		printf("\tusecmap /%s\n", cmap->usecmap_name);
120
	if (cmap->usecmap)
121
		printf("\tusecmap $%p\n", (void *) cmap->usecmap);
122
 
123
	printf("\twmode %d\n", cmap->wmode);
124
 
125
	printf("\tcodespaces {\n");
126
	for (i = 0; i < cmap->codespace_len; i++)
127
	{
128
		printf("\t\t<%x> <%x>\n", cmap->codespace[i].low, cmap->codespace[i].high);
129
	}
130
	printf("\t}\n");
131
 
132
	printf("\tranges (%d,%d) {\n", cmap->rlen, cmap->tlen);
133
	for (i = 0; i < cmap->rlen; i++)
134
	{
135
		pdf_range *r = &cmap->ranges[i];
136
		printf("\t\t<%04x> <%04x> ", r->low, pdf_range_high(r));
137
		if (pdf_range_flags(r) == PDF_CMAP_TABLE)
138
		{
139
			printf("[ ");
140
			for (k = 0; k < pdf_range_high(r) - r->low + 1; k++)
141
				printf("%d ", cmap->table[r->offset + k]);
142
			printf("]\n");
143
		}
144
		else if (pdf_range_flags(r) == PDF_CMAP_MULTI)
145
		{
146
			printf("< ");
147
			n = cmap->table[r->offset];
148
			for (k = 0; k < n; k++)
149
				printf("%04x ", cmap->table[r->offset + 1 + k]);
150
			printf(">\n");
151
		}
152
		else
153
			printf("%d\n", r->offset);
154
	}
155
	printf("\t}\n}\n");
156
}
157
 
158
/*
159
 * Add a codespacerange section.
160
 * These ranges are used by pdf_decode_cmap to decode
161
 * multi-byte encoded strings.
162
 */
163
void
164
pdf_add_codespace(pdf_cmap *cmap, int low, int high, int n)
165
{
166
	if (cmap->codespace_len + 1 == nelem(cmap->codespace))
167
	{
168
		fz_warn("assert: too many code space ranges");
169
		return;
170
	}
171
 
172
	cmap->codespace[cmap->codespace_len].n = n;
173
	cmap->codespace[cmap->codespace_len].low = low;
174
	cmap->codespace[cmap->codespace_len].high = high;
175
	cmap->codespace_len ++;
176
}
177
 
178
/*
179
 * Add an integer to the table.
180
 */
181
static void
182
add_table(pdf_cmap *cmap, int value)
183
{
184
	if (cmap->tlen == USHRT_MAX)
185
	{
186
		fz_warn("cmap table is full; ignoring additional entries");
187
		return;
188
	}
189
	if (cmap->tlen + 1 > cmap->tcap)
190
	{
191
		cmap->tcap = cmap->tcap > 1 ? (cmap->tcap * 3) / 2 : 256;
192
		cmap->table = fz_realloc(cmap->table, cmap->tcap, sizeof(unsigned short));
193
	}
194
	cmap->table[cmap->tlen++] = value;
195
}
196
 
197
/*
198
 * Add a range.
199
 */
200
static void
201
add_range(pdf_cmap *cmap, int low, int high, int flag, int offset)
202
{
203
	/* If the range is too large to be represented, split it */
204
	if (high - low > 0x3fff)
205
	{
206
		add_range(cmap, low, low+0x3fff, flag, offset);
207
		add_range(cmap, low+0x3fff, high, flag, offset+0x3fff);
208
		return;
209
	}
210
	if (cmap->rlen + 1 > cmap->rcap)
211
	{
212
		cmap->rcap = cmap->rcap > 1 ? (cmap->rcap * 3) / 2 : 256;
213
		cmap->ranges = fz_realloc(cmap->ranges, cmap->rcap, sizeof(pdf_range));
214
	}
215
	cmap->ranges[cmap->rlen].low = low;
216
	pdf_range_set_high(&cmap->ranges[cmap->rlen], high);
217
	pdf_range_set_flags(&cmap->ranges[cmap->rlen], flag);
218
	cmap->ranges[cmap->rlen].offset = offset;
219
	cmap->rlen ++;
220
}
221
 
222
/*
223
 * Add a range-to-table mapping.
224
 */
225
void
226
pdf_map_range_to_table(pdf_cmap *cmap, int low, int *table, int len)
227
{
228
	int i;
229
	int high = low + len;
230
	int offset = cmap->tlen;
231
	if (cmap->tlen + len >= USHRT_MAX)
232
		fz_warn("cannot map range to table; table is full");
233
	else
234
	{
235
		for (i = 0; i < len; i++)
236
			add_table(cmap, table[i]);
237
		add_range(cmap, low, high, PDF_CMAP_TABLE, offset);
238
	}
239
}
240
 
241
/*
242
 * Add a range of contiguous one-to-one mappings (ie 1..5 maps to 21..25)
243
 */
244
void
245
pdf_map_range_to_range(pdf_cmap *cmap, int low, int high, int offset)
246
{
247
	add_range(cmap, low, high, high - low == 0 ? PDF_CMAP_SINGLE : PDF_CMAP_RANGE, offset);
248
}
249
 
250
/*
251
 * Add a single one-to-many mapping.
252
 */
253
void
254
pdf_map_one_to_many(pdf_cmap *cmap, int low, int *values, int len)
255
{
256
	int offset, i;
257
 
258
	if (len == 1)
259
	{
260
		add_range(cmap, low, low, PDF_CMAP_SINGLE, values[0]);
261
		return;
262
	}
263
 
264
	if (len > 8)
265
	{
266
		fz_warn("one to many mapping is too large (%d); truncating", len);
267
		len = 8;
268
	}
269
 
270
	if (len == 2 &&
271
		values[0] >= 0xD800 && values[0] <= 0xDBFF &&
272
		values[1] >= 0xDC00 && values[1] <= 0xDFFF)
273
	{
274
		fz_warn("ignoring surrogate pair mapping in cmap");
275
		return;
276
	}
277
 
278
	if (cmap->tlen + len + 1 >= USHRT_MAX)
279
		fz_warn("cannot map one to many; table is full");
280
	else
281
	{
282
		offset = cmap->tlen;
283
		add_table(cmap, len);
284
		for (i = 0; i < len; i++)
285
			add_table(cmap, values[i]);
286
		add_range(cmap, low, low, PDF_CMAP_MULTI, offset);
287
	}
288
}
289
 
290
/*
291
 * Sort the input ranges.
292
 * Merge contiguous input ranges to range-to-range if the output is contiguous.
293
 * Merge contiguous input ranges to range-to-table if the output is random.
294
 */
295
 
296
static int cmprange(const void *va, const void *vb)
297
{
298
	return ((const pdf_range*)va)->low - ((const pdf_range*)vb)->low;
299
}
300
 
301
void
302
pdf_sort_cmap(pdf_cmap *cmap)
303
{
304
	pdf_range *a;			/* last written range on output */
305
	pdf_range *b;			/* current range examined on input */
306
 
307
	if (cmap->rlen == 0)
308
		return;
309
 
310
	qsort(cmap->ranges, cmap->rlen, sizeof(pdf_range), cmprange);
311
 
312
	if (cmap->tlen == USHRT_MAX)
313
	{
314
		fz_warn("cmap table is full; will not combine ranges");
315
		return;
316
	}
317
 
318
	a = cmap->ranges;
319
	b = cmap->ranges + 1;
320
 
321
	while (b < cmap->ranges + cmap->rlen)
322
	{
323
		/* ignore one-to-many mappings */
324
		if (pdf_range_flags(b) == PDF_CMAP_MULTI)
325
		{
326
			*(++a) = *b;
327
		}
328
 
329
		/* input contiguous */
330
		else if (pdf_range_high(a) + 1 == b->low)
331
		{
332
			/* output contiguous */
333
			if (pdf_range_high(a) - a->low + a->offset + 1 == b->offset)
334
			{
335
				/* SR -> R and SS -> R and RR -> R and RS -> R */
336
				if ((pdf_range_flags(a) == PDF_CMAP_SINGLE || pdf_range_flags(a) == PDF_CMAP_RANGE) && (pdf_range_high(b) - a->low <= 0x3fff))
337
				{
338
					pdf_range_set_flags(a, PDF_CMAP_RANGE);
339
					pdf_range_set_high(a, pdf_range_high(b));
340
				}
341
 
342
				/* LS -> L */
343
				else if (pdf_range_flags(a) == PDF_CMAP_TABLE && pdf_range_flags(b) == PDF_CMAP_SINGLE && (pdf_range_high(b) - a->low <= 0x3fff))
344
				{
345
					pdf_range_set_high(a, pdf_range_high(b));
346
					add_table(cmap, b->offset);
347
				}
348
 
349
				/* LR -> LR */
350
				else if (pdf_range_flags(a) == PDF_CMAP_TABLE && pdf_range_flags(b) == PDF_CMAP_RANGE)
351
				{
352
					*(++a) = *b;
353
				}
354
 
355
				/* XX -> XX */
356
				else
357
				{
358
					*(++a) = *b;
359
				}
360
			}
361
 
362
			/* output separated */
363
			else
364
			{
365
				/* SS -> L */
366
				if (pdf_range_flags(a) == PDF_CMAP_SINGLE && pdf_range_flags(b) == PDF_CMAP_SINGLE)
367
				{
368
					pdf_range_set_flags(a, PDF_CMAP_TABLE);
369
					pdf_range_set_high(a, pdf_range_high(b));
370
					add_table(cmap, a->offset);
371
					add_table(cmap, b->offset);
372
					a->offset = cmap->tlen - 2;
373
				}
374
 
375
				/* LS -> L */
376
				else if (pdf_range_flags(a) == PDF_CMAP_TABLE && pdf_range_flags(b) == PDF_CMAP_SINGLE && (pdf_range_high(b) - a->low <= 0x3fff))
377
				{
378
					pdf_range_set_high(a, pdf_range_high(b));
379
					add_table(cmap, b->offset);
380
				}
381
 
382
				/* XX -> XX */
383
				else
384
				{
385
					*(++a) = *b;
386
				}
387
			}
388
		}
389
 
390
		/* input separated: XX -> XX */
391
		else
392
		{
393
			*(++a) = *b;
394
		}
395
 
396
		b ++;
397
	}
398
 
399
	cmap->rlen = a - cmap->ranges + 1;
400
 
401
	fz_flush_warnings();
402
}
403
 
404
/*
405
 * Lookup the mapping of a codepoint.
406
 */
407
int
408
pdf_lookup_cmap(pdf_cmap *cmap, int cpt)
409
{
410
	int l = 0;
411
	int r = cmap->rlen - 1;
412
	int m;
413
 
414
	while (l <= r)
415
	{
416
		m = (l + r) >> 1;
417
		if (cpt < cmap->ranges[m].low)
418
			r = m - 1;
419
		else if (cpt > pdf_range_high(&cmap->ranges[m]))
420
			l = m + 1;
421
		else
422
		{
423
			int i = cpt - cmap->ranges[m].low + cmap->ranges[m].offset;
424
			if (pdf_range_flags(&cmap->ranges[m]) == PDF_CMAP_TABLE)
425
				return cmap->table[i];
426
			if (pdf_range_flags(&cmap->ranges[m]) == PDF_CMAP_MULTI)
427
				return -1; /* should use lookup_cmap_full */
428
			return i;
429
		}
430
	}
431
 
432
	if (cmap->usecmap)
433
		return pdf_lookup_cmap(cmap->usecmap, cpt);
434
 
435
	return -1;
436
}
437
 
438
int
439
pdf_lookup_cmap_full(pdf_cmap *cmap, int cpt, int *out)
440
{
441
	int i, k, n;
442
	int l = 0;
443
	int r = cmap->rlen - 1;
444
	int m;
445
 
446
	while (l <= r)
447
	{
448
		m = (l + r) >> 1;
449
		if (cpt < cmap->ranges[m].low)
450
			r = m - 1;
451
		else if (cpt > pdf_range_high(&cmap->ranges[m]))
452
			l = m + 1;
453
		else
454
		{
455
			k = cpt - cmap->ranges[m].low + cmap->ranges[m].offset;
456
			if (pdf_range_flags(&cmap->ranges[m]) == PDF_CMAP_TABLE)
457
			{
458
				out[0] = cmap->table[k];
459
				return 1;
460
			}
461
			else if (pdf_range_flags(&cmap->ranges[m]) == PDF_CMAP_MULTI)
462
			{
463
				n = cmap->ranges[m].offset;
464
				for (i = 0; i < cmap->table[n]; i++)
465
					out[i] = cmap->table[n + i + 1];
466
				return cmap->table[n];
467
			}
468
			else
469
			{
470
				out[0] = k;
471
				return 1;
472
			}
473
		}
474
	}
475
 
476
	if (cmap->usecmap)
477
		return pdf_lookup_cmap_full(cmap->usecmap, cpt, out);
478
 
479
	return 0;
480
}
481
 
482
/*
483
 * Use the codespace ranges to extract a codepoint from a
484
 * multi-byte encoded string.
485
 */
486
unsigned char *
487
pdf_decode_cmap(pdf_cmap *cmap, unsigned char *buf, int *cpt)
488
{
489
	int k, n, c;
490
 
491
	c = 0;
492
	for (n = 0; n < 4; n++)
493
	{
494
		c = (c << 8) | buf[n];
495
		for (k = 0; k < cmap->codespace_len; k++)
496
		{
497
			if (cmap->codespace[k].n == n + 1)
498
			{
499
				if (c >= cmap->codespace[k].low && c <= cmap->codespace[k].high)
500
				{
501
					*cpt = c;
502
					return buf + n + 1;
503
				}
504
			}
505
		}
506
	}
507
 
508
	*cpt = 0;
509
	return buf + 1;
510
}