Subversion Repositories Kolibri OS

Rev

Rev 4680 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
4680 right-hear 1
/*
2
This code does smooth scaling of a pixmap.
3
 
4
This function returns a new pixmap representing the area starting at (0,0)
5
given by taking the source pixmap src, scaling it to width w, and height h,
6
and then positioning it at (frac(x),frac(y)).
7
*/
8
 
9
#include "fitz.h"
10
 
11
/* Do we special case handling of single pixel high/wide images? The
12
 * 'purest' handling is given by not special casing them, but certain
13
 * files that use such images 'stack' them to give full images. Not
14
 * special casing them results in then being fainter and giving noticable
15
 * rounding errors.
16
 */
17
#define SINGLE_PIXEL_SPECIALS
18
 
19
#ifdef DEBUG_SCALING
20
#ifdef WIN32
21
#include 
22
static void debug_print(const char *fmt, ...)
23
{
24
	va_list args;
25
	char text[256];
26
	va_start(args, fmt);
27
	vsprintf(text, fmt, args);
28
	va_end(args);
29
	OutputDebugStringA(text);
30
	printf(text);
31
}
32
#else
33
static void debug_print(const char *fmt, ...)
34
{
35
	va_list args;
36
	va_start(args, fmt);
37
	vfprintf(stderr, fmt, args);
38
	va_end(args);
39
}
40
#endif
41
#endif
42
#ifdef DEBUG_SCALING
43
#define DBUG(A) debug_print A
44
#else
45
#define DBUG(A) do {} while(0==1)
46
#endif
47
 
48
/*
49
Consider a row of source samples, src, of width src_w, positioned at x,
50
scaled to width dst_w.
51
 
52
src[i] is centred at: x + (i + 0.5)*dst_w/src_w
53
 
54
Therefore the distance between the centre of the jth output pixel and
55
the centre of the ith source sample is:
56
 
57
dist[j,i] = j + 0.5 - (x + (i + 0.5)*dst_w/src_w)
58
 
59
When scaling up, therefore:
60
 
61
dst[j] = SUM(filter(dist[j,i]) * src[i])
62
	(for all ints i)
63
 
64
This can be simplified by noticing that filters are only non zero within
65
a given filter width (henceforth called W). So:
66
 
67
dst[j] = SUM(filter(dist[j,i]) * src[i])
68
	(for ints i, s.t. (j*src_w/dst_w)-W < i < (j*src_w/dst_w)+W)
69
 
70
When scaling down, each filtered source sample is stretched to be wider
71
to avoid aliasing issues. This effectively reduces the distance between
72
centres.
73
 
74
dst[j] = SUM(filter(dist[j,i] * F) * F * src[i])
75
	(where F = dst_w/src_w)
76
	(for ints i, s.t. (j-W)/F < i < (j+W)/F)
77
 
78
*/
79
 
80
typedef struct fz_scale_filter_s fz_scale_filter;
81
 
82
struct fz_scale_filter_s
83
{
84
	int width;
85
	float (*fn)(fz_scale_filter *, float);
86
};
87
 
88
/* Image scale filters */
89
 
90
static float
91
triangle(fz_scale_filter *filter, float f)
92
{
93
	if (f >= 1)
94
		return 0;
95
	return 1-f;
96
}
97
 
98
static float
99
box(fz_scale_filter *filter, float f)
100
{
101
	if (f >= 0.5f)
102
		return 0;
103
	return 1;
104
}
105
 
106
static float
107
simple(fz_scale_filter *filter, float x)
108
{
109
	if (x >= 1)
110
		return 0;
111
	return 1 + (2*x - 3)*x*x;
112
}
113
 
114
static float
115
lanczos2(fz_scale_filter *filter, float x)
116
{
117
	if (x >= 2)
118
		return 0;
119
	return sinf(M_PI*x) * sinf(M_PI*x/2) / (M_PI*x) / (M_PI*x/2);
120
}
121
 
122
static float
123
lanczos3(fz_scale_filter *filter, float f)
124
{
125
	if (f >= 3)
126
		return 0;
127
	return sinf(M_PI*f) * sinf(M_PI*f/3) / (M_PI*f) / (M_PI*f/3);
128
}
129
 
130
/*
131
The Mitchell family of filters is defined:
132
 
133
	f(x) =	1 { (12-9B-6C)x^3 + (-18+12B+6C)x^2 + (6-2B)	for x < 1
134
		- {
135
		6 { (-B-6C)x^3+(6B+30C)x^2+(-12B-48C)x+(8B+24C)	for 1<=x<=2
136
 
137
The 'best' ones lie along the line B+2C = 1.
138
The literature suggests that B=1/3, C=1/3 is best.
139
 
140
	f(x) =	1 { (12-3-2)x^3 - (-18+4+2)x^2 + (16/3)	for x < 1
141
		- {
142
		6 { (-7/3)x^3 + 12x^2 - 20x + (32/3)	for 1<=x<=2
143
 
144
	f(x) =	1 { 21x^3 - 36x^2 + 16			for x < 1
145
		- {
146
		18{ -7x^3 + 36x^2 - 60x + 32		for 1<=x<=2
147
*/
148
 
149
static float
150
mitchell(fz_scale_filter *filter, float x)
151
{
152
	if (x >= 2)
153
		return 0;
154
	if (x >= 1)
155
		return (32 + x*(-60 + x*(36 - 7*x)))/18;
156
	return (16 + x*x*(-36 + 21*x))/18;
157
}
158
 
159
fz_scale_filter fz_scale_filter_box = { 1, box };
160
fz_scale_filter fz_scale_filter_triangle = { 1, triangle };
161
fz_scale_filter fz_scale_filter_simple = { 1, simple };
162
fz_scale_filter fz_scale_filter_lanczos2 = { 2, lanczos2 };
163
fz_scale_filter fz_scale_filter_lanczos3 = { 3, lanczos3 };
164
fz_scale_filter fz_scale_filter_mitchell = { 2, mitchell };
165
 
166
/*
167
We build ourselves a set of tables to contain the precalculated weights
168
for a given set of scale settings.
169
 
170
The first dst_w entries in index are the index into index of the
171
sets of weight for each destination pixel.
172
 
173
Each of the sets of weights is a set of values consisting of:
174
	the minimum source pixel index used for this destination pixel
175
	the number of weights used for this destination pixel
176
	the weights themselves
177
 
178
So to calculate dst[i] we do the following:
179
 
180
	weights = &index[index[i]];
181
	min = *weights++;
182
	len = *weights++;
183
	dst[i] = 0;
184
	while (--len > 0)
185
		dst[i] += src[min++] * *weights++
186
 
187
in addition, we guarantee that at the end of this process weights will now
188
point to the weights value for dst pixel i+1.
189
 
190
In the simplest version of this algorithm, we would scale the whole image
191
horizontally first into a temporary buffer, then scale that temporary
192
buffer again vertically to give us our result. Using such a simple
193
algorithm would mean that could use the same style of weights for both
194
horizontal and vertical scaling.
195
 
196
Unfortunately, this would also require a large temporary buffer,
197
particularly in the case where we are scaling up.
198
 
199
We therefore modify the algorithm as follows; we scale scanlines from the
200
source image horizontally into a temporary buffer, until we have all the
201
contributors for a given output scanline. We then produce that output
202
scanline from the temporary buffer. In this way we restrict the height
203
of the temporary buffer to a small fraction of the final size.
204
 
205
Unfortunately, this means that the pseudo code for recombining a
206
scanline of fully scaled pixels is as follows:
207
 
208
	weights = &index[index[y]];
209
	min = *weights++;
210
	len = *weights++;
211
	for (x=0 to dst_w)
212
		min2 = min
213
		len2 = len
214
		weights2 = weights
215
		dst[x] = 0;
216
		while (--len2 > 0)
217
			dst[x] += temp[x][(min2++) % tmp_buf_height] * *weights2++
218
 
219
i.e. it requires a % operation for every source pixel - this is typically
220
expensive.
221
 
222
To avoid this, we alter the order in which vertical weights are stored,
223
so that they are ordered in the same order as the temporary buffer lines
224
would appear. This simplifies the algorithm to:
225
 
226
	weights = &index[index[y]];
227
	min = *weights++;
228
	len = *weights++;
229
	for (x=0 to dst_w)
230
		min2 = 0
231
		len2 = len
232
		weights2 = weights
233
		dst[x] = 0;
234
		while (--len2 > 0)
235
			dst[x] += temp[i][min2++] * *weights2++
236
 
237
This means that len may be larger than it needs to be (due to the
238
possible inclusion of a zero weight row or two), but in practise this
239
is only an increase of 1 or 2 at worst.
240
 
241
We implement this by generating the weights as normal (but ensuring we
242
leave enough space) and then reordering afterwards.
243
 
244
*/
245
 
246
typedef struct fz_weights_s fz_weights;
247
 
248
struct fz_weights_s
249
{
250
	int count;
251
	int max_len;
252
	int n;
253
	int flip;
254
	int new_line;
255
	int index[1];
256
};
257
 
258
static fz_weights *
259
new_weights(fz_scale_filter *filter, int src_w, float dst_w, int dst_w_i, int n, int flip)
260
{
261
	int max_len;
262
	fz_weights *weights;
263
 
264
	if (src_w > dst_w)
265
	{
266
		/* Scaling down, so there will be a maximum of
267
		 * 2*filterwidth*src_w/dst_w src pixels
268
		 * contributing to each dst pixel. */
269
		max_len = (int)ceilf((2 * filter->width * src_w)/dst_w);
270
		if (max_len > src_w)
271
			max_len = src_w;
272
	}
273
	else
274
	{
275
		/* Scaling up, so there will be a maximum of
276
		 * 2*filterwidth src pixels contributing to each dst pixel.
277
		 */
278
		max_len = 2 * filter->width;
279
	}
280
	/* We need the size of the struct,
281
	 * plus dst_w*sizeof(int) for the index
282
	 * plus (2+max_len)*sizeof(int) for the weights
283
	 * plus room for an extra set of weights for reordering.
284
	 */
285
	weights = fz_malloc(sizeof(*weights)+(max_len+3)*(dst_w_i+1)*sizeof(int));
286
	if (weights == NULL)
287
		return NULL;
288
	weights->count = -1;
289
	weights->max_len = max_len;
290
	weights->index[0] = dst_w_i;
291
	weights->n = n;
292
	weights->flip = flip;
293
	return weights;
294
}
295
 
296
static void
297
init_weights(fz_weights *weights, int j)
298
{
299
	int index;
300
 
301
	assert(weights->count == j-1);
302
	weights->count++;
303
	weights->new_line = 1;
304
	if (j == 0)
305
		index = weights->index[0];
306
	else
307
	{
308
		index = weights->index[j-1];
309
		index += 2 + weights->index[index+1];
310
	}
311
	weights->index[j] = index; /* row pointer */
312
	weights->index[index] = 0; /* min */
313
	weights->index[index+1] = 0; /* len */
314
}
315
 
316
static void
317
add_weight(fz_weights *weights, int j, int i, fz_scale_filter *filter,
318
	float x, float F, float G, int src_w, float dst_w)
319
{
320
	float dist = j - x + 0.5f - ((i + 0.5f)*dst_w/src_w);
321
	float f;
322
	int min, len, index, weight;
323
 
324
	dist *= G;
325
	if (dist < 0)
326
		dist = -dist;
327
	f = filter->fn(filter, dist)*F;
328
	weight = (int)(256*f+0.5f);
329
	if (weight == 0)
330
		return;
331
 
332
	/* wrap i back into range */
333
#ifdef MIRROR_WRAP
334
	do
335
	{
336
		if (i < 0)
337
			i = -1-i;
338
		else if (i >= src_w)
339
			i = 2*src_w-1-i;
340
		else
341
			break;
342
	}
343
	while (1);
344
#elif defined(WRAP)
345
	if (i < 0)
346
		i = 0;
347
	else if (i >= src_w)
348
		i = src_w-1;
349
#else
350
	if (i < 0)
351
	{
352
		i = 0;
353
		weight = 0;
354
	}
355
	else if (i >= src_w)
356
	{
357
		i = src_w-1;
358
		weight = 0;
359
	}
360
	if (weight == 0)
361
		return;
362
#endif
363
 
364
	DBUG(("add_weight[%d][%d] = %d(%g) dist=%g\n",j,i,weight,f,dist));
365
 
366
	if (weights->new_line)
367
	{
368
		/* New line */
369
		weights->new_line = 0;
370
		index = weights->index[j]; /* row pointer */
371
		weights->index[index] = i; /* min */
372
		weights->index[index+1] = 0; /* len */
373
	}
374
	index = weights->index[j];
375
	min = weights->index[index++];
376
	len = weights->index[index++];
377
	while (i < min)
378
	{
379
		/* This only happens in rare cases, but we need to insert
380
		 * one earlier. In exceedingly rare cases we may need to
381
		 * insert more than one earlier. */
382
		int k;
383
 
384
		for (k = len; k > 0; k--)
385
		{
386
			weights->index[index+k] = weights->index[index+k-1];
387
		}
388
		weights->index[index] = 0;
389
		min--;
390
		len++;
391
		weights->index[index-2] = min;
392
		weights->index[index-1] = len;
393
	}
394
	if (i-min >= len)
395
	{
396
		/* The usual case */
397
		while (i-min >= ++len)
398
		{
399
			weights->index[index+len-1] = 0;
400
		}
401
		assert(len-1 == i-min);
402
		weights->index[index+i-min] = weight;
403
		weights->index[index-1] = len;
404
		assert(len <= weights->max_len);
405
	}
406
	else
407
	{
408
		/* Infrequent case */
409
		weights->index[index+i-min] += weight;
410
	}
411
}
412
 
413
static void
414
reorder_weights(fz_weights *weights, int j, int src_w)
415
{
416
	int idx = weights->index[j];
417
	int min = weights->index[idx++];
418
	int len = weights->index[idx++];
419
	int max = weights->max_len;
420
	int tmp = idx+max;
421
	int i, off;
422
 
423
	/* Copy into the temporary area */
424
	memcpy(&weights->index[tmp], &weights->index[idx], sizeof(int)*len);
425
 
426
	/* Pad out if required */
427
	assert(len <= max);
428
	assert(min+len <= src_w);
429
	off = 0;
430
	if (len < max)
431
	{
432
		memset(&weights->index[tmp+len], 0, sizeof(int)*(max-len));
433
		len = max;
434
		if (min + len > src_w)
435
		{
436
			off = min + len - src_w;
437
			min = src_w - len;
438
			weights->index[idx-2] = min;
439
		}
440
		weights->index[idx-1] = len;
441
	}
442
 
443
	/* Copy back into the proper places */
444
	for (i = 0; i < len; i++)
445
	{
446
		weights->index[idx+((min+i+off) % max)] = weights->index[tmp+i];
447
	}
448
}
449
 
450
/* Due to rounding and edge effects, the sums for the weights sometimes don't
451
 * add up to 256. This causes visible rendering effects. Therefore, we take
452
 * pains to ensure that they 1) never exceed 256, and 2) add up to exactly
453
 * 256 for all pixels that are completely covered. See bug #691629. */
454
static void
455
check_weights(fz_weights *weights, int j, int w, float x, float wf)
456
{
457
	int idx, len;
458
	int sum = 0;
459
	int max = -256;
460
	int maxidx = 0;
461
	int i;
462
 
463
	idx = weights->index[j];
464
	idx++; /* min */
465
	len = weights->index[idx++];
466
 
467
	for(i=0; i < len; i++)
468
	{
469
		int v = weights->index[idx++];
470
		sum += v;
471
		if (v > max)
472
		{
473
			max = v;
474
			maxidx = idx;
475
		}
476
	}
477
	/* If we aren't the first or last pixel, OR if the sum is too big
478
	 * then adjust it. */
479
	if (((j != 0) && (j != w-1)) || (sum > 256))
480
		weights->index[maxidx-1] += 256-sum;
481
	/* Otherwise, if we are the first pixel, and it's fully covered, then
482
	 * adjust it. */
483
	else if ((j == 0) && (x < 0.0001F) && (sum != 256))
484
		weights->index[maxidx-1] += 256-sum;
485
	/* Finally, if we are the last pixel, and it's fully covered, then
486
	 * adjust it. */
487
	else if ((j == w-1) && ((float)w-wf < 0.0001F) && (sum != 256))
488
		weights->index[maxidx-1] += 256-sum;
489
	DBUG(("total weight %d = %d\n", j, sum));
490
}
491
 
492
static fz_weights *
493
make_weights(int src_w, float x, float dst_w, fz_scale_filter *filter, int vertical, int dst_w_int, int n, int flip)
494
{
495
	fz_weights *weights;
496
	float F, G;
497
	float window;
498
	int j;
499
 
500
	if (dst_w < src_w)
501
	{
502
		/* Scaling down */
503
		F = dst_w / src_w;
504
		G = 1;
505
	}
506
	else
507
	{
508
		/* Scaling up */
509
		F = 1;
510
		G = src_w / dst_w;
511
	}
512
	window = filter->width / F;
513
	DBUG(("make_weights src_w=%d x=%g dst_w=%g dst_w_int=%d F=%g window=%g\n", src_w, x, dst_w, dst_w_int, F, window));
514
	weights	= new_weights(filter, src_w, dst_w, dst_w_int, n, flip);
515
	if (weights == NULL)
516
		return NULL;
517
	for (j = 0; j < dst_w_int; j++)
518
	{
519
		/* find the position of the centre of dst[j] in src space */
520
		float centre = (j - x + 0.5f)*src_w/dst_w - 0.5f;
521
		int l, r;
522
		l = ceilf(centre - window);
523
		r = floorf(centre + window);
524
		DBUG(("%d: centre=%g l=%d r=%d\n", j, centre, l, r));
525
		init_weights(weights, j);
526
		for (; l <= r; l++)
527
		{
528
			add_weight(weights, j, l, filter, x, F, G, src_w, dst_w);
529
		}
530
		check_weights(weights, j, dst_w_int, x, dst_w);
531
		if (vertical)
532
		{
533
			reorder_weights(weights, j, src_w);
534
		}
535
	}
536
	weights->count++; /* weights->count = dst_w_int now */
537
	return weights;
538
}
539
 
540
static void
541
scale_row_to_temp(int *dst, unsigned char *src, fz_weights *weights)
542
{
543
	int *contrib = &weights->index[weights->index[0]];
544
	int len, i, j, n;
545
	unsigned char *min;
546
 
547
	n = weights->n;
548
	if (weights->flip)
549
	{
550
		dst += (weights->count-1)*n;
551
		for (i=weights->count; i > 0; i--)
552
		{
553
			min = &src[n * *contrib++];
554
			len = *contrib++;
555
			for (j = 0; j < n; j++)
556
				dst[j] = 0;
557
			while (len-- > 0)
558
			{
559
				for (j = n; j > 0; j--)
560
					*dst++ += *min++ * *contrib;
561
				dst -= n;
562
				contrib++;
563
			}
564
			dst -= n;
565
		}
566
	}
567
	else
568
	{
569
		for (i=weights->count; i > 0; i--)
570
		{
571
			min = &src[n * *contrib++];
572
			len = *contrib++;
573
			for (j = 0; j < n; j++)
574
				dst[j] = 0;
575
			while (len-- > 0)
576
			{
577
				for (j = n; j > 0; j--)
578
					*dst++ += *min++ * *contrib;
579
				dst -= n;
580
				contrib++;
581
			}
582
			dst += n;
583
		}
584
	}
585
}
586
 
587
static void
588
scale_row_to_temp1(int *dst, unsigned char *src, fz_weights *weights)
589
{
590
	int *contrib = &weights->index[weights->index[0]];
591
	int len, i;
592
	unsigned char *min;
593
 
594
	assert(weights->n == 1);
595
	if (weights->flip)
596
	{
597
		dst += weights->count;
598
		for (i=weights->count; i > 0; i--)
599
		{
600
			int val = 0;
601
			min = &src[*contrib++];
602
			len = *contrib++;
603
			while (len-- > 0)
604
			{
605
				val += *min++ * *contrib++;
606
			}
607
			*--dst = val;
608
		}
609
	}
610
	else
611
	{
612
		for (i=weights->count; i > 0; i--)
613
		{
614
			int val = 0;
615
			min = &src[*contrib++];
616
			len = *contrib++;
617
			while (len-- > 0)
618
			{
619
				val += *min++ * *contrib++;
620
			}
621
			*dst++ = val;
622
		}
623
	}
624
}
625
 
626
static void
627
scale_row_to_temp2(int *dst, unsigned char *src, fz_weights *weights)
628
{
629
	int *contrib = &weights->index[weights->index[0]];
630
	int len, i;
631
	unsigned char *min;
632
 
633
	assert(weights->n == 2);
634
	if (weights->flip)
635
	{
636
		dst += 2*weights->count;
637
		for (i=weights->count; i > 0; i--)
638
		{
639
			int c1 = 0;
640
			int c2 = 0;
641
			min = &src[2 * *contrib++];
642
			len = *contrib++;
643
			while (len-- > 0)
644
			{
645
				c1 += *min++ * *contrib;
646
				c2 += *min++ * *contrib++;
647
			}
648
			*--dst = c2;
649
			*--dst = c1;
650
		}
651
	}
652
	else
653
	{
654
		for (i=weights->count; i > 0; i--)
655
		{
656
			int c1 = 0;
657
			int c2 = 0;
658
			min = &src[2 * *contrib++];
659
			len = *contrib++;
660
			while (len-- > 0)
661
			{
662
				c1 += *min++ * *contrib;
663
				c2 += *min++ * *contrib++;
664
			}
665
			*dst++ = c1;
666
			*dst++ = c2;
667
		}
668
	}
669
}
670
 
671
static void
672
scale_row_to_temp4(int *dst, unsigned char *src, fz_weights *weights)
673
{
674
	int *contrib = &weights->index[weights->index[0]];
675
#ifndef ARCH_ARM
676
	int len, i;
677
	unsigned char *min;
678
#endif
679
 
680
	assert(weights->n == 4);
681
	if (weights->flip)
682
	{
683
		dst += 4*weights->count;
684
#ifdef ARCH_ARM
685
		asm volatile(
686
		"1:"
687
		"ldr	r4, [%2], #4		@ r4 = *contrib++	\n"
688
		"ldr	r9, [%2], #4		@ r9 = len = *contrib++	\n"
689
		"mov	r5, #0			@ r5 = r = 0		\n"
690
		"mov	r6, #0			@ r6 = g = 0		\n"
691
		"mov	r7, #0			@ r7 = b = 0		\n"
692
		"mov	r8, #0			@ r8 = a = 0		\n"
693
		"add	r4, %1, r4, LSL #2	@ r4 = min = &src[4*r4]	\n"
694
		"cmp	r9, #0			@ while (len-- > 0)	\n"
695
		"beq	3f			@ {			\n"
696
		"2:							\n"
697
		"ldr	r10,[%2], #4		@ r10 = *contrib++	\n"
698
		"ldrb	r11,[r4], #1		@ r11 = *min++		\n"
699
		"ldrb	r12,[r4], #1		@ r12 = *min++		\n"
700
		"ldrb	r14,[r4], #1		@ r14 = *min++		\n"
701
		"mla	r5, r10,r11,r5		@ r += r11 * r10	\n"
702
		"ldrb	r11,[r4], #1		@ r11 = *min++		\n"
703
		"mla	r6, r10,r12,r6		@ g += r12 * r10	\n"
704
		"mla	r7, r10,r14,r7		@ b += r14 * r10	\n"
705
		"mla	r8, r10,r11,r8		@ a += r11 * r10	\n"
706
		"subs	r9, r9, #1		@ r9 = len--		\n"
707
		"bgt	2b			@ }			\n"
708
		"stmdb	%0!,{r5,r6,r7,r8}	@ *--dst=a;*--dst=b;	\n"
709
		"3:				@ *--dst=g;*--dst=r;	\n"
710
		"subs	%3, %3, #1		@ i--			\n"
711
		"bgt	1b			@ 			\n"
712
		:
713
		:
714
		"r" (dst),
715
		"r" (src),
716
		"r" (contrib),
717
		"r" (weights->count)
718
		:
719
		"r4","r5","r6","r7","r8","r9","r10","r11","r12","r14",
720
		"memory","cc"
721
		);
722
#else
723
		for (i=weights->count; i > 0; i--)
724
		{
725
			int r = 0;
726
			int g = 0;
727
			int b = 0;
728
			int a = 0;
729
			min = &src[4 * *contrib++];
730
			len = *contrib++;
731
			while (len-- > 0)
732
			{
733
				r += *min++ * *contrib;
734
				g += *min++ * *contrib;
735
				b += *min++ * *contrib;
736
				a += *min++ * *contrib++;
737
			}
738
			*--dst = a;
739
			*--dst = b;
740
			*--dst = g;
741
			*--dst = r;
742
		}
743
#endif
744
	}
745
	else
746
	{
747
#ifdef ARCH_ARM
748
		asm volatile(
749
		"1:"
750
		"ldr	r4, [%2], #4		@ r4 = *contrib++	\n"
751
		"ldr	r9, [%2], #4		@ r9 = len = *contrib++	\n"
752
		"mov	r5, #0			@ r5 = r = 0		\n"
753
		"mov	r6, #0			@ r6 = g = 0		\n"
754
		"mov	r7, #0			@ r7 = b = 0		\n"
755
		"mov	r8, #0			@ r8 = a = 0		\n"
756
		"add	r4, %1, r4, LSL #2	@ r4 = min = &src[4*r4]	\n"
757
		"cmp	r9, #0			@ while (len-- > 0)	\n"
758
		"beq	3f			@ {			\n"
759
		"2:							\n"
760
		"ldr	r10,[%2], #4		@ r10 = *contrib++	\n"
761
		"ldrb	r11,[r4], #1		@ r11 = *min++		\n"
762
		"ldrb	r12,[r4], #1		@ r12 = *min++		\n"
763
		"ldrb	r14,[r4], #1		@ r14 = *min++		\n"
764
		"mla	r5, r10,r11,r5		@ r += r11 * r10	\n"
765
		"ldrb	r11,[r4], #1		@ r11 = *min++		\n"
766
		"mla	r6, r10,r12,r6		@ g += r12 * r10	\n"
767
		"mla	r7, r10,r14,r7		@ b += r14 * r10	\n"
768
		"mla	r8, r10,r11,r8		@ a += r11 * r10	\n"
769
		"subs	r9, r9, #1		@ r9 = len--		\n"
770
		"bgt	2b			@ }			\n"
771
		"stmia	%0!,{r5,r6,r7,r8}	@ *dst++=r;*dst++=g;	\n"
772
		"3:				@ *dst++=b;*dst++=a;	\n"
773
		"subs	%3, %3, #1		@ i--			\n"
774
		"bgt	1b			@ 			\n"
775
		:
776
		:
777
		"r" (dst),
778
		"r" (src),
779
		"r" (contrib),
780
		"r" (weights->count)
781
		:
782
		"r4","r5","r6","r7","r8","r9","r10","r11","r12","r14",
783
		"memory","cc"
784
		);
785
#else
786
		for (i=weights->count; i > 0; i--)
787
		{
788
			int r = 0;
789
			int g = 0;
790
			int b = 0;
791
			int a = 0;
792
			min = &src[4 * *contrib++];
793
			len = *contrib++;
794
			while (len-- > 0)
795
			{
796
				r += *min++ * *contrib;
797
				g += *min++ * *contrib;
798
				b += *min++ * *contrib;
799
				a += *min++ * *contrib++;
800
			}
801
			*dst++ = r;
802
			*dst++ = g;
803
			*dst++ = b;
804
			*dst++ = a;
805
		}
806
#endif
807
	}
808
}
809
 
810
static void
811
scale_row_from_temp(unsigned char *dst, int *src, fz_weights *weights, int width, int row)
812
{
813
	int *contrib = &weights->index[weights->index[row]];
814
	int len, x;
815
 
816
	contrib++; /* Skip min */
817
	len = *contrib++;
818
	for (x=width; x > 0; x--)
819
	{
820
		int *min = src;
821
		int val = 0;
822
		int len2 = len;
823
		int *contrib2 = contrib;
824
 
825
		while (len2-- > 0)
826
		{
827
			val += *min * *contrib2++;
828
			min += width;
829
		}
830
		val = (val+(1<<15))>>16;
831
		if (val < 0)
832
			val = 0;
833
		else if (val > 255)
834
			val = 255;
835
		*dst++ = val;
836
		src++;
837
	}
838
}
839
 
840
#ifdef SINGLE_PIXEL_SPECIALS
841
static void
842
duplicate_single_pixel(unsigned char *dst, unsigned char *src, int n, int w, int h)
843
{
844
	int i;
845
 
846
	for (i = n; i > 0; i--)
847
		*dst++ = *src++;
848
	for (i = (w*h-1)*n; i > 0; i--)
849
	{
850
		*dst = dst[-n];
851
		dst++;
852
	}
853
}
854
 
855
static void
856
scale_single_row(unsigned char *dst, unsigned char *src, fz_weights *weights, int src_w, int h)
857
{
858
	int *contrib = &weights->index[weights->index[0]];
859
	int min, len, i, j, val, n;
860
	int tmp[FZ_MAX_COLORS];
861
 
862
	n = weights->n;
863
	/* Scale a single row */
864
	if (weights->flip)
865
	{
866
		dst += (weights->count-1)*n;
867
		for (i=weights->count; i > 0; i--)
868
		{
869
			min = *contrib++;
870
			len = *contrib++;
871
			min *= n;
872
			for (j = 0; j < n; j++)
873
				tmp[j] = 0;
874
			while (len-- > 0)
875
			{
876
				for (j = 0; j < n; j++)
877
					tmp[j] += src[min++] * *contrib;
878
				contrib++;
879
			}
880
			for (j = 0; j < n; j++)
881
			{
882
				val = (tmp[j]+(1<<7))>>8;
883
				if (val < 0)
884
					val = 0;
885
				else if (val > 255)
886
					val = 255;
887
				*dst++ = val;
888
			}
889
			dst -= 2*n;
890
		}
891
		dst += n * (weights->count+1);
892
	}
893
	else
894
	{
895
		for (i=weights->count; i > 0; i--)
896
		{
897
			min = *contrib++;
898
			len = *contrib++;
899
			min *= n;
900
			for (j = 0; j < n; j++)
901
				tmp[j] = 0;
902
			while (len-- > 0)
903
			{
904
				for (j = 0; j < n; j++)
905
					tmp[j] += src[min++] * *contrib;
906
				contrib++;
907
			}
908
			for (j = 0; j < n; j++)
909
			{
910
				val = (tmp[j]+(1<<7))>>8;
911
				if (val < 0)
912
					val = 0;
913
				else if (val > 255)
914
					val = 255;
915
				*dst++ = val;
916
			}
917
		}
918
	}
919
	/* And then duplicate it h times */
920
	n *= weights->count;
921
	while (--h > 0)
922
	{
923
		memcpy(dst, dst-n, n);
924
		dst += n;
925
	}
926
}
927
 
928
static void
929
scale_single_col(unsigned char *dst, unsigned char *src, fz_weights *weights, int src_w, int n, int w, int flip_y)
930
{
931
	int *contrib = &weights->index[weights->index[0]];
932
	int min, len, i, j, val;
933
	int tmp[FZ_MAX_COLORS];
934
 
935
	if (flip_y)
936
	{
937
		src_w = (src_w-1)*n;
938
		w = (w-1)*n;
939
		for (i=weights->count; i > 0; i--)
940
		{
941
			/* Scale the next pixel in the column */
942
			min = *contrib++;
943
			len = *contrib++;
944
			min = src_w-min*n;
945
			for (j = 0; j < n; j++)
946
				tmp[j] = 0;
947
			while (len-- > 0)
948
			{
949
				for (j = 0; j < n; j++)
950
					tmp[j] += src[src_w-min+j] * *contrib;
951
				contrib++;
952
			}
953
			for (j = 0; j < n; j++)
954
			{
955
				val = (tmp[j]+(1<<7))>>8;
956
				if (val < 0)
957
					val = 0;
958
				else if (val > 255)
959
					val = 255;
960
				*dst++ = val;
961
			}
962
			/* And then duplicate it across the row */
963
			for (j = w; j > 0; j--)
964
			{
965
				*dst = dst[-n];
966
				dst++;
967
			}
968
		}
969
	}
970
	else
971
	{
972
		w = (w-1)*n;
973
		for (i=weights->count; i > 0; i--)
974
		{
975
			/* Scale the next pixel in the column */
976
			min = *contrib++;
977
			len = *contrib++;
978
			min *= n;
979
			for (j = 0; j < n; j++)
980
				tmp[j] = 0;
981
			while (len-- > 0)
982
			{
983
				for (j = 0; j < n; j++)
984
					tmp[j] += src[min++] * *contrib;
985
				contrib++;
986
			}
987
			for (j = 0; j < n; j++)
988
			{
989
				val = (tmp[j]+(1<<7))>>8;
990
				if (val < 0)
991
					val = 0;
992
				else if (val > 255)
993
					val = 255;
994
				*dst++ = val;
995
			}
996
			/* And then duplicate it across the row */
997
			for (j = w; j > 0; j--)
998
			{
999
				*dst = dst[-n];
1000
				dst++;
1001
			}
1002
		}
1003
	}
1004
}
1005
#endif /* SINGLE_PIXEL_SPECIALS */
1006
 
1007
fz_pixmap *
1008
fz_scale_pixmap_gridfit(fz_pixmap *src, float x, float y, float w, float h, int gridfit)
1009
{
1010
	if (gridfit) {
1011
		float n;
1012
		if (w > 0) {
1013
			/* Adjust the left hand edge, leftwards to a pixel boundary */
1014
			n = (float)(int)x;   /* n is now on a pixel boundary */
1015
			if (n > x)           /* Ensure it's the pixel boundary BELOW x */
1016
				n -= 1.0f;
1017
			w += x-n;            /* width gets wider as x >= n */
1018
			x = n;
1019
			/* Adjust the right hand edge rightwards to a pixel boundary */
1020
			n = (float)(int)w;   /* n is now the integer width <= w */
1021
			if (n != w)          /* If w isn't an integer already, bump it */
1022
				w = 1.0f + n;/* up to the next integer. */
1023
		} else {
1024
			/* Adjust the right hand edge, rightwards to a pixel boundary */
1025
			n = (float)(int)x;   /* n is now on a pixel boundary */
1026
			if (n > x)           /* Ensure it's the pixel boundary <= x */
1027
				n -= 1.0f;
1028
			if (n != x)          /* If x isn't on a pixel boundary already, */
1029
				n += 1.0f;   /* make n be the pixel boundary above x. */
1030
			w -= n-x;            /* Expand width (more negative!) as n >= x */
1031
			x = n;
1032
			/* Adjust the left hand edge leftwards to a pixel boundary */
1033
			n = (float)(int)w;
1034
			if (n != w)
1035
				w = n - 1.0f;
1036
		}
1037
		if (h > 0) {
1038
			/* Adjust the bottom edge, downwards to a pixel boundary */
1039
			n = (float)(int)y;   /* n is now on a pixel boundary */
1040
			if (n > y)           /* Ensure it's the pixel boundary BELOW y */
1041
				n -= 1.0f;
1042
			h += y-n;            /* height gets larger as y >= n */
1043
			y = n;
1044
			/* Adjust the top edge upwards to a pixel boundary */
1045
			n = (float)(int)h;   /* n is now the integer height <= h */
1046
			if (n != h)          /* If h isn't an integer already, bump it */
1047
				h = 1.0f + n;/* up to the next integer. */
1048
		} else {
1049
			/* Adjust the top edge, upwards to a pixel boundary */
1050
			n = (float)(int)y;   /* n is now on a pixel boundary */
1051
			if (n > y)           /* Ensure it's the pixel boundary <= y */
1052
			n -= 1.0f;
1053
			if (n != y)          /* If y isn't on a pixel boundary already, */
1054
				n += 1.0f;   /* make n be the pixel boundary above y. */
1055
			h -= n-y;            /* Expand height (more negative!) as n >= y */
1056
			y = n;
1057
			/* Adjust the bottom edge downwards to a pixel boundary */
1058
			n = (float)(int)h;
1059
			if (n != h)
1060
				h = n - 1.0f;
1061
		}
1062
	}
1063
	return fz_scale_pixmap(src, x, y, w, h);
1064
}
1065
 
1066
fz_pixmap *
1067
fz_scale_pixmap(fz_pixmap *src, float x, float y, float w, float h)
1068
{
1069
	fz_scale_filter *filter = &fz_scale_filter_simple;
1070
	fz_weights *contrib_rows = NULL;
1071
	fz_weights *contrib_cols = NULL;
1072
	fz_pixmap *output = NULL;
1073
	int *temp = NULL;
1074
	int max_row, temp_span, temp_rows, row;
1075
	int dst_w_int, dst_h_int, dst_x_int, dst_y_int;
1076
	int flip_x, flip_y;
1077
 
1078
	DBUG(("Scale: (%d,%d) to (%g,%g) at (%g,%g)\n",src->w,src->h,w,h,x,y));
1079
 
1080
	/* Find the destination bbox, width/height, and sub pixel offset,
1081
	 * allowing for whether we're flipping or not. */
1082
	/* Note that the x and y sub pixel offsets here are different.
1083
	 * The (x,y) position given describes where the bottom left corner
1084
	 * of the source image should be mapped to (i.e. where (0,h) in image
1085
	 * space ends up, not the more logical and sane (0,0)). Also there
1086
	 * are differences in the way we scale horizontally and vertically.
1087
	 * When scaling rows horizontally, we always read forwards through
1088
	 * the source, and store either forwards or in reverse as required.
1089
	 * When scaling vertically, we always store out forwards, but may
1090
	 * feed source rows in in a different order.
1091
	 *
1092
	 * Consider the image rectange 'r' to which the image is mapped,
1093
	 * and the (possibly) larger rectangle 'R', given by expanding 'r' to
1094
	 * complete pixels.
1095
	 *
1096
	 * x can either be r.xmin-R.xmin or R.xmax-r.xmax depending on whether
1097
	 * the image is x flipped or not. Whatever happens 0 <= x < 1.
1098
	 * y is always R.ymax - r.ymax.
1099
	 */
1100
	/* dst_x_int is calculated to be the left of the scaled image, and
1101
	 * x (the sub_pixel_offset) is the distance in from either the left
1102
	 * or right pixel expanded edge. */
1103
	flip_x = (w < 0);
1104
	if (flip_x)
1105
	{
1106
		float tmp;
1107
		w = -w;
1108
		dst_x_int = floor(x-w);
1109
		tmp = ceilf(x);
1110
		dst_w_int = (int)tmp;
1111
		x = tmp - x;
1112
		dst_w_int -= dst_x_int;
1113
	}
1114
	else
1115
	{
1116
		dst_x_int = floor(x);
1117
		x -= (float)dst_x_int;
1118
		dst_w_int = (int)ceilf(x + w);
1119
	}
1120
	flip_y = (h < 0);
1121
	/* dst_y_int is calculated to be the bottom of the scaled image, but
1122
	 * y (the sub pixel offset) has to end up being the value at the top.
1123
	 */
1124
	if (flip_y)
1125
	{
1126
		h = -h;
1127
		dst_y_int = floor(y-h);
1128
		dst_h_int = (int)ceilf(y) - dst_y_int;
1129
	} else {
1130
		dst_y_int = floor(y);
1131
		y += h;
1132
		dst_h_int = (int)ceilf(y) - dst_y_int;
1133
	}
1134
	/* y is the top edge position in floats. We want it to be the
1135
	 * distance down from the next pixel boundary. */
1136
	y = ceilf(y) - y;
1137
 
1138
	DBUG(("Result image: (%d,%d) at (%d,%d) (subpix=%g,%g)\n", dst_w_int, dst_h_int, dst_x_int, dst_y_int, x, y));
1139
 
1140
	/* Step 1: Calculate the weights for columns and rows */
1141
#ifdef SINGLE_PIXEL_SPECIALS
1142
	if (src->w == 1)
1143
	{
1144
		contrib_cols = NULL;
1145
	}
1146
	else
1147
#endif /* SINGLE_PIXEL_SPECIALS */
1148
	{
1149
		contrib_cols = make_weights(src->w, x, w, filter, 0, dst_w_int, src->n, flip_x);
1150
		if (contrib_cols == NULL)
1151
			goto cleanup;
1152
	}
1153
#ifdef SINGLE_PIXEL_SPECIALS
1154
	if (src->h == 1)
1155
	{
1156
		contrib_rows = NULL;
1157
	}
1158
	else
1159
#endif /* SINGLE_PIXEL_SPECIALS */
1160
	{
1161
		contrib_rows = make_weights(src->h, y, h, filter, 1, dst_h_int, src->n, flip_y);
1162
		if (contrib_rows == NULL)
1163
			goto cleanup;
1164
	}
1165
 
1166
	assert(contrib_cols == NULL || contrib_cols->count == dst_w_int);
1167
	assert(contrib_rows == NULL || contrib_rows->count == dst_h_int);
1168
	output = fz_new_pixmap(src->colorspace, dst_w_int, dst_h_int);
1169
	output->x = dst_x_int;
1170
	output->y = dst_y_int;
1171
 
1172
	/* Step 2: Apply the weights */
1173
#ifdef SINGLE_PIXEL_SPECIALS
1174
	if (contrib_rows == NULL)
1175
	{
1176
		/* Only 1 source pixel high. */
1177
		if (contrib_cols == NULL)
1178
		{
1179
			/* Only 1 pixel in the entire image! */
1180
			duplicate_single_pixel(output->samples, src->samples, src->n, dst_w_int, dst_h_int);
1181
		}
1182
		else
1183
		{
1184
			/* Scale the row once, then copy it. */
1185
			scale_single_row(output->samples, src->samples, contrib_cols, src->w, dst_h_int);
1186
		}
1187
	}
1188
	else if (contrib_cols == NULL)
1189
	{
1190
		/* Only 1 source pixel wide. Scale the col and duplicate. */
1191
		scale_single_col(output->samples, src->samples, contrib_rows, src->h, src->n, dst_w_int, flip_y);
1192
	}
1193
	else
1194
#endif /* SINGLE_PIXEL_SPECIALS */
1195
	{
1196
		void (*row_scale)(int *dst, unsigned char *src, fz_weights *weights);
1197
 
1198
		temp_span = contrib_cols->count * src->n;
1199
		temp_rows = contrib_rows->max_len;
1200
		if (temp_span <= 0 || temp_rows > INT_MAX / temp_span)
1201
			goto cleanup;
1202
		temp = fz_calloc(temp_span*temp_rows, sizeof(int));
1203
		if (temp == NULL)
1204
			goto cleanup;
1205
		switch (src->n)
1206
		{
1207
		default:
1208
			row_scale = scale_row_to_temp;
1209
			break;
1210
		case 1: /* Image mask case */
1211
			row_scale = scale_row_to_temp1;
1212
			break;
1213
		case 2: /* Greyscale with alpha case */
1214
			row_scale = scale_row_to_temp2;
1215
			break;
1216
		case 4: /* RGBA */
1217
			row_scale = scale_row_to_temp4;
1218
			break;
1219
		}
1220
		max_row = 0;
1221
		for (row = 0; row < contrib_rows->count; row++)
1222
		{
1223
			/*
1224
			Which source rows do we need to have scaled into the
1225
			temporary buffer in order to be able to do the final
1226
			scale?
1227
			*/
1228
			int row_index = contrib_rows->index[row];
1229
			int row_min = contrib_rows->index[row_index++];
1230
			int row_len = contrib_rows->index[row_index++];
1231
			while (max_row < row_min+row_len)
1232
			{
1233
				/* Scale another row */
1234
				assert(max_row < src->h);
1235
				DBUG(("scaling row %d to temp\n", max_row));
1236
				(*row_scale)(&temp[temp_span*(max_row % temp_rows)], &src->samples[(flip_y ? (src->h-1-max_row): max_row)*src->w*src->n], contrib_cols);
1237
				max_row++;
1238
			}
1239
 
1240
			DBUG(("scaling row %d from temp\n", row));
1241
			scale_row_from_temp(&output->samples[row*output->w*output->n], temp, contrib_rows, temp_span, row);
1242
		}
1243
		fz_free(temp);
1244
	}
1245
 
1246
cleanup:
1247
	fz_free(contrib_rows);
1248
	fz_free(contrib_cols);
1249
	return output;
1250
}