Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4680 right-hear 1
/*
2
 * ARM specific render optims live here
3
 */
4
 
5
#include "fitz.h"
6
 
7
typedef unsigned char byte;
8
 
9
/* always surround cpu specific code with HAVE_XXX */
10
#ifdef ARCH_ARM
11
 
12
/* from imagescalearm.s */
13
extern void fz_srow4_arm(byte *src, byte *dst, int w, int denom);
14
extern void fz_scol4_arm(byte *src, byte *dst, int w, int denom);
15
 
16
static void
17
path_w4i1o4_arm(byte * restrict rgba, byte * restrict src, byte cov, int len, byte * restrict dst)
18
{
19
	/* The ARM code here is a hand coded implementation of the optimized C version. */
20
 
21
	if (len <= 0)
22
		return;
23
 
24
	asm volatile(
25
	"ldr	%0, [%0]		@ %0 = rgba			\n"
26
	"mov	r11,#0							\n"
27
	"mov	r8, #0xFF00						\n"
28
	"mov	r14,%0,lsr #24		@ r14= alpha			\n"
29
	"orr	%0, %0, #0xFF000000	@ %0 = rgba |= 0xFF000000	\n"
30
	"orr	r8, r8, r8, LSL #16	@ r8 = 0xFF00FF00		\n"
31
	"adds	r14,r14,r14,LSR #7	@ r14 = alpha += alpha>>7	\n"
32
	"beq	9f			@ if (alpha == 0) bale		\n"
33
	"and	r6, %0, r8		@ r6 = ga<<8			\n"
34
	"bic	%0, %0, r8		@ %0 = rb			\n"
35
	"mov	r6, r6, LSR #8		@ r6 = ga			\n"
36
	"cmp	r14,#256		@ if (alpha == 256)		\n"
37
	"beq	4f			@	no-alpha loop		\n"
38
	"B	2f			@ enter the loop		\n"
39
	"1:	@ Loop used for when coverage*alpha == 0		\n"
40
	"subs	%3, %3, #1		@ len--				\n"
41
	"ble	9f							\n"
42
	"2:								\n"
43
	"ldrb	r12,[%1]		@ r12= *src			\n"
44
	"ldr	r9, [%4], #4		@ r9 = drb = *dst32++		\n"
45
	"strb	r11,[%1], #1		@ r11= *src++ = 0		\n"
46
	"add	%2, r12, %2		@ %2 = cov += r12		\n"
47
	"ands	%2, %2, #255		@ %2 = cov &= 255		\n"
48
	"beq	1b			@ if coverage == 0 loop back	\n"
49
	"add	r10,%2, %2, LSR #7	@ r10= ca = cov+(cov>>7)	\n"
50
	"mul	r10,r14,r10		@ r10= ca *= alpha		\n"
51
	"and	r7, r8, r9		@ r7 = dga = drb & MASK		\n"
52
	"mov	r10,r10,LSR #8		@ r10= ca >>= 8			\n"
53
	"and	r9, r8, r9, LSL #8	@ r9 = drb = (drb<<8) & MASK	\n"
54
	"sub	r12,r6, r7, LSR #8	@ r12= cga = ga - (dga>>8)	\n"
55
	"sub	r5, %0, r9, LSR #8	@ r5 = crb = rb - (drb>>8)	\n"
56
	"mla	r7, r12,r10,r7		@ r7 = dga += cga * ca		\n"
57
	"subs	%3, %3, #1		@ len--				\n"
58
	"mla	r9, r5, r10,r9		@ r9 = drb += crb * ca		\n"
59
	"and	r7, r8, r7		@ r7 = dga &= MASK		\n"
60
	"and	r9, r8, r9		@ r9 = drb &= MASK		\n"
61
	"orr	r9, r7, r9, LSR #8	@ r9 = drb = dga | (drb>>8)	\n"
62
	"str	r9, [%4, #-4]		@ dst32[-1] = r9		\n"
63
	"bgt	2b							\n"
64
	"b	9f							\n"
65
	"@ --- Solid alpha loop	---------------------------------------	\n"
66
	"3:	@ Loop used when coverage == 256			\n"
67
	"orr	r9, %0, r6, LSL #8	@ r9 = rgba			\n"
68
	"str	r9, [%4, #-4]		@ dst32[-1] = r9		\n"
69
	"4:	@ Loop used for when coverage*alpha == 0		\n"
70
	"subs	%3, %3, #1		@ len--				\n"
71
	"ble	9f							\n"
72
	"5:								\n"
73
	"ldrb	r12,[%1]		@ r12= *src			\n"
74
	"ldr	r9, [%4], #4		@ r9 = drb = *dst32++		\n"
75
	"strb	r11,[%1], #1		@ r11= *src++ = 0		\n"
76
	"add	%2, r12, %2		@ %2 = cov += r12		\n"
77
	"ands	%2, %2, #255		@ %2 = cov &= 255		\n"
78
	"beq	4b			@ if coverage == 0 loop back	\n"
79
	"cmp	%2, #255		@ if coverage == solid		\n"
80
	"beq	3b			@	loop back		\n"
81
	"add	r10,%2, %2, LSR #7	@ r10= ca = cov+(cov>>7)	\n"
82
	"and	r7, r8, r9		@ r7 = dga = drb & MASK		\n"
83
	"and	r9, r8, r9, LSL #8	@ r9 = dga = (drb<<8) & MASK	\n"
84
	"sub	r12,r6, r7, LSR #8	@ r12= cga = ga - (dga>>8)	\n"
85
	"sub	r5, %0, r9, LSR #8	@ r5 = crb = rb - (drb>>8)	\n"
86
	"mla	r7, r12,r10,r7		@ r7 = dga += cga * ca		\n"
87
	"subs	%3, %3, #1		@ len--				\n"
88
	"mla	r9, r5, r10,r9		@ r9 = drb += crb * ca		\n"
89
	"and	r7, r8, r7		@ r7 = dga &= MASK		\n"
90
	"and	r9, r8, r9		@ r9 = drb &= MASK		\n"
91
	"orr	r9, r7, r9, LSR #8	@ r9 = drb = dga | (drb>>8)	\n"
92
	"str	r9, [%4, #-4]		@ dst32[-1] = r9		\n"
93
	"bgt	5b							\n"
94
	"9:				@ End				\n"
95
	:
96
	"+r" (rgba),
97
	"+r" (src),
98
	"+r" (cov),
99
	"+r" (len),
100
	"+r" (dst)
101
	:
102
	:
103
	"r5","r6","r7","r8","r9","r10","r11","r12","r14","memory","cc"
104
	);
105
}
106
 
107
static void load_tile8_arm(byte * restrict src, int sw, byte * restrict dst, int dw, int w, int h, int pad)
108
{
109
	if ((h == 0) || (w == 0))
110
		return;
111
 
112
	switch (pad)
113
	{
114
	case 0:
115
		while (h--)
116
		{
117
			memcpy(dst, src, w);
118
			src += sw;
119
			dst += dw;
120
		}
121
		break;
122
 
123
	case 1:
124
		sw -= w;
125
		dw -= w<<1;
126
		asm volatile(
127
			"MOV	r11,#255				\n"
128
			"1:						\n"
129
			"MOV	r5, %[w]		@ r5 = x = w	\n"
130
			"2:						\n"
131
			"LDRB	r4, [%[src]], #1	@ r4 = *src++	\n"
132
			"SUBS	r5, r5, #1				\n"
133
			"STRB	r4, [%[dst]], #1	@ *dst++ = r4	\n"
134
			"STRB	r11,[%[dst]], #1	@ *dst++ = 255	\n"
135
			"BGT	2b					\n"
136
			"ADD	%[src],%[src],%[sw]	@ src += sw	\n"
137
			"ADD	%[dst],%[dst],%[dw]	@ dst += dw	\n"
138
			"SUBS	%[h],%[h],#1				\n"
139
			"BGT	1b					\n"
140
			:
141
			[src]	"+r" (src),
142
			[sw]	"+r" (sw),
143
			[dst]	"+r" (dst),
144
			[dw]	"+r" (dw),
145
			[h]	"+r" (h),
146
			[w]	"+r" (w)
147
			:
148
			:
149
			"r4","r5","r11","memory","cc"
150
			);
151
		break;
152
 
153
	case 3:
154
		sw -= w;
155
		asm volatile(
156
			"MOV	r11,#255				\n"
157
			"1:						\n"
158
			"MOV	r5, %[w]		@ r5 = x = w	\n"
159
			"MOV	r8, %[dst]		@ r8 = dp = dst	\n"
160
			"2:						\n"
161
			"LDRB	r4, [%[src]], #1	@ r4 = *src++	\n"
162
			"LDRB	r6, [%[src]], #1	@ r6 = *src++	\n"
163
			"LDRB	r7, [%[src]], #1	@ r7 = *src++	\n"
164
			"SUBS	r5, r5, #3				\n"
165
			"STRB	r4, [r8], #1		@ *dp++ = r4	\n"
166
			"STRB	r6, [r8], #1		@ *dp++ = r6	\n"
167
			"STRB	r7, [r8], #1		@ *dp++ = r7	\n"
168
			"STRB	r11,[r8], #1		@ *dp++ = 255	\n"
169
			"BGT	2b					\n"
170
			"ADD	%[src],%[src],%[sw]	@ src += sw	\n"
171
			"ADD	%[dst],%[dst],%[dw]	@ dst += dw	\n"
172
			"SUBS	%[h],%[h],#1				\n"
173
			"BGT	1b					\n"
174
			:
175
			[src]	"+r" (src),
176
			[sw]	"+r" (sw),
177
			[dst]	"+r" (dst),
178
			[dw]	"+r" (dw),
179
			[h]	"+r" (h),
180
			[w]	"+r" (w)
181
			:
182
			:
183
			"r4","r5","r6","r7","r8","r11","memory","cc"
184
			);
185
		break;
186
 
187
	default:
188
		sw -= w;
189
		asm volatile(
190
			"mov	r9,#255					\n"
191
			"1:						\n"
192
			"mov	r7, %[dst]	@ r7 = dp = dst		\n"
193
			"mov	r8, #1		@ r8 = tpad = 1		\n"
194
			"mov	r14,%[w]	@ r11= x = w		\n"
195
			"2:						\n"
196
			"ldrb	r10,[%[src]],#1				\n"
197
			"subs	r8, r8, #1				\n"
198
			"moveq	r8, %[pad]				\n"
199
			"streqb	r9, [r7], #1				\n"
200
			"strb	r10,[r7], #1				\n"
201
			"subs	r14,r14, #1				\n"
202
			"bgt	2b					\n"
203
			"add	%[src],%[src],%[sw]			\n"
204
			"add	%[dst],%[dst],%[dw]			\n"
205
			"subs	%[h], %[h], #1				\n"
206
			"bgt	1b					\n"
207
			:
208
			[src]	"+r" (src),
209
			[sw]	"+r" (sw),
210
			[dst]	"+r" (dst),
211
			[dw]	"+r" (dw),
212
			[h]	"+r" (h),
213
			[w]	"+r" (w),
214
			[pad]	"+r" (pad)
215
			:
216
			:
217
			"r7","r8","r9","r10","r14","memory","cc"
218
			);
219
		break;
220
	}
221
}
222
 
223
void
224
fz_accelerate_arch(void)
225
{
226
	fz_path_w4i1o4 = path_w4i1o4_arm;
227
	fz_loadtile8 = load_tile8_arm;
228
	fz_srow4 = fz_srow4_arm;
229
	fz_scol4 = fz_scol4_arm;
230
}
231
 
232
#endif