Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4349 | Serge | 1 | /* |
2 | C-like prototype : |
||
3 | void j_rev_dct_arm(DCTBLOCK data) |
||
4 | |||
5 | With DCTBLOCK being a pointer to an array of 64 'signed shorts' |
||
6 | |||
7 | Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org) |
||
8 | |||
9 | Permission is hereby granted, free of charge, to any person obtaining a copy |
||
10 | of this software and associated documentation files (the "Software"), to deal |
||
11 | in the Software without restriction, including without limitation the rights |
||
12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||
13 | copies of the Software, and to permit persons to whom the Software is |
||
14 | furnished to do so, subject to the following conditions: |
||
15 | |||
16 | The above copyright notice and this permission notice shall be included in |
||
17 | all copies or substantial portions of the Software. |
||
18 | |||
19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||
22 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER |
||
23 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
||
24 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | |||
26 | */ |
||
27 | |||
28 | #include "libavutil/arm/asm.S" |
||
29 | |||
30 | #define FIX_0_298631336 2446 |
||
31 | #define FIX_0_541196100 4433 |
||
32 | #define FIX_0_765366865 6270 |
||
33 | #define FIX_1_175875602 9633 |
||
34 | #define FIX_1_501321110 12299 |
||
35 | #define FIX_2_053119869 16819 |
||
36 | #define FIX_3_072711026 25172 |
||
37 | #define FIX_M_0_390180644 -3196 |
||
38 | #define FIX_M_0_899976223 -7373 |
||
39 | #define FIX_M_1_847759065 -15137 |
||
40 | #define FIX_M_1_961570560 -16069 |
||
41 | #define FIX_M_2_562915447 -20995 |
||
42 | #define FIX_0xFFFF 0xFFFF |
||
43 | |||
44 | #define FIX_0_298631336_ID 0 |
||
45 | #define FIX_0_541196100_ID 4 |
||
46 | #define FIX_0_765366865_ID 8 |
||
47 | #define FIX_1_175875602_ID 12 |
||
48 | #define FIX_1_501321110_ID 16 |
||
49 | #define FIX_2_053119869_ID 20 |
||
50 | #define FIX_3_072711026_ID 24 |
||
51 | #define FIX_M_0_390180644_ID 28 |
||
52 | #define FIX_M_0_899976223_ID 32 |
||
53 | #define FIX_M_1_847759065_ID 36 |
||
54 | #define FIX_M_1_961570560_ID 40 |
||
55 | #define FIX_M_2_562915447_ID 44 |
||
56 | #define FIX_0xFFFF_ID 48 |
||
57 | |||
58 | function ff_j_rev_dct_arm, export=1 |
||
59 | push {r0, r4 - r11, lr} |
||
60 | |||
61 | mov lr, r0 @ lr = pointer to the current row |
||
62 | mov r12, #8 @ r12 = row-counter |
||
63 | movrel r11, const_array @ r11 = base pointer to the constants array |
||
64 | row_loop: |
||
65 | ldrsh r0, [lr, # 0] @ r0 = 'd0' |
||
66 | ldrsh r2, [lr, # 2] @ r2 = 'd2' |
||
67 | |||
68 | @ Optimization for row that have all items except the first set to 0 |
||
69 | @ (this works as the int16_t are always 4-byte aligned) |
||
70 | ldr r5, [lr, # 0] |
||
71 | ldr r6, [lr, # 4] |
||
72 | ldr r3, [lr, # 8] |
||
73 | ldr r4, [lr, #12] |
||
74 | orr r3, r3, r4 |
||
75 | orr r3, r3, r6 |
||
76 | orrs r5, r3, r5 |
||
77 | beq end_of_row_loop @ nothing to be done as ALL of them are '0' |
||
78 | orrs r3, r3, r2 |
||
79 | beq empty_row |
||
80 | |||
81 | ldrsh r1, [lr, # 8] @ r1 = 'd1' |
||
82 | ldrsh r4, [lr, # 4] @ r4 = 'd4' |
||
83 | ldrsh r6, [lr, # 6] @ r6 = 'd6' |
||
84 | |||
85 | ldr r3, [r11, #FIX_0_541196100_ID] |
||
86 | add r7, r2, r6 |
||
87 | ldr r5, [r11, #FIX_M_1_847759065_ID] |
||
88 | mul r7, r3, r7 @ r7 = z1 |
||
89 | ldr r3, [r11, #FIX_0_765366865_ID] |
||
90 | mla r6, r5, r6, r7 @ r6 = tmp2 |
||
91 | add r5, r0, r4 @ r5 = tmp0 |
||
92 | mla r2, r3, r2, r7 @ r2 = tmp3 |
||
93 | sub r3, r0, r4 @ r3 = tmp1 |
||
94 | |||
95 | add r0, r2, r5, lsl #13 @ r0 = tmp10 |
||
96 | rsb r2, r2, r5, lsl #13 @ r2 = tmp13 |
||
97 | add r4, r6, r3, lsl #13 @ r4 = tmp11 |
||
98 | rsb r3, r6, r3, lsl #13 @ r3 = tmp12 |
||
99 | |||
100 | push {r0, r2, r3, r4} @ save on the stack tmp10, tmp13, tmp12, tmp11 |
||
101 | |||
102 | ldrsh r3, [lr, #10] @ r3 = 'd3' |
||
103 | ldrsh r5, [lr, #12] @ r5 = 'd5' |
||
104 | ldrsh r7, [lr, #14] @ r7 = 'd7' |
||
105 | |||
106 | add r0, r3, r5 @ r0 = 'z2' |
||
107 | add r2, r1, r7 @ r2 = 'z1' |
||
108 | add r4, r3, r7 @ r4 = 'z3' |
||
109 | add r6, r1, r5 @ r6 = 'z4' |
||
110 | ldr r9, [r11, #FIX_1_175875602_ID] |
||
111 | add r8, r4, r6 @ r8 = z3 + z4 |
||
112 | ldr r10, [r11, #FIX_M_0_899976223_ID] |
||
113 | mul r8, r9, r8 @ r8 = 'z5' |
||
114 | ldr r9, [r11, #FIX_M_2_562915447_ID] |
||
115 | mul r2, r10, r2 @ r2 = 'z1' |
||
116 | ldr r10, [r11, #FIX_M_1_961570560_ID] |
||
117 | mul r0, r9, r0 @ r0 = 'z2' |
||
118 | ldr r9, [r11, #FIX_M_0_390180644_ID] |
||
119 | mla r4, r10, r4, r8 @ r4 = 'z3' |
||
120 | ldr r10, [r11, #FIX_0_298631336_ID] |
||
121 | mla r6, r9, r6, r8 @ r6 = 'z4' |
||
122 | ldr r9, [r11, #FIX_2_053119869_ID] |
||
123 | mla r7, r10, r7, r2 @ r7 = tmp0 + z1 |
||
124 | ldr r10, [r11, #FIX_3_072711026_ID] |
||
125 | mla r5, r9, r5, r0 @ r5 = tmp1 + z2 |
||
126 | ldr r9, [r11, #FIX_1_501321110_ID] |
||
127 | mla r3, r10, r3, r0 @ r3 = tmp2 + z2 |
||
128 | add r7, r7, r4 @ r7 = tmp0 |
||
129 | mla r1, r9, r1, r2 @ r1 = tmp3 + z1 |
||
130 | add r5, r5, r6 @ r5 = tmp1 |
||
131 | add r3, r3, r4 @ r3 = tmp2 |
||
132 | add r1, r1, r6 @ r1 = tmp3 |
||
133 | |||
134 | pop {r0, r2, r4, r6} @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11 |
||
135 | @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 |
||
136 | |||
137 | @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) |
||
138 | add r8, r0, r1 |
||
139 | add r8, r8, #(1<<10) |
||
140 | mov r8, r8, asr #11 |
||
141 | strh r8, [lr, # 0] |
||
142 | |||
143 | @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) |
||
144 | sub r8, r0, r1 |
||
145 | add r8, r8, #(1<<10) |
||
146 | mov r8, r8, asr #11 |
||
147 | strh r8, [lr, #14] |
||
148 | |||
149 | @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) |
||
150 | add r8, r6, r3 |
||
151 | add r8, r8, #(1<<10) |
||
152 | mov r8, r8, asr #11 |
||
153 | strh r8, [lr, # 2] |
||
154 | |||
155 | @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) |
||
156 | sub r8, r6, r3 |
||
157 | add r8, r8, #(1<<10) |
||
158 | mov r8, r8, asr #11 |
||
159 | strh r8, [lr, #12] |
||
160 | |||
161 | @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) |
||
162 | add r8, r4, r5 |
||
163 | add r8, r8, #(1<<10) |
||
164 | mov r8, r8, asr #11 |
||
165 | strh r8, [lr, # 4] |
||
166 | |||
167 | @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) |
||
168 | sub r8, r4, r5 |
||
169 | add r8, r8, #(1<<10) |
||
170 | mov r8, r8, asr #11 |
||
171 | strh r8, [lr, #10] |
||
172 | |||
173 | @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) |
||
174 | add r8, r2, r7 |
||
175 | add r8, r8, #(1<<10) |
||
176 | mov r8, r8, asr #11 |
||
177 | strh r8, [lr, # 6] |
||
178 | |||
179 | @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) |
||
180 | sub r8, r2, r7 |
||
181 | add r8, r8, #(1<<10) |
||
182 | mov r8, r8, asr #11 |
||
183 | strh r8, [lr, # 8] |
||
184 | |||
185 | @ End of row loop |
||
186 | add lr, lr, #16 |
||
187 | subs r12, r12, #1 |
||
188 | bne row_loop |
||
189 | beq start_column_loop |
||
190 | |||
191 | empty_row: |
||
192 | ldr r1, [r11, #FIX_0xFFFF_ID] |
||
193 | mov r0, r0, lsl #2 |
||
194 | and r0, r0, r1 |
||
195 | add r0, r0, r0, lsl #16 |
||
196 | str r0, [lr, # 0] |
||
197 | str r0, [lr, # 4] |
||
198 | str r0, [lr, # 8] |
||
199 | str r0, [lr, #12] |
||
200 | |||
201 | end_of_row_loop: |
||
202 | @ End of loop |
||
203 | add lr, lr, #16 |
||
204 | subs r12, r12, #1 |
||
205 | bne row_loop |
||
206 | |||
207 | start_column_loop: |
||
208 | @ Start of column loop |
||
209 | pop {lr} |
||
210 | mov r12, #8 |
||
211 | column_loop: |
||
212 | ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0' |
||
213 | ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2' |
||
214 | ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4' |
||
215 | ldrsh r6, [lr, #(12*8)] @ r6 = 'd6' |
||
216 | |||
217 | ldr r3, [r11, #FIX_0_541196100_ID] |
||
218 | add r1, r2, r6 |
||
219 | ldr r5, [r11, #FIX_M_1_847759065_ID] |
||
220 | mul r1, r3, r1 @ r1 = z1 |
||
221 | ldr r3, [r11, #FIX_0_765366865_ID] |
||
222 | mla r6, r5, r6, r1 @ r6 = tmp2 |
||
223 | add r5, r0, r4 @ r5 = tmp0 |
||
224 | mla r2, r3, r2, r1 @ r2 = tmp3 |
||
225 | sub r3, r0, r4 @ r3 = tmp1 |
||
226 | |||
227 | add r0, r2, r5, lsl #13 @ r0 = tmp10 |
||
228 | rsb r2, r2, r5, lsl #13 @ r2 = tmp13 |
||
229 | add r4, r6, r3, lsl #13 @ r4 = tmp11 |
||
230 | rsb r6, r6, r3, lsl #13 @ r6 = tmp12 |
||
231 | |||
232 | ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1' |
||
233 | ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3' |
||
234 | ldrsh r5, [lr, #(10*8)] @ r5 = 'd5' |
||
235 | ldrsh r7, [lr, #(14*8)] @ r7 = 'd7' |
||
236 | |||
237 | @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats) |
||
238 | orr r9, r1, r3 |
||
239 | orr r10, r5, r7 |
||
240 | orrs r10, r9, r10 |
||
241 | beq empty_odd_column |
||
242 | |||
243 | push {r0, r2, r4, r6} @ save on the stack tmp10, tmp13, tmp12, tmp11 |
||
244 | |||
245 | add r0, r3, r5 @ r0 = 'z2' |
||
246 | add r2, r1, r7 @ r2 = 'z1' |
||
247 | add r4, r3, r7 @ r4 = 'z3' |
||
248 | add r6, r1, r5 @ r6 = 'z4' |
||
249 | ldr r9, [r11, #FIX_1_175875602_ID] |
||
250 | add r8, r4, r6 |
||
251 | ldr r10, [r11, #FIX_M_0_899976223_ID] |
||
252 | mul r8, r9, r8 @ r8 = 'z5' |
||
253 | ldr r9, [r11, #FIX_M_2_562915447_ID] |
||
254 | mul r2, r10, r2 @ r2 = 'z1' |
||
255 | ldr r10, [r11, #FIX_M_1_961570560_ID] |
||
256 | mul r0, r9, r0 @ r0 = 'z2' |
||
257 | ldr r9, [r11, #FIX_M_0_390180644_ID] |
||
258 | mla r4, r10, r4, r8 @ r4 = 'z3' |
||
259 | ldr r10, [r11, #FIX_0_298631336_ID] |
||
260 | mla r6, r9, r6, r8 @ r6 = 'z4' |
||
261 | ldr r9, [r11, #FIX_2_053119869_ID] |
||
262 | mla r7, r10, r7, r2 @ r7 = tmp0 + z1 |
||
263 | ldr r10, [r11, #FIX_3_072711026_ID] |
||
264 | mla r5, r9, r5, r0 @ r5 = tmp1 + z2 |
||
265 | ldr r9, [r11, #FIX_1_501321110_ID] |
||
266 | mla r3, r10, r3, r0 @ r3 = tmp2 + z2 |
||
267 | add r7, r7, r4 @ r7 = tmp0 |
||
268 | mla r1, r9, r1, r2 @ r1 = tmp3 + z1 |
||
269 | add r5, r5, r6 @ r5 = tmp1 |
||
270 | add r3, r3, r4 @ r3 = tmp2 |
||
271 | add r1, r1, r6 @ r1 = tmp3 |
||
272 | |||
273 | pop {r0, r2, r4, r6} @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12 |
||
274 | @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 |
||
275 | |||
276 | @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) |
||
277 | add r8, r0, r1 |
||
278 | add r8, r8, #(1<<17) |
||
279 | mov r8, r8, asr #18 |
||
280 | strh r8, [lr, #( 0*8)] |
||
281 | |||
282 | @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) |
||
283 | sub r8, r0, r1 |
||
284 | add r8, r8, #(1<<17) |
||
285 | mov r8, r8, asr #18 |
||
286 | strh r8, [lr, #(14*8)] |
||
287 | |||
288 | @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) |
||
289 | add r8, r4, r3 |
||
290 | add r8, r8, #(1<<17) |
||
291 | mov r8, r8, asr #18 |
||
292 | strh r8, [lr, #( 2*8)] |
||
293 | |||
294 | @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) |
||
295 | sub r8, r4, r3 |
||
296 | add r8, r8, #(1<<17) |
||
297 | mov r8, r8, asr #18 |
||
298 | strh r8, [lr, #(12*8)] |
||
299 | |||
300 | @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) |
||
301 | add r8, r6, r5 |
||
302 | add r8, r8, #(1<<17) |
||
303 | mov r8, r8, asr #18 |
||
304 | strh r8, [lr, #( 4*8)] |
||
305 | |||
306 | @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) |
||
307 | sub r8, r6, r5 |
||
308 | add r8, r8, #(1<<17) |
||
309 | mov r8, r8, asr #18 |
||
310 | strh r8, [lr, #(10*8)] |
||
311 | |||
312 | @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) |
||
313 | add r8, r2, r7 |
||
314 | add r8, r8, #(1<<17) |
||
315 | mov r8, r8, asr #18 |
||
316 | strh r8, [lr, #( 6*8)] |
||
317 | |||
318 | @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) |
||
319 | sub r8, r2, r7 |
||
320 | add r8, r8, #(1<<17) |
||
321 | mov r8, r8, asr #18 |
||
322 | strh r8, [lr, #( 8*8)] |
||
323 | |||
324 | @ End of row loop |
||
325 | add lr, lr, #2 |
||
326 | subs r12, r12, #1 |
||
327 | bne column_loop |
||
328 | beq the_end |
||
329 | |||
330 | empty_odd_column: |
||
331 | @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) |
||
332 | @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) |
||
333 | add r0, r0, #(1<<17) |
||
334 | mov r0, r0, asr #18 |
||
335 | strh r0, [lr, #( 0*8)] |
||
336 | strh r0, [lr, #(14*8)] |
||
337 | |||
338 | @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) |
||
339 | @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) |
||
340 | add r4, r4, #(1<<17) |
||
341 | mov r4, r4, asr #18 |
||
342 | strh r4, [lr, #( 2*8)] |
||
343 | strh r4, [lr, #(12*8)] |
||
344 | |||
345 | @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) |
||
346 | @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) |
||
347 | add r6, r6, #(1<<17) |
||
348 | mov r6, r6, asr #18 |
||
349 | strh r6, [lr, #( 4*8)] |
||
350 | strh r6, [lr, #(10*8)] |
||
351 | |||
352 | @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) |
||
353 | @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) |
||
354 | add r2, r2, #(1<<17) |
||
355 | mov r2, r2, asr #18 |
||
356 | strh r2, [lr, #( 6*8)] |
||
357 | strh r2, [lr, #( 8*8)] |
||
358 | |||
359 | @ End of row loop |
||
360 | add lr, lr, #2 |
||
361 | subs r12, r12, #1 |
||
362 | bne column_loop |
||
363 | |||
364 | the_end: |
||
365 | @ The end.... |
||
366 | pop {r4 - r11, pc} |
||
367 | endfunc |
||
368 | |||
369 | const const_array |
||
370 | .word FIX_0_298631336 |
||
371 | .word FIX_0_541196100 |
||
372 | .word FIX_0_765366865 |
||
373 | .word FIX_1_175875602 |
||
374 | .word FIX_1_501321110 |
||
375 | .word FIX_2_053119869 |
||
376 | .word FIX_3_072711026 |
||
377 | .word FIX_M_0_390180644 |
||
378 | .word FIX_M_0_899976223 |
||
379 | .word FIX_M_1_847759065 |
||
380 | .word FIX_M_1_961570560 |
||
381 | .word FIX_M_2_562915447 |
||
382 | .word FIX_0xFFFF |
||
383 | endconst17) |