Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4349 Serge 1
/*
2
   C-like prototype :
3
        void j_rev_dct_arm(DCTBLOCK data)
4
 
5
   With DCTBLOCK being a pointer to an array of 64 'signed shorts'
6
 
7
   Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org)
8
 
9
   Permission is hereby granted, free of charge, to any person obtaining a copy
10
   of this software and associated documentation files (the "Software"), to deal
11
   in the Software without restriction, including without limitation the rights
12
   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
   copies of the Software, and to permit persons to whom the Software is
14
   furnished to do so, subject to the following conditions:
15
 
16
   The above copyright notice and this permission notice shall be included in
17
   all copies or substantial portions of the Software.
18
 
19
   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
22
   COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
23
   IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24
   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
 
26
*/
27
 
28
#include "libavutil/arm/asm.S"
29
 
30
#define FIX_0_298631336 2446
31
#define FIX_0_541196100 4433
32
#define FIX_0_765366865 6270
33
#define FIX_1_175875602 9633
34
#define FIX_1_501321110 12299
35
#define FIX_2_053119869 16819
36
#define FIX_3_072711026 25172
37
#define FIX_M_0_390180644 -3196
38
#define FIX_M_0_899976223 -7373
39
#define FIX_M_1_847759065 -15137
40
#define FIX_M_1_961570560 -16069
41
#define FIX_M_2_562915447 -20995
42
#define FIX_0xFFFF 0xFFFF
43
 
44
#define FIX_0_298631336_ID      0
45
#define FIX_0_541196100_ID      4
46
#define FIX_0_765366865_ID      8
47
#define FIX_1_175875602_ID     12
48
#define FIX_1_501321110_ID     16
49
#define FIX_2_053119869_ID     20
50
#define FIX_3_072711026_ID     24
51
#define FIX_M_0_390180644_ID   28
52
#define FIX_M_0_899976223_ID   32
53
#define FIX_M_1_847759065_ID   36
54
#define FIX_M_1_961570560_ID   40
55
#define FIX_M_2_562915447_ID   44
56
#define FIX_0xFFFF_ID          48
57
 
58
function ff_j_rev_dct_arm, export=1
59
        push {r0, r4 - r11, lr}
60
 
61
        mov lr, r0                      @ lr = pointer to the current row
62
        mov r12, #8                     @ r12 = row-counter
63
        movrel r11, const_array         @ r11 = base pointer to the constants array
64
row_loop:
65
        ldrsh r0, [lr, # 0]             @ r0 = 'd0'
66
        ldrsh r2, [lr, # 2]             @ r2 = 'd2'
67
 
68
        @ Optimization for row that have all items except the first set to 0
69
        @ (this works as the int16_t are always 4-byte aligned)
70
        ldr r5, [lr, # 0]
71
        ldr r6, [lr, # 4]
72
        ldr r3, [lr, # 8]
73
        ldr r4, [lr, #12]
74
        orr r3, r3, r4
75
        orr r3, r3, r6
76
        orrs r5, r3, r5
77
        beq end_of_row_loop             @ nothing to be done as ALL of them are '0'
78
        orrs r3, r3, r2
79
        beq empty_row
80
 
81
        ldrsh r1, [lr, # 8]             @ r1 = 'd1'
82
        ldrsh r4, [lr, # 4]             @ r4 = 'd4'
83
        ldrsh r6, [lr, # 6]             @ r6 = 'd6'
84
 
85
        ldr r3, [r11, #FIX_0_541196100_ID]
86
        add r7, r2, r6
87
        ldr r5, [r11, #FIX_M_1_847759065_ID]
88
        mul r7, r3, r7                      @ r7 = z1
89
        ldr r3, [r11, #FIX_0_765366865_ID]
90
        mla r6, r5, r6, r7                  @ r6 = tmp2
91
        add r5, r0, r4                      @ r5 = tmp0
92
        mla r2, r3, r2, r7                  @ r2 = tmp3
93
        sub r3, r0, r4                      @ r3 = tmp1
94
 
95
        add r0, r2, r5, lsl #13             @ r0 = tmp10
96
        rsb r2, r2, r5, lsl #13             @ r2 = tmp13
97
        add r4, r6, r3, lsl #13             @ r4 = tmp11
98
        rsb r3, r6, r3, lsl #13             @ r3 = tmp12
99
 
100
        push {r0, r2, r3, r4} @ save on the stack tmp10, tmp13, tmp12, tmp11
101
 
102
        ldrsh r3, [lr, #10]             @ r3 = 'd3'
103
        ldrsh r5, [lr, #12]             @ r5 = 'd5'
104
        ldrsh r7, [lr, #14]             @ r7 = 'd7'
105
 
106
        add r0, r3, r5                        @ r0 = 'z2'
107
        add r2, r1, r7                  @ r2 = 'z1'
108
        add r4, r3, r7                  @ r4 = 'z3'
109
        add r6, r1, r5                  @ r6 = 'z4'
110
        ldr r9, [r11, #FIX_1_175875602_ID]
111
        add r8, r4, r6                  @ r8 = z3 + z4
112
        ldr r10, [r11, #FIX_M_0_899976223_ID]
113
        mul r8, r9, r8                  @ r8 = 'z5'
114
        ldr r9, [r11, #FIX_M_2_562915447_ID]
115
        mul r2, r10, r2                 @ r2 = 'z1'
116
        ldr r10, [r11, #FIX_M_1_961570560_ID]
117
        mul r0, r9, r0                  @ r0 = 'z2'
118
        ldr r9, [r11, #FIX_M_0_390180644_ID]
119
        mla r4, r10, r4, r8             @ r4 = 'z3'
120
        ldr r10, [r11, #FIX_0_298631336_ID]
121
        mla r6, r9, r6, r8              @ r6 = 'z4'
122
        ldr r9, [r11, #FIX_2_053119869_ID]
123
        mla r7, r10, r7, r2             @ r7 = tmp0 + z1
124
        ldr r10, [r11, #FIX_3_072711026_ID]
125
        mla r5, r9, r5, r0              @ r5 = tmp1 + z2
126
        ldr r9, [r11, #FIX_1_501321110_ID]
127
        mla r3, r10, r3, r0             @ r3 = tmp2 + z2
128
        add r7, r7, r4                  @ r7 = tmp0
129
        mla r1, r9, r1, r2              @ r1 = tmp3 + z1
130
        add r5,        r5, r6                  @ r5 = tmp1
131
        add r3, r3, r4                  @ r3 = tmp2
132
        add r1, r1, r6                  @ r1 = tmp3
133
 
134
        pop {r0, r2, r4, r6} @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
135
                             @ r1 = tmp3  / r3 = tmp2  / r5 = tmp1  / r7 = tmp0
136
 
137
        @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS)
138
        add r8, r0, r1
139
        add r8, r8, #(1<<10)
140
        mov r8, r8, asr #11
141
        strh r8, [lr, # 0]
142
 
143
        @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS)
144
        sub r8, r0, r1
145
        add r8, r8, #(1<<10)
146
        mov r8, r8, asr #11
147
        strh r8, [lr, #14]
148
 
149
        @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS)
150
        add r8, r6, r3
151
        add r8, r8, #(1<<10)
152
        mov r8, r8, asr #11
153
        strh r8, [lr, # 2]
154
 
155
        @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS)
156
        sub r8, r6, r3
157
        add r8, r8, #(1<<10)
158
        mov r8, r8, asr #11
159
        strh r8, [lr, #12]
160
 
161
        @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS)
162
        add r8, r4, r5
163
        add r8, r8, #(1<<10)
164
        mov r8, r8, asr #11
165
        strh r8, [lr, # 4]
166
 
167
        @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS)
168
        sub r8, r4, r5
169
        add r8, r8, #(1<<10)
170
        mov r8, r8, asr #11
171
        strh r8, [lr, #10]
172
 
173
        @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS)
174
        add r8, r2, r7
175
        add r8, r8, #(1<<10)
176
        mov r8, r8, asr #11
177
        strh r8, [lr, # 6]
178
 
179
        @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS)
180
        sub r8, r2, r7
181
        add r8, r8, #(1<<10)
182
        mov r8, r8, asr #11
183
        strh r8, [lr, # 8]
184
 
185
        @ End of row loop
186
        add lr, lr, #16
187
        subs r12, r12, #1
188
        bne row_loop
189
        beq start_column_loop
190
 
191
empty_row:
192
        ldr r1, [r11, #FIX_0xFFFF_ID]
193
        mov r0, r0, lsl #2
194
        and r0, r0, r1
195
        add r0, r0, r0, lsl #16
196
        str r0, [lr, # 0]
197
        str r0, [lr, # 4]
198
        str r0, [lr, # 8]
199
        str r0, [lr, #12]
200
 
201
end_of_row_loop:
202
        @ End of loop
203
        add lr, lr, #16
204
        subs r12, r12, #1
205
        bne row_loop
206
 
207
start_column_loop:
208
        @ Start of column loop
209
        pop {lr}
210
        mov r12, #8
211
column_loop:
212
        ldrsh r0, [lr, #( 0*8)]             @ r0 = 'd0'
213
        ldrsh r2, [lr, #( 4*8)]             @ r2 = 'd2'
214
        ldrsh r4, [lr, #( 8*8)]             @ r4 = 'd4'
215
        ldrsh r6, [lr, #(12*8)]             @ r6 = 'd6'
216
 
217
        ldr r3, [r11, #FIX_0_541196100_ID]
218
        add r1, r2, r6
219
        ldr r5, [r11, #FIX_M_1_847759065_ID]
220
        mul r1, r3, r1                      @ r1 = z1
221
        ldr r3, [r11, #FIX_0_765366865_ID]
222
        mla r6, r5, r6, r1                  @ r6 = tmp2
223
        add r5, r0, r4                      @ r5 = tmp0
224
        mla r2, r3, r2, r1                  @ r2 = tmp3
225
        sub r3, r0, r4                      @ r3 = tmp1
226
 
227
        add r0, r2, r5, lsl #13             @ r0 = tmp10
228
        rsb r2, r2, r5, lsl #13             @ r2 = tmp13
229
        add r4, r6, r3, lsl #13             @ r4 = tmp11
230
        rsb r6, r6, r3, lsl #13             @ r6 = tmp12
231
 
232
        ldrsh r1, [lr, #( 2*8)]             @ r1 = 'd1'
233
        ldrsh r3, [lr, #( 6*8)]             @ r3 = 'd3'
234
        ldrsh r5, [lr, #(10*8)]             @ r5 = 'd5'
235
        ldrsh r7, [lr, #(14*8)]             @ r7 = 'd7'
236
 
237
        @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats)
238
        orr r9, r1, r3
239
        orr r10, r5, r7
240
        orrs r10, r9, r10
241
        beq empty_odd_column
242
 
243
        push {r0, r2, r4, r6} @ save on the stack tmp10, tmp13, tmp12, tmp11
244
 
245
        add r0, r3, r5                  @ r0 = 'z2'
246
        add r2, r1, r7                  @ r2 = 'z1'
247
        add r4, r3, r7                  @ r4 = 'z3'
248
        add r6, r1, r5                  @ r6 = 'z4'
249
        ldr r9, [r11, #FIX_1_175875602_ID]
250
        add r8, r4, r6
251
        ldr r10, [r11, #FIX_M_0_899976223_ID]
252
        mul r8, r9, r8                  @ r8 = 'z5'
253
        ldr r9, [r11, #FIX_M_2_562915447_ID]
254
        mul r2, r10, r2                 @ r2 = 'z1'
255
        ldr r10, [r11, #FIX_M_1_961570560_ID]
256
        mul r0, r9, r0                  @ r0 = 'z2'
257
        ldr r9, [r11, #FIX_M_0_390180644_ID]
258
        mla r4, r10, r4, r8             @ r4 = 'z3'
259
        ldr r10, [r11, #FIX_0_298631336_ID]
260
        mla r6, r9, r6, r8              @ r6 = 'z4'
261
        ldr r9, [r11, #FIX_2_053119869_ID]
262
        mla r7, r10, r7, r2             @ r7 = tmp0 + z1
263
        ldr r10, [r11, #FIX_3_072711026_ID]
264
        mla r5, r9, r5, r0              @ r5 = tmp1 + z2
265
        ldr r9, [r11, #FIX_1_501321110_ID]
266
        mla r3, r10, r3, r0             @ r3 = tmp2 + z2
267
        add r7, r7, r4                  @ r7 = tmp0
268
        mla r1, r9, r1, r2              @ r1 = tmp3 + z1
269
        add r5,        r5, r6                  @ r5 = tmp1
270
        add r3, r3, r4                  @ r3 = tmp2
271
        add r1, r1, r6                  @ r1 = tmp3
272
 
273
        pop {r0, r2, r4, r6} @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
274
                             @ r1 = tmp3  / r3 = tmp2  / r5 = tmp1  / r7 = tmp0
275
 
276
        @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
277
        add r8, r0, r1
278
        add r8, r8, #(1<<17)
279
        mov r8, r8, asr #18
280
        strh r8, [lr, #( 0*8)]
281
 
282
        @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
283
        sub r8, r0, r1
284
        add r8, r8, #(1<<17)
285
        mov r8, r8, asr #18
286
        strh r8, [lr, #(14*8)]
287
 
288
        @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
289
        add r8, r4, r3
290
        add r8, r8, #(1<<17)
291
        mov r8, r8, asr #18
292
        strh r8, [lr, #( 2*8)]
293
 
294
        @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
295
        sub r8, r4, r3
296
        add r8, r8, #(1<<17)
297
        mov r8, r8, asr #18
298
        strh r8, [lr, #(12*8)]
299
 
300
        @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
301
        add r8, r6, r5
302
        add r8, r8, #(1<<17)
303
        mov r8, r8, asr #18
304
        strh r8, [lr, #( 4*8)]
305
 
306
        @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
307
        sub r8, r6, r5
308
        add r8, r8, #(1<<17)
309
        mov r8, r8, asr #18
310
        strh r8, [lr, #(10*8)]
311
 
312
        @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
313
        add r8, r2, r7
314
        add r8, r8, #(1<<17)
315
        mov r8, r8, asr #18
316
        strh r8, [lr, #( 6*8)]
317
 
318
        @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
319
        sub r8, r2, r7
320
        add r8, r8, #(1<<17)
321
        mov r8, r8, asr #18
322
        strh r8, [lr, #( 8*8)]
323
 
324
        @ End of row loop
325
        add lr, lr, #2
326
        subs r12, r12, #1
327
        bne column_loop
328
        beq the_end
329
 
330
empty_odd_column:
331
        @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
332
        @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
333
        add r0, r0, #(1<<17)
334
        mov r0, r0, asr #18
335
        strh r0, [lr, #( 0*8)]
336
        strh r0, [lr, #(14*8)]
337
 
338
        @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
339
        @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
340
        add r4, r4, #(1<<17)
341
        mov r4, r4, asr #18
342
        strh r4, [lr, #( 2*8)]
343
        strh r4, [lr, #(12*8)]
344
 
345
        @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
346
        @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
347
        add r6, r6, #(1<<17)
348
        mov r6, r6, asr #18
349
        strh r6, [lr, #( 4*8)]
350
        strh r6, [lr, #(10*8)]
351
 
352
        @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
353
        @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
354
        add r2, r2, #(1<<17)
355
        mov r2, r2, asr #18
356
        strh r2, [lr, #( 6*8)]
357
        strh r2, [lr, #( 8*8)]
358
 
359
        @ End of row loop
360
        add lr, lr, #2
361
        subs r12, r12, #1
362
        bne column_loop
363
 
364
the_end:
365
        @ The end....
366
        pop {r4 - r11, pc}
367
endfunc
368
 
369
const const_array
370
        .word FIX_0_298631336
371
        .word FIX_0_541196100
372
        .word FIX_0_765366865
373
        .word FIX_1_175875602
374
        .word FIX_1_501321110
375
        .word FIX_2_053119869
376
        .word FIX_3_072711026
377
        .word FIX_M_0_390180644
378
        .word FIX_M_0_899976223
379
        .word FIX_M_1_847759065
380
        .word FIX_M_1_961570560
381
        .word FIX_M_2_562915447
382
        .word FIX_0xFFFF
383
endconst