Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
6147 serge 1
/*
2
 * Loongson SIMD optimized h264pred
3
 *
4
 * Copyright (c) 2015 Loongson Technology Corporation Limited
5
 * Copyright (c) 2015 Zhou Xiaoyong 
6
 *                    Zhang Shuangshuang 
7
 *
8
 * This file is part of FFmpeg.
9
 *
10
 * FFmpeg is free software; you can redistribute it and/or
11
 * modify it under the terms of the GNU Lesser General Public
12
 * License as published by the Free Software Foundation; either
13
 * version 2.1 of the License, or (at your option) any later version.
14
 *
15
 * FFmpeg is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18
 * Lesser General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Lesser General Public
21
 * License along with FFmpeg; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23
 */
24
 
25
#include "h264pred_mips.h"
26
 
27
void ff_pred16x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
28
{
29
    __asm__ volatile (
30
        "dli $8, 16                         \r\n"
31
        "gsldlc1 $f2, 7(%[srcA])            \r\n"
32
        "gsldrc1 $f2, 0(%[srcA])            \r\n"
33
        "gsldlc1 $f4, 15(%[srcA])           \r\n"
34
        "gsldrc1 $f4, 8(%[srcA])            \r\n"
35
        "1:                                 \r\n"
36
        "gssdlc1 $f2, 7(%[src])             \r\n"
37
        "gssdrc1 $f2, 0(%[src])             \r\n"
38
        "gssdlc1 $f4, 15(%[src])            \r\n"
39
        "gssdrc1 $f4, 8(%[src])             \r\n"
40
        "daddu %[src], %[src], %[stride]    \r\n"
41
        "daddi $8, $8, -1                   \r\n"
42
        "bnez $8, 1b                        \r\n"
43
        : [src]"+&r"(src)
44
        : [stride]"r"(stride),[srcA]"r"(src-stride)
45
        : "$8","$f2","$f4"
46
    );
47
}
48
 
49
void ff_pred16x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
50
{
51
    __asm__ volatile (
52
        "daddiu $2, %[src], -1              \r\n"
53
        "daddu $3, %[src], $0               \r\n"
54
        "dli $6, 0x10                       \r\n"
55
        "1:                                 \r\n"
56
        "lbu $4, 0($2)                      \r\n"
57
        "dmul $5, $4, %[ff_pb_1]            \r\n"
58
        "sdl $5, 7($3)                      \r\n"
59
        "sdr $5, 0($3)                      \r\n"
60
        "sdl $5, 15($3)                     \r\n"
61
        "sdr $5, 8($3)                      \r\n"
62
        "daddu $2, %[stride]                \r\n"
63
        "daddu $3, %[stride]                \r\n"
64
        "daddiu $6, -1                      \r\n"
65
        "bnez $6, 1b                        \r\n"
66
        ::[src]"r"(src),[stride]"r"(stride),[ff_pb_1]"r"(ff_pb_1)
67
        : "$2","$3","$4","$5","$6"
68
    );
69
}
70
 
71
void ff_pred16x16_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
72
{
73
    __asm__ volatile (
74
        "daddiu $2, %[src], -1              \r\n"
75
        "dli $6, 0x10                       \r\n"
76
        "xor $8, $8, $8                     \r\n"
77
        "1:                                 \r\n"
78
        "lbu $4, 0($2)                      \r\n"
79
        "daddu $8, $8, $4                   \r\n"
80
        "daddu $2, $2, %[stride]            \r\n"
81
        "daddiu $6, $6, -1                  \r\n"
82
        "bnez $6, 1b                        \r\n"
83
        "dli $6, 0x10                       \r\n"
84
        "negu $3, %[stride]                 \r\n"
85
        "daddu $2, %[src], $3               \r\n"
86
        "2:                                 \r\n"
87
        "lbu $4, 0($2)                      \r\n"
88
        "daddu $8, $8, $4                   \r\n"
89
        "daddiu $2, $2, 1                   \r\n"
90
        "daddiu $6, $6, -1                  \r\n"
91
        "bnez $6, 2b                        \r\n"
92
        "daddiu $8, $8, 0x10                \r\n"
93
        "dsra $8, 5                         \r\n"
94
        "dmul $5, $8, %[ff_pb_1]            \r\n"
95
        "daddu $2, %[src], $0               \r\n"
96
        "dli $6, 0x10                       \r\n"
97
        "3:                                 \r\n"
98
        "sdl $5, 7($2)                      \r\n"
99
        "sdr $5, 0($2)                      \r\n"
100
        "sdl $5, 15($2)                     \r\n"
101
        "sdr $5, 8($2)                      \r\n"
102
        "daddu $2, $2, %[stride]            \r\n"
103
        "daddiu $6, $6, -1                  \r\n"
104
        "bnez $6, 3b                        \r\n"
105
        ::[src]"r"(src),[stride]"r"(stride),[ff_pb_1]"r"(ff_pb_1)
106
        : "$2","$3","$4","$5","$6","$8"
107
    );
108
}
109
 
110
void ff_pred8x8l_top_dc_8_mmi(uint8_t *src, int has_topleft,
111
        int has_topright, ptrdiff_t stride)
112
{
113
    uint32_t dc;
114
 
115
    __asm__ volatile (
116
        "ldl $8, 7(%[srcA])                 \r\n"
117
        "ldr $8, 0(%[srcA])                 \r\n"
118
        "ldl $9, 7(%[src0])                 \r\n"
119
        "ldr $9, 0(%[src0])                 \r\n"
120
        "ldl $10, 7(%[src1])                \r\n"
121
        "ldr $10, 0(%[src1])                \r\n"
122
        "dmtc1 $8, $f2                      \r\n"
123
        "dmtc1 $9, $f4                      \r\n"
124
        "dmtc1 $10, $f6                     \r\n"
125
        "dmtc1 $0, $f0                      \r\n"
126
        "punpcklbh $f8, $f2, $f0            \r\n"
127
        "punpckhbh $f10, $f2, $f0           \r\n"
128
        "punpcklbh $f12, $f4, $f0           \r\n"
129
        "punpckhbh $f14, $f4, $f0           \r\n"
130
        "punpcklbh $f16, $f6, $f0           \r\n"
131
        "punpckhbh $f18, $f6, $f0           \r\n"
132
        "bnez %[has_topleft], 1f            \r\n"
133
        "pinsrh_0 $f8, $f8, $f12            \r\n"
134
        "1:                                 \r\n"
135
        "bnez %[has_topright], 2f           \r\n"
136
        "pinsrh_3 $f18, $f18, $f14          \r\n"
137
        "2:                                 \r\n"
138
        "daddiu $8, $0, 2                   \r\n"
139
        "dmtc1 $8, $f20                     \r\n"
140
        "pshufh $f22, $f20, $f0             \r\n"
141
        "pmullh $f12, $f12, $f22            \r\n"
142
        "pmullh $f14, $f14, $f22            \r\n"
143
        "paddh $f8, $f8, $f12               \r\n"
144
        "paddh $f10, $f10, $f14             \r\n"
145
        "paddh $f8, $f8, $f16               \r\n"
146
        "paddh $f10, $f10, $f18             \r\n"
147
        "paddh $f8, $f8, $f22               \r\n"
148
        "paddh $f10, $f10, $f22             \r\n"
149
        "psrah $f8, $f8, $f20               \r\n"
150
        "psrah $f10, $f10, $f20             \r\n"
151
        "packushb $f4, $f8, $f10            \r\n"
152
        "biadd $f2, $f4                     \r\n"
153
        "mfc1 $9, $f2                       \r\n"
154
        "addiu $9, $9, 4                    \r\n"
155
        "dsrl $9, $9, 3                     \r\n"
156
        "mul %[dc], $9, %[ff_pb_1]          \r\n"
157
        : [dc]"=r"(dc)
158
        : [srcA]"r"(src-stride-1),[src0]"r"(src-stride),
159
          [src1]"r"(src-stride+1),[has_topleft]"r"(has_topleft),
160
          [has_topright]"r"(has_topright),[ff_pb_1]"r"(ff_pb_1)
161
        : "$8","$9","$10","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16",
162
          "$f18","$f20","$f22"
163
    );
164
 
165
    __asm__ volatile (
166
        "dli $8, 8                          \r\n"
167
        "1:                                 \r\n"
168
        "punpcklwd $f2, %[dc], %[dc]        \r\n"
169
        "gssdlc1 $f2, 7(%[src])             \r\n"
170
        "gssdrc1 $f2, 0(%[src])             \r\n"
171
        "daddu %[src], %[src], %[stride]    \r\n"
172
        "daddi $8, $8, -1                   \r\n"
173
        "bnez $8, 1b                        \r\n"
174
        : [src]"+&r"(src)
175
        : [dc]"f"(dc),[stride]"r"(stride)
176
        : "$8","$f2"
177
    );
178
}
179
 
180
void ff_pred8x8l_dc_8_mmi(uint8_t *src, int has_topleft,
181
        int has_topright, ptrdiff_t stride)
182
{
183
    uint32_t dc, dc1, dc2;
184
 
185
    const int l0 = ((has_topleft ? src[-1+-1*stride] : src[-1+0*stride]) + 2*src[-1+0*stride] + src[-1+1*stride] + 2) >> 2;
186
    const int l1 = (src[-1+0*stride] + 2*src[-1+1*stride] + src[-1+2*stride] + 2) >> 2;
187
    const int l2 = (src[-1+1*stride] + 2*src[-1+2*stride] + src[-1+3*stride] + 2) >> 2;
188
    const int l3 = (src[-1+2*stride] + 2*src[-1+3*stride] + src[-1+4*stride] + 2) >> 2;
189
    const int l4 = (src[-1+3*stride] + 2*src[-1+4*stride] + src[-1+5*stride] + 2) >> 2;
190
    const int l5 = (src[-1+4*stride] + 2*src[-1+5*stride] + src[-1+6*stride] + 2) >> 2;
191
    const int l6 = (src[-1+5*stride] + 2*src[-1+6*stride] + src[-1+7*stride] + 2) >> 2;
192
    const int l7 = (src[-1+6*stride] + 2*src[-1+7*stride] + src[-1+7*stride] + 2) >> 2;
193
 
194
    __asm__ volatile (
195
        "ldl $8, 7(%[srcA])                 \r\n"
196
        "ldr $8, 0(%[srcA])                 \r\n"
197
        "ldl $9, 7(%[src0])                 \r\n"
198
        "ldr $9, 0(%[src0])                 \r\n"
199
        "ldl $10, 7(%[src1])                \r\n"
200
        "ldr $10, 0(%[src1])                \r\n"
201
        "dmtc1 $8, $f2                      \r\n"
202
        "dmtc1 $9, $f4                      \r\n"
203
        "dmtc1 $10, $f6                     \r\n"
204
        "dmtc1 $0, $f0                      \r\n"
205
        "punpcklbh $f8, $f2, $f0            \r\n"
206
        "punpckhbh $f10, $f2, $f0           \r\n"
207
        "punpcklbh $f12, $f4, $f0           \r\n"
208
        "punpckhbh $f14, $f4, $f0           \r\n"
209
        "punpcklbh $f16, $f6, $f0           \r\n"
210
        "punpckhbh $f18, $f6, $f0           \r\n"
211
        "daddiu $8, $0, 3                   \r\n"
212
        "dmtc1 $8, $f20                     \r\n"
213
        "pshufh $f28, $f10, $f20            \r\n"
214
        "pshufh $f30, $f18, $f20            \r\n"
215
        "pinsrh_3 $f10, $f10, $f30          \r\n"
216
        "pinsrh_3 $f18, $f18, $f28          \r\n"
217
        "bnez %[has_topleft], 1f            \r\n"
218
        "pinsrh_0 $f8, $f8, $f12            \r\n"
219
        "1:                                 \r\n"
220
        "bnez %[has_topright], 2f           \r\n"
221
        "pshufh $f30, $f14, $f20            \r\n"
222
        "pinsrh_3 $f10, $f10, $f30          \r\n"
223
        "2:                                 \r\n"
224
        "daddiu $8, $0, 2                   \r\n"
225
        "dmtc1 $8, $f20                     \r\n"
226
        "pshufh $f22, $f20, $f0             \r\n"
227
        "pmullh $f12, $f12, $f22            \r\n"
228
        "pmullh $f14, $f14, $f22            \r\n"
229
        "paddh $f8, $f8, $f12               \r\n"
230
        "paddh $f10, $f10, $f14             \r\n"
231
        "paddh $f8, $f8, $f16               \r\n"
232
        "paddh $f10, $f10, $f18             \r\n"
233
        "paddh $f8, $f8, $f22               \r\n"
234
        "paddh $f10, $f10, $f22             \r\n"
235
        "psrah $f8, $f8, $f20               \r\n"
236
        "psrah $f10, $f10, $f20             \r\n"
237
        "packushb $f4, $f8, $f10            \r\n"
238
        "biadd $f2, $f4                     \r\n"
239
        "mfc1 %[dc2], $f2                   \r\n"
240
        : [dc2]"=r"(dc2)
241
        : [srcA]"r"(src-stride-1),[src0]"r"(src-stride),
242
          [src1]"r"(src-stride+1),[has_topleft]"r"(has_topleft),
243
          [has_topright]"r"(has_topright)
244
        : "$8","$9","$10","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16",
245
          "$f18","$f20","$f22"
246
    );
247
 
248
    dc1 = l0+l1+l2+l3+l4+l5+l6+l7;
249
    dc = ((dc1+dc2+8)>>4)*0x01010101U;
250
 
251
    __asm__ volatile (
252
        "dli $8, 8                          \r\n"
253
        "1:                                 \r\n"
254
        "punpcklwd $f2, %[dc], %[dc]        \r\n"
255
        "gssdlc1 $f2, 7(%[src])             \r\n"
256
        "gssdrc1 $f2, 0(%[src])             \r\n"
257
        "daddu %[src], %[src], %[stride]    \r\n"
258
        "daddi $8, $8, -1                   \r\n"
259
        "bnez $8, 1b                        \r\n"
260
        : [src]"+&r"(src)
261
        : [dc]"f"(dc),[stride]"r"(stride)
262
        : "$8","$f2"
263
    );
264
}
265
 
266
void ff_pred8x8l_vertical_8_mmi(uint8_t *src, int has_topleft,
267
        int has_topright, ptrdiff_t stride)
268
{
269
    __asm__ volatile (
270
        "ldl $8, 7(%[srcA])                 \r\n"
271
        "ldr $8, 0(%[srcA])                 \r\n"
272
        "ldl $9, 7(%[src0])                 \r\n"
273
        "ldr $9, 0(%[src0])                 \r\n"
274
        "ldl $10, 7(%[src1])                \r\n"
275
        "ldr $10, 0(%[src1])                \r\n"
276
        "dmtc1 $8, $f2                      \r\n"
277
        "dmtc1 $9, $f4                      \r\n"
278
        "dmtc1 $10, $f6                     \r\n"
279
        "dmtc1 $0, $f0                      \r\n"
280
        "punpcklbh $f8, $f2, $f0            \r\n"
281
        "punpckhbh $f10, $f2, $f0           \r\n"
282
        "punpcklbh $f12, $f4, $f0           \r\n"
283
        "punpckhbh $f14, $f4, $f0           \r\n"
284
        "punpcklbh $f16, $f6, $f0           \r\n"
285
        "punpckhbh $f18, $f6, $f0           \r\n"
286
        "bnez %[has_topleft], 1f            \r\n"
287
        "pinsrh_0 $f8, $f8, $f12            \r\n"
288
        "1:                                 \r\n"
289
        "bnez %[has_topright], 2f           \r\n"
290
        "pinsrh_3 $f18, $f18, $f14          \r\n"
291
        "2:                                 \r\n"
292
        "daddiu $8, $0, 2                   \r\n"
293
        "dmtc1 $8, $f20                     \r\n"
294
        "pshufh $f22, $f20, $f0             \r\n"
295
        "pmullh $f12, $f12, $f22            \r\n"
296
        "pmullh $f14, $f14, $f22            \r\n"
297
        "paddh $f8, $f8, $f12               \r\n"
298
        "paddh $f10, $f10, $f14             \r\n"
299
        "paddh $f8, $f8, $f16               \r\n"
300
        "paddh $f10, $f10, $f18             \r\n"
301
        "paddh $f8, $f8, $f22               \r\n"
302
        "paddh $f10, $f10, $f22             \r\n"
303
        "psrah $f8, $f8, $f20               \r\n"
304
        "psrah $f10, $f10, $f20             \r\n"
305
        "packushb $f4, $f8, $f10            \r\n"
306
        "sdc1 $f4, 0(%[src])                \r\n"
307
        : [src]"=r"(src)
308
        : [srcA]"r"(src-stride-1),[src0]"r"(src-stride),
309
          [src1]"r"(src-stride+1),[has_topleft]"r"(has_topleft),
310
          [has_topright]"r"(has_topright)
311
        : "$8","$9","$10","$f2","$f4","$f6","$f8","$f10","$f12","$f14","$f16",
312
          "$f18","$f20","$f22"
313
    );
314
 
315
    __asm__ volatile (
316
        "dli $8, 7                          \r\n"
317
        "gsldlc1 $f2, 7(%[src])             \r\n"
318
        "gsldrc1 $f2, 0(%[src])             \r\n"
319
        "dadd %[src], %[src], %[stride]     \r\n"
320
        "1:                                 \r\n"
321
        "gssdlc1 $f2, 7(%[src])             \r\n"
322
        "gssdrc1 $f2, 0(%[src])             \r\n"
323
        "daddu %[src], %[src], %[stride]    \r\n"
324
        "daddi $8, $8, -1                   \r\n"
325
        "bnez $8, 1b                        \r\n"
326
        : [src]"+&r"(src)
327
        : [stride]"r"(stride)
328
        : "$8","$f2"
329
    );
330
}
331
 
332
void ff_pred4x4_dc_8_mmi(uint8_t *src, const uint8_t *topright,
333
        ptrdiff_t stride)
334
{
335
    const int dc = (src[-stride] + src[1-stride] + src[2-stride]
336
                 + src[3-stride] + src[-1+0*stride] + src[-1+1*stride]
337
                 + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
338
 
339
    __asm__ volatile (
340
        "daddu $2, %[dc], $0                \r\n"
341
        "dmul $3, $2, %[ff_pb_1]            \r\n"
342
        "xor $4, $4, $4                     \r\n"
343
        "gsswx $3, 0(%[src],$4)             \r\n"
344
        "daddu $4, %[stride]                \r\n"
345
        "gsswx $3, 0(%[src],$4)             \r\n"
346
        "daddu $4, %[stride]                \r\n"
347
        "gsswx $3, 0(%[src],$4)             \r\n"
348
        "daddu $4, %[stride]                \r\n"
349
        "gsswx $3, 0(%[src],$4)             \r\n"
350
        ::[src]"r"(src),[stride]"r"(stride),[dc]"r"(dc),[ff_pb_1]"r"(ff_pb_1)
351
        : "$2","$3","$4"
352
    );
353
}
354
 
355
void ff_pred8x8_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
356
{
357
    __asm__ volatile (
358
        "dsubu $2, %[src], %[stride]        \r\n"
359
        "daddu $3, %[src], $0               \r\n"
360
        "ldl $4, 7($2)                      \r\n"
361
        "ldr $4, 0($2)                      \r\n"
362
        "dli $5, 0x8                        \r\n"
363
        "1:                                 \r\n"
364
        "sdl $4, 7($3)                      \r\n"
365
        "sdr $4, 0($3)                      \r\n"
366
        "daddu $3, %[stride]                \r\n"
367
        "daddiu $5, -1                      \r\n"
368
        "bnez $5, 1b                        \r\n"
369
        ::[src]"r"(src),[stride]"r"(stride)
370
        : "$2","$3","$4","$5"
371
    );
372
}
373
 
374
void ff_pred8x8_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
375
{
376
    __asm__ volatile (
377
        "daddiu $2, %[src], -1              \r\n"
378
        "daddu $3, %[src], $0               \r\n"
379
        "dli $6, 0x8                        \r\n"
380
        "1:                                 \r\n"
381
        "lbu $4, 0($2)                      \r\n"
382
        "dmul $5, $4, %[ff_pb_1]            \r\n"
383
        "sdl $5, 7($3)                      \r\n"
384
        "sdr $5, 0($3)                      \r\n"
385
        "daddu $2, %[stride]                \r\n"
386
        "daddu $3, %[stride]                \r\n"
387
        "daddiu $6, -1                      \r\n"
388
        "bnez $6, 1b                        \r\n"
389
        ::[src]"r"(src),[stride]"r"(stride),[ff_pb_1]"r"(ff_pb_1)
390
        : "$2","$3","$4","$5","$6"
391
    );
392
}
393
 
394
static void ff_pred16x16_plane_compat_8_mmi(uint8_t *src, ptrdiff_t stride,
395
        const int svq3, const int rv40)
396
{
397
    __asm__ volatile (
398
        "negu $2, %[stride]                 \r\n"
399
        "daddu $3, %[src], $2               \r\n"
400
        "xor $f8, $f8, $f8                  \r\n"
401
        "gslwlc1 $f0, 2($3)                 \r\n"
402
        "gslwrc1 $f0, -1($3)                \r\n"
403
        "gslwlc1 $f2, 6($3)                 \r\n"
404
        "gslwrc1 $f2, 3($3)                 \r\n"
405
        "gslwlc1 $f4, 11($3)                \r\n"
406
        "gslwrc1 $f4, 8($3)                 \r\n"
407
        "gslwlc1 $f6, 15($3)                \r\n"
408
        "gslwrc1 $f6, 12($3)                \r\n"
409
        "punpcklbh $f0, $f0, $f8            \r\n"
410
        "punpcklbh $f2, $f2, $f8            \r\n"
411
        "punpcklbh $f4, $f4, $f8            \r\n"
412
        "punpcklbh $f6, $f6, $f8            \r\n"
413
        "dmtc1 %[ff_pw_m8tom5], $f20        \r\n"
414
        "dmtc1 %[ff_pw_m4tom1], $f22        \r\n"
415
        "dmtc1 %[ff_pw_1to4], $f24          \r\n"
416
        "dmtc1 %[ff_pw_5to8], $f26          \r\n"
417
        "pmullh $f0, $f0, $f20              \r\n"
418
        "pmullh $f2, $f2, $f22              \r\n"
419
        "pmullh $f4, $f4, $f24              \r\n"
420
        "pmullh $f6, $f6, $f26              \r\n"
421
        "paddsh $f0, $f0, $f4               \r\n"
422
        "paddsh $f2, $f2, $f6               \r\n"
423
        "paddsh $f0, $f0, $f2               \r\n"
424
        "dli $4, 0xE                        \r\n"
425
        "dmtc1 $4, $f28                     \r\n"
426
        "pshufh $f2, $f0, $f28              \r\n"
427
        "paddsh $f0, $f0, $f2               \r\n"
428
        "dli $4, 0x1                        \r\n"
429
        "dmtc1 $4, $f30                     \r\n"
430
        "pshufh $f2, $f0, $f30              \r\n"
431
        "paddsh $f10, $f0, $f2              \r\n"
432
        "daddiu $3, %[src], -1              \r\n"
433
        "daddu $3, $2                       \r\n"
434
        "lbu $4, 0($3)                      \r\n"
435
        "lbu $8, 16($3)                     \r\n"
436
        "daddu $3, %[stride]                \r\n"
437
        "lbu $5, 0($3)                      \r\n"
438
        "daddu $3, %[stride]                \r\n"
439
        "lbu $6, 0($3)                      \r\n"
440
        "daddu $3, %[stride]                \r\n"
441
        "lbu $7, 0($3)                      \r\n"
442
        "dsll $5, 16                        \r\n"
443
        "dsll $6, 32                        \r\n"
444
        "dsll $7, 48                        \r\n"
445
        "or $6, $7                          \r\n"
446
        "or $4, $5                          \r\n"
447
        "or $4, $6                          \r\n"
448
        "dmtc1 $4, $f0                      \r\n"
449
        "daddu $3, %[stride]                \r\n"
450
        "lbu $4, 0($3)                      \r\n"
451
        "daddu $3, %[stride]                \r\n"
452
        "lbu $5, 0($3)                      \r\n"
453
        "daddu $3, %[stride]                \r\n"
454
        "lbu $6, 0($3)                      \r\n"
455
        "daddu $3, %[stride]                \r\n"
456
        "lbu $7, 0($3)                      \r\n"
457
        "dsll $5, 16                        \r\n"
458
        "dsll $6, 32                        \r\n"
459
        "dsll $7, 48                        \r\n"
460
        "or $6, $7                          \r\n"
461
        "or $4, $5                          \r\n"
462
        "or $4, $6                          \r\n"
463
        "dmtc1 $4, $f2                      \r\n"
464
        "daddu $3, %[stride]                \r\n"
465
        "daddu $3, %[stride]                \r\n"
466
        "lbu $4, 0($3)                      \r\n"
467
        "daddu $3, %[stride]                \r\n"
468
        "lbu $5, 0($3)                      \r\n"
469
        "daddu $3, %[stride]                \r\n"
470
        "lbu $6, 0($3)                      \r\n"
471
        "daddu $3, %[stride]                \r\n"
472
        "lbu $7, 0($3)                      \r\n"
473
        "dsll $5, 16                        \r\n"
474
        "dsll $6, 32                        \r\n"
475
        "dsll $7, 48                        \r\n"
476
        "or $6, $7                          \r\n"
477
        "or $4, $5                          \r\n"
478
        "or $4, $6                          \r\n"
479
        "dmtc1 $4, $f4                      \r\n"
480
        "daddu $3, %[stride]                \r\n"
481
        "lbu $4, 0($3)                      \r\n"
482
        "daddu $3, %[stride]                \r\n"
483
        "lbu $5, 0($3)                      \r\n"
484
        "daddu $3, %[stride]                \r\n"
485
        "lbu $6, 0($3)                      \r\n"
486
        "daddu $3, %[stride]                \r\n"
487
        "lbu $7, 0($3)                      \r\n"
488
        "daddu $8, $7                       \r\n"
489
        "daddiu $8, 1                       \r\n"
490
        "dsll $8, 4                         \r\n"
491
        "dsll $5, 16                        \r\n"
492
        "dsll $6, 32                        \r\n"
493
        "dsll $7, 48                        \r\n"
494
        "or $6, $7                          \r\n"
495
        "or $4, $5                          \r\n"
496
        "or $4, $6                          \r\n"
497
        "dmtc1 $4, $f6                      \r\n"
498
        "pmullh $f0, $f0, $f20              \r\n"
499
        "pmullh $f2, $f2, $f22              \r\n"
500
        "pmullh $f4, $f4, $f24              \r\n"
501
        "pmullh $f6, $f6, $f26              \r\n"
502
        "paddsh $f0, $f0, $f4               \r\n"
503
        "paddsh $f2, $f2, $f6               \r\n"
504
        "paddsh $f0, $f0, $f2               \r\n"
505
        "pshufh $f2, $f0, $f28              \r\n"
506
        "paddsh $f0, $f0, $f2               \r\n"
507
        "pshufh $f2, $f0, $f30              \r\n"
508
        "paddsh $f12, $f0, $f2              \r\n"
509
        "dmfc1 $2, $f10                     \r\n"
510
        "dsll $2, 48                        \r\n"
511
        "dsra $2, 48                        \r\n"
512
        "dmfc1 $3, $f12                     \r\n"
513
        "dsll $3, 48                        \r\n"
514
        "dsra $3, 48                        \r\n"
515
        "beqz %[svq3], 1f                   \r\n"
516
        "dli $4, 4                          \r\n"
517
        "ddiv $2, $4                        \r\n"
518
        "ddiv $3, $4                        \r\n"
519
        "dli $4, 5                          \r\n"
520
        "dmul $2, $4                        \r\n"
521
        "dmul $3, $4                        \r\n"
522
        "dli $4, 16                         \r\n"
523
        "ddiv $2, $4                        \r\n"
524
        "ddiv $3, $4                        \r\n"
525
        "daddu $4, $2, $0                   \r\n"
526
        "daddu $2, $3, $0                   \r\n"
527
        "daddu $3, $4, $0                   \r\n"
528
        "b 2f                               \r\n"
529
        "1:                                 \r\n"
530
        "beqz %[rv40], 1f                   \r\n"
531
        "dsra $4, $2, 2                     \r\n"
532
        "daddu $2, $4                       \r\n"
533
        "dsra $4, $3, 2                     \r\n"
534
        "daddu $3, $4                       \r\n"
535
        "dsra $2, 4                         \r\n"
536
        "dsra $3, 4                         \r\n"
537
        "b 2f                               \r\n"
538
        "1:                                 \r\n"
539
        "dli $4, 5                          \r\n"
540
        "dmul $2, $4                        \r\n"
541
        "dmul $3, $4                        \r\n"
542
        "daddiu $2, 32                      \r\n"
543
        "daddiu $3, 32                      \r\n"
544
        "dsra $2, 6                         \r\n"
545
        "dsra $3, 6                         \r\n"
546
        "2:                                 \r\n"
547
        "daddu $5, $2, $3                   \r\n"
548
        "dli $4, 7                          \r\n"
549
        "dmul $5, $4                        \r\n"
550
        "dsubu $8, $5                       \r\n"
551
        "dmtc1 $0, $f8                      \r\n"
552
        "dmtc1 $2, $f0                      \r\n"
553
        "pshufh $f0, $f0, $f8               \r\n"
554
        "dmtc1 $3, $f10                     \r\n"
555
        "pshufh $f10, $f10, $f8             \r\n"
556
        "dmtc1 $8, $f12                     \r\n"
557
        "pshufh $f12, $f12, $f8             \r\n"
558
        "dli $4, 5                          \r\n"
559
        "dmtc1 $4, $f14                     \r\n"
560
        "pmullh $f2, %[ff_pw_0to3], $f0     \r\n"
561
        "pmullh $f4, %[ff_pw_4to7], $f0     \r\n"
562
        "pmullh $f6, %[ff_pw_8tob], $f0     \r\n"
563
        "pmullh $f8, %[ff_pw_ctof], $f0     \r\n"
564
        "daddu $3, %[src], $0               \r\n"
565
        "dli $2, 16                         \r\n"
566
        "1:                                 \r\n"
567
        "paddsh $f16, $f2, $f12             \r\n"
568
        "psrah $f16, $f16, $f14             \r\n"
569
        "paddsh $f18, $f4, $f12             \r\n"
570
        "psrah $f18, $f18, $f14             \r\n"
571
        "packushb $f20, $f16, $f18          \r\n"
572
        "gssdlc1 $f20, 7($3)                \r\n"
573
        "gssdrc1 $f20, 0($3)                \r\n"
574
        "paddsh $f16, $f6, $f12             \r\n"
575
        "psrah $f16, $f16, $f14             \r\n"
576
        "paddsh $f18, $f8, $f12             \r\n"
577
        "psrah $f18, $f18, $f14             \r\n"
578
        "packushb $f20, $f16, $f18          \r\n"
579
        "gssdlc1 $f20, 15($3)               \r\n"
580
        "gssdrc1 $f20, 8($3)                \r\n"
581
        "paddsh $f12, $f12, $f10            \r\n"
582
        "daddu $3, %[stride]                \r\n"
583
        "daddiu $2, -1                      \r\n"
584
        "bnez $2, 1b                        \r\n"
585
        ::[src]"r"(src),[stride]"r"(stride),[svq3]"r"(svq3),[rv40]"r"(rv40),
586
          [ff_pw_m8tom5]"r"(ff_pw_m8tom5),[ff_pw_m4tom1]"r"(ff_pw_m4tom1),
587
          [ff_pw_1to4]"r"(ff_pw_1to4),[ff_pw_5to8]"r"(ff_pw_5to8),
588
          [ff_pw_0to3]"f"(ff_pw_0to3),[ff_pw_4to7]"f"(ff_pw_4to7),
589
          [ff_pw_8tob]"f"(ff_pw_8tob),[ff_pw_ctof]"f"(ff_pw_ctof)
590
        : "$2","$3","$4","$5","$6","$7","$8","$f0","$f2","$f4","$f6","$f8",
591
          "$f10","$f12","$f14","$f16","$f18","$f20","$f22","$f24","$f26",
592
          "$f28","$f30"
593
    );
594
}
595
 
596
void ff_pred16x16_plane_svq3_8_mmi(uint8_t *src, ptrdiff_t stride)
597
{
598
    ff_pred16x16_plane_compat_8_mmi(src, stride, 1, 0);
599
}
600
 
601
void ff_pred16x16_plane_rv40_8_mmi(uint8_t *src, ptrdiff_t stride)
602
{
603
    ff_pred16x16_plane_compat_8_mmi(src, stride, 0, 1);
604
}
605
 
606
void ff_pred16x16_plane_h264_8_mmi(uint8_t *src, ptrdiff_t stride)
607
{
608
    ff_pred16x16_plane_compat_8_mmi(src, stride, 0, 0);
609
}
610
 
611
void ff_pred8x8_top_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
612
{
613
    __asm__ volatile (
614
        "dli $2, 2                          \r\n"
615
        "xor $f0, $f0, $f0                  \r\n"
616
        "xor $f2, $f2, $f2                  \r\n"
617
        "xor $f30, $f30, $f30               \r\n"
618
        "negu $3, %[stride]                 \r\n"
619
        "daddu $3, $3, %[src]               \r\n"
620
        "gsldlc1 $f4, 7($3)                 \r\n"
621
        "gsldrc1 $f4, 0($3)                 \r\n"
622
        "punpcklbh $f0, $f4, $f30           \r\n"
623
        "punpckhbh $f2, $f4, $f30           \r\n"
624
        "biadd $f0, $f0                     \r\n"
625
        "biadd $f2, $f2                     \r\n"
626
        "pshufh $f0, $f0, $f30              \r\n"
627
        "pshufh $f2, $f2, $f30              \r\n"
628
        "dmtc1 $2, $f4                      \r\n"
629
        "pshufh $f4, $f4, $f30              \r\n"
630
        "paddush $f0, $f0, $f4              \r\n"
631
        "paddush $f2, $f2, $f4              \r\n"
632
        "dmtc1 $2, $f4                      \r\n"
633
        "psrlh $f0, $f0, $f4                \r\n"
634
        "psrlh $f2, $f2, $f4                \r\n"
635
        "packushb $f4, $f0, $f2             \r\n"
636
        "dli $2, 8                          \r\n"
637
        "1:                                 \r\n"
638
        "gssdlc1 $f4, 7(%[src])             \r\n"
639
        "gssdrc1 $f4, 0(%[src])             \r\n"
640
        "daddu %[src], %0, %[stride]        \r\n"
641
        "daddiu $2, $2, -1                  \r\n"
642
        "bnez $2, 1b                        \r\n"
643
        ::[src]"r"(src),[stride]"r"(stride)
644
        : "$2","$3","$f0","$f2","$f4","$f30"
645
    );
646
}
647
 
648
void ff_pred8x8_dc_8_mmi(uint8_t *src, ptrdiff_t stride)
649
{
650
    __asm__ volatile (
651
        "negu $2, %[stride]                 \r\n"
652
        "daddu $2, $2, %[src]               \r\n"
653
        "daddiu $5, $2, 4                   \r\n"
654
        "lbu $6, 0($2)                      \r\n"
655
        "daddu $3, $0, $6                   \r\n"
656
        "daddiu $2, 1                       \r\n"
657
        "lbu $6, 0($5)                      \r\n"
658
        "daddu $4, $0, $6                   \r\n"
659
        "daddiu $5, 1                       \r\n"
660
        "lbu $6, 0($2)                      \r\n"
661
        "daddu $3, $3, $6                   \r\n"
662
        "daddiu $2, 1                       \r\n"
663
        "lbu $6, 0($5)                      \r\n"
664
        "daddu $4, $4, $6                   \r\n"
665
        "daddiu $5, 1                       \r\n"
666
        "lbu $6, 0($2)                      \r\n"
667
        "daddu $3, $3, $6                   \r\n"
668
        "daddiu $2, 1                       \r\n"
669
        "lbu $6, 0($5)                      \r\n"
670
        "daddu $4, $4, $6                   \r\n"
671
        "daddiu $5, 1                       \r\n"
672
        "lbu $6, 0($2)                      \r\n"
673
        "daddu $3, $3, $6                   \r\n"
674
        "daddiu $2, 1                       \r\n"
675
        "lbu $6, 0($5)                      \r\n"
676
        "daddu $4, $4, $6                   \r\n"
677
        "daddiu $5, 1                       \r\n"
678
        "dli $6, -1                         \r\n"
679
        "daddu $6, $6, %[src]               \r\n"
680
        "lbu $5, 0($6)                      \r\n"
681
        "daddu $7, $0, $5                   \r\n"
682
        "daddu $6, $6, %[stride]            \r\n"
683
        "lbu $5, 0($6)                      \r\n"
684
        "daddu $7, $7, $5                   \r\n"
685
        "daddu $6, $6, %[stride]            \r\n"
686
        "lbu $5, 0($6)                      \r\n"
687
        "daddu $7, $7, $5                   \r\n"
688
        "daddu $6, $6, %[stride]            \r\n"
689
        "lbu $5, 0($6)                      \r\n"
690
        "daddu $7, $7, $5                   \r\n"
691
        "daddu $6, $6, %[stride]            \r\n"
692
        "lbu $5, 0($6)                      \r\n"
693
        "daddu $8, $0, $5                   \r\n"
694
        "daddu $6, $6, %[stride]            \r\n"
695
        "lbu $5, 0($6)                      \r\n"
696
        "daddu $8, $8, $5                   \r\n"
697
        "daddu $6, $6, %[stride]            \r\n"
698
        "lbu $5, 0($6)                      \r\n"
699
        "daddu $8, $8, $5                   \r\n"
700
        "daddu $6, $6, %[stride]            \r\n"
701
        "lbu $5, 0($6)                      \r\n"
702
        "daddu $8, $8, $5                   \r\n"
703
        "daddu $3, $3, $7                   \r\n"
704
        "daddiu $3, $3, 4                   \r\n"
705
        "daddiu $4, $4, 2                   \r\n"
706
        "daddiu $5, $8, 2                   \r\n"
707
        "daddu $6, $4, $5                   \r\n"
708
        "dsrl $3, 3                         \r\n"
709
        "dsrl $4, 2                         \r\n"
710
        "dsrl $5, 2                         \r\n"
711
        "dsrl $6, 3                         \r\n"
712
        "xor $f30, $f30, $f30               \r\n"
713
        "dmtc1 $3, $f0                      \r\n"
714
        "pshufh $f0, $f0, $f30              \r\n"
715
        "dmtc1 $4, $f2                      \r\n"
716
        "pshufh $f2, $f2, $f30              \r\n"
717
        "dmtc1 $5, $f4                      \r\n"
718
        "pshufh $f4, $f4, $f30              \r\n"
719
        "dmtc1 $6, $f6                      \r\n"
720
        "pshufh $f6, $f6, $f30              \r\n"
721
        "packushb $f0, $f0, $f2             \r\n"
722
        "packushb $f2, $f4, $f6             \r\n"
723
        "daddu $2, $0, %[src]               \r\n"
724
        "sdc1 $f0, 0($2)                    \r\n"
725
        "daddu $2, $2, %[stride]            \r\n"
726
        "sdc1 $f0, 0($2)                    \r\n"
727
        "daddu $2, $2, %[stride]            \r\n"
728
        "sdc1 $f0, 0($2)                    \r\n"
729
        "daddu $2, $2, %[stride]            \r\n"
730
        "sdc1 $f0, 0($2)                    \r\n"
731
        "daddu $2, $2, %[stride]            \r\n"
732
        "sdc1 $f2, 0($2)                    \r\n"
733
        "daddu $2, $2, %[stride]            \r\n"
734
        "sdc1 $f2, 0($2)                    \r\n"
735
        "daddu $2, $2, %[stride]            \r\n"
736
        "sdc1 $f2, 0($2)                    \r\n"
737
        "daddu $2, $2, %[stride]            \r\n"
738
        "sdc1 $f2, 0($2)                    \r\n"
739
        ::[src]"r"(src),[stride]"r"(stride)
740
        : "$2","$3","$4","$5","$6","$7","$8","$f0","$f2","$f4","$f6","$f30"
741
    );
742
}
743
 
744
void ff_pred8x16_vertical_8_mmi(uint8_t *src, ptrdiff_t stride)
745
{
746
    __asm__ volatile (
747
        "gsldlc1 $f2, 7(%[srcA])            \r\n"
748
        "gsldrc1 $f2, 0(%[srcA])            \r\n"
749
        "dli $8, 16                         \r\n"
750
        "1:                                 \r\n"
751
        "gssdlc1 $f2, 7(%[src])             \r\n"
752
        "gssdrc1 $f2, 0(%[src])             \r\n"
753
        "daddu %[src], %[src], %[stride]    \r\n"
754
        "daddi $8, $8, -1                   \r\n"
755
        "bnez $8, 1b                        \r\n"
756
        : [src]"+&r"(src)
757
        : [stride]"r"(stride),[srcA]"r"(src-stride)
758
        : "$8","$f2"
759
    );
760
}
761
 
762
void ff_pred8x16_horizontal_8_mmi(uint8_t *src, ptrdiff_t stride)
763
{
764
    __asm__ volatile (
765
        "daddiu $2, %[src], -1              \r\n"
766
        "daddu $3, %[src], $0               \r\n"
767
        "dli $6, 0x10                       \r\n"
768
        "1:                                 \r\n"
769
        "lbu $4, 0($2)                      \r\n"
770
        "dmul $5, $4, %[ff_pb_1]            \r\n"
771
        "sdl $5, 7($3)                      \r\n"
772
        "sdr $5, 0($3)                      \r\n"
773
        "daddu $2, %[stride]                \r\n"
774
        "daddu $3, %[stride]                \r\n"
775
        "daddiu $6, -1                      \r\n"
776
        "bnez $6, 1b                        \r\n"
777
        ::[src]"r"(src),[stride]"r"(stride),[ff_pb_1]"r"(ff_pb_1)
778
        : "$2","$3","$4","$5","$6"
779
    );
780
}