Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4349 | Serge | 1 | /* |
2 | * RV40 decoder motion compensation functions |
||
3 | * Copyright (c) 2008 Konstantin Shishkov |
||
4 | * |
||
5 | * This file is part of FFmpeg. |
||
6 | * |
||
7 | * FFmpeg is free software; you can redistribute it and/or |
||
8 | * modify it under the terms of the GNU Lesser General Public |
||
9 | * License as published by the Free Software Foundation; either |
||
10 | * version 2.1 of the License, or (at your option) any later version. |
||
11 | * |
||
12 | * FFmpeg is distributed in the hope that it will be useful, |
||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
15 | * Lesser General Public License for more details. |
||
16 | * |
||
17 | * You should have received a copy of the GNU Lesser General Public |
||
18 | * License along with FFmpeg; if not, write to the Free Software |
||
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||
20 | */ |
||
21 | |||
22 | /** |
||
23 | * @file |
||
24 | * RV40 decoder motion compensation functions |
||
25 | */ |
||
26 | |||
27 | #include "avcodec.h" |
||
28 | #include "h264qpel.h" |
||
29 | #include "rv34dsp.h" |
||
30 | #include "libavutil/avassert.h" |
||
31 | #include "libavutil/common.h" |
||
32 | |||
33 | #define RV40_LOWPASS(OPNAME, OP) \ |
||
34 | static av_unused void OPNAME ## rv40_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\ |
||
35 | const int h, const int C1, const int C2, const int SHIFT){\ |
||
36 | const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
||
37 | int i;\ |
||
38 | for(i = 0; i < h; i++)\ |
||
39 | {\ |
||
40 | OP(dst[0], (src[-2] + src[ 3] - 5*(src[-1]+src[2]) + src[0]*C1 + src[1]*C2 + (1<<(SHIFT-1))) >> SHIFT);\ |
||
41 | OP(dst[1], (src[-1] + src[ 4] - 5*(src[ 0]+src[3]) + src[1]*C1 + src[2]*C2 + (1<<(SHIFT-1))) >> SHIFT);\ |
||
42 | OP(dst[2], (src[ 0] + src[ 5] - 5*(src[ 1]+src[4]) + src[2]*C1 + src[3]*C2 + (1<<(SHIFT-1))) >> SHIFT);\ |
||
43 | OP(dst[3], (src[ 1] + src[ 6] - 5*(src[ 2]+src[5]) + src[3]*C1 + src[4]*C2 + (1<<(SHIFT-1))) >> SHIFT);\ |
||
44 | OP(dst[4], (src[ 2] + src[ 7] - 5*(src[ 3]+src[6]) + src[4]*C1 + src[5]*C2 + (1<<(SHIFT-1))) >> SHIFT);\ |
||
45 | OP(dst[5], (src[ 3] + src[ 8] - 5*(src[ 4]+src[7]) + src[5]*C1 + src[6]*C2 + (1<<(SHIFT-1))) >> SHIFT);\ |
||
46 | OP(dst[6], (src[ 4] + src[ 9] - 5*(src[ 5]+src[8]) + src[6]*C1 + src[7]*C2 + (1<<(SHIFT-1))) >> SHIFT);\ |
||
47 | OP(dst[7], (src[ 5] + src[10] - 5*(src[ 6]+src[9]) + src[7]*C1 + src[8]*C2 + (1<<(SHIFT-1))) >> SHIFT);\ |
||
48 | dst += dstStride;\ |
||
49 | src += srcStride;\ |
||
50 | }\ |
||
51 | }\ |
||
52 | \ |
||
53 | static void OPNAME ## rv40_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\ |
||
54 | const int w, const int C1, const int C2, const int SHIFT){\ |
||
55 | const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
||
56 | int i;\ |
||
57 | for(i = 0; i < w; i++)\ |
||
58 | {\ |
||
59 | const int srcB = src[-2*srcStride];\ |
||
60 | const int srcA = src[-1*srcStride];\ |
||
61 | const int src0 = src[0 *srcStride];\ |
||
62 | const int src1 = src[1 *srcStride];\ |
||
63 | const int src2 = src[2 *srcStride];\ |
||
64 | const int src3 = src[3 *srcStride];\ |
||
65 | const int src4 = src[4 *srcStride];\ |
||
66 | const int src5 = src[5 *srcStride];\ |
||
67 | const int src6 = src[6 *srcStride];\ |
||
68 | const int src7 = src[7 *srcStride];\ |
||
69 | const int src8 = src[8 *srcStride];\ |
||
70 | const int src9 = src[9 *srcStride];\ |
||
71 | const int src10 = src[10*srcStride];\ |
||
72 | OP(dst[0*dstStride], (srcB + src3 - 5*(srcA+src2) + src0*C1 + src1*C2 + (1<<(SHIFT-1))) >> SHIFT);\ |
||
73 | OP(dst[1*dstStride], (srcA + src4 - 5*(src0+src3) + src1*C1 + src2*C2 + (1<<(SHIFT-1))) >> SHIFT);\ |
||
74 | OP(dst[2*dstStride], (src0 + src5 - 5*(src1+src4) + src2*C1 + src3*C2 + (1<<(SHIFT-1))) >> SHIFT);\ |
||
75 | OP(dst[3*dstStride], (src1 + src6 - 5*(src2+src5) + src3*C1 + src4*C2 + (1<<(SHIFT-1))) >> SHIFT);\ |
||
76 | OP(dst[4*dstStride], (src2 + src7 - 5*(src3+src6) + src4*C1 + src5*C2 + (1<<(SHIFT-1))) >> SHIFT);\ |
||
77 | OP(dst[5*dstStride], (src3 + src8 - 5*(src4+src7) + src5*C1 + src6*C2 + (1<<(SHIFT-1))) >> SHIFT);\ |
||
78 | OP(dst[6*dstStride], (src4 + src9 - 5*(src5+src8) + src6*C1 + src7*C2 + (1<<(SHIFT-1))) >> SHIFT);\ |
||
79 | OP(dst[7*dstStride], (src5 + src10 - 5*(src6+src9) + src7*C1 + src8*C2 + (1<<(SHIFT-1))) >> SHIFT);\ |
||
80 | dst++;\ |
||
81 | src++;\ |
||
82 | }\ |
||
83 | }\ |
||
84 | \ |
||
85 | static void OPNAME ## rv40_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\ |
||
86 | const int w, const int C1, const int C2, const int SHIFT){\ |
||
87 | OPNAME ## rv40_qpel8_v_lowpass(dst , src , dstStride, srcStride, 8, C1, C2, SHIFT);\ |
||
88 | OPNAME ## rv40_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, 8, C1, C2, SHIFT);\ |
||
89 | src += 8*srcStride;\ |
||
90 | dst += 8*dstStride;\ |
||
91 | OPNAME ## rv40_qpel8_v_lowpass(dst , src , dstStride, srcStride, w-8, C1, C2, SHIFT);\ |
||
92 | OPNAME ## rv40_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, w-8, C1, C2, SHIFT);\ |
||
93 | }\ |
||
94 | \ |
||
95 | static void OPNAME ## rv40_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\ |
||
96 | const int h, const int C1, const int C2, const int SHIFT){\ |
||
97 | OPNAME ## rv40_qpel8_h_lowpass(dst , src , dstStride, srcStride, 8, C1, C2, SHIFT);\ |
||
98 | OPNAME ## rv40_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, 8, C1, C2, SHIFT);\ |
||
99 | src += 8*srcStride;\ |
||
100 | dst += 8*dstStride;\ |
||
101 | OPNAME ## rv40_qpel8_h_lowpass(dst , src , dstStride, srcStride, h-8, C1, C2, SHIFT);\ |
||
102 | OPNAME ## rv40_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, h-8, C1, C2, SHIFT);\ |
||
103 | }\ |
||
104 | \ |
||
105 | |||
106 | #define RV40_MC(OPNAME, SIZE) \ |
||
107 | static void OPNAME ## rv40_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ |
||
108 | {\ |
||
109 | OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 52, 20, 6);\ |
||
110 | }\ |
||
111 | \ |
||
112 | static void OPNAME ## rv40_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ |
||
113 | {\ |
||
114 | OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 20, 52, 6);\ |
||
115 | }\ |
||
116 | \ |
||
117 | static void OPNAME ## rv40_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ |
||
118 | {\ |
||
119 | OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 52, 20, 6);\ |
||
120 | }\ |
||
121 | \ |
||
122 | static void OPNAME ## rv40_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ |
||
123 | {\ |
||
124 | uint8_t full[SIZE*(SIZE+5)];\ |
||
125 | uint8_t * const full_mid = full + SIZE*2;\ |
||
126 | put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\ |
||
127 | OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\ |
||
128 | }\ |
||
129 | \ |
||
130 | static void OPNAME ## rv40_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ |
||
131 | {\ |
||
132 | uint8_t full[SIZE*(SIZE+5)];\ |
||
133 | uint8_t * const full_mid = full + SIZE*2;\ |
||
134 | put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\ |
||
135 | OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\ |
||
136 | }\ |
||
137 | \ |
||
138 | static void OPNAME ## rv40_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ |
||
139 | {\ |
||
140 | uint8_t full[SIZE*(SIZE+5)];\ |
||
141 | uint8_t * const full_mid = full + SIZE*2;\ |
||
142 | put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 52, 6);\ |
||
143 | OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\ |
||
144 | }\ |
||
145 | \ |
||
146 | static void OPNAME ## rv40_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ |
||
147 | {\ |
||
148 | uint8_t full[SIZE*(SIZE+5)];\ |
||
149 | uint8_t * const full_mid = full + SIZE*2;\ |
||
150 | put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\ |
||
151 | OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\ |
||
152 | }\ |
||
153 | \ |
||
154 | static void OPNAME ## rv40_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ |
||
155 | {\ |
||
156 | uint8_t full[SIZE*(SIZE+5)];\ |
||
157 | uint8_t * const full_mid = full + SIZE*2;\ |
||
158 | put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\ |
||
159 | OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\ |
||
160 | }\ |
||
161 | \ |
||
162 | static void OPNAME ## rv40_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ |
||
163 | {\ |
||
164 | uint8_t full[SIZE*(SIZE+5)];\ |
||
165 | uint8_t * const full_mid = full + SIZE*2;\ |
||
166 | put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 52, 6);\ |
||
167 | OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\ |
||
168 | }\ |
||
169 | \ |
||
170 | static void OPNAME ## rv40_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ |
||
171 | {\ |
||
172 | OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 20, 52, 6);\ |
||
173 | }\ |
||
174 | \ |
||
175 | static void OPNAME ## rv40_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ |
||
176 | {\ |
||
177 | uint8_t full[SIZE*(SIZE+5)];\ |
||
178 | uint8_t * const full_mid = full + SIZE*2;\ |
||
179 | put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\ |
||
180 | OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 52, 6);\ |
||
181 | }\ |
||
182 | \ |
||
183 | static void OPNAME ## rv40_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ |
||
184 | {\ |
||
185 | uint8_t full[SIZE*(SIZE+5)];\ |
||
186 | uint8_t * const full_mid = full + SIZE*2;\ |
||
187 | put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\ |
||
188 | OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 52, 6);\ |
||
189 | }\ |
||
190 | \ |
||
191 | |||
192 | #define op_avg(a, b) a = (((a)+cm[b]+1)>>1) |
||
193 | #define op_put(a, b) a = cm[b] |
||
194 | |||
195 | RV40_LOWPASS(put_ , op_put) |
||
196 | RV40_LOWPASS(avg_ , op_avg) |
||
197 | |||
198 | #undef op_avg |
||
199 | #undef op_put |
||
200 | |||
201 | RV40_MC(put_, 8) |
||
202 | RV40_MC(put_, 16) |
||
203 | RV40_MC(avg_, 8) |
||
204 | RV40_MC(avg_, 16) |
||
205 | |||
206 | static const int rv40_bias[4][4] = { |
||
207 | { 0, 16, 32, 16 }, |
||
208 | { 32, 28, 32, 28 }, |
||
209 | { 0, 32, 16, 32 }, |
||
210 | { 32, 28, 32, 28 } |
||
211 | }; |
||
212 | |||
213 | #define RV40_CHROMA_MC(OPNAME, OP)\ |
||
214 | static void OPNAME ## rv40_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ |
||
215 | const int A = (8-x) * (8-y);\ |
||
216 | const int B = ( x) * (8-y);\ |
||
217 | const int C = (8-x) * ( y);\ |
||
218 | const int D = ( x) * ( y);\ |
||
219 | int i;\ |
||
220 | int bias = rv40_bias[y>>1][x>>1];\ |
||
221 | \ |
||
222 | av_assert2(x<8 && y<8 && x>=0 && y>=0);\ |
||
223 | \ |
||
224 | if(D){\ |
||
225 | for(i = 0; i < h; i++){\ |
||
226 | OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + bias));\ |
||
227 | OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + bias));\ |
||
228 | OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + bias));\ |
||
229 | OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + bias));\ |
||
230 | dst += stride;\ |
||
231 | src += stride;\ |
||
232 | }\ |
||
233 | }else{\ |
||
234 | const int E = B + C;\ |
||
235 | const int step = C ? stride : 1;\ |
||
236 | for(i = 0; i < h; i++){\ |
||
237 | OP(dst[0], (A*src[0] + E*src[step+0] + bias));\ |
||
238 | OP(dst[1], (A*src[1] + E*src[step+1] + bias));\ |
||
239 | OP(dst[2], (A*src[2] + E*src[step+2] + bias));\ |
||
240 | OP(dst[3], (A*src[3] + E*src[step+3] + bias));\ |
||
241 | dst += stride;\ |
||
242 | src += stride;\ |
||
243 | }\ |
||
244 | }\ |
||
245 | }\ |
||
246 | \ |
||
247 | static void OPNAME ## rv40_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ |
||
248 | const int A = (8-x) * (8-y);\ |
||
249 | const int B = ( x) * (8-y);\ |
||
250 | const int C = (8-x) * ( y);\ |
||
251 | const int D = ( x) * ( y);\ |
||
252 | int i;\ |
||
253 | int bias = rv40_bias[y>>1][x>>1];\ |
||
254 | \ |
||
255 | av_assert2(x<8 && y<8 && x>=0 && y>=0);\ |
||
256 | \ |
||
257 | if(D){\ |
||
258 | for(i = 0; i < h; i++){\ |
||
259 | OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + bias));\ |
||
260 | OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + bias));\ |
||
261 | OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + bias));\ |
||
262 | OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + bias));\ |
||
263 | OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + bias));\ |
||
264 | OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + bias));\ |
||
265 | OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + bias));\ |
||
266 | OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + bias));\ |
||
267 | dst += stride;\ |
||
268 | src += stride;\ |
||
269 | }\ |
||
270 | }else{\ |
||
271 | const int E = B + C;\ |
||
272 | const int step = C ? stride : 1;\ |
||
273 | for(i = 0; i < h; i++){\ |
||
274 | OP(dst[0], (A*src[0] + E*src[step+0] + bias));\ |
||
275 | OP(dst[1], (A*src[1] + E*src[step+1] + bias));\ |
||
276 | OP(dst[2], (A*src[2] + E*src[step+2] + bias));\ |
||
277 | OP(dst[3], (A*src[3] + E*src[step+3] + bias));\ |
||
278 | OP(dst[4], (A*src[4] + E*src[step+4] + bias));\ |
||
279 | OP(dst[5], (A*src[5] + E*src[step+5] + bias));\ |
||
280 | OP(dst[6], (A*src[6] + E*src[step+6] + bias));\ |
||
281 | OP(dst[7], (A*src[7] + E*src[step+7] + bias));\ |
||
282 | dst += stride;\ |
||
283 | src += stride;\ |
||
284 | }\ |
||
285 | }\ |
||
286 | } |
||
287 | |||
288 | #define op_avg(a, b) a = (((a)+((b)>>6)+1)>>1) |
||
289 | #define op_put(a, b) a = ((b)>>6) |
||
290 | |||
291 | RV40_CHROMA_MC(put_, op_put) |
||
292 | RV40_CHROMA_MC(avg_, op_avg) |
||
293 | |||
294 | #define RV40_WEIGHT_FUNC(size) \ |
||
295 | static void rv40_weight_func_rnd_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, ptrdiff_t stride)\ |
||
296 | {\ |
||
297 | int i, j;\ |
||
298 | \ |
||
299 | for (j = 0; j < size; j++) {\ |
||
300 | for (i = 0; i < size; i++)\ |
||
301 | dst[i] = (((w2 * src1[i]) >> 9) + ((w1 * src2[i]) >> 9) + 0x10) >> 5;\ |
||
302 | src1 += stride;\ |
||
303 | src2 += stride;\ |
||
304 | dst += stride;\ |
||
305 | }\ |
||
306 | }\ |
||
307 | static void rv40_weight_func_nornd_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, ptrdiff_t stride)\ |
||
308 | {\ |
||
309 | int i, j;\ |
||
310 | \ |
||
311 | for (j = 0; j < size; j++) {\ |
||
312 | for (i = 0; i < size; i++)\ |
||
313 | dst[i] = (w2 * src1[i] + w1 * src2[i] + 0x10) >> 5;\ |
||
314 | src1 += stride;\ |
||
315 | src2 += stride;\ |
||
316 | dst += stride;\ |
||
317 | }\ |
||
318 | } |
||
319 | |||
320 | RV40_WEIGHT_FUNC(16) |
||
321 | RV40_WEIGHT_FUNC(8) |
||
322 | |||
323 | /** |
||
324 | * dither values for deblocking filter - left/top values |
||
325 | */ |
||
326 | static const uint8_t rv40_dither_l[16] = { |
||
327 | 0x40, 0x50, 0x20, 0x60, 0x30, 0x50, 0x40, 0x30, |
||
328 | 0x50, 0x40, 0x50, 0x30, 0x60, 0x20, 0x50, 0x40 |
||
329 | }; |
||
330 | |||
331 | /** |
||
332 | * dither values for deblocking filter - right/bottom values |
||
333 | */ |
||
334 | static const uint8_t rv40_dither_r[16] = { |
||
335 | 0x40, 0x30, 0x60, 0x20, 0x50, 0x30, 0x30, 0x40, |
||
336 | 0x40, 0x40, 0x50, 0x30, 0x20, 0x60, 0x30, 0x40 |
||
337 | }; |
||
338 | |||
339 | #define CLIP_SYMM(a, b) av_clip(a, -(b), b) |
||
340 | /** |
||
341 | * weaker deblocking very similar to the one described in 4.4.2 of JVT-A003r1 |
||
342 | */ |
||
343 | static av_always_inline void rv40_weak_loop_filter(uint8_t *src, |
||
344 | const int step, |
||
345 | const ptrdiff_t stride, |
||
346 | const int filter_p1, |
||
347 | const int filter_q1, |
||
348 | const int alpha, |
||
349 | const int beta, |
||
350 | const int lim_p0q0, |
||
351 | const int lim_q1, |
||
352 | const int lim_p1) |
||
353 | { |
||
354 | const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
||
355 | int i, t, u, diff; |
||
356 | |||
357 | for (i = 0; i < 4; i++, src += stride) { |
||
358 | int diff_p1p0 = src[-2*step] - src[-1*step]; |
||
359 | int diff_q1q0 = src[ 1*step] - src[ 0*step]; |
||
360 | int diff_p1p2 = src[-2*step] - src[-3*step]; |
||
361 | int diff_q1q2 = src[ 1*step] - src[ 2*step]; |
||
362 | |||
363 | t = src[0*step] - src[-1*step]; |
||
364 | if (!t) |
||
365 | continue; |
||
366 | |||
367 | u = (alpha * FFABS(t)) >> 7; |
||
368 | if (u > 3 - (filter_p1 && filter_q1)) |
||
369 | continue; |
||
370 | |||
371 | t <<= 2; |
||
372 | if (filter_p1 && filter_q1) |
||
373 | t += src[-2*step] - src[1*step]; |
||
374 | |||
375 | diff = CLIP_SYMM((t + 4) >> 3, lim_p0q0); |
||
376 | src[-1*step] = cm[src[-1*step] + diff]; |
||
377 | src[ 0*step] = cm[src[ 0*step] - diff]; |
||
378 | |||
379 | if (filter_p1 && FFABS(diff_p1p2) <= beta) { |
||
380 | t = (diff_p1p0 + diff_p1p2 - diff) >> 1; |
||
381 | src[-2*step] = cm[src[-2*step] - CLIP_SYMM(t, lim_p1)]; |
||
382 | } |
||
383 | |||
384 | if (filter_q1 && FFABS(diff_q1q2) <= beta) { |
||
385 | t = (diff_q1q0 + diff_q1q2 + diff) >> 1; |
||
386 | src[ 1*step] = cm[src[ 1*step] - CLIP_SYMM(t, lim_q1)]; |
||
387 | } |
||
388 | } |
||
389 | } |
||
390 | |||
391 | static void rv40_h_weak_loop_filter(uint8_t *src, const ptrdiff_t stride, |
||
392 | const int filter_p1, const int filter_q1, |
||
393 | const int alpha, const int beta, |
||
394 | const int lim_p0q0, const int lim_q1, |
||
395 | const int lim_p1) |
||
396 | { |
||
397 | rv40_weak_loop_filter(src, stride, 1, filter_p1, filter_q1, |
||
398 | alpha, beta, lim_p0q0, lim_q1, lim_p1); |
||
399 | } |
||
400 | |||
401 | static void rv40_v_weak_loop_filter(uint8_t *src, const ptrdiff_t stride, |
||
402 | const int filter_p1, const int filter_q1, |
||
403 | const int alpha, const int beta, |
||
404 | const int lim_p0q0, const int lim_q1, |
||
405 | const int lim_p1) |
||
406 | { |
||
407 | rv40_weak_loop_filter(src, 1, stride, filter_p1, filter_q1, |
||
408 | alpha, beta, lim_p0q0, lim_q1, lim_p1); |
||
409 | } |
||
410 | |||
411 | static av_always_inline void rv40_strong_loop_filter(uint8_t *src, |
||
412 | const int step, |
||
413 | const ptrdiff_t stride, |
||
414 | const int alpha, |
||
415 | const int lims, |
||
416 | const int dmode, |
||
417 | const int chroma) |
||
418 | { |
||
419 | int i; |
||
420 | |||
421 | for(i = 0; i < 4; i++, src += stride){ |
||
422 | int sflag, p0, q0, p1, q1; |
||
423 | int t = src[0*step] - src[-1*step]; |
||
424 | |||
425 | if (!t) |
||
426 | continue; |
||
427 | |||
428 | sflag = (alpha * FFABS(t)) >> 7; |
||
429 | if (sflag > 1) |
||
430 | continue; |
||
431 | |||
432 | p0 = (25*src[-3*step] + 26*src[-2*step] + 26*src[-1*step] + |
||
433 | 26*src[ 0*step] + 25*src[ 1*step] + |
||
434 | rv40_dither_l[dmode + i]) >> 7; |
||
435 | |||
436 | q0 = (25*src[-2*step] + 26*src[-1*step] + 26*src[ 0*step] + |
||
437 | 26*src[ 1*step] + 25*src[ 2*step] + |
||
438 | rv40_dither_r[dmode + i]) >> 7; |
||
439 | |||
440 | if (sflag) { |
||
441 | p0 = av_clip(p0, src[-1*step] - lims, src[-1*step] + lims); |
||
442 | q0 = av_clip(q0, src[ 0*step] - lims, src[ 0*step] + lims); |
||
443 | } |
||
444 | |||
445 | p1 = (25*src[-4*step] + 26*src[-3*step] + 26*src[-2*step] + 26*p0 + |
||
446 | 25*src[ 0*step] + rv40_dither_l[dmode + i]) >> 7; |
||
447 | q1 = (25*src[-1*step] + 26*q0 + 26*src[ 1*step] + 26*src[ 2*step] + |
||
448 | 25*src[ 3*step] + rv40_dither_r[dmode + i]) >> 7; |
||
449 | |||
450 | if (sflag) { |
||
451 | p1 = av_clip(p1, src[-2*step] - lims, src[-2*step] + lims); |
||
452 | q1 = av_clip(q1, src[ 1*step] - lims, src[ 1*step] + lims); |
||
453 | } |
||
454 | |||
455 | src[-2*step] = p1; |
||
456 | src[-1*step] = p0; |
||
457 | src[ 0*step] = q0; |
||
458 | src[ 1*step] = q1; |
||
459 | |||
460 | if(!chroma){ |
||
461 | src[-3*step] = (25*src[-1*step] + 26*src[-2*step] + |
||
462 | 51*src[-3*step] + 26*src[-4*step] + 64) >> 7; |
||
463 | src[ 2*step] = (25*src[ 0*step] + 26*src[ 1*step] + |
||
464 | 51*src[ 2*step] + 26*src[ 3*step] + 64) >> 7; |
||
465 | } |
||
466 | } |
||
467 | } |
||
468 | |||
469 | static void rv40_h_strong_loop_filter(uint8_t *src, const ptrdiff_t stride, |
||
470 | const int alpha, const int lims, |
||
471 | const int dmode, const int chroma) |
||
472 | { |
||
473 | rv40_strong_loop_filter(src, stride, 1, alpha, lims, dmode, chroma); |
||
474 | } |
||
475 | |||
476 | static void rv40_v_strong_loop_filter(uint8_t *src, const ptrdiff_t stride, |
||
477 | const int alpha, const int lims, |
||
478 | const int dmode, const int chroma) |
||
479 | { |
||
480 | rv40_strong_loop_filter(src, 1, stride, alpha, lims, dmode, chroma); |
||
481 | } |
||
482 | |||
483 | static av_always_inline int rv40_loop_filter_strength(uint8_t *src, |
||
484 | int step, ptrdiff_t stride, |
||
485 | int beta, int beta2, |
||
486 | int edge, |
||
487 | int *p1, int *q1) |
||
488 | { |
||
489 | int sum_p1p0 = 0, sum_q1q0 = 0, sum_p1p2 = 0, sum_q1q2 = 0; |
||
490 | int strong0 = 0, strong1 = 0; |
||
491 | uint8_t *ptr; |
||
492 | int i; |
||
493 | |||
494 | for (i = 0, ptr = src; i < 4; i++, ptr += stride) { |
||
495 | sum_p1p0 += ptr[-2*step] - ptr[-1*step]; |
||
496 | sum_q1q0 += ptr[ 1*step] - ptr[ 0*step]; |
||
497 | } |
||
498 | |||
499 | *p1 = FFABS(sum_p1p0) < (beta << 2); |
||
500 | *q1 = FFABS(sum_q1q0) < (beta << 2); |
||
501 | |||
502 | if(!*p1 && !*q1) |
||
503 | return 0; |
||
504 | |||
505 | if (!edge) |
||
506 | return 0; |
||
507 | |||
508 | for (i = 0, ptr = src; i < 4; i++, ptr += stride) { |
||
509 | sum_p1p2 += ptr[-2*step] - ptr[-3*step]; |
||
510 | sum_q1q2 += ptr[ 1*step] - ptr[ 2*step]; |
||
511 | } |
||
512 | |||
513 | strong0 = *p1 && (FFABS(sum_p1p2) < beta2); |
||
514 | strong1 = *q1 && (FFABS(sum_q1q2) < beta2); |
||
515 | |||
516 | return strong0 && strong1; |
||
517 | } |
||
518 | |||
519 | static int rv40_h_loop_filter_strength(uint8_t *src, ptrdiff_t stride, |
||
520 | int beta, int beta2, int edge, |
||
521 | int *p1, int *q1) |
||
522 | { |
||
523 | return rv40_loop_filter_strength(src, stride, 1, beta, beta2, edge, p1, q1); |
||
524 | } |
||
525 | |||
526 | static int rv40_v_loop_filter_strength(uint8_t *src, ptrdiff_t stride, |
||
527 | int beta, int beta2, int edge, |
||
528 | int *p1, int *q1) |
||
529 | { |
||
530 | return rv40_loop_filter_strength(src, 1, stride, beta, beta2, edge, p1, q1); |
||
531 | } |
||
532 | |||
533 | av_cold void ff_rv40dsp_init(RV34DSPContext *c) |
||
534 | { |
||
535 | H264QpelContext qpel; |
||
536 | |||
537 | ff_rv34dsp_init(c); |
||
538 | ff_h264qpel_init(&qpel, 8); |
||
539 | |||
540 | c->put_pixels_tab[0][ 0] = qpel.put_h264_qpel_pixels_tab[0][0]; |
||
541 | c->put_pixels_tab[0][ 1] = put_rv40_qpel16_mc10_c; |
||
542 | c->put_pixels_tab[0][ 2] = qpel.put_h264_qpel_pixels_tab[0][2]; |
||
543 | c->put_pixels_tab[0][ 3] = put_rv40_qpel16_mc30_c; |
||
544 | c->put_pixels_tab[0][ 4] = put_rv40_qpel16_mc01_c; |
||
545 | c->put_pixels_tab[0][ 5] = put_rv40_qpel16_mc11_c; |
||
546 | c->put_pixels_tab[0][ 6] = put_rv40_qpel16_mc21_c; |
||
547 | c->put_pixels_tab[0][ 7] = put_rv40_qpel16_mc31_c; |
||
548 | c->put_pixels_tab[0][ 8] = qpel.put_h264_qpel_pixels_tab[0][8]; |
||
549 | c->put_pixels_tab[0][ 9] = put_rv40_qpel16_mc12_c; |
||
550 | c->put_pixels_tab[0][10] = put_rv40_qpel16_mc22_c; |
||
551 | c->put_pixels_tab[0][11] = put_rv40_qpel16_mc32_c; |
||
552 | c->put_pixels_tab[0][12] = put_rv40_qpel16_mc03_c; |
||
553 | c->put_pixels_tab[0][13] = put_rv40_qpel16_mc13_c; |
||
554 | c->put_pixels_tab[0][14] = put_rv40_qpel16_mc23_c; |
||
555 | c->put_pixels_tab[0][15] = ff_put_rv40_qpel16_mc33_c; |
||
556 | c->avg_pixels_tab[0][ 0] = qpel.avg_h264_qpel_pixels_tab[0][0]; |
||
557 | c->avg_pixels_tab[0][ 1] = avg_rv40_qpel16_mc10_c; |
||
558 | c->avg_pixels_tab[0][ 2] = qpel.avg_h264_qpel_pixels_tab[0][2]; |
||
559 | c->avg_pixels_tab[0][ 3] = avg_rv40_qpel16_mc30_c; |
||
560 | c->avg_pixels_tab[0][ 4] = avg_rv40_qpel16_mc01_c; |
||
561 | c->avg_pixels_tab[0][ 5] = avg_rv40_qpel16_mc11_c; |
||
562 | c->avg_pixels_tab[0][ 6] = avg_rv40_qpel16_mc21_c; |
||
563 | c->avg_pixels_tab[0][ 7] = avg_rv40_qpel16_mc31_c; |
||
564 | c->avg_pixels_tab[0][ 8] = qpel.avg_h264_qpel_pixels_tab[0][8]; |
||
565 | c->avg_pixels_tab[0][ 9] = avg_rv40_qpel16_mc12_c; |
||
566 | c->avg_pixels_tab[0][10] = avg_rv40_qpel16_mc22_c; |
||
567 | c->avg_pixels_tab[0][11] = avg_rv40_qpel16_mc32_c; |
||
568 | c->avg_pixels_tab[0][12] = avg_rv40_qpel16_mc03_c; |
||
569 | c->avg_pixels_tab[0][13] = avg_rv40_qpel16_mc13_c; |
||
570 | c->avg_pixels_tab[0][14] = avg_rv40_qpel16_mc23_c; |
||
571 | c->avg_pixels_tab[0][15] = ff_avg_rv40_qpel16_mc33_c; |
||
572 | c->put_pixels_tab[1][ 0] = qpel.put_h264_qpel_pixels_tab[1][0]; |
||
573 | c->put_pixels_tab[1][ 1] = put_rv40_qpel8_mc10_c; |
||
574 | c->put_pixels_tab[1][ 2] = qpel.put_h264_qpel_pixels_tab[1][2]; |
||
575 | c->put_pixels_tab[1][ 3] = put_rv40_qpel8_mc30_c; |
||
576 | c->put_pixels_tab[1][ 4] = put_rv40_qpel8_mc01_c; |
||
577 | c->put_pixels_tab[1][ 5] = put_rv40_qpel8_mc11_c; |
||
578 | c->put_pixels_tab[1][ 6] = put_rv40_qpel8_mc21_c; |
||
579 | c->put_pixels_tab[1][ 7] = put_rv40_qpel8_mc31_c; |
||
580 | c->put_pixels_tab[1][ 8] = qpel.put_h264_qpel_pixels_tab[1][8]; |
||
581 | c->put_pixels_tab[1][ 9] = put_rv40_qpel8_mc12_c; |
||
582 | c->put_pixels_tab[1][10] = put_rv40_qpel8_mc22_c; |
||
583 | c->put_pixels_tab[1][11] = put_rv40_qpel8_mc32_c; |
||
584 | c->put_pixels_tab[1][12] = put_rv40_qpel8_mc03_c; |
||
585 | c->put_pixels_tab[1][13] = put_rv40_qpel8_mc13_c; |
||
586 | c->put_pixels_tab[1][14] = put_rv40_qpel8_mc23_c; |
||
587 | c->put_pixels_tab[1][15] = ff_put_rv40_qpel8_mc33_c; |
||
588 | c->avg_pixels_tab[1][ 0] = qpel.avg_h264_qpel_pixels_tab[1][0]; |
||
589 | c->avg_pixels_tab[1][ 1] = avg_rv40_qpel8_mc10_c; |
||
590 | c->avg_pixels_tab[1][ 2] = qpel.avg_h264_qpel_pixels_tab[1][2]; |
||
591 | c->avg_pixels_tab[1][ 3] = avg_rv40_qpel8_mc30_c; |
||
592 | c->avg_pixels_tab[1][ 4] = avg_rv40_qpel8_mc01_c; |
||
593 | c->avg_pixels_tab[1][ 5] = avg_rv40_qpel8_mc11_c; |
||
594 | c->avg_pixels_tab[1][ 6] = avg_rv40_qpel8_mc21_c; |
||
595 | c->avg_pixels_tab[1][ 7] = avg_rv40_qpel8_mc31_c; |
||
596 | c->avg_pixels_tab[1][ 8] = qpel.avg_h264_qpel_pixels_tab[1][8]; |
||
597 | c->avg_pixels_tab[1][ 9] = avg_rv40_qpel8_mc12_c; |
||
598 | c->avg_pixels_tab[1][10] = avg_rv40_qpel8_mc22_c; |
||
599 | c->avg_pixels_tab[1][11] = avg_rv40_qpel8_mc32_c; |
||
600 | c->avg_pixels_tab[1][12] = avg_rv40_qpel8_mc03_c; |
||
601 | c->avg_pixels_tab[1][13] = avg_rv40_qpel8_mc13_c; |
||
602 | c->avg_pixels_tab[1][14] = avg_rv40_qpel8_mc23_c; |
||
603 | c->avg_pixels_tab[1][15] = ff_avg_rv40_qpel8_mc33_c; |
||
604 | |||
605 | c->put_chroma_pixels_tab[0] = put_rv40_chroma_mc8_c; |
||
606 | c->put_chroma_pixels_tab[1] = put_rv40_chroma_mc4_c; |
||
607 | c->avg_chroma_pixels_tab[0] = avg_rv40_chroma_mc8_c; |
||
608 | c->avg_chroma_pixels_tab[1] = avg_rv40_chroma_mc4_c; |
||
609 | |||
610 | c->rv40_weight_pixels_tab[0][0] = rv40_weight_func_rnd_16; |
||
611 | c->rv40_weight_pixels_tab[0][1] = rv40_weight_func_rnd_8; |
||
612 | c->rv40_weight_pixels_tab[1][0] = rv40_weight_func_nornd_16; |
||
613 | c->rv40_weight_pixels_tab[1][1] = rv40_weight_func_nornd_8; |
||
614 | |||
615 | c->rv40_weak_loop_filter[0] = rv40_h_weak_loop_filter; |
||
616 | c->rv40_weak_loop_filter[1] = rv40_v_weak_loop_filter; |
||
617 | c->rv40_strong_loop_filter[0] = rv40_h_strong_loop_filter; |
||
618 | c->rv40_strong_loop_filter[1] = rv40_v_strong_loop_filter; |
||
619 | c->rv40_loop_filter_strength[0] = rv40_h_loop_filter_strength; |
||
620 | c->rv40_loop_filter_strength[1] = rv40_v_loop_filter_strength; |
||
621 | |||
622 | if (ARCH_ARM) |
||
623 | ff_rv40dsp_init_arm(c); |
||
624 | if (ARCH_X86) |
||
625 | ff_rv40dsp_init_x86(c); |
||
626 | }>>>><>>><>>>>=>=>=><=>>>>>>>>8>8>>>8>8>(SHIFT-1)))><(SHIFT-1)))>(SHIFT-1)))><(SHIFT-1)))>(SHIFT-1)))><(SHIFT-1)))>(SHIFT-1)))><(SHIFT-1)))>(SHIFT-1)))><(SHIFT-1)))>(SHIFT-1)))><(SHIFT-1)))>(SHIFT-1)))><(SHIFT-1)))>(SHIFT-1)))><(SHIFT-1)))>>(SHIFT-1)))><(SHIFT-1)))>(SHIFT-1)))><(SHIFT-1)))>(SHIFT-1)))><(SHIFT-1)))>(SHIFT-1)))><(SHIFT-1)))>(SHIFT-1)))><(SHIFT-1)))>(SHIFT-1)))><(SHIFT-1)))>(SHIFT-1)))><(SHIFT-1)))>(SHIFT-1)))><(SHIFT-1)))>> |