Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4349 | Serge | 1 | /* |
2 | * VIS optimized software YUV to RGB converter |
||
3 | * Copyright (c) 2007 Denes Balatoni |
||
4 | * |
||
5 | * This file is part of FFmpeg. |
||
6 | * |
||
7 | * FFmpeg is free software; you can redistribute it and/or |
||
8 | * modify it under the terms of the GNU Lesser General Public |
||
9 | * License as published by the Free Software Foundation; either |
||
10 | * version 2.1 of the License, or (at your option) any later version. |
||
11 | * |
||
12 | * FFmpeg is distributed in the hope that it will be useful, |
||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
15 | * Lesser General Public License for more details. |
||
16 | * |
||
17 | * You should have received a copy of the GNU Lesser General Public |
||
18 | * License along with FFmpeg; if not, write to the Free Software |
||
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||
20 | */ |
||
21 | |||
22 | #include |
||
23 | #include |
||
24 | |||
25 | #include "libavutil/attributes.h" |
||
26 | #include "libswscale/swscale.h" |
||
27 | #include "libswscale/swscale_internal.h" |
||
28 | |||
29 | #define YUV2RGB_INIT \ |
||
30 | "wr %%g0, 0x10, %%gsr \n\t" \ |
||
31 | "ldd [%5], %%f32 \n\t" \ |
||
32 | "ldd [%5 + 8], %%f34 \n\t" \ |
||
33 | "ldd [%5 + 16], %%f36 \n\t" \ |
||
34 | "ldd [%5 + 24], %%f38 \n\t" \ |
||
35 | "ldd [%5 + 32], %%f40 \n\t" \ |
||
36 | "ldd [%5 + 40], %%f42 \n\t" \ |
||
37 | "ldd [%5 + 48], %%f44 \n\t" \ |
||
38 | "ldd [%5 + 56], %%f46 \n\t" \ |
||
39 | "ldd [%5 + 64], %%f48 \n\t" \ |
||
40 | "ldd [%5 + 72], %%f50 \n\t" |
||
41 | |||
42 | #define YUV2RGB_KERNEL \ |
||
43 | /* ^^^^ f0=Y f3=u f5=v */ \ |
||
44 | "fmul8x16 %%f3, %%f48, %%f6 \n\t" \ |
||
45 | "fmul8x16 %%f19, %%f48, %%f22 \n\t" \ |
||
46 | "fmul8x16 %%f5, %%f44, %%f8 \n\t" \ |
||
47 | "fmul8x16 %%f21, %%f44, %%f24 \n\t" \ |
||
48 | "fmul8x16 %%f0, %%f42, %%f0 \n\t" \ |
||
49 | "fmul8x16 %%f16, %%f42, %%f16 \n\t" \ |
||
50 | "fmul8x16 %%f3, %%f50, %%f2 \n\t" \ |
||
51 | "fmul8x16 %%f19, %%f50, %%f18 \n\t" \ |
||
52 | "fmul8x16 %%f5, %%f46, %%f4 \n\t" \ |
||
53 | "fmul8x16 %%f21, %%f46, %%f20 \n\t" \ |
||
54 | \ |
||
55 | "fpsub16 %%f6, %%f34, %%f6 \n\t" /* 1 */ \ |
||
56 | "fpsub16 %%f22, %%f34, %%f22 \n\t" /* 1 */ \ |
||
57 | "fpsub16 %%f8, %%f38, %%f8 \n\t" /* 3 */ \ |
||
58 | "fpsub16 %%f24, %%f38, %%f24 \n\t" /* 3 */ \ |
||
59 | "fpsub16 %%f0, %%f32, %%f0 \n\t" /* 0 */ \ |
||
60 | "fpsub16 %%f16, %%f32, %%f16 \n\t" /* 0 */ \ |
||
61 | "fpsub16 %%f2, %%f36, %%f2 \n\t" /* 2 */ \ |
||
62 | "fpsub16 %%f18, %%f36, %%f18 \n\t" /* 2 */ \ |
||
63 | "fpsub16 %%f4, %%f40, %%f4 \n\t" /* 4 */ \ |
||
64 | "fpsub16 %%f20, %%f40, %%f20 \n\t" /* 4 */ \ |
||
65 | \ |
||
66 | "fpadd16 %%f0, %%f8, %%f8 \n\t" /* Gt */ \ |
||
67 | "fpadd16 %%f16, %%f24, %%f24 \n\t" /* Gt */ \ |
||
68 | "fpadd16 %%f0, %%f4, %%f4 \n\t" /* R */ \ |
||
69 | "fpadd16 %%f16, %%f20, %%f20 \n\t" /* R */ \ |
||
70 | "fpadd16 %%f0, %%f6, %%f6 \n\t" /* B */ \ |
||
71 | "fpadd16 %%f16, %%f22, %%f22 \n\t" /* B */ \ |
||
72 | "fpadd16 %%f8, %%f2, %%f2 \n\t" /* G */ \ |
||
73 | "fpadd16 %%f24, %%f18, %%f18 \n\t" /* G */ \ |
||
74 | \ |
||
75 | "fpack16 %%f4, %%f4 \n\t" \ |
||
76 | "fpack16 %%f20, %%f20 \n\t" \ |
||
77 | "fpack16 %%f6, %%f6 \n\t" \ |
||
78 | "fpack16 %%f22, %%f22 \n\t" \ |
||
79 | "fpack16 %%f2, %%f2 \n\t" \ |
||
80 | "fpack16 %%f18, %%f18 \n\t" |
||
81 | |||
82 | // FIXME: must be changed to set alpha to 255 instead of 0 |
||
83 | static int vis_420P_ARGB32(SwsContext *c, uint8_t *src[], int srcStride[], |
||
84 | int srcSliceY, int srcSliceH, |
||
85 | uint8_t *dst[], int dstStride[]) |
||
86 | { |
||
87 | int y, out1, out2, out3, out4, out5, out6; |
||
88 | |||
89 | for (y = 0; y < srcSliceH; ++y) |
||
90 | __asm__ volatile ( |
||
91 | YUV2RGB_INIT |
||
92 | "wr %%g0, 0xd2, %%asi \n\t" /* ASI_FL16_P */ |
||
93 | "1: \n\t" |
||
94 | "ldda [%1] %%asi, %%f2 \n\t" |
||
95 | "ldda [%1 + 2] %%asi, %%f18 \n\t" |
||
96 | "ldda [%2] %%asi, %%f4 \n\t" |
||
97 | "ldda [%2 + 2] %%asi, %%f20 \n\t" |
||
98 | "ld [%0], %%f0 \n\t" |
||
99 | "ld [%0+4], %%f16 \n\t" |
||
100 | "fpmerge %%f3, %%f3, %%f2 \n\t" |
||
101 | "fpmerge %%f19, %%f19, %%f18 \n\t" |
||
102 | "fpmerge %%f5, %%f5, %%f4 \n\t" |
||
103 | "fpmerge %%f21, %%f21, %%f20 \n\t" |
||
104 | YUV2RGB_KERNEL |
||
105 | "fzero %%f0 \n\t" |
||
106 | "fpmerge %%f4, %%f6, %%f8 \n\t" // r, b, t1 |
||
107 | "fpmerge %%f20, %%f22, %%f24 \n\t" // r, b, t1 |
||
108 | "fpmerge %%f0, %%f2, %%f10 \n\t" // 0, g, t2 |
||
109 | "fpmerge %%f0, %%f18, %%f26 \n\t" // 0, g, t2 |
||
110 | "fpmerge %%f10, %%f8, %%f4 \n\t" // t2, t1, msb |
||
111 | "fpmerge %%f26, %%f24, %%f20 \n\t" // t2, t1, msb |
||
112 | "fpmerge %%f11, %%f9, %%f6 \n\t" // t2, t1, lsb |
||
113 | "fpmerge %%f27, %%f25, %%f22 \n\t" // t2, t1, lsb |
||
114 | "std %%f4, [%3] \n\t" |
||
115 | "std %%f20, [%3 + 16] \n\t" |
||
116 | "std %%f6, [%3 + 8] \n\t" |
||
117 | "std %%f22, [%3 + 24] \n\t" |
||
118 | |||
119 | "add %0, 8, %0 \n\t" |
||
120 | "add %1, 4, %1 \n\t" |
||
121 | "add %2, 4, %2 \n\t" |
||
122 | "subcc %4, 8, %4 \n\t" |
||
123 | "bne 1b \n\t" |
||
124 | "add %3, 32, %3 \n\t" // delay slot |
||
125 | : "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4), "=r" (out5), "=r" (out6) |
||
126 | : "0" (src[0] + (y + srcSliceY) * srcStride[0]), "1" (src[1] + ((y + srcSliceY) >> 1) * srcStride[1]), |
||
127 | "2" (src[2] + ((y + srcSliceY) >> 1) * srcStride[2]), "3" (dst[0] + (y + srcSliceY) * dstStride[0]), |
||
128 | "4" (c->dstW), |
||
129 | "5" (c->sparc_coeffs) |
||
130 | ); |
||
131 | |||
132 | return srcSliceH; |
||
133 | } |
||
134 | |||
135 | // FIXME: must be changed to set alpha to 255 instead of 0 |
||
136 | static int vis_422P_ARGB32(SwsContext *c, uint8_t *src[], int srcStride[], |
||
137 | int srcSliceY, int srcSliceH, |
||
138 | uint8_t *dst[], int dstStride[]) |
||
139 | { |
||
140 | int y, out1, out2, out3, out4, out5, out6; |
||
141 | |||
142 | for (y = 0; y < srcSliceH; ++y) |
||
143 | __asm__ volatile ( |
||
144 | YUV2RGB_INIT |
||
145 | "wr %%g0, 0xd2, %%asi \n\t" /* ASI_FL16_P */ |
||
146 | "1: \n\t" |
||
147 | "ldda [%1] %%asi, %%f2 \n\t" |
||
148 | "ldda [%1 + 2] %%asi, %%f18 \n\t" |
||
149 | "ldda [%2] %%asi, %%f4 \n\t" |
||
150 | "ldda [%2 + 2] %%asi, %%f20 \n\t" |
||
151 | "ld [%0], %%f0 \n\t" |
||
152 | "ld [%0 + 4], %%f16 \n\t" |
||
153 | "fpmerge %%f3, %%f3, %%f2 \n\t" |
||
154 | "fpmerge %%f19, %%f19, %%f18 \n\t" |
||
155 | "fpmerge %%f5, %%f5, %%f4 \n\t" |
||
156 | "fpmerge %%f21, %%f21, %%f20 \n\t" |
||
157 | YUV2RGB_KERNEL |
||
158 | "fzero %%f0 \n\t" |
||
159 | "fpmerge %%f4, %%f6, %%f8 \n\t" // r,b,t1 |
||
160 | "fpmerge %%f20, %%f22, %%f24 \n\t" // r,b,t1 |
||
161 | "fpmerge %%f0, %%f2, %%f10 \n\t" // 0,g,t2 |
||
162 | "fpmerge %%f0, %%f18, %%f26 \n\t" // 0,g,t2 |
||
163 | "fpmerge %%f10, %%f8, %%f4 \n\t" // t2,t1,msb |
||
164 | "fpmerge %%f26, %%f24, %%f20 \n\t" // t2,t1,msb |
||
165 | "fpmerge %%f11, %%f9, %%f6 \n\t" // t2,t1,lsb |
||
166 | "fpmerge %%f27, %%f25, %%f22 \n\t" // t2,t1,lsb |
||
167 | "std %%f4, [%3] \n\t" |
||
168 | "std %%f20, [%3 + 16] \n\t" |
||
169 | "std %%f6, [%3 + 8] \n\t" |
||
170 | "std %%f22, [%3 + 24] \n\t" |
||
171 | |||
172 | "add %0, 8, %0 \n\t" |
||
173 | "add %1, 4, %1 \n\t" |
||
174 | "add %2, 4, %2 \n\t" |
||
175 | "subcc %4, 8, %4 \n\t" |
||
176 | "bne 1b \n\t" |
||
177 | "add %3, 32, %3 \n\t" //delay slot |
||
178 | : "=r" (out1), "=r" (out2), "=r" (out3), "=r" (out4), "=r" (out5), "=r" (out6) |
||
179 | : "0" (src[0] + (y + srcSliceY) * srcStride[0]), "1" (src[1] + (y + srcSliceY) * srcStride[1]), |
||
180 | "2" (src[2] + (y + srcSliceY) * srcStride[2]), "3" (dst[0] + (y + srcSliceY) * dstStride[0]), |
||
181 | "4" (c->dstW), |
||
182 | "5" (c->sparc_coeffs) |
||
183 | ); |
||
184 | |||
185 | return srcSliceH; |
||
186 | } |
||
187 | |||
188 | av_cold SwsFunc ff_yuv2rgb_init_vis(SwsContext *c) |
||
189 | { |
||
190 | c->sparc_coeffs[5] = c->yCoeff; |
||
191 | c->sparc_coeffs[6] = c->vgCoeff; |
||
192 | c->sparc_coeffs[7] = c->vrCoeff; |
||
193 | c->sparc_coeffs[8] = c->ubCoeff; |
||
194 | c->sparc_coeffs[9] = c->ugCoeff; |
||
195 | |||
196 | c->sparc_coeffs[0] = (((int16_t)c->yOffset * (int16_t)c->yCoeff >> 11) & 0xffff) * 0x0001000100010001ULL; |
||
197 | c->sparc_coeffs[1] = (((int16_t)c->uOffset * (int16_t)c->ubCoeff >> 11) & 0xffff) * 0x0001000100010001ULL; |
||
198 | c->sparc_coeffs[2] = (((int16_t)c->uOffset * (int16_t)c->ugCoeff >> 11) & 0xffff) * 0x0001000100010001ULL; |
||
199 | c->sparc_coeffs[3] = (((int16_t)c->vOffset * (int16_t)c->vgCoeff >> 11) & 0xffff) * 0x0001000100010001ULL; |
||
200 | c->sparc_coeffs[4] = (((int16_t)c->vOffset * (int16_t)c->vrCoeff >> 11) & 0xffff) * 0x0001000100010001ULL; |
||
201 | |||
202 | if (c->dstFormat == AV_PIX_FMT_RGB32 && c->srcFormat == AV_PIX_FMT_YUV422P && (c->dstW & 7) == 0) { |
||
203 | av_log(c, AV_LOG_INFO, |
||
204 | "SPARC VIS accelerated YUV422P -> RGB32 (WARNING: alpha value is wrong)\n"); |
||
205 | return vis_422P_ARGB32; |
||
206 | } else if (c->dstFormat == AV_PIX_FMT_RGB32 && c->srcFormat == AV_PIX_FMT_YUV420P && (c->dstW & 7) == 0) { |
||
207 | av_log(c, AV_LOG_INFO, |
||
208 | "SPARC VIS accelerated YUV420P -> RGB32 (WARNING: alpha value is wrong)\n"); |
||
209 | return vis_420P_ARGB32; |
||
210 | } |
||
211 | return NULL; |
||
212 | }>> |