Rev 4359 | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4304 | Serge | 1 | /* |
2 | * Copyright © 2012 Intel Corporation |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||
21 | * SOFTWARE. |
||
22 | * |
||
23 | * Authors: |
||
24 | * Chris Wilson |
||
25 | * |
||
26 | */ |
||
27 | |||
28 | #ifdef HAVE_CONFIG_H |
||
29 | #include "config.h" |
||
30 | #endif |
||
31 | |||
32 | #include "sna.h" |
||
33 | #include "sna_render.h" |
||
34 | #include "sna_render_inline.h" |
||
35 | #include "gen4_vertex.h" |
||
36 | |||
37 | #ifndef sse2 |
||
38 | #define sse2 |
||
39 | #endif |
||
40 | |||
4501 | Serge | 41 | void gen4_vertex_align(struct sna *sna, const struct sna_composite_op *op) |
42 | { |
||
43 | int vertex_index; |
||
44 | |||
45 | assert(op->floats_per_rect == 3*op->floats_per_vertex); |
||
46 | |||
47 | vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; |
||
48 | if ((int)sna->render.vertex_size - vertex_index * op->floats_per_vertex < 2*op->floats_per_rect) { |
||
49 | DBG(("%s: flushing vertex buffer: new index=%d, max=%d\n", |
||
50 | __FUNCTION__, vertex_index, sna->render.vertex_size / op->floats_per_vertex)); |
||
51 | if (gen4_vertex_finish(sna) < op->floats_per_rect) { |
||
52 | kgem_submit(&sna->kgem); |
||
53 | _kgem_set_mode(&sna->kgem, KGEM_RENDER); |
||
54 | } |
||
55 | |||
56 | vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; |
||
57 | assert(vertex_index * op->floats_per_vertex <= sna->render.vertex_size); |
||
58 | } |
||
59 | |||
60 | sna->render.vertex_index = vertex_index; |
||
61 | sna->render.vertex_used = vertex_index * op->floats_per_vertex; |
||
62 | } |
||
63 | |||
4304 | Serge | 64 | void gen4_vertex_flush(struct sna *sna) |
65 | { |
||
66 | DBG(("%s[%x] = %d\n", __FUNCTION__, |
||
67 | 4*sna->render.vertex_offset, |
||
68 | sna->render.vertex_index - sna->render.vertex_start)); |
||
69 | |||
70 | assert(sna->render.vertex_offset); |
||
4501 | Serge | 71 | assert(sna->render.vertex_offset <= sna->kgem.nbatch); |
4304 | Serge | 72 | assert(sna->render.vertex_index > sna->render.vertex_start); |
4501 | Serge | 73 | assert(sna->render.vertex_used <= sna->render.vertex_size); |
4304 | Serge | 74 | |
75 | sna->kgem.batch[sna->render.vertex_offset] = |
||
76 | sna->render.vertex_index - sna->render.vertex_start; |
||
77 | sna->render.vertex_offset = 0; |
||
78 | } |
||
79 | |||
80 | int gen4_vertex_finish(struct sna *sna) |
||
81 | { |
||
82 | struct kgem_bo *bo; |
||
83 | unsigned int i; |
||
84 | unsigned hint, size; |
||
85 | |||
86 | DBG(("%s: used=%d / %d\n", __FUNCTION__, |
||
87 | sna->render.vertex_used, sna->render.vertex_size)); |
||
88 | assert(sna->render.vertex_offset == 0); |
||
89 | assert(sna->render.vertex_used); |
||
4501 | Serge | 90 | assert(sna->render.vertex_used <= sna->render.vertex_size); |
4304 | Serge | 91 | |
92 | sna_vertex_wait__locked(&sna->render); |
||
93 | |||
94 | /* Note: we only need dword alignment (currently) */ |
||
95 | |||
4501 | Serge | 96 | hint = CREATE_GTT_MAP; |
97 | |||
4304 | Serge | 98 | bo = sna->render.vbo; |
99 | if (bo) { |
||
100 | for (i = 0; i < sna->render.nvertex_reloc; i++) { |
||
101 | DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, |
||
102 | i, sna->render.vertex_reloc[i])); |
||
103 | |||
104 | sna->kgem.batch[sna->render.vertex_reloc[i]] = |
||
105 | kgem_add_reloc(&sna->kgem, |
||
106 | sna->render.vertex_reloc[i], bo, |
||
107 | I915_GEM_DOMAIN_VERTEX << 16, |
||
108 | 0); |
||
109 | } |
||
110 | |||
111 | assert(!sna->render.active); |
||
112 | sna->render.nvertex_reloc = 0; |
||
113 | sna->render.vertex_used = 0; |
||
114 | sna->render.vertex_index = 0; |
||
115 | sna->render.vbo = NULL; |
||
116 | sna->render.vb_id = 0; |
||
117 | |||
118 | kgem_bo_destroy(&sna->kgem, bo); |
||
4501 | Serge | 119 | hint |= CREATE_CACHED | CREATE_NO_THROTTLE; |
120 | } else { |
||
121 | if (kgem_is_idle(&sna->kgem)) { |
||
122 | sna->render.vertices = sna->render.vertex_data; |
||
123 | sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); |
||
124 | return 0; |
||
125 | } |
||
4304 | Serge | 126 | } |
127 | |||
128 | |||
129 | size = 256*1024; |
||
130 | assert(!sna->render.active); |
||
131 | sna->render.vertices = NULL; |
||
132 | sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint); |
||
133 | while (sna->render.vbo == NULL && size > 16*1024) { |
||
134 | size /= 2; |
||
135 | sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint); |
||
136 | } |
||
137 | if (sna->render.vbo == NULL) |
||
138 | sna->render.vbo = kgem_create_linear(&sna->kgem, |
||
139 | 256*1024, CREATE_GTT_MAP); |
||
140 | if (sna->render.vbo) |
||
141 | sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo); |
||
142 | if (sna->render.vertices == NULL) { |
||
143 | if (sna->render.vbo) { |
||
144 | kgem_bo_destroy(&sna->kgem, sna->render.vbo); |
||
145 | sna->render.vbo = NULL; |
||
146 | } |
||
147 | sna->render.vertices = sna->render.vertex_data; |
||
148 | sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); |
||
149 | return 0; |
||
150 | } |
||
151 | |||
152 | if (sna->render.vertex_used) { |
||
153 | DBG(("%s: copying initial buffer x %d to handle=%d\n", |
||
154 | __FUNCTION__, |
||
155 | sna->render.vertex_used, |
||
156 | sna->render.vbo->handle)); |
||
157 | assert(sizeof(float)*sna->render.vertex_used <= |
||
158 | __kgem_bo_size(sna->render.vbo)); |
||
159 | memcpy(sna->render.vertices, |
||
160 | sna->render.vertex_data, |
||
161 | sizeof(float)*sna->render.vertex_used); |
||
162 | } |
||
163 | |||
164 | size = __kgem_bo_size(sna->render.vbo)/4; |
||
165 | if (size >= UINT16_MAX) |
||
166 | size = UINT16_MAX - 1; |
||
167 | |||
168 | DBG(("%s: create vbo handle=%d, size=%d\n", |
||
169 | __FUNCTION__, sna->render.vbo->handle, size)); |
||
170 | |||
171 | sna->render.vertex_size = size; |
||
172 | return sna->render.vertex_size - sna->render.vertex_used; |
||
173 | } |
||
174 | |||
175 | void gen4_vertex_close(struct sna *sna) |
||
176 | { |
||
177 | struct kgem_bo *bo, *free_bo = NULL; |
||
178 | unsigned int i, delta = 0; |
||
179 | |||
180 | assert(sna->render.vertex_offset == 0); |
||
181 | if (!sna->render.vb_id) |
||
182 | return; |
||
183 | |||
184 | DBG(("%s: used=%d, vbo active? %d, vb=%x, nreloc=%d\n", |
||
185 | __FUNCTION__, sna->render.vertex_used, sna->render.vbo ? sna->render.vbo->handle : 0, |
||
186 | sna->render.vb_id, sna->render.nvertex_reloc)); |
||
187 | |||
188 | assert(!sna->render.active); |
||
189 | |||
190 | bo = sna->render.vbo; |
||
191 | if (bo) { |
||
192 | if (sna->render.vertex_size - sna->render.vertex_used < 64) { |
||
193 | DBG(("%s: discarding vbo (full), handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); |
||
194 | sna->render.vbo = NULL; |
||
195 | sna->render.vertices = sna->render.vertex_data; |
||
196 | sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); |
||
197 | free_bo = bo; |
||
4501 | Serge | 198 | } else if (!sna->kgem.has_llc && sna->render.vertices == MAP(bo->map__cpu)) { |
4304 | Serge | 199 | DBG(("%s: converting CPU map to GTT\n", __FUNCTION__)); |
200 | sna->render.vertices = |
||
201 | kgem_bo_map__gtt(&sna->kgem, sna->render.vbo); |
||
202 | if (sna->render.vertices == NULL) { |
||
203 | sna->render.vbo = NULL; |
||
204 | sna->render.vertices = sna->render.vertex_data; |
||
205 | sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); |
||
206 | free_bo = bo; |
||
207 | } |
||
208 | |||
209 | } |
||
210 | } else { |
||
4501 | Serge | 211 | int size; |
212 | |||
213 | size = sna->kgem.nbatch; |
||
214 | size += sna->kgem.batch_size - sna->kgem.surface; |
||
215 | size += sna->render.vertex_used; |
||
216 | |||
217 | if (size <= 1024) { |
||
4304 | Serge | 218 | DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__, |
219 | sna->render.vertex_used, sna->kgem.nbatch)); |
||
4501 | Serge | 220 | assert(sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface); |
4304 | Serge | 221 | memcpy(sna->kgem.batch + sna->kgem.nbatch, |
222 | sna->render.vertex_data, |
||
223 | sna->render.vertex_used * 4); |
||
224 | delta = sna->kgem.nbatch * 4; |
||
225 | bo = NULL; |
||
226 | sna->kgem.nbatch += sna->render.vertex_used; |
||
227 | } else { |
||
4501 | Serge | 228 | size = 256 * 1024; |
229 | do { |
||
230 | bo = kgem_create_linear(&sna->kgem, size, |
||
231 | CREATE_GTT_MAP | CREATE_NO_RETIRE | CREATE_NO_THROTTLE | CREATE_CACHED); |
||
232 | } while (bo == NULL && (size>>=1) > sizeof(float)*sna->render.vertex_used); |
||
233 | |||
234 | sna->render.vertices = NULL; |
||
235 | if (bo) |
||
236 | sna->render.vertices = kgem_bo_map(&sna->kgem, bo); |
||
237 | if (sna->render.vertices != NULL) { |
||
238 | DBG(("%s: new vbo: %d / %d\n", __FUNCTION__, |
||
239 | sna->render.vertex_used, __kgem_bo_size(bo)/4)); |
||
240 | |||
241 | assert(sizeof(float)*sna->render.vertex_used <= __kgem_bo_size(bo)); |
||
242 | memcpy(sna->render.vertices, |
||
243 | sna->render.vertex_data, |
||
244 | sizeof(float)*sna->render.vertex_used); |
||
245 | |||
246 | size = __kgem_bo_size(bo)/4; |
||
247 | if (size >= UINT16_MAX) |
||
248 | size = UINT16_MAX - 1; |
||
249 | |||
250 | sna->render.vbo = bo; |
||
251 | sna->render.vertex_size = size; |
||
252 | } else { |
||
253 | DBG(("%s: tmp vbo: %d\n", __FUNCTION__, |
||
254 | sna->render.vertex_used)); |
||
255 | |||
256 | if (bo) |
||
257 | kgem_bo_destroy(&sna->kgem, bo); |
||
258 | |||
4304 | Serge | 259 | bo = kgem_create_linear(&sna->kgem, |
260 | 4*sna->render.vertex_used, |
||
261 | CREATE_NO_THROTTLE); |
||
262 | if (bo && !kgem_bo_write(&sna->kgem, bo, |
||
263 | sna->render.vertex_data, |
||
264 | 4*sna->render.vertex_used)) { |
||
265 | kgem_bo_destroy(&sna->kgem, bo); |
||
266 | bo = NULL; |
||
267 | } |
||
4501 | Serge | 268 | |
269 | assert(sna->render.vbo == NULL); |
||
270 | sna->render.vertices = sna->render.vertex_data; |
||
271 | sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); |
||
4304 | Serge | 272 | free_bo = bo; |
273 | } |
||
274 | } |
||
4501 | Serge | 275 | } |
4304 | Serge | 276 | |
277 | assert(sna->render.nvertex_reloc); |
||
278 | for (i = 0; i < sna->render.nvertex_reloc; i++) { |
||
279 | DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, |
||
280 | i, sna->render.vertex_reloc[i])); |
||
281 | |||
282 | sna->kgem.batch[sna->render.vertex_reloc[i]] = |
||
283 | kgem_add_reloc(&sna->kgem, |
||
284 | sna->render.vertex_reloc[i], bo, |
||
285 | I915_GEM_DOMAIN_VERTEX << 16, |
||
286 | delta); |
||
287 | } |
||
288 | sna->render.nvertex_reloc = 0; |
||
289 | sna->render.vb_id = 0; |
||
290 | |||
291 | if (sna->render.vbo == NULL) { |
||
292 | assert(!sna->render.active); |
||
293 | sna->render.vertex_used = 0; |
||
294 | sna->render.vertex_index = 0; |
||
295 | assert(sna->render.vertices == sna->render.vertex_data); |
||
296 | assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data)); |
||
297 | } |
||
298 | |||
299 | if (free_bo) |
||
300 | kgem_bo_destroy(&sna->kgem, free_bo); |
||
301 | } |
||
302 | |||
303 | /* specialised vertex emission routines */ |
||
304 | |||
305 | #define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) /* XXX assert(!too_large(x, y)); */ |
||
306 | #define OUT_VERTEX_F(v) vertex_emit(sna, v) |
||
307 | |||
308 | force_inline static float |
||
309 | compute_linear(const struct sna_composite_channel *channel, |
||
310 | int16_t x, int16_t y) |
||
311 | { |
||
312 | return ((x+channel->offset[0]) * channel->u.linear.dx + |
||
313 | (y+channel->offset[1]) * channel->u.linear.dy + |
||
314 | channel->u.linear.offset); |
||
315 | } |
||
316 | |||
317 | sse2 inline static void |
||
318 | emit_texcoord(struct sna *sna, |
||
319 | const struct sna_composite_channel *channel, |
||
320 | int16_t x, int16_t y) |
||
321 | { |
||
322 | if (channel->is_solid) { |
||
323 | OUT_VERTEX_F(x); |
||
324 | return; |
||
325 | } |
||
326 | |||
327 | x += channel->offset[0]; |
||
328 | y += channel->offset[1]; |
||
329 | |||
330 | if (channel->is_affine) { |
||
331 | float s, t; |
||
332 | |||
333 | sna_get_transformed_coordinates(x, y, |
||
334 | channel->transform, |
||
335 | &s, &t); |
||
336 | OUT_VERTEX_F(s * channel->scale[0]); |
||
337 | OUT_VERTEX_F(t * channel->scale[1]); |
||
338 | } else { |
||
339 | float s, t, w; |
||
340 | |||
341 | sna_get_transformed_coordinates_3d(x, y, |
||
342 | channel->transform, |
||
343 | &s, &t, &w); |
||
344 | OUT_VERTEX_F(s * channel->scale[0]); |
||
345 | OUT_VERTEX_F(t * channel->scale[1]); |
||
346 | OUT_VERTEX_F(w); |
||
347 | } |
||
348 | } |
||
349 | |||
350 | sse2 force_inline static void |
||
351 | emit_vertex(struct sna *sna, |
||
352 | const struct sna_composite_op *op, |
||
353 | int16_t srcX, int16_t srcY, |
||
354 | int16_t mskX, int16_t mskY, |
||
355 | int16_t dstX, int16_t dstY) |
||
356 | { |
||
357 | OUT_VERTEX(dstX, dstY); |
||
358 | emit_texcoord(sna, &op->src, srcX, srcY); |
||
359 | } |
||
360 | |||
361 | sse2 fastcall static void |
||
362 | emit_primitive(struct sna *sna, |
||
363 | const struct sna_composite_op *op, |
||
364 | const struct sna_composite_rectangles *r) |
||
365 | { |
||
366 | emit_vertex(sna, op, |
||
367 | r->src.x + r->width, r->src.y + r->height, |
||
368 | r->mask.x + r->width, r->mask.y + r->height, |
||
369 | r->dst.x + r->width, r->dst.y + r->height); |
||
370 | emit_vertex(sna, op, |
||
371 | r->src.x, r->src.y + r->height, |
||
372 | r->mask.x, r->mask.y + r->height, |
||
373 | r->dst.x, r->dst.y + r->height); |
||
374 | emit_vertex(sna, op, |
||
375 | r->src.x, r->src.y, |
||
376 | r->mask.x, r->mask.y, |
||
377 | r->dst.x, r->dst.y); |
||
378 | } |
||
379 | |||
380 | sse2 force_inline static void |
||
381 | emit_vertex_mask(struct sna *sna, |
||
382 | const struct sna_composite_op *op, |
||
383 | int16_t srcX, int16_t srcY, |
||
384 | int16_t mskX, int16_t mskY, |
||
385 | int16_t dstX, int16_t dstY) |
||
386 | { |
||
387 | OUT_VERTEX(dstX, dstY); |
||
388 | emit_texcoord(sna, &op->src, srcX, srcY); |
||
389 | emit_texcoord(sna, &op->mask, mskX, mskY); |
||
390 | } |
||
391 | |||
392 | sse2 fastcall static void |
||
393 | emit_primitive_mask(struct sna *sna, |
||
394 | const struct sna_composite_op *op, |
||
395 | const struct sna_composite_rectangles *r) |
||
396 | { |
||
397 | emit_vertex_mask(sna, op, |
||
398 | r->src.x + r->width, r->src.y + r->height, |
||
399 | r->mask.x + r->width, r->mask.y + r->height, |
||
400 | r->dst.x + r->width, r->dst.y + r->height); |
||
401 | emit_vertex_mask(sna, op, |
||
402 | r->src.x, r->src.y + r->height, |
||
403 | r->mask.x, r->mask.y + r->height, |
||
404 | r->dst.x, r->dst.y + r->height); |
||
405 | emit_vertex_mask(sna, op, |
||
406 | r->src.x, r->src.y, |
||
407 | r->mask.x, r->mask.y, |
||
408 | r->dst.x, r->dst.y); |
||
409 | } |
||
410 | |||
411 | sse2 fastcall static void |
||
412 | emit_primitive_solid(struct sna *sna, |
||
413 | const struct sna_composite_op *op, |
||
414 | const struct sna_composite_rectangles *r) |
||
415 | { |
||
416 | float *v; |
||
417 | union { |
||
418 | struct sna_coordinate p; |
||
419 | float f; |
||
420 | } dst; |
||
421 | |||
422 | assert(op->floats_per_rect == 6); |
||
423 | assert((sna->render.vertex_used % 2) == 0); |
||
424 | v = sna->render.vertices + sna->render.vertex_used; |
||
425 | sna->render.vertex_used += 6; |
||
426 | assert(sna->render.vertex_used <= sna->render.vertex_size); |
||
427 | |||
428 | dst.p.x = r->dst.x + r->width; |
||
429 | dst.p.y = r->dst.y + r->height; |
||
430 | v[0] = dst.f; |
||
431 | dst.p.x = r->dst.x; |
||
432 | v[2] = dst.f; |
||
433 | dst.p.y = r->dst.y; |
||
434 | v[4] = dst.f; |
||
435 | |||
436 | v[5] = v[3] = v[1] = .5; |
||
437 | } |
||
438 | |||
439 | sse2 fastcall static void |
||
440 | emit_boxes_solid(const struct sna_composite_op *op, |
||
441 | const BoxRec *box, int nbox, |
||
442 | float *v) |
||
443 | { |
||
444 | do { |
||
445 | union { |
||
446 | struct sna_coordinate p; |
||
447 | float f; |
||
448 | } dst; |
||
449 | |||
450 | dst.p.x = box->x2; |
||
451 | dst.p.y = box->y2; |
||
452 | v[0] = dst.f; |
||
453 | dst.p.x = box->x1; |
||
454 | v[2] = dst.f; |
||
455 | dst.p.y = box->y1; |
||
456 | v[4] = dst.f; |
||
457 | |||
458 | v[5] = v[3] = v[1] = .5; |
||
459 | box++; |
||
460 | v += 6; |
||
461 | } while (--nbox); |
||
462 | } |
||
463 | |||
464 | sse2 fastcall static void |
||
465 | emit_primitive_linear(struct sna *sna, |
||
466 | const struct sna_composite_op *op, |
||
467 | const struct sna_composite_rectangles *r) |
||
468 | { |
||
469 | float *v; |
||
470 | union { |
||
471 | struct sna_coordinate p; |
||
472 | float f; |
||
473 | } dst; |
||
474 | |||
475 | assert(op->floats_per_rect == 6); |
||
476 | assert((sna->render.vertex_used % 2) == 0); |
||
477 | v = sna->render.vertices + sna->render.vertex_used; |
||
478 | sna->render.vertex_used += 6; |
||
479 | assert(sna->render.vertex_used <= sna->render.vertex_size); |
||
480 | |||
481 | dst.p.x = r->dst.x + r->width; |
||
482 | dst.p.y = r->dst.y + r->height; |
||
483 | v[0] = dst.f; |
||
484 | dst.p.x = r->dst.x; |
||
485 | v[2] = dst.f; |
||
486 | dst.p.y = r->dst.y; |
||
487 | v[4] = dst.f; |
||
488 | |||
489 | v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); |
||
490 | v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height); |
||
491 | v[5] = compute_linear(&op->src, r->src.x, r->src.y); |
||
492 | } |
||
493 | |||
494 | sse2 fastcall static void |
||
495 | emit_boxes_linear(const struct sna_composite_op *op, |
||
496 | const BoxRec *box, int nbox, |
||
497 | float *v) |
||
498 | { |
||
499 | union { |
||
500 | struct sna_coordinate p; |
||
501 | float f; |
||
502 | } dst; |
||
503 | |||
504 | do { |
||
505 | dst.p.x = box->x2; |
||
506 | dst.p.y = box->y2; |
||
507 | v[0] = dst.f; |
||
508 | dst.p.x = box->x1; |
||
509 | v[2] = dst.f; |
||
510 | dst.p.y = box->y1; |
||
511 | v[4] = dst.f; |
||
512 | |||
513 | v[1] = compute_linear(&op->src, box->x2, box->y2); |
||
514 | v[3] = compute_linear(&op->src, box->x1, box->y2); |
||
515 | v[5] = compute_linear(&op->src, box->x1, box->y1); |
||
516 | |||
517 | v += 6; |
||
518 | box++; |
||
519 | } while (--nbox); |
||
520 | } |
||
521 | |||
522 | sse2 fastcall static void |
||
523 | emit_primitive_identity_source(struct sna *sna, |
||
524 | const struct sna_composite_op *op, |
||
525 | const struct sna_composite_rectangles *r) |
||
526 | { |
||
527 | union { |
||
528 | struct sna_coordinate p; |
||
529 | float f; |
||
530 | } dst; |
||
531 | float *v; |
||
532 | |||
533 | assert(op->floats_per_rect == 9); |
||
534 | assert((sna->render.vertex_used % 3) == 0); |
||
535 | v = sna->render.vertices + sna->render.vertex_used; |
||
536 | sna->render.vertex_used += 9; |
||
537 | |||
538 | dst.p.x = r->dst.x + r->width; |
||
539 | dst.p.y = r->dst.y + r->height; |
||
540 | v[0] = dst.f; |
||
541 | dst.p.x = r->dst.x; |
||
542 | v[3] = dst.f; |
||
543 | dst.p.y = r->dst.y; |
||
544 | v[6] = dst.f; |
||
545 | |||
546 | v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; |
||
547 | v[1] = v[4] + r->width * op->src.scale[0]; |
||
548 | |||
549 | v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; |
||
550 | v[5] = v[2] = v[8] + r->height * op->src.scale[1]; |
||
551 | } |
||
552 | |||
553 | sse2 fastcall static void |
||
554 | emit_boxes_identity_source(const struct sna_composite_op *op, |
||
555 | const BoxRec *box, int nbox, |
||
556 | float *v) |
||
557 | { |
||
558 | do { |
||
559 | union { |
||
560 | struct sna_coordinate p; |
||
561 | float f; |
||
562 | } dst; |
||
563 | |||
564 | dst.p.x = box->x2; |
||
565 | dst.p.y = box->y2; |
||
566 | v[0] = dst.f; |
||
567 | dst.p.x = box->x1; |
||
568 | v[3] = dst.f; |
||
569 | dst.p.y = box->y1; |
||
570 | v[6] = dst.f; |
||
571 | |||
572 | v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; |
||
573 | v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; |
||
574 | |||
575 | v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; |
||
576 | v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; |
||
577 | |||
578 | v += 9; |
||
579 | box++; |
||
580 | } while (--nbox); |
||
581 | } |
||
582 | |||
583 | sse2 fastcall static void |
||
584 | emit_primitive_simple_source(struct sna *sna, |
||
585 | const struct sna_composite_op *op, |
||
586 | const struct sna_composite_rectangles *r) |
||
587 | { |
||
588 | float *v; |
||
589 | union { |
||
590 | struct sna_coordinate p; |
||
591 | float f; |
||
592 | } dst; |
||
593 | |||
594 | float xx = op->src.transform->matrix[0][0]; |
||
595 | float x0 = op->src.transform->matrix[0][2]; |
||
596 | float yy = op->src.transform->matrix[1][1]; |
||
597 | float y0 = op->src.transform->matrix[1][2]; |
||
598 | float sx = op->src.scale[0]; |
||
599 | float sy = op->src.scale[1]; |
||
600 | int16_t tx = op->src.offset[0]; |
||
601 | int16_t ty = op->src.offset[1]; |
||
602 | |||
603 | assert(op->floats_per_rect == 9); |
||
604 | assert((sna->render.vertex_used % 3) == 0); |
||
605 | v = sna->render.vertices + sna->render.vertex_used; |
||
606 | sna->render.vertex_used += 3*3; |
||
607 | |||
608 | dst.p.x = r->dst.x + r->width; |
||
609 | dst.p.y = r->dst.y + r->height; |
||
610 | v[0] = dst.f; |
||
611 | v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx; |
||
612 | v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy; |
||
613 | |||
614 | dst.p.x = r->dst.x; |
||
615 | v[3] = dst.f; |
||
616 | v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx; |
||
617 | |||
618 | dst.p.y = r->dst.y; |
||
619 | v[6] = dst.f; |
||
620 | v[8] = ((r->src.y + ty) * yy + y0) * sy; |
||
621 | } |
||
622 | |||
623 | sse2 fastcall static void |
||
624 | emit_boxes_simple_source(const struct sna_composite_op *op, |
||
625 | const BoxRec *box, int nbox, |
||
626 | float *v) |
||
627 | { |
||
628 | float xx = op->src.transform->matrix[0][0]; |
||
629 | float x0 = op->src.transform->matrix[0][2]; |
||
630 | float yy = op->src.transform->matrix[1][1]; |
||
631 | float y0 = op->src.transform->matrix[1][2]; |
||
632 | float sx = op->src.scale[0]; |
||
633 | float sy = op->src.scale[1]; |
||
634 | int16_t tx = op->src.offset[0]; |
||
635 | int16_t ty = op->src.offset[1]; |
||
636 | |||
637 | do { |
||
638 | union { |
||
639 | struct sna_coordinate p; |
||
640 | float f; |
||
641 | } dst; |
||
642 | |||
643 | dst.p.x = box->x2; |
||
644 | dst.p.y = box->y2; |
||
645 | v[0] = dst.f; |
||
646 | v[1] = ((box->x2 + tx) * xx + x0) * sx; |
||
647 | v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy; |
||
648 | |||
649 | dst.p.x = box->x1; |
||
650 | v[3] = dst.f; |
||
651 | v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx; |
||
652 | |||
653 | dst.p.y = box->y1; |
||
654 | v[6] = dst.f; |
||
655 | v[8] = ((box->y1 + ty) * yy + y0) * sy; |
||
656 | |||
657 | v += 9; |
||
658 | box++; |
||
659 | } while (--nbox); |
||
660 | } |
||
661 | |||
662 | sse2 fastcall static void |
||
663 | emit_primitive_affine_source(struct sna *sna, |
||
664 | const struct sna_composite_op *op, |
||
665 | const struct sna_composite_rectangles *r) |
||
666 | { |
||
667 | union { |
||
668 | struct sna_coordinate p; |
||
669 | float f; |
||
670 | } dst; |
||
671 | float *v; |
||
672 | |||
673 | assert(op->floats_per_rect == 9); |
||
674 | assert((sna->render.vertex_used % 3) == 0); |
||
675 | v = sna->render.vertices + sna->render.vertex_used; |
||
676 | sna->render.vertex_used += 9; |
||
677 | |||
678 | dst.p.x = r->dst.x + r->width; |
||
679 | dst.p.y = r->dst.y + r->height; |
||
680 | v[0] = dst.f; |
||
681 | _sna_get_transformed_scaled(op->src.offset[0] + r->src.x + r->width, |
||
682 | op->src.offset[1] + r->src.y + r->height, |
||
683 | op->src.transform, op->src.scale, |
||
684 | &v[1], &v[2]); |
||
685 | |||
686 | dst.p.x = r->dst.x; |
||
687 | v[3] = dst.f; |
||
688 | _sna_get_transformed_scaled(op->src.offset[0] + r->src.x, |
||
689 | op->src.offset[1] + r->src.y + r->height, |
||
690 | op->src.transform, op->src.scale, |
||
691 | &v[4], &v[5]); |
||
692 | |||
693 | dst.p.y = r->dst.y; |
||
694 | v[6] = dst.f; |
||
695 | _sna_get_transformed_scaled(op->src.offset[0] + r->src.x, |
||
696 | op->src.offset[1] + r->src.y, |
||
697 | op->src.transform, op->src.scale, |
||
698 | &v[7], &v[8]); |
||
699 | } |
||
700 | |||
701 | sse2 fastcall static void |
||
702 | emit_boxes_affine_source(const struct sna_composite_op *op, |
||
703 | const BoxRec *box, int nbox, |
||
704 | float *v) |
||
705 | { |
||
706 | do { |
||
707 | union { |
||
708 | struct sna_coordinate p; |
||
709 | float f; |
||
710 | } dst; |
||
711 | |||
712 | dst.p.x = box->x2; |
||
713 | dst.p.y = box->y2; |
||
714 | v[0] = dst.f; |
||
715 | _sna_get_transformed_scaled(op->src.offset[0] + box->x2, |
||
716 | op->src.offset[1] + box->y2, |
||
717 | op->src.transform, op->src.scale, |
||
718 | &v[1], &v[2]); |
||
719 | |||
720 | dst.p.x = box->x1; |
||
721 | v[3] = dst.f; |
||
722 | _sna_get_transformed_scaled(op->src.offset[0] + box->x1, |
||
723 | op->src.offset[1] + box->y2, |
||
724 | op->src.transform, op->src.scale, |
||
725 | &v[4], &v[5]); |
||
726 | |||
727 | dst.p.y = box->y1; |
||
728 | v[6] = dst.f; |
||
729 | _sna_get_transformed_scaled(op->src.offset[0] + box->x1, |
||
730 | op->src.offset[1] + box->y1, |
||
731 | op->src.transform, op->src.scale, |
||
732 | &v[7], &v[8]); |
||
733 | box++; |
||
734 | v += 9; |
||
735 | } while (--nbox); |
||
736 | } |
||
737 | |||
738 | sse2 fastcall static void |
||
739 | emit_primitive_identity_mask(struct sna *sna, |
||
740 | const struct sna_composite_op *op, |
||
741 | const struct sna_composite_rectangles *r) |
||
742 | { |
||
743 | union { |
||
744 | struct sna_coordinate p; |
||
745 | float f; |
||
746 | } dst; |
||
747 | float msk_x, msk_y; |
||
748 | float w, h; |
||
749 | float *v; |
||
750 | |||
751 | msk_x = r->mask.x + op->mask.offset[0]; |
||
752 | msk_y = r->mask.y + op->mask.offset[1]; |
||
753 | w = r->width; |
||
754 | h = r->height; |
||
755 | |||
756 | DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", |
||
757 | __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); |
||
758 | |||
759 | assert(op->floats_per_rect == 12); |
||
760 | assert((sna->render.vertex_used % 4) == 0); |
||
761 | v = sna->render.vertices + sna->render.vertex_used; |
||
762 | sna->render.vertex_used += 12; |
||
763 | |||
764 | dst.p.x = r->dst.x + r->width; |
||
765 | dst.p.y = r->dst.y + r->height; |
||
766 | v[0] = dst.f; |
||
767 | v[2] = (msk_x + w) * op->mask.scale[0]; |
||
768 | v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; |
||
769 | |||
770 | dst.p.x = r->dst.x; |
||
771 | v[4] = dst.f; |
||
772 | v[10] = v[6] = msk_x * op->mask.scale[0]; |
||
773 | |||
774 | dst.p.y = r->dst.y; |
||
775 | v[8] = dst.f; |
||
776 | v[11] = msk_y * op->mask.scale[1]; |
||
777 | |||
778 | v[9] = v[5] = v[1] = .5; |
||
779 | } |
||
780 | |||
781 | sse2 fastcall static void |
||
782 | emit_boxes_identity_mask(const struct sna_composite_op *op, |
||
783 | const BoxRec *box, int nbox, |
||
784 | float *v) |
||
785 | { |
||
786 | float msk_x = op->mask.offset[0]; |
||
787 | float msk_y = op->mask.offset[1]; |
||
788 | |||
789 | do { |
||
790 | union { |
||
791 | struct sna_coordinate p; |
||
792 | float f; |
||
793 | } dst; |
||
794 | |||
795 | dst.p.x = box->x2; |
||
796 | dst.p.y = box->y2; |
||
797 | v[0] = dst.f; |
||
798 | v[2] = (msk_x + box->x2) * op->mask.scale[0]; |
||
799 | v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; |
||
800 | |||
801 | dst.p.x = box->x1; |
||
802 | v[4] = dst.f; |
||
803 | v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; |
||
804 | |||
805 | dst.p.y = box->y1; |
||
806 | v[8] = dst.f; |
||
807 | v[11] = (msk_y + box->y1) * op->mask.scale[1]; |
||
808 | |||
809 | v[9] = v[5] = v[1] = .5; |
||
810 | v += 12; |
||
811 | box++; |
||
812 | } while (--nbox); |
||
813 | } |
||
814 | |||
815 | sse2 fastcall static void |
||
816 | emit_primitive_linear_identity_mask(struct sna *sna, |
||
817 | const struct sna_composite_op *op, |
||
818 | const struct sna_composite_rectangles *r) |
||
819 | { |
||
820 | union { |
||
821 | struct sna_coordinate p; |
||
822 | float f; |
||
823 | } dst; |
||
824 | float msk_x, msk_y; |
||
825 | float w, h; |
||
826 | float *v; |
||
827 | |||
828 | msk_x = r->mask.x + op->mask.offset[0]; |
||
829 | msk_y = r->mask.y + op->mask.offset[1]; |
||
830 | w = r->width; |
||
831 | h = r->height; |
||
832 | |||
833 | DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", |
||
834 | __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); |
||
835 | |||
836 | assert(op->floats_per_rect == 12); |
||
837 | assert((sna->render.vertex_used % 4) == 0); |
||
838 | v = sna->render.vertices + sna->render.vertex_used; |
||
839 | sna->render.vertex_used += 12; |
||
840 | |||
841 | dst.p.x = r->dst.x + r->width; |
||
842 | dst.p.y = r->dst.y + r->height; |
||
843 | v[0] = dst.f; |
||
844 | v[2] = (msk_x + w) * op->mask.scale[0]; |
||
845 | v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; |
||
846 | |||
847 | dst.p.x = r->dst.x; |
||
848 | v[4] = dst.f; |
||
849 | v[10] = v[6] = msk_x * op->mask.scale[0]; |
||
850 | |||
851 | dst.p.y = r->dst.y; |
||
852 | v[8] = dst.f; |
||
853 | v[11] = msk_y * op->mask.scale[1]; |
||
854 | |||
855 | v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); |
||
856 | v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height); |
||
857 | v[9] = compute_linear(&op->src, r->src.x, r->src.y); |
||
858 | } |
||
859 | |||
860 | sse2 fastcall static void |
||
861 | emit_boxes_linear_identity_mask(const struct sna_composite_op *op, |
||
862 | const BoxRec *box, int nbox, |
||
863 | float *v) |
||
864 | { |
||
865 | float msk_x = op->mask.offset[0]; |
||
866 | float msk_y = op->mask.offset[1]; |
||
867 | |||
868 | do { |
||
869 | union { |
||
870 | struct sna_coordinate p; |
||
871 | float f; |
||
872 | } dst; |
||
873 | |||
874 | dst.p.x = box->x2; |
||
875 | dst.p.y = box->y2; |
||
876 | v[0] = dst.f; |
||
877 | v[2] = (msk_x + box->x2) * op->mask.scale[0]; |
||
878 | v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; |
||
879 | |||
880 | dst.p.x = box->x1; |
||
881 | v[4] = dst.f; |
||
882 | v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; |
||
883 | |||
884 | dst.p.y = box->y1; |
||
885 | v[8] = dst.f; |
||
886 | v[11] = (msk_y + box->y1) * op->mask.scale[1]; |
||
887 | |||
888 | v[1] = compute_linear(&op->src, box->x2, box->y2); |
||
889 | v[5] = compute_linear(&op->src, box->x1, box->y2); |
||
890 | v[9] = compute_linear(&op->src, box->x1, box->y1); |
||
891 | |||
892 | v += 12; |
||
893 | box++; |
||
894 | } while (--nbox); |
||
895 | } |
||
896 | |||
897 | sse2 fastcall static void |
||
898 | emit_primitive_identity_source_mask(struct sna *sna, |
||
899 | const struct sna_composite_op *op, |
||
900 | const struct sna_composite_rectangles *r) |
||
901 | { |
||
902 | union { |
||
903 | struct sna_coordinate p; |
||
904 | float f; |
||
905 | } dst; |
||
906 | float src_x, src_y; |
||
907 | float msk_x, msk_y; |
||
908 | float w, h; |
||
909 | float *v; |
||
910 | |||
911 | src_x = r->src.x + op->src.offset[0]; |
||
912 | src_y = r->src.y + op->src.offset[1]; |
||
913 | msk_x = r->mask.x + op->mask.offset[0]; |
||
914 | msk_y = r->mask.y + op->mask.offset[1]; |
||
915 | w = r->width; |
||
916 | h = r->height; |
||
917 | |||
918 | assert(op->floats_per_rect == 15); |
||
919 | assert((sna->render.vertex_used % 5) == 0); |
||
920 | v = sna->render.vertices + sna->render.vertex_used; |
||
921 | sna->render.vertex_used += 15; |
||
922 | |||
923 | dst.p.x = r->dst.x + r->width; |
||
924 | dst.p.y = r->dst.y + r->height; |
||
925 | v[0] = dst.f; |
||
926 | v[1] = (src_x + w) * op->src.scale[0]; |
||
927 | v[2] = (src_y + h) * op->src.scale[1]; |
||
928 | v[3] = (msk_x + w) * op->mask.scale[0]; |
||
929 | v[4] = (msk_y + h) * op->mask.scale[1]; |
||
930 | |||
931 | dst.p.x = r->dst.x; |
||
932 | v[5] = dst.f; |
||
933 | v[6] = src_x * op->src.scale[0]; |
||
934 | v[7] = v[2]; |
||
935 | v[8] = msk_x * op->mask.scale[0]; |
||
936 | v[9] = v[4]; |
||
937 | |||
938 | dst.p.y = r->dst.y; |
||
939 | v[10] = dst.f; |
||
940 | v[11] = v[6]; |
||
941 | v[12] = src_y * op->src.scale[1]; |
||
942 | v[13] = v[8]; |
||
943 | v[14] = msk_y * op->mask.scale[1]; |
||
944 | } |
||
945 | |||
946 | sse2 fastcall static void |
||
947 | emit_primitive_simple_source_identity(struct sna *sna, |
||
948 | const struct sna_composite_op *op, |
||
949 | const struct sna_composite_rectangles *r) |
||
950 | { |
||
951 | float *v; |
||
952 | union { |
||
953 | struct sna_coordinate p; |
||
954 | float f; |
||
955 | } dst; |
||
956 | |||
957 | float xx = op->src.transform->matrix[0][0]; |
||
958 | float x0 = op->src.transform->matrix[0][2]; |
||
959 | float yy = op->src.transform->matrix[1][1]; |
||
960 | float y0 = op->src.transform->matrix[1][2]; |
||
961 | float sx = op->src.scale[0]; |
||
962 | float sy = op->src.scale[1]; |
||
963 | int16_t tx = op->src.offset[0]; |
||
964 | int16_t ty = op->src.offset[1]; |
||
965 | float msk_x = r->mask.x + op->mask.offset[0]; |
||
966 | float msk_y = r->mask.y + op->mask.offset[1]; |
||
967 | float w = r->width, h = r->height; |
||
968 | |||
969 | assert(op->floats_per_rect == 15); |
||
970 | assert((sna->render.vertex_used % 5) == 0); |
||
971 | v = sna->render.vertices + sna->render.vertex_used; |
||
972 | sna->render.vertex_used += 3*5; |
||
973 | |||
974 | dst.p.x = r->dst.x + r->width; |
||
975 | dst.p.y = r->dst.y + r->height; |
||
976 | v[0] = dst.f; |
||
977 | v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx; |
||
978 | v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy; |
||
979 | v[3] = (msk_x + w) * op->mask.scale[0]; |
||
980 | v[4] = (msk_y + h) * op->mask.scale[1]; |
||
981 | |||
982 | dst.p.x = r->dst.x; |
||
983 | v[5] = dst.f; |
||
984 | v[6] = ((r->src.x + tx) * xx + x0) * sx; |
||
985 | v[7] = v[2]; |
||
986 | v[8] = msk_x * op->mask.scale[0]; |
||
987 | v[9] = v[4]; |
||
988 | |||
989 | dst.p.y = r->dst.y; |
||
990 | v[10] = dst.f; |
||
991 | v[11] = v[6]; |
||
992 | v[12] = ((r->src.y + ty) * yy + y0) * sy; |
||
993 | v[13] = v[8]; |
||
994 | v[14] = msk_y * op->mask.scale[1]; |
||
995 | } |
||
996 | |||
997 | sse2 fastcall static void |
||
998 | emit_primitive_affine_source_identity(struct sna *sna, |
||
999 | const struct sna_composite_op *op, |
||
1000 | const struct sna_composite_rectangles *r) |
||
1001 | { |
||
1002 | float *v; |
||
1003 | union { |
||
1004 | struct sna_coordinate p; |
||
1005 | float f; |
||
1006 | } dst; |
||
1007 | float msk_x = r->mask.x + op->mask.offset[0]; |
||
1008 | float msk_y = r->mask.y + op->mask.offset[1]; |
||
1009 | float w = r->width, h = r->height; |
||
1010 | |||
1011 | assert(op->floats_per_rect == 15); |
||
1012 | assert((sna->render.vertex_used % 5) == 0); |
||
1013 | v = sna->render.vertices + sna->render.vertex_used; |
||
1014 | sna->render.vertex_used += 3*5; |
||
1015 | |||
1016 | dst.p.x = r->dst.x + r->width; |
||
1017 | dst.p.y = r->dst.y + r->height; |
||
1018 | v[0] = dst.f; |
||
1019 | _sna_get_transformed_scaled(op->src.offset[0] + r->src.x + r->width, |
||
1020 | op->src.offset[1] + r->src.y + r->height, |
||
1021 | op->src.transform, op->src.scale, |
||
1022 | &v[1], &v[2]); |
||
1023 | v[3] = (msk_x + w) * op->mask.scale[0]; |
||
1024 | v[4] = (msk_y + h) * op->mask.scale[1]; |
||
1025 | |||
1026 | dst.p.x = r->dst.x; |
||
1027 | v[5] = dst.f; |
||
1028 | _sna_get_transformed_scaled(op->src.offset[0] + r->src.x, |
||
1029 | op->src.offset[1] + r->src.y + r->height, |
||
1030 | op->src.transform, op->src.scale, |
||
1031 | &v[6], &v[7]); |
||
1032 | v[8] = msk_x * op->mask.scale[0]; |
||
1033 | v[9] = v[4]; |
||
1034 | |||
1035 | dst.p.y = r->dst.y; |
||
1036 | v[10] = dst.f; |
||
1037 | _sna_get_transformed_scaled(op->src.offset[0] + r->src.x, |
||
1038 | op->src.offset[1] + r->src.y, |
||
1039 | op->src.transform, op->src.scale, |
||
1040 | &v[11], &v[12]); |
||
1041 | v[13] = v[8]; |
||
1042 | v[14] = msk_y * op->mask.scale[1]; |
||
1043 | } |
||
1044 | |||
1045 | /* SSE4_2 */ |
||
1046 | #if defined(sse4_2) |
||
1047 | |||
1048 | sse4_2 fastcall static void |
||
1049 | emit_primitive_linear__sse4_2(struct sna *sna, |
||
1050 | const struct sna_composite_op *op, |
||
1051 | const struct sna_composite_rectangles *r) |
||
1052 | { |
||
1053 | float *v; |
||
1054 | union { |
||
1055 | struct sna_coordinate p; |
||
1056 | float f; |
||
1057 | } dst; |
||
1058 | |||
1059 | assert(op->floats_per_rect == 6); |
||
1060 | assert((sna->render.vertex_used % 2) == 0); |
||
1061 | v = sna->render.vertices + sna->render.vertex_used; |
||
1062 | sna->render.vertex_used += 6; |
||
1063 | assert(sna->render.vertex_used <= sna->render.vertex_size); |
||
1064 | |||
1065 | dst.p.x = r->dst.x + r->width; |
||
1066 | dst.p.y = r->dst.y + r->height; |
||
1067 | v[0] = dst.f; |
||
1068 | dst.p.x = r->dst.x; |
||
1069 | v[2] = dst.f; |
||
1070 | dst.p.y = r->dst.y; |
||
1071 | v[4] = dst.f; |
||
1072 | |||
1073 | v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); |
||
1074 | v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height); |
||
1075 | v[5] = compute_linear(&op->src, r->src.x, r->src.y); |
||
1076 | } |
||
1077 | |||
1078 | sse4_2 fastcall static void |
||
1079 | emit_boxes_linear__sse4_2(const struct sna_composite_op *op, |
||
1080 | const BoxRec *box, int nbox, |
||
1081 | float *v) |
||
1082 | { |
||
1083 | union { |
||
1084 | struct sna_coordinate p; |
||
1085 | float f; |
||
1086 | } dst; |
||
1087 | |||
1088 | do { |
||
1089 | dst.p.x = box->x2; |
||
1090 | dst.p.y = box->y2; |
||
1091 | v[0] = dst.f; |
||
1092 | dst.p.x = box->x1; |
||
1093 | v[2] = dst.f; |
||
1094 | dst.p.y = box->y1; |
||
1095 | v[4] = dst.f; |
||
1096 | |||
1097 | v[1] = compute_linear(&op->src, box->x2, box->y2); |
||
1098 | v[3] = compute_linear(&op->src, box->x1, box->y2); |
||
1099 | v[5] = compute_linear(&op->src, box->x1, box->y1); |
||
1100 | |||
1101 | v += 6; |
||
1102 | box++; |
||
1103 | } while (--nbox); |
||
1104 | } |
||
1105 | |||
1106 | sse4_2 fastcall static void |
||
1107 | emit_primitive_identity_source__sse4_2(struct sna *sna, |
||
1108 | const struct sna_composite_op *op, |
||
1109 | const struct sna_composite_rectangles *r) |
||
1110 | { |
||
1111 | union { |
||
1112 | struct sna_coordinate p; |
||
1113 | float f; |
||
1114 | } dst; |
||
1115 | float *v; |
||
1116 | |||
1117 | assert(op->floats_per_rect == 9); |
||
1118 | assert((sna->render.vertex_used % 3) == 0); |
||
1119 | v = sna->render.vertices + sna->render.vertex_used; |
||
1120 | sna->render.vertex_used += 9; |
||
1121 | |||
1122 | dst.p.x = r->dst.x + r->width; |
||
1123 | dst.p.y = r->dst.y + r->height; |
||
1124 | v[0] = dst.f; |
||
1125 | dst.p.x = r->dst.x; |
||
1126 | v[3] = dst.f; |
||
1127 | dst.p.y = r->dst.y; |
||
1128 | v[6] = dst.f; |
||
1129 | |||
1130 | v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; |
||
1131 | v[1] = v[4] + r->width * op->src.scale[0]; |
||
1132 | |||
1133 | v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; |
||
1134 | v[5] = v[2] = v[8] + r->height * op->src.scale[1]; |
||
1135 | } |
||
1136 | |||
1137 | sse4_2 fastcall static void |
||
1138 | emit_boxes_identity_source__sse4_2(const struct sna_composite_op *op, |
||
1139 | const BoxRec *box, int nbox, |
||
1140 | float *v) |
||
1141 | { |
||
1142 | do { |
||
1143 | union { |
||
1144 | struct sna_coordinate p; |
||
1145 | float f; |
||
1146 | } dst; |
||
1147 | |||
1148 | dst.p.x = box->x2; |
||
1149 | dst.p.y = box->y2; |
||
1150 | v[0] = dst.f; |
||
1151 | dst.p.x = box->x1; |
||
1152 | v[3] = dst.f; |
||
1153 | dst.p.y = box->y1; |
||
1154 | v[6] = dst.f; |
||
1155 | |||
1156 | v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; |
||
1157 | v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; |
||
1158 | |||
1159 | v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; |
||
1160 | v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; |
||
1161 | |||
1162 | v += 9; |
||
1163 | box++; |
||
1164 | } while (--nbox); |
||
1165 | } |
||
1166 | |||
1167 | sse4_2 fastcall static void |
||
1168 | emit_primitive_simple_source__sse4_2(struct sna *sna, |
||
1169 | const struct sna_composite_op *op, |
||
1170 | const struct sna_composite_rectangles *r) |
||
1171 | { |
||
1172 | float *v; |
||
1173 | union { |
||
1174 | struct sna_coordinate p; |
||
1175 | float f; |
||
1176 | } dst; |
||
1177 | |||
1178 | float xx = op->src.transform->matrix[0][0]; |
||
1179 | float x0 = op->src.transform->matrix[0][2]; |
||
1180 | float yy = op->src.transform->matrix[1][1]; |
||
1181 | float y0 = op->src.transform->matrix[1][2]; |
||
1182 | float sx = op->src.scale[0]; |
||
1183 | float sy = op->src.scale[1]; |
||
1184 | int16_t tx = op->src.offset[0]; |
||
1185 | int16_t ty = op->src.offset[1]; |
||
1186 | |||
1187 | assert(op->floats_per_rect == 9); |
||
1188 | assert((sna->render.vertex_used % 3) == 0); |
||
1189 | v = sna->render.vertices + sna->render.vertex_used; |
||
1190 | sna->render.vertex_used += 3*3; |
||
1191 | |||
1192 | dst.p.x = r->dst.x + r->width; |
||
1193 | dst.p.y = r->dst.y + r->height; |
||
1194 | v[0] = dst.f; |
||
1195 | v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx; |
||
1196 | v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy; |
||
1197 | |||
1198 | dst.p.x = r->dst.x; |
||
1199 | v[3] = dst.f; |
||
1200 | v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx; |
||
1201 | |||
1202 | dst.p.y = r->dst.y; |
||
1203 | v[6] = dst.f; |
||
1204 | v[8] = ((r->src.y + ty) * yy + y0) * sy; |
||
1205 | } |
||
1206 | |||
1207 | sse4_2 fastcall static void |
||
1208 | emit_boxes_simple_source__sse4_2(const struct sna_composite_op *op, |
||
1209 | const BoxRec *box, int nbox, |
||
1210 | float *v) |
||
1211 | { |
||
1212 | float xx = op->src.transform->matrix[0][0]; |
||
1213 | float x0 = op->src.transform->matrix[0][2]; |
||
1214 | float yy = op->src.transform->matrix[1][1]; |
||
1215 | float y0 = op->src.transform->matrix[1][2]; |
||
1216 | float sx = op->src.scale[0]; |
||
1217 | float sy = op->src.scale[1]; |
||
1218 | int16_t tx = op->src.offset[0]; |
||
1219 | int16_t ty = op->src.offset[1]; |
||
1220 | |||
1221 | do { |
||
1222 | union { |
||
1223 | struct sna_coordinate p; |
||
1224 | float f; |
||
1225 | } dst; |
||
1226 | |||
1227 | dst.p.x = box->x2; |
||
1228 | dst.p.y = box->y2; |
||
1229 | v[0] = dst.f; |
||
1230 | v[1] = ((box->x2 + tx) * xx + x0) * sx; |
||
1231 | v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy; |
||
1232 | |||
1233 | dst.p.x = box->x1; |
||
1234 | v[3] = dst.f; |
||
1235 | v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx; |
||
1236 | |||
1237 | dst.p.y = box->y1; |
||
1238 | v[6] = dst.f; |
||
1239 | v[8] = ((box->y1 + ty) * yy + y0) * sy; |
||
1240 | |||
1241 | v += 9; |
||
1242 | box++; |
||
1243 | } while (--nbox); |
||
1244 | } |
||
1245 | |||
1246 | sse4_2 fastcall static void |
||
1247 | emit_primitive_identity_mask__sse4_2(struct sna *sna, |
||
1248 | const struct sna_composite_op *op, |
||
1249 | const struct sna_composite_rectangles *r) |
||
1250 | { |
||
1251 | union { |
||
1252 | struct sna_coordinate p; |
||
1253 | float f; |
||
1254 | } dst; |
||
1255 | float msk_x, msk_y; |
||
1256 | float w, h; |
||
1257 | float *v; |
||
1258 | |||
1259 | msk_x = r->mask.x + op->mask.offset[0]; |
||
1260 | msk_y = r->mask.y + op->mask.offset[1]; |
||
1261 | w = r->width; |
||
1262 | h = r->height; |
||
1263 | |||
1264 | DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", |
||
1265 | __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); |
||
1266 | |||
1267 | assert(op->floats_per_rect == 12); |
||
1268 | assert((sna->render.vertex_used % 4) == 0); |
||
1269 | v = sna->render.vertices + sna->render.vertex_used; |
||
1270 | sna->render.vertex_used += 12; |
||
1271 | |||
1272 | dst.p.x = r->dst.x + r->width; |
||
1273 | dst.p.y = r->dst.y + r->height; |
||
1274 | v[0] = dst.f; |
||
1275 | v[2] = (msk_x + w) * op->mask.scale[0]; |
||
1276 | v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; |
||
1277 | |||
1278 | dst.p.x = r->dst.x; |
||
1279 | v[4] = dst.f; |
||
1280 | v[10] = v[6] = msk_x * op->mask.scale[0]; |
||
1281 | |||
1282 | dst.p.y = r->dst.y; |
||
1283 | v[8] = dst.f; |
||
1284 | v[11] = msk_y * op->mask.scale[1]; |
||
1285 | |||
1286 | v[9] = v[5] = v[1] = .5; |
||
1287 | } |
||
1288 | |||
1289 | sse4_2 fastcall static void |
||
1290 | emit_boxes_identity_mask__sse4_2(const struct sna_composite_op *op, |
||
1291 | const BoxRec *box, int nbox, |
||
1292 | float *v) |
||
1293 | { |
||
1294 | float msk_x = op->mask.offset[0]; |
||
1295 | float msk_y = op->mask.offset[1]; |
||
1296 | |||
1297 | do { |
||
1298 | union { |
||
1299 | struct sna_coordinate p; |
||
1300 | float f; |
||
1301 | } dst; |
||
1302 | |||
1303 | dst.p.x = box->x2; |
||
1304 | dst.p.y = box->y2; |
||
1305 | v[0] = dst.f; |
||
1306 | v[2] = (msk_x + box->x2) * op->mask.scale[0]; |
||
1307 | v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; |
||
1308 | |||
1309 | dst.p.x = box->x1; |
||
1310 | v[4] = dst.f; |
||
1311 | v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; |
||
1312 | |||
1313 | dst.p.y = box->y1; |
||
1314 | v[8] = dst.f; |
||
1315 | v[11] = (msk_y + box->y1) * op->mask.scale[1]; |
||
1316 | |||
1317 | v[9] = v[5] = v[1] = .5; |
||
1318 | v += 12; |
||
1319 | box++; |
||
1320 | } while (--nbox); |
||
1321 | } |
||
1322 | |||
1323 | sse4_2 fastcall static void |
||
1324 | emit_primitive_linear_identity_mask__sse4_2(struct sna *sna, |
||
1325 | const struct sna_composite_op *op, |
||
1326 | const struct sna_composite_rectangles *r) |
||
1327 | { |
||
1328 | union { |
||
1329 | struct sna_coordinate p; |
||
1330 | float f; |
||
1331 | } dst; |
||
1332 | float msk_x, msk_y; |
||
1333 | float w, h; |
||
1334 | float *v; |
||
1335 | |||
1336 | msk_x = r->mask.x + op->mask.offset[0]; |
||
1337 | msk_y = r->mask.y + op->mask.offset[1]; |
||
1338 | w = r->width; |
||
1339 | h = r->height; |
||
1340 | |||
1341 | DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", |
||
1342 | __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); |
||
1343 | |||
1344 | assert(op->floats_per_rect == 12); |
||
1345 | assert((sna->render.vertex_used % 4) == 0); |
||
1346 | v = sna->render.vertices + sna->render.vertex_used; |
||
1347 | sna->render.vertex_used += 12; |
||
1348 | |||
1349 | dst.p.x = r->dst.x + r->width; |
||
1350 | dst.p.y = r->dst.y + r->height; |
||
1351 | v[0] = dst.f; |
||
1352 | v[2] = (msk_x + w) * op->mask.scale[0]; |
||
1353 | v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; |
||
1354 | |||
1355 | dst.p.x = r->dst.x; |
||
1356 | v[4] = dst.f; |
||
1357 | v[10] = v[6] = msk_x * op->mask.scale[0]; |
||
1358 | |||
1359 | dst.p.y = r->dst.y; |
||
1360 | v[8] = dst.f; |
||
1361 | v[11] = msk_y * op->mask.scale[1]; |
||
1362 | |||
1363 | v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); |
||
1364 | v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height); |
||
1365 | v[9] = compute_linear(&op->src, r->src.x, r->src.y); |
||
1366 | } |
||
1367 | |||
1368 | sse4_2 fastcall static void |
||
1369 | emit_boxes_linear_identity_mask__sse4_2(const struct sna_composite_op *op, |
||
1370 | const BoxRec *box, int nbox, |
||
1371 | float *v) |
||
1372 | { |
||
1373 | float msk_x = op->mask.offset[0]; |
||
1374 | float msk_y = op->mask.offset[1]; |
||
1375 | |||
1376 | do { |
||
1377 | union { |
||
1378 | struct sna_coordinate p; |
||
1379 | float f; |
||
1380 | } dst; |
||
1381 | |||
1382 | dst.p.x = box->x2; |
||
1383 | dst.p.y = box->y2; |
||
1384 | v[0] = dst.f; |
||
1385 | v[2] = (msk_x + box->x2) * op->mask.scale[0]; |
||
1386 | v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; |
||
1387 | |||
1388 | dst.p.x = box->x1; |
||
1389 | v[4] = dst.f; |
||
1390 | v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; |
||
1391 | |||
1392 | dst.p.y = box->y1; |
||
1393 | v[8] = dst.f; |
||
1394 | v[11] = (msk_y + box->y1) * op->mask.scale[1]; |
||
1395 | |||
1396 | v[1] = compute_linear(&op->src, box->x2, box->y2); |
||
1397 | v[5] = compute_linear(&op->src, box->x1, box->y2); |
||
1398 | v[9] = compute_linear(&op->src, box->x1, box->y1); |
||
1399 | |||
1400 | v += 12; |
||
1401 | box++; |
||
1402 | } while (--nbox); |
||
1403 | } |
||
1404 | |||
1405 | #endif |
||
1406 | |||
1407 | /* AVX2 */ |
||
1408 | #if defined(avx2) |
||
1409 | |||
1410 | avx2 fastcall static void |
||
1411 | emit_primitive_linear__avx2(struct sna *sna, |
||
1412 | const struct sna_composite_op *op, |
||
1413 | const struct sna_composite_rectangles *r) |
||
1414 | { |
||
1415 | float *v; |
||
1416 | union { |
||
1417 | struct sna_coordinate p; |
||
1418 | float f; |
||
1419 | } dst; |
||
1420 | |||
1421 | assert(op->floats_per_rect == 6); |
||
1422 | assert((sna->render.vertex_used % 2) == 0); |
||
1423 | v = sna->render.vertices + sna->render.vertex_used; |
||
1424 | sna->render.vertex_used += 6; |
||
1425 | assert(sna->render.vertex_used <= sna->render.vertex_size); |
||
1426 | |||
1427 | dst.p.x = r->dst.x + r->width; |
||
1428 | dst.p.y = r->dst.y + r->height; |
||
1429 | v[0] = dst.f; |
||
1430 | dst.p.x = r->dst.x; |
||
1431 | v[2] = dst.f; |
||
1432 | dst.p.y = r->dst.y; |
||
1433 | v[4] = dst.f; |
||
1434 | |||
1435 | v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); |
||
1436 | v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height); |
||
1437 | v[5] = compute_linear(&op->src, r->src.x, r->src.y); |
||
1438 | } |
||
1439 | |||
1440 | avx2 fastcall static void |
||
1441 | emit_boxes_linear__avx2(const struct sna_composite_op *op, |
||
1442 | const BoxRec *box, int nbox, |
||
1443 | float *v) |
||
1444 | { |
||
1445 | union { |
||
1446 | struct sna_coordinate p; |
||
1447 | float f; |
||
1448 | } dst; |
||
1449 | |||
1450 | do { |
||
1451 | dst.p.x = box->x2; |
||
1452 | dst.p.y = box->y2; |
||
1453 | v[0] = dst.f; |
||
1454 | dst.p.x = box->x1; |
||
1455 | v[2] = dst.f; |
||
1456 | dst.p.y = box->y1; |
||
1457 | v[4] = dst.f; |
||
1458 | |||
1459 | v[1] = compute_linear(&op->src, box->x2, box->y2); |
||
1460 | v[3] = compute_linear(&op->src, box->x1, box->y2); |
||
1461 | v[5] = compute_linear(&op->src, box->x1, box->y1); |
||
1462 | |||
1463 | v += 6; |
||
1464 | box++; |
||
1465 | } while (--nbox); |
||
1466 | } |
||
1467 | |||
1468 | avx2 fastcall static void |
||
1469 | emit_primitive_identity_source__avx2(struct sna *sna, |
||
1470 | const struct sna_composite_op *op, |
||
1471 | const struct sna_composite_rectangles *r) |
||
1472 | { |
||
1473 | union { |
||
1474 | struct sna_coordinate p; |
||
1475 | float f; |
||
1476 | } dst; |
||
1477 | float *v; |
||
1478 | |||
1479 | assert(op->floats_per_rect == 9); |
||
1480 | assert((sna->render.vertex_used % 3) == 0); |
||
1481 | v = sna->render.vertices + sna->render.vertex_used; |
||
1482 | sna->render.vertex_used += 9; |
||
1483 | |||
1484 | dst.p.x = r->dst.x + r->width; |
||
1485 | dst.p.y = r->dst.y + r->height; |
||
1486 | v[0] = dst.f; |
||
1487 | dst.p.x = r->dst.x; |
||
1488 | v[3] = dst.f; |
||
1489 | dst.p.y = r->dst.y; |
||
1490 | v[6] = dst.f; |
||
1491 | |||
1492 | v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; |
||
1493 | v[1] = v[4] + r->width * op->src.scale[0]; |
||
1494 | |||
1495 | v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; |
||
1496 | v[5] = v[2] = v[8] + r->height * op->src.scale[1]; |
||
1497 | } |
||
1498 | |||
1499 | avx2 fastcall static void |
||
1500 | emit_boxes_identity_source__avx2(const struct sna_composite_op *op, |
||
1501 | const BoxRec *box, int nbox, |
||
1502 | float *v) |
||
1503 | { |
||
1504 | do { |
||
1505 | union { |
||
1506 | struct sna_coordinate p; |
||
1507 | float f; |
||
1508 | } dst; |
||
1509 | |||
1510 | dst.p.x = box->x2; |
||
1511 | dst.p.y = box->y2; |
||
1512 | v[0] = dst.f; |
||
1513 | dst.p.x = box->x1; |
||
1514 | v[3] = dst.f; |
||
1515 | dst.p.y = box->y1; |
||
1516 | v[6] = dst.f; |
||
1517 | |||
1518 | v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; |
||
1519 | v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; |
||
1520 | |||
1521 | v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; |
||
1522 | v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; |
||
1523 | |||
1524 | v += 9; |
||
1525 | box++; |
||
1526 | } while (--nbox); |
||
1527 | } |
||
1528 | |||
1529 | avx2 fastcall static void |
||
1530 | emit_primitive_simple_source__avx2(struct sna *sna, |
||
1531 | const struct sna_composite_op *op, |
||
1532 | const struct sna_composite_rectangles *r) |
||
1533 | { |
||
1534 | float *v; |
||
1535 | union { |
||
1536 | struct sna_coordinate p; |
||
1537 | float f; |
||
1538 | } dst; |
||
1539 | |||
1540 | float xx = op->src.transform->matrix[0][0]; |
||
1541 | float x0 = op->src.transform->matrix[0][2]; |
||
1542 | float yy = op->src.transform->matrix[1][1]; |
||
1543 | float y0 = op->src.transform->matrix[1][2]; |
||
1544 | float sx = op->src.scale[0]; |
||
1545 | float sy = op->src.scale[1]; |
||
1546 | int16_t tx = op->src.offset[0]; |
||
1547 | int16_t ty = op->src.offset[1]; |
||
1548 | |||
1549 | assert(op->floats_per_rect == 9); |
||
1550 | assert((sna->render.vertex_used % 3) == 0); |
||
1551 | v = sna->render.vertices + sna->render.vertex_used; |
||
1552 | sna->render.vertex_used += 3*3; |
||
1553 | |||
1554 | dst.p.x = r->dst.x + r->width; |
||
1555 | dst.p.y = r->dst.y + r->height; |
||
1556 | v[0] = dst.f; |
||
1557 | v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx; |
||
1558 | v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy; |
||
1559 | |||
1560 | dst.p.x = r->dst.x; |
||
1561 | v[3] = dst.f; |
||
1562 | v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx; |
||
1563 | |||
1564 | dst.p.y = r->dst.y; |
||
1565 | v[6] = dst.f; |
||
1566 | v[8] = ((r->src.y + ty) * yy + y0) * sy; |
||
1567 | } |
||
1568 | |||
1569 | avx2 fastcall static void |
||
1570 | emit_boxes_simple_source__avx2(const struct sna_composite_op *op, |
||
1571 | const BoxRec *box, int nbox, |
||
1572 | float *v) |
||
1573 | { |
||
1574 | float xx = op->src.transform->matrix[0][0]; |
||
1575 | float x0 = op->src.transform->matrix[0][2]; |
||
1576 | float yy = op->src.transform->matrix[1][1]; |
||
1577 | float y0 = op->src.transform->matrix[1][2]; |
||
1578 | float sx = op->src.scale[0]; |
||
1579 | float sy = op->src.scale[1]; |
||
1580 | int16_t tx = op->src.offset[0]; |
||
1581 | int16_t ty = op->src.offset[1]; |
||
1582 | |||
1583 | do { |
||
1584 | union { |
||
1585 | struct sna_coordinate p; |
||
1586 | float f; |
||
1587 | } dst; |
||
1588 | |||
1589 | dst.p.x = box->x2; |
||
1590 | dst.p.y = box->y2; |
||
1591 | v[0] = dst.f; |
||
1592 | v[1] = ((box->x2 + tx) * xx + x0) * sx; |
||
1593 | v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy; |
||
1594 | |||
1595 | dst.p.x = box->x1; |
||
1596 | v[3] = dst.f; |
||
1597 | v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx; |
||
1598 | |||
1599 | dst.p.y = box->y1; |
||
1600 | v[6] = dst.f; |
||
1601 | v[8] = ((box->y1 + ty) * yy + y0) * sy; |
||
1602 | |||
1603 | v += 9; |
||
1604 | box++; |
||
1605 | } while (--nbox); |
||
1606 | } |
||
1607 | |||
1608 | avx2 fastcall static void |
||
1609 | emit_primitive_identity_mask__avx2(struct sna *sna, |
||
1610 | const struct sna_composite_op *op, |
||
1611 | const struct sna_composite_rectangles *r) |
||
1612 | { |
||
1613 | union { |
||
1614 | struct sna_coordinate p; |
||
1615 | float f; |
||
1616 | } dst; |
||
1617 | float msk_x, msk_y; |
||
1618 | float w, h; |
||
1619 | float *v; |
||
1620 | |||
1621 | msk_x = r->mask.x + op->mask.offset[0]; |
||
1622 | msk_y = r->mask.y + op->mask.offset[1]; |
||
1623 | w = r->width; |
||
1624 | h = r->height; |
||
1625 | |||
1626 | DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", |
||
1627 | __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); |
||
1628 | |||
1629 | assert(op->floats_per_rect == 12); |
||
1630 | assert((sna->render.vertex_used % 4) == 0); |
||
1631 | v = sna->render.vertices + sna->render.vertex_used; |
||
1632 | sna->render.vertex_used += 12; |
||
1633 | |||
1634 | dst.p.x = r->dst.x + r->width; |
||
1635 | dst.p.y = r->dst.y + r->height; |
||
1636 | v[0] = dst.f; |
||
1637 | v[2] = (msk_x + w) * op->mask.scale[0]; |
||
1638 | v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; |
||
1639 | |||
1640 | dst.p.x = r->dst.x; |
||
1641 | v[4] = dst.f; |
||
1642 | v[10] = v[6] = msk_x * op->mask.scale[0]; |
||
1643 | |||
1644 | dst.p.y = r->dst.y; |
||
1645 | v[8] = dst.f; |
||
1646 | v[11] = msk_y * op->mask.scale[1]; |
||
1647 | |||
1648 | v[9] = v[5] = v[1] = .5; |
||
1649 | } |
||
1650 | |||
1651 | avx2 fastcall static void |
||
1652 | emit_boxes_identity_mask__avx2(const struct sna_composite_op *op, |
||
1653 | const BoxRec *box, int nbox, |
||
1654 | float *v) |
||
1655 | { |
||
1656 | float msk_x = op->mask.offset[0]; |
||
1657 | float msk_y = op->mask.offset[1]; |
||
1658 | |||
1659 | do { |
||
1660 | union { |
||
1661 | struct sna_coordinate p; |
||
1662 | float f; |
||
1663 | } dst; |
||
1664 | |||
1665 | dst.p.x = box->x2; |
||
1666 | dst.p.y = box->y2; |
||
1667 | v[0] = dst.f; |
||
1668 | v[2] = (msk_x + box->x2) * op->mask.scale[0]; |
||
1669 | v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; |
||
1670 | |||
1671 | dst.p.x = box->x1; |
||
1672 | v[4] = dst.f; |
||
1673 | v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; |
||
1674 | |||
1675 | dst.p.y = box->y1; |
||
1676 | v[8] = dst.f; |
||
1677 | v[11] = (msk_y + box->y1) * op->mask.scale[1]; |
||
1678 | |||
1679 | v[9] = v[5] = v[1] = .5; |
||
1680 | v += 12; |
||
1681 | box++; |
||
1682 | } while (--nbox); |
||
1683 | } |
||
1684 | |||
1685 | avx2 fastcall static void |
||
1686 | emit_primitive_linear_identity_mask__avx2(struct sna *sna, |
||
1687 | const struct sna_composite_op *op, |
||
1688 | const struct sna_composite_rectangles *r) |
||
1689 | { |
||
1690 | union { |
||
1691 | struct sna_coordinate p; |
||
1692 | float f; |
||
1693 | } dst; |
||
1694 | float msk_x, msk_y; |
||
1695 | float w, h; |
||
1696 | float *v; |
||
1697 | |||
1698 | msk_x = r->mask.x + op->mask.offset[0]; |
||
1699 | msk_y = r->mask.y + op->mask.offset[1]; |
||
1700 | w = r->width; |
||
1701 | h = r->height; |
||
1702 | |||
1703 | DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n", |
||
1704 | __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h)); |
||
1705 | |||
1706 | assert(op->floats_per_rect == 12); |
||
1707 | assert((sna->render.vertex_used % 4) == 0); |
||
1708 | v = sna->render.vertices + sna->render.vertex_used; |
||
1709 | sna->render.vertex_used += 12; |
||
1710 | |||
1711 | dst.p.x = r->dst.x + r->width; |
||
1712 | dst.p.y = r->dst.y + r->height; |
||
1713 | v[0] = dst.f; |
||
1714 | v[2] = (msk_x + w) * op->mask.scale[0]; |
||
1715 | v[7] = v[3] = (msk_y + h) * op->mask.scale[1]; |
||
1716 | |||
1717 | dst.p.x = r->dst.x; |
||
1718 | v[4] = dst.f; |
||
1719 | v[10] = v[6] = msk_x * op->mask.scale[0]; |
||
1720 | |||
1721 | dst.p.y = r->dst.y; |
||
1722 | v[8] = dst.f; |
||
1723 | v[11] = msk_y * op->mask.scale[1]; |
||
1724 | |||
1725 | v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); |
||
1726 | v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height); |
||
1727 | v[9] = compute_linear(&op->src, r->src.x, r->src.y); |
||
1728 | } |
||
1729 | |||
1730 | avx2 fastcall static void |
||
1731 | emit_boxes_linear_identity_mask__avx2(const struct sna_composite_op *op, |
||
1732 | const BoxRec *box, int nbox, |
||
1733 | float *v) |
||
1734 | { |
||
1735 | float msk_x = op->mask.offset[0]; |
||
1736 | float msk_y = op->mask.offset[1]; |
||
1737 | |||
1738 | do { |
||
1739 | union { |
||
1740 | struct sna_coordinate p; |
||
1741 | float f; |
||
1742 | } dst; |
||
1743 | |||
1744 | dst.p.x = box->x2; |
||
1745 | dst.p.y = box->y2; |
||
1746 | v[0] = dst.f; |
||
1747 | v[2] = (msk_x + box->x2) * op->mask.scale[0]; |
||
1748 | v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1]; |
||
1749 | |||
1750 | dst.p.x = box->x1; |
||
1751 | v[4] = dst.f; |
||
1752 | v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0]; |
||
1753 | |||
1754 | dst.p.y = box->y1; |
||
1755 | v[8] = dst.f; |
||
1756 | v[11] = (msk_y + box->y1) * op->mask.scale[1]; |
||
1757 | |||
1758 | v[1] = compute_linear(&op->src, box->x2, box->y2); |
||
1759 | v[5] = compute_linear(&op->src, box->x1, box->y2); |
||
1760 | v[9] = compute_linear(&op->src, box->x1, box->y1); |
||
1761 | |||
1762 | v += 12; |
||
1763 | box++; |
||
1764 | } while (--nbox); |
||
1765 | } |
||
1766 | |||
1767 | #endif |
||
1768 | |||
1769 | unsigned gen4_choose_composite_emitter(struct sna *sna, struct sna_composite_op *tmp) |
||
1770 | { |
||
1771 | unsigned vb; |
||
1772 | |||
1773 | if (tmp->mask.bo) { |
||
1774 | if (tmp->mask.transform == NULL) { |
||
1775 | if (tmp->src.is_solid) { |
||
1776 | DBG(("%s: solid, identity mask\n", __FUNCTION__)); |
||
1777 | #if defined(avx2) |
||
1778 | if (sna->cpu_features & AVX2) { |
||
1779 | tmp->prim_emit = emit_primitive_identity_mask__avx2; |
||
1780 | } else |
||
1781 | #endif |
||
1782 | #if defined(sse4_2) |
||
1783 | if (sna->cpu_features & SSE4_2) { |
||
1784 | tmp->prim_emit = emit_primitive_identity_mask__sse4_2; |
||
1785 | } else |
||
1786 | #endif |
||
1787 | { |
||
1788 | tmp->prim_emit = emit_primitive_identity_mask; |
||
1789 | } |
||
1790 | tmp->floats_per_vertex = 4; |
||
1791 | vb = 1 | 2 << 2; |
||
1792 | } else if (tmp->src.is_linear) { |
||
1793 | DBG(("%s: linear, identity mask\n", __FUNCTION__)); |
||
1794 | #if defined(avx2) |
||
1795 | if (sna->cpu_features & AVX2) { |
||
1796 | tmp->prim_emit = emit_primitive_linear_identity_mask__avx2; |
||
1797 | } else |
||
1798 | #endif |
||
1799 | #if defined(sse4_2) |
||
1800 | if (sna->cpu_features & SSE4_2) { |
||
1801 | tmp->prim_emit = emit_primitive_linear_identity_mask__sse4_2; |
||
1802 | } else |
||
1803 | #endif |
||
1804 | { |
||
1805 | tmp->prim_emit = emit_primitive_linear_identity_mask; |
||
1806 | } |
||
1807 | tmp->floats_per_vertex = 4; |
||
1808 | vb = 1 | 2 << 2; |
||
1809 | } else if (tmp->src.transform == NULL) { |
||
1810 | DBG(("%s: identity source, identity mask\n", __FUNCTION__)); |
||
1811 | tmp->prim_emit = emit_primitive_identity_source_mask; |
||
1812 | tmp->floats_per_vertex = 5; |
||
1813 | vb = 2 << 2 | 2; |
||
1814 | } else if (tmp->src.is_affine) { |
||
1815 | tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; |
||
1816 | tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; |
||
1817 | if (!sna_affine_transform_is_rotation(tmp->src.transform)) { |
||
1818 | DBG(("%s: simple src, identity mask\n", __FUNCTION__)); |
||
1819 | tmp->prim_emit = emit_primitive_simple_source_identity; |
||
1820 | } else { |
||
1821 | DBG(("%s: affine src, identity mask\n", __FUNCTION__)); |
||
1822 | tmp->prim_emit = emit_primitive_affine_source_identity; |
||
1823 | } |
||
1824 | tmp->floats_per_vertex = 5; |
||
1825 | vb = 2 << 2 | 2; |
||
1826 | } else { |
||
1827 | DBG(("%s: projective source, identity mask\n", __FUNCTION__)); |
||
1828 | tmp->prim_emit = emit_primitive_mask; |
||
1829 | tmp->floats_per_vertex = 6; |
||
1830 | vb = 2 << 2 | 3; |
||
1831 | } |
||
1832 | } else { |
||
1833 | tmp->prim_emit = emit_primitive_mask; |
||
1834 | tmp->floats_per_vertex = 1; |
||
1835 | vb = 0; |
||
1836 | if (tmp->mask.is_solid) { |
||
1837 | tmp->floats_per_vertex += 1; |
||
1838 | vb |= 1 << 2; |
||
1839 | } else if (tmp->mask.is_affine) { |
||
1840 | tmp->floats_per_vertex += 2; |
||
1841 | vb |= 2 << 2; |
||
1842 | }else { |
||
1843 | tmp->floats_per_vertex += 3; |
||
1844 | vb |= 3 << 2; |
||
1845 | } |
||
1846 | if (tmp->src.is_solid) { |
||
1847 | tmp->floats_per_vertex += 1; |
||
1848 | vb |= 1; |
||
1849 | } else if (tmp->src.is_affine) { |
||
1850 | tmp->floats_per_vertex += 2; |
||
1851 | vb |= 2 ; |
||
1852 | }else { |
||
1853 | tmp->floats_per_vertex += 3; |
||
1854 | vb |= 3; |
||
1855 | } |
||
1856 | DBG(("%s: general mask: floats-per-vertex=%d, vb=%x\n", |
||
1857 | __FUNCTION__,tmp->floats_per_vertex, vb)); |
||
1858 | } |
||
1859 | } else { |
||
1860 | #if 0 |
||
1861 | if (tmp->src.is_solid) { |
||
1862 | DBG(("%s: solid, no mask\n", __FUNCTION__)); |
||
1863 | tmp->prim_emit = emit_primitive_solid; |
||
1864 | if (tmp->src.is_opaque && tmp->op == PictOpOver) |
||
1865 | tmp->op = PictOpSrc; |
||
1866 | tmp->floats_per_vertex = 2; |
||
1867 | vb = 1; |
||
1868 | } else if (tmp->src.is_linear) { |
||
1869 | DBG(("%s: linear, no mask\n", __FUNCTION__)); |
||
1870 | #if defined(avx2) |
||
1871 | if (sna->cpu_features & AVX2) { |
||
1872 | tmp->prim_emit = emit_primitive_linear__avx2; |
||
1873 | } else |
||
1874 | #endif |
||
1875 | #if defined(sse4_2) |
||
1876 | if (sna->cpu_features & SSE4_2) { |
||
1877 | tmp->prim_emit = emit_primitive_linear__sse4_2; |
||
1878 | } else |
||
1879 | #endif |
||
1880 | { |
||
1881 | tmp->prim_emit = emit_primitive_linear; |
||
1882 | } |
||
1883 | tmp->floats_per_vertex = 2; |
||
1884 | vb = 1; |
||
1885 | } else if (tmp->src.transform == NULL) { |
||
1886 | DBG(("%s: identity src, no mask\n", __FUNCTION__)); |
||
1887 | #if defined(avx2) |
||
1888 | if (sna->cpu_features & AVX2) { |
||
1889 | tmp->prim_emit = emit_primitive_identity_source__avx2; |
||
1890 | } else |
||
1891 | #endif |
||
1892 | #if defined(sse4_2) |
||
1893 | if (sna->cpu_features & SSE4_2) { |
||
1894 | tmp->prim_emit = emit_primitive_identity_source__sse4_2; |
||
1895 | } else |
||
1896 | #endif |
||
1897 | { |
||
1898 | tmp->prim_emit = emit_primitive_identity_source; |
||
1899 | } |
||
1900 | tmp->floats_per_vertex = 3; |
||
1901 | vb = 2; |
||
1902 | } else if (tmp->src.is_affine) { |
||
1903 | tmp->src.scale[0] /= tmp->src.transform->matrix[2][2]; |
||
1904 | tmp->src.scale[1] /= tmp->src.transform->matrix[2][2]; |
||
1905 | if (!sna_affine_transform_is_rotation(tmp->src.transform)) { |
||
1906 | DBG(("%s: simple src, no mask\n", __FUNCTION__)); |
||
1907 | #if defined(avx2) |
||
1908 | if (sna->cpu_features & AVX2) { |
||
1909 | tmp->prim_emit = emit_primitive_simple_source__avx2; |
||
1910 | } else |
||
1911 | #endif |
||
1912 | #if defined(sse4_2) |
||
1913 | if (sna->cpu_features & SSE4_2) { |
||
1914 | tmp->prim_emit = emit_primitive_simple_source__sse4_2; |
||
1915 | } else |
||
1916 | #endif |
||
1917 | { |
||
1918 | tmp->prim_emit = emit_primitive_simple_source; |
||
1919 | } |
||
1920 | } else { |
||
1921 | DBG(("%s: affine src, no mask\n", __FUNCTION__)); |
||
1922 | tmp->prim_emit = emit_primitive_affine_source; |
||
1923 | } |
||
1924 | tmp->floats_per_vertex = 3; |
||
1925 | vb = 2; |
||
1926 | } else { |
||
1927 | DBG(("%s: projective src, no mask\n", __FUNCTION__)); |
||
1928 | assert(!tmp->src.is_solid); |
||
1929 | tmp->prim_emit = emit_primitive; |
||
1930 | tmp->floats_per_vertex = 4; |
||
1931 | vb = 3; |
||
1932 | } |
||
1933 | #endif |
||
1934 | } |
||
1935 | tmp->floats_per_rect = 3 * tmp->floats_per_vertex; |
||
1936 | |||
1937 | return vb; |
||
1938 | }><>><>><>><>><>><>><>><>=>=>=>=>><>>=>=>=>>= |
||
1939 | |||
1940 |