Rev 4358 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | |
2 | * Mesa 3-D graphics library |
||
3 | * |
||
4 | * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the "Software"), |
||
8 | * to deal in the Software without restriction, including without limitation |
||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
10 | * and/or sell copies of the Software, and to permit persons to whom the |
||
11 | * Software is furnished to do so, subject to the following conditions: |
||
12 | * |
||
13 | * The above copyright notice and this permission notice shall be included |
||
14 | * in all copies or substantial portions of the Software. |
||
15 | * |
||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
17 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
||
20 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
||
21 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
||
22 | * OTHER DEALINGS IN THE SOFTWARE. |
||
23 | */ |
||
24 | |||
25 | |||
26 | * - insert PREFETCH instructions to avoid cache-misses ! |
||
27 | * - some more optimizations are possible... |
||
28 | * - for 40-50% more performance in the SSE-functions, the |
||
29 | * data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! |
||
30 | */ |
||
31 | |||
32 | |||
33 | #include "assyntax.h" |
||
34 | #include "matypes.h" |
||
35 | #include "xform_args.h" |
||
36 | |||
37 | |||
38 | |||
39 | |||
40 | #define D(i) REGOFF(i * 4, EDI) |
||
41 | #define M(i) REGOFF(i * 4, EDX) |
||
42 | |||
43 | |||
44 | |||
45 | GLOBL GLNAME(_mesa_sse_transform_points2_general) |
||
4632 | Serge | 46 | HIDDEN (_mesa_sse_transform_points2_general) |
47 | GLNAME( _mesa_sse_transform_points2_general ): |
||
48 | |||
4358 | Serge | 49 | |
50 | PUSH_L ( ESI ) |
||
51 | PUSH_L ( EDI ) |
||
52 | |||
53 | |||
54 | MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
||
55 | |||
56 | |||
57 | MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
||
58 | |||
59 | |||
60 | JZ( LLBL(K_GTP2GR_finish) ) /* count was zero; go to finish */ |
||
61 | |||
62 | |||
63 | OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
||
64 | |||
65 | |||
66 | MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
||
67 | |||
68 | |||
69 | MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
||
70 | |||
71 | |||
72 | ADD_L( EDI, ECX ) /* count += dest ptr */ |
||
73 | |||
74 | |||
75 | MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ |
||
76 | MOVAPS( M(4), XMM1 ) /* m7 | m6 | m5 | m4 */ |
||
77 | MOVAPS( M(12), XMM2 ) /* m15 | m14 | m13 | m12 */ |
||
78 | |||
79 | |||
80 | LLBL(K_GTP2GR_top): |
||
81 | MOVSS( S(0), XMM3 ) /* ox */ |
||
82 | SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox | ox | ox */ |
||
83 | MULPS( XMM0, XMM3 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ |
||
84 | MOVSS( S(1), XMM4 ) /* oy */ |
||
85 | SHUFPS( CONST(0x0), XMM4, XMM4 ) /* oy | oy | oy | oy */ |
||
86 | MULPS( XMM1, XMM4 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */ |
||
87 | |||
88 | |||
89 | ADDPS( XMM2, XMM3 ) |
||
90 | MOVAPS( XMM3, D(0) ) |
||
91 | |||
92 | |||
93 | ADD_L ( CONST(16), EDI ) |
||
94 | ADD_L ( EAX, ESI ) |
||
95 | CMP_L ( ECX, EDI ) |
||
96 | JNE ( LLBL(K_GTP2GR_top) ) |
||
97 | |||
98 | |||
99 | POP_L ( EDI ) |
||
100 | POP_L ( ESI ) |
||
101 | RET |
||
102 | #undef FRAME_OFFSET |
||
103 | |||
104 | |||
105 | |||
106 | GLOBL GLNAME(_mesa_sse_transform_points2_identity) |
||
4632 | Serge | 107 | HIDDEN(_mesa_sse_transform_points2_identity) |
108 | GLNAME( _mesa_sse_transform_points2_identity ): |
||
109 | |||
4358 | Serge | 110 | |
111 | PUSH_L ( ESI ) |
||
112 | PUSH_L ( EDI ) |
||
113 | |||
114 | |||
115 | MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
||
116 | |||
117 | |||
118 | |||
119 | |||
120 | JZ( LLBL(K_GTP2IR_finish) ) /* count was zero; go to finish */ |
||
121 | |||
122 | |||
123 | OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
||
124 | |||
125 | |||
126 | MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
||
127 | |||
128 | |||
129 | MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
||
130 | |||
131 | |||
132 | ADD_L( EDI, ECX ) /* count += dest ptr */ |
||
133 | |||
134 | |||
135 | JE( LLBL(K_GTP2IR_finish) ) |
||
136 | |||
137 | |||
138 | |||
139 | LLBL(K_GTP2IR_top): |
||
140 | MOV_L ( S(0), EDX ) |
||
141 | MOV_L ( EDX, D(0) ) |
||
142 | MOV_L ( S(1), EDX ) |
||
143 | MOV_L ( EDX, D(1) ) |
||
144 | |||
145 | |||
146 | ADD_L ( CONST(16), EDI ) |
||
147 | ADD_L ( EAX, ESI ) |
||
148 | CMP_L ( ECX, EDI ) |
||
149 | JNE ( LLBL(K_GTP2IR_top) ) |
||
150 | |||
151 | |||
152 | POP_L ( EDI ) |
||
153 | POP_L ( ESI ) |
||
154 | RET |
||
155 | #undef FRAME_OFFSET |
||
156 | |||
157 | |||
158 | |||
159 | GLOBL GLNAME(_mesa_sse_transform_points2_3d_no_rot) |
||
4632 | Serge | 160 | HIDDEN(_mesa_sse_transform_points2_3d_no_rot) |
161 | GLNAME(_mesa_sse_transform_points2_3d_no_rot): |
||
162 | |||
4358 | Serge | 163 | |
164 | PUSH_L( ESI ) |
||
165 | PUSH_L( EDI ) |
||
166 | |||
167 | |||
168 | MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
||
169 | |||
170 | |||
171 | MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
||
172 | |||
173 | |||
174 | JZ( LLBL(K_GTP23DNRR_finish) ) /* count was zero; go to finish */ |
||
175 | |||
176 | |||
177 | OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
||
178 | |||
179 | |||
180 | MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
||
181 | |||
182 | |||
183 | MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
||
184 | |||
185 | |||
186 | ADD_L( EDI, ECX ) /* count += dest ptr */ |
||
187 | |||
188 | |||
189 | |||
190 | |||
191 | MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */ |
||
192 | MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */ |
||
193 | UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */ |
||
194 | MOVLPS ( M(12), XMM2 ) /* - | - | m13 | m12 */ |
||
195 | MOVSS ( M(14), XMM3 ) /* - | - | - | m14 */ |
||
196 | |||
197 | |||
198 | LLBL(K_GTP23DNRR_top): |
||
199 | MOVLPS ( S(0), XMM0 ) /* - | - | oy | ox */ |
||
200 | MULPS ( XMM1, XMM0 ) /* - | - | oy*m5 | ox*m0 */ |
||
201 | ADDPS ( XMM2, XMM0 ) /* - | - | +m13 | +m12 */ |
||
202 | MOVLPS ( XMM0, D(0) ) /* -> D(1) | -> D(0) */ |
||
203 | |||
204 | |||
205 | |||
206 | |||
207 | ADD_L ( CONST(16), EDI ) |
||
208 | ADD_L ( EAX, ESI ) |
||
209 | CMP_L ( ECX, EDI ) |
||
210 | JNE ( LLBL(K_GTP23DNRR_top) ) |
||
211 | |||
212 | |||
213 | POP_L ( EDI ) |
||
214 | POP_L ( ESI ) |
||
215 | RET |
||
216 | #undef FRAME_OFFSET |
||
217 | |||
218 | |||
219 | |||
220 | GLOBL GLNAME(_mesa_sse_transform_points2_perspective) |
||
4632 | Serge | 221 | HIDDEN(_mesa_sse_transform_points2_perspective) |
222 | GLNAME(_mesa_sse_transform_points2_perspective): |
||
223 | |||
4358 | Serge | 224 | |
225 | PUSH_L ( ESI ) |
||
226 | PUSH_L ( EDI ) |
||
227 | |||
228 | |||
229 | MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
||
230 | |||
231 | |||
232 | MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
||
233 | |||
234 | |||
235 | JZ( LLBL(K_GTP23PR_finish) ) /* count was zero; go to finish */ |
||
236 | |||
237 | |||
238 | OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
||
239 | |||
240 | |||
241 | MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
||
242 | |||
243 | |||
244 | MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
||
245 | |||
246 | |||
247 | ADD_L( EDI, ECX ) /* count += dest ptr */ |
||
248 | |||
249 | |||
250 | MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */ |
||
251 | MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */ |
||
252 | UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */ |
||
253 | MOVSS ( M(14), XMM3 ) /* m14 */ |
||
254 | XORPS ( XMM0, XMM0 ) /* 0 | 0 | 0 | 0 */ |
||
255 | |||
256 | |||
257 | LLBL(K_GTP23PR_top): |
||
258 | MOVLPS( S(0), XMM4 ) /* oy | ox */ |
||
259 | MULPS( XMM1, XMM4 ) /* oy*m5 | ox*m0 */ |
||
260 | MOVLPS( XMM4, D(0) ) /* ->D(1) | ->D(0) */ |
||
261 | MOVSS( XMM3, D(2) ) /* ->D(2) */ |
||
262 | MOVSS( XMM0, D(3) ) /* ->D(3) */ |
||
263 | |||
264 | |||
265 | ADD_L( CONST(16), EDI ) |
||
266 | ADD_L( EAX, ESI ) |
||
267 | CMP_L( ECX, EDI ) |
||
268 | JNE( LLBL(K_GTP23PR_top) ) |
||
269 | |||
270 | |||
271 | POP_L ( EDI ) |
||
272 | POP_L ( ESI ) |
||
273 | RET |
||
274 | #undef FRAME_OFFSET |
||
275 | |||
276 | |||
277 | |||
278 | |||
279 | GLOBL GLNAME(_mesa_sse_transform_points2_2d) |
||
4632 | Serge | 280 | HIDDEN(_mesa_sse_transform_points2_2d) |
281 | GLNAME(_mesa_sse_transform_points2_2d): |
||
282 | |||
4358 | Serge | 283 | |
284 | PUSH_L( ESI ) |
||
285 | PUSH_L( EDI ) |
||
286 | |||
287 | |||
288 | MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
||
289 | |||
290 | |||
291 | MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
||
292 | |||
293 | |||
294 | JZ( LLBL(K_GTP23P2DR_finish) ) /* count was zero; go to finish */ |
||
295 | |||
296 | |||
297 | OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
||
298 | |||
299 | |||
300 | MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
||
301 | |||
302 | |||
303 | MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
||
304 | |||
305 | |||
306 | ADD_L( EDI, ECX ) /* count += dest ptr */ |
||
307 | |||
308 | |||
309 | MOVLPS( M(0), XMM0 ) /* m1 | m0 */ |
||
310 | MOVLPS( M(4), XMM1 ) /* m5 | m4 */ |
||
311 | MOVLPS( M(12), XMM2 ) /* m13 | m12 */ |
||
312 | |||
313 | |||
314 | LLBL(K_GTP23P2DR_top): |
||
315 | MOVSS( S(0), XMM3 ) /* ox */ |
||
316 | SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox */ |
||
317 | MULPS( XMM0, XMM3 ) /* ox*m1 | ox*m0 */ |
||
318 | |||
319 | |||
320 | SHUFPS( CONST(0x0), XMM4, XMM4 ) /* oy | oy */ |
||
321 | MULPS( XMM1, XMM4 ) /* oy*m5 | oy*m4 */ |
||
322 | |||
323 | |||
324 | ADDPS( XMM2, XMM3 ) |
||
325 | MOVLPS( XMM3, D(0) ) /* ->D(1) | ->D(0) */ |
||
326 | |||
327 | |||
328 | ADD_L ( CONST(16), EDI ) |
||
329 | ADD_L ( EAX, ESI ) |
||
330 | CMP_L ( ECX, EDI ) |
||
331 | JNE ( LLBL(K_GTP23P2DR_top) ) |
||
332 | |||
333 | |||
334 | POP_L ( EDI ) |
||
335 | POP_L ( ESI ) |
||
336 | RET |
||
337 | #undef FRAME_OFFSET |
||
338 | |||
339 | |||
340 | |||
341 | |||
342 | GLOBL GLNAME(_mesa_sse_transform_points2_2d_no_rot) |
||
4632 | Serge | 343 | HIDDEN(_mesa_sse_transform_points2_2d_no_rot) |
344 | GLNAME(_mesa_sse_transform_points2_2d_no_rot): |
||
345 | |||
4358 | Serge | 346 | |
347 | PUSH_L( ESI ) |
||
348 | PUSH_L( EDI ) |
||
349 | |||
350 | |||
351 | MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
||
352 | |||
353 | |||
354 | MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
||
355 | |||
356 | |||
357 | JZ( LLBL(K_GTP23P2DNRR_finish) ) /* count was zero; go to finish */ |
||
358 | |||
359 | |||
360 | OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
||
361 | |||
362 | |||
363 | MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
||
364 | |||
365 | |||
366 | MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
||
367 | |||
368 | |||
369 | ADD_L( EDI, ECX ) /* count += dest ptr */ |
||
370 | |||
371 | |||
372 | MOVSS ( M(0), XMM1 ) /* m0 */ |
||
373 | MOVSS ( M(5), XMM2 ) /* m5 */ |
||
374 | UNPCKLPS ( XMM2, XMM1 ) /* m5 | m0 */ |
||
375 | MOVLPS ( M(12), XMM2 ) /* m13 | m12 */ |
||
376 | |||
377 | |||
378 | LLBL(K_GTP23P2DNRR_top): |
||
379 | MOVLPS( S(0), XMM0 ) /* oy | ox */ |
||
380 | MULPS( XMM1, XMM0 ) /* oy*m5 | ox*m0 */ |
||
381 | ADDPS( XMM2, XMM0 ) /* +m13 | +m12 */ |
||
382 | MOVLPS( XMM0, D(0) ) /* ->D(1) | ->D(0) */ |
||
383 | |||
384 | |||
385 | ADD_L( CONST(16), EDI ) |
||
386 | ADD_L( EAX, ESI ) |
||
387 | CMP_L( ECX, EDI ) |
||
388 | JNE( LLBL(K_GTP23P2DNRR_top) ) |
||
389 | |||
390 | |||
391 | POP_L( EDI ) |
||
392 | POP_L( ESI ) |
||
393 | RET |
||
394 | #undef FRAME_OFFSET |
||
395 | |||
396 | |||
397 | |||
398 | |||
399 | GLOBL GLNAME(_mesa_sse_transform_points2_3d) |
||
4632 | Serge | 400 | HIDDEN(_mesa_sse_transform_points2_3d) |
401 | GLNAME(_mesa_sse_transform_points2_3d): |
||
402 | |||
4358 | Serge | 403 | |
404 | PUSH_L( ESI ) |
||
405 | PUSH_L( EDI ) |
||
406 | |||
407 | |||
408 | MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
||
409 | |||
410 | |||
411 | MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
||
412 | |||
413 | |||
414 | JZ( LLBL(K_GTP23P3DR_finish) ) /* count was zero; go to finish */ |
||
415 | |||
416 | |||
417 | OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
||
418 | |||
419 | |||
420 | MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
||
421 | |||
422 | |||
423 | MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
||
424 | |||
425 | |||
426 | ADD_L( EDI, ECX ) /* count += dest ptr */ |
||
427 | |||
428 | |||
429 | MOVAPS( M(0), XMM0 ) /* m2 | m1 | m0 */ |
||
430 | MOVAPS( M(4), XMM1 ) /* m6 | m5 | m4 */ |
||
431 | MOVAPS( M(12), XMM2 ) /* m14 | m13 | m12 */ |
||
432 | |||
433 | |||
434 | LLBL(K_GTP23P3DR_top): |
||
435 | MOVSS( S(0), XMM3 ) /* ox */ |
||
436 | SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox | ox */ |
||
437 | MULPS( XMM0, XMM3 ) /* ox*m2 | ox*m1 | ox*m0 */ |
||
438 | |||
439 | |||
440 | SHUFPS( CONST(0x0), XMM4, XMM4 ) /* oy | oy | oy */ |
||
441 | MULPS( XMM1, XMM4 ) /* oy*m6 | oy*m5 | oy*m4 */ |
||
442 | |||
443 | |||
444 | ADDPS( XMM2, XMM3 ) |
||
445 | |||
446 | |||
447 | UNPCKHPS( XMM3, XMM3 ) |
||
448 | MOVSS( XMM3, D(2) ) /* ->D(2) */ |
||
449 | |||
450 | |||
451 | ADD_L( CONST(16), EDI ) |
||
452 | ADD_L( EAX, ESI ) |
||
453 | CMP_L( ECX, EDI ) |
||
454 | JNE( LLBL(K_GTP23P3DR_top) ) |
||
455 | |||
456 | |||
457 | POP_L( EDI ) |
||
458 | POP_L( ESI ) |
||
459 | RET |
||
460 | #undef FRAME_OFFSET |
||
461 | #endif |
||
462 | |||
463 | |||
464 | .section .note.GNU-stack,"",%progbits |
||
465 | #endif |
||
466 |