Rev 4358 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | |
2 | * Mesa 3-D graphics library |
||
3 | * |
||
4 | * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the "Software"), |
||
8 | * to deal in the Software without restriction, including without limitation |
||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
10 | * and/or sell copies of the Software, and to permit persons to whom the |
||
11 | * Software is furnished to do so, subject to the following conditions: |
||
12 | * |
||
13 | * The above copyright notice and this permission notice shall be included |
||
14 | * in all copies or substantial portions of the Software. |
||
15 | * |
||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
17 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
||
20 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
||
21 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
||
22 | * OTHER DEALINGS IN THE SOFTWARE. |
||
23 | */ |
||
24 | |||
25 | |||
26 | * - insert PREFETCH instructions to avoid cache-misses ! |
||
27 | * - some more optimizations are possible... |
||
28 | * - for 40-50% more performance in the SSE-functions, the |
||
29 | * data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! |
||
30 | */ |
||
31 | |||
32 | |||
33 | #include "assyntax.h" |
||
34 | #include "matypes.h" |
||
35 | #include "xform_args.h" |
||
36 | |||
37 | |||
38 | |||
39 | |||
40 | #define D(i) REGOFF(i * 4, EDI) |
||
41 | #define M(i) REGOFF(i * 4, EDX) |
||
42 | |||
43 | |||
44 | |||
45 | GLOBL GLNAME(_mesa_sse_transform_points1_general) |
||
4632 | Serge | 46 | HIDDEN( _mesa_sse_transform_points1_general ) |
47 | GLNAME( _mesa_sse_transform_points1_general ): |
||
48 | |||
4358 | Serge | 49 | |
50 | PUSH_L ( ESI ) |
||
51 | PUSH_L ( EDI ) |
||
52 | |||
53 | |||
54 | MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
||
55 | |||
56 | |||
57 | MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
||
58 | |||
59 | |||
60 | JE( LLBL(K_GTP1GR_finish) ) /* yes -> nothing to do. */ |
||
61 | |||
62 | |||
63 | OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
||
64 | |||
65 | |||
66 | MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
||
67 | |||
68 | |||
69 | MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
||
70 | |||
71 | |||
72 | ADD_L( EDI, ECX ) /* count += dest ptr */ |
||
73 | |||
74 | |||
75 | |||
76 | MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ |
||
77 | MOVAPS( M(12), XMM1 ) /* m15 | m14 | m13 | m12 */ |
||
78 | |||
79 | |||
80 | LLBL(K_GTP1GR_top): |
||
81 | MOVSS( S(0), XMM2 ) /* ox */ |
||
82 | SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */ |
||
83 | MULPS( XMM0, XMM2 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ |
||
84 | ADDPS( XMM1, XMM2 ) /* + | + | + | + */ |
||
85 | MOVUPS( XMM2, D(0) ) |
||
86 | |||
87 | |||
88 | ADD_L ( CONST(16), EDI ) |
||
89 | ADD_L ( EAX, ESI ) |
||
90 | CMP_L ( ECX, EDI ) |
||
91 | JNE ( LLBL(K_GTP1GR_top) ) |
||
92 | |||
93 | |||
94 | POP_L ( EDI ) |
||
95 | POP_L ( ESI ) |
||
96 | RET |
||
97 | #undef FRAME_OFFSET |
||
98 | |||
99 | |||
100 | |||
101 | |||
102 | GLOBL GLNAME(_mesa_sse_transform_points1_identity) |
||
4632 | Serge | 103 | HIDDEN(_mesa_sse_transform_points1_identity) |
104 | GLNAME( _mesa_sse_transform_points1_identity ): |
||
105 | |||
4358 | Serge | 106 | |
107 | PUSH_L ( ESI ) |
||
108 | PUSH_L ( EDI ) |
||
109 | |||
110 | |||
111 | MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
||
112 | |||
113 | |||
114 | |||
115 | |||
116 | JZ( LLBL(K_GTP1IR_finish) ) /* count was zero; go to finish */ |
||
117 | |||
118 | |||
119 | OR_L( CONST(VEC_SIZE_1), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
||
120 | |||
121 | |||
122 | MOV_L( CONST(1), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
||
123 | |||
124 | |||
125 | MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
||
126 | |||
127 | |||
128 | ADD_L( EDI, ECX ) /* count += dest ptr */ |
||
129 | |||
130 | |||
131 | JE( LLBL(K_GTP1IR_finish) ) |
||
132 | |||
133 | |||
134 | |||
135 | LLBL(K_GTP1IR_top): |
||
136 | MOV_L( S(0), EDX ) |
||
137 | MOV_L( EDX, D(0) ) |
||
138 | |||
139 | |||
140 | ADD_L ( CONST(16), EDI ) |
||
141 | ADD_L ( EAX, ESI ) |
||
142 | CMP_L ( ECX, EDI ) |
||
143 | JNE ( LLBL(K_GTP1IR_top) ) |
||
144 | |||
145 | |||
146 | POP_L ( EDI ) |
||
147 | POP_L ( ESI ) |
||
148 | RET |
||
149 | #undef FRAME_OFFSET |
||
150 | |||
151 | |||
152 | |||
153 | |||
154 | GLOBL GLNAME(_mesa_sse_transform_points1_3d_no_rot) |
||
4632 | Serge | 155 | HIDDEN(_mesa_sse_transform_points1_3d_no_rot) |
156 | GLNAME(_mesa_sse_transform_points1_3d_no_rot): |
||
157 | |||
4358 | Serge | 158 | |
159 | PUSH_L( ESI ) |
||
160 | PUSH_L( EDI ) |
||
161 | |||
162 | |||
163 | MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
||
164 | |||
165 | |||
166 | MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
||
167 | |||
168 | |||
169 | JZ( LLBL(K_GTP13DNRR_finish) ) /* count was zero; go to finish */ |
||
170 | |||
171 | |||
172 | OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
||
173 | |||
174 | |||
175 | MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
||
176 | |||
177 | |||
178 | MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
||
179 | |||
180 | |||
181 | ADD_L( EDI, ECX ) /* count += dest ptr */ |
||
182 | |||
183 | |||
184 | |||
185 | MOVSS( M(0), XMM0 ) /* m0 */ |
||
186 | MOVSS( M(12), XMM1 ) /* m12 */ |
||
187 | MOVSS( M(13), XMM2 ) /* m13 */ |
||
188 | MOVSS( M(14), XMM3 ) /* m14 */ |
||
189 | |||
190 | |||
191 | LLBL(K_GTP13DNRR_top): |
||
192 | MOVSS( S(0), XMM4 ) /* ox */ |
||
193 | MULSS( XMM0, XMM4 ) /* ox*m0 */ |
||
194 | ADDSS( XMM1, XMM4 ) /* ox*m0+m12 */ |
||
195 | MOVSS( XMM4, D(0) ) |
||
196 | |||
197 | |||
198 | MOVSS( XMM3, D(2) ) |
||
199 | |||
200 | |||
201 | ADD_L ( CONST(16), EDI ) |
||
202 | ADD_L ( EAX, ESI ) |
||
203 | CMP_L ( ECX, EDI ) |
||
204 | JNE ( LLBL(K_GTP13DNRR_top) ) |
||
205 | |||
206 | |||
207 | POP_L ( EDI ) |
||
208 | POP_L ( ESI ) |
||
209 | RET |
||
210 | #undef FRAME_OFFSET |
||
211 | |||
212 | |||
213 | |||
214 | |||
215 | GLOBL GLNAME(_mesa_sse_transform_points1_perspective) |
||
4632 | Serge | 216 | HIDDEN(_mesa_sse_transform_points1_perspective) |
217 | GLNAME(_mesa_sse_transform_points1_perspective): |
||
218 | |||
4358 | Serge | 219 | |
220 | PUSH_L ( ESI ) |
||
221 | PUSH_L ( EDI ) |
||
222 | |||
223 | |||
224 | MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
||
225 | |||
226 | |||
227 | MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
||
228 | |||
229 | |||
230 | JZ( LLBL(K_GTP13PR_finish) ) /* count was zero; go to finish */ |
||
231 | |||
232 | |||
233 | OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
||
234 | |||
235 | |||
236 | MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
||
237 | |||
238 | |||
239 | MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
||
240 | |||
241 | |||
242 | ADD_L( EDI, ECX ) /* count += dest ptr */ |
||
243 | |||
244 | |||
245 | |||
246 | XORPS( XMM0, XMM0 ) /* 0 | 0 | 0 | 0 */ |
||
247 | MOVSS( M(0), XMM1 ) /* m0 */ |
||
248 | MOVSS( M(14), XMM2 ) /* m14 */ |
||
249 | |||
250 | |||
251 | LLBL(K_GTP13PR_top): |
||
252 | MOVSS( S(0), XMM3 ) /* ox */ |
||
253 | MULSS( XMM1, XMM3 ) /* ox*m0 */ |
||
254 | MOVSS( XMM3, D(0) ) /* ox*m0->D(0) */ |
||
255 | MOVSS( XMM2, D(2) ) /* m14->D(2) */ |
||
256 | |||
257 | |||
258 | MOVSS( XMM0, D(3) ) |
||
259 | |||
260 | |||
261 | ADD_L( CONST(16), EDI ) |
||
262 | ADD_L( EAX, ESI ) |
||
263 | CMP_L( ECX, EDI ) |
||
264 | JNE( LLBL(K_GTP13PR_top) ) |
||
265 | |||
266 | |||
267 | POP_L ( EDI ) |
||
268 | POP_L ( ESI ) |
||
269 | RET |
||
270 | #undef FRAME_OFFSET |
||
271 | |||
272 | |||
273 | |||
274 | GLOBL GLNAME(_mesa_sse_transform_points1_2d) |
||
4632 | Serge | 275 | HIDDEN(_mesa_sse_transform_points1_2d) |
276 | GLNAME(_mesa_sse_transform_points1_2d): |
||
277 | |||
4358 | Serge | 278 | |
279 | PUSH_L( ESI ) |
||
280 | PUSH_L( EDI ) |
||
281 | |||
282 | |||
283 | MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
||
284 | |||
285 | |||
286 | MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
||
287 | |||
288 | |||
289 | JZ( LLBL(K_GTP13P2DR_finish) ) /* count was zero; go to finish */ |
||
290 | |||
291 | |||
292 | OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
||
293 | |||
294 | |||
295 | MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
||
296 | |||
297 | |||
298 | MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
||
299 | |||
300 | |||
301 | ADD_L( EDI, ECX ) /* count += dest ptr */ |
||
302 | |||
303 | |||
304 | MOVLPS( M(0), XMM0 ) /* m1 | m0 */ |
||
305 | MOVLPS( M(12), XMM1 ) /* m13 | m12 */ |
||
306 | |||
307 | |||
308 | LLBL(K_GTP13P2DR_top): |
||
309 | MOVSS( S(0), XMM2 ) /* ox */ |
||
310 | SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */ |
||
311 | MULPS( XMM0, XMM2 ) /* - | - | ox*m1 | ox*m0 */ |
||
312 | ADDPS( XMM1, XMM2 ) /* - | - | ox*m1+m13 | ox*m0+m12 */ |
||
313 | MOVLPS( XMM2, D(0) ) |
||
314 | |||
315 | |||
316 | ADD_L ( CONST(16), EDI ) |
||
317 | ADD_L ( EAX, ESI ) |
||
318 | CMP_L ( ECX, EDI ) |
||
319 | JNE ( LLBL(K_GTP13P2DR_top) ) |
||
320 | |||
321 | |||
322 | POP_L ( EDI ) |
||
323 | POP_L ( ESI ) |
||
324 | RET |
||
325 | #undef FRAME_OFFSET |
||
326 | |||
327 | |||
328 | |||
329 | GLOBL GLNAME(_mesa_sse_transform_points1_2d_no_rot) |
||
4632 | Serge | 330 | HIDDEN(_mesa_sse_transform_points1_2d_no_rot) |
331 | GLNAME(_mesa_sse_transform_points1_2d_no_rot): |
||
332 | |||
4358 | Serge | 333 | |
334 | PUSH_L( ESI ) |
||
335 | PUSH_L( EDI ) |
||
336 | |||
337 | |||
338 | MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
||
339 | |||
340 | |||
341 | MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
||
342 | |||
343 | |||
344 | JZ( LLBL(K_GTP13P2DNRR_finish) ) /* count was zero; go to finish */ |
||
345 | |||
346 | |||
347 | OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
||
348 | |||
349 | |||
350 | MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
||
351 | |||
352 | |||
353 | MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
||
354 | |||
355 | |||
356 | ADD_L( EDI, ECX ) /* count += dest ptr */ |
||
357 | |||
358 | |||
359 | MOVSS( M(0), XMM0 ) /* m0 */ |
||
360 | MOVSS( M(12), XMM1 ) /* m12 */ |
||
361 | MOVSS( M(13), XMM2 ) /* m13 */ |
||
362 | |||
363 | |||
364 | LLBL(K_GTP13P2DNRR_top): |
||
365 | MOVSS( S(0), XMM3 ) /* ox */ |
||
366 | MULSS( XMM0, XMM3 ) /* ox*m0 */ |
||
367 | ADDSS( XMM1, XMM3 ) /* ox*m0+m12 */ |
||
368 | MOVSS( XMM3, D(0) ) |
||
369 | MOVSS( XMM2, D(1) ) |
||
370 | |||
371 | |||
372 | ADD_L( CONST(16), EDI ) |
||
373 | ADD_L( EAX, ESI ) |
||
374 | CMP_L( ECX, EDI ) |
||
375 | JNE( LLBL(K_GTP13P2DNRR_top) ) |
||
376 | |||
377 | |||
378 | POP_L( EDI ) |
||
379 | POP_L( ESI ) |
||
380 | RET |
||
381 | #undef FRAME_OFFSET |
||
382 | |||
383 | |||
384 | |||
385 | |||
386 | GLOBL GLNAME(_mesa_sse_transform_points1_3d) |
||
4632 | Serge | 387 | HIDDEN(_mesa_sse_transform_points1_3d) |
388 | GLNAME(_mesa_sse_transform_points1_3d): |
||
389 | |||
4358 | Serge | 390 | |
391 | PUSH_L( ESI ) |
||
392 | PUSH_L( EDI ) |
||
393 | |||
394 | |||
395 | MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ |
||
396 | |||
397 | |||
398 | MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ |
||
399 | |||
400 | |||
401 | JZ( LLBL(K_GTP13P3DR_finish) ) /* count was zero; go to finish */ |
||
402 | |||
403 | |||
404 | OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ |
||
405 | |||
406 | |||
407 | MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ |
||
408 | |||
409 | |||
410 | MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ |
||
411 | |||
412 | |||
413 | ADD_L( EDI, ECX ) /* count += dest ptr */ |
||
414 | |||
415 | |||
416 | |||
417 | MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ |
||
418 | MOVAPS( M(12), XMM1 ) /* m15 | m14 | m13 | m12 */ |
||
419 | |||
420 | |||
421 | LLBL(K_GTP13P3DR_top): |
||
422 | MOVSS( S(0), XMM2 ) /* ox */ |
||
423 | SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */ |
||
424 | MULPS( XMM0, XMM2 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ |
||
425 | ADDPS( XMM1, XMM2 ) /* +m15 | +m14 | +m13 | +m12 */ |
||
426 | MOVLPS( XMM2, D(0) ) /* - | - | ->D(1)| ->D(0)*/ |
||
427 | UNPCKHPS( XMM2, XMM2 ) /* ox*m3+m15 | ox*m3+m15 | ox*m2+m14 | ox*m2+m14 */ |
||
428 | MOVSS( XMM2, D(2) ) |
||
429 | |||
430 | |||
431 | ADD_L( CONST(16), EDI ) |
||
432 | ADD_L( EAX, ESI ) |
||
433 | CMP_L( ECX, EDI ) |
||
434 | JNE( LLBL(K_GTP13P3DR_top) ) |
||
435 | |||
436 | |||
437 | POP_L( EDI ) |
||
438 | POP_L( ESI ) |
||
439 | RET |
||
440 | #undef FRAME_OFFSET |
||
441 | #endif |
||
442 | |||
443 | |||
444 | .section .note.GNU-stack,"",%progbits |
||
445 | #endif |
||
446 |