Subversion Repositories Kolibri OS

Rev

Rev 4358 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
4358 Serge 1
 
2
 * Mesa 3-D graphics library
3
 *
4
 * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 * copy of this software and associated documentation files (the "Software"),
8
 * to deal in the Software without restriction, including without limitation
9
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
 * and/or sell copies of the Software, and to permit persons to whom the
11
 * Software is furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included
14
 * in all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
 * OTHER DEALINGS IN THE SOFTWARE.
23
 */
24
25
 
26
  * - insert PREFETCH instructions to avoid cache-misses !
27
  * - some more optimizations are possible...
28
  * - for 40-50% more performance in the SSE-functions, the
29
  *   data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
30
  */
31
32
 
33
#include "assyntax.h"
34
#include "matypes.h"
35
#include "xform_args.h"
36
37
 
38
39
 
40
#define D(i) 	REGOFF(i * 4, EDI)
41
#define M(i) 	REGOFF(i * 4, EDX)
42
43
 
44
 
45
GLOBL GLNAME(_mesa_sse_transform_points2_general)
4632 Serge 46
HIDDEN (_mesa_sse_transform_points2_general)
47
GLNAME( _mesa_sse_transform_points2_general ):
48
4358 Serge 49
 
50
    PUSH_L    ( ESI )
51
    PUSH_L    ( EDI )
52
53
 
54
    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) 	/* ptr to dest GLvector4f */
55
56
 
57
    MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
58
59
 
60
    JZ( LLBL(K_GTP2GR_finish) )			/* count was zero; go to finish */
61
62
 
63
    OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 	/* set dest flags */
64
65
 
66
    MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 	/* set dest size */
67
68
 
69
    MOV_L( REGOFF(V4F_START, ESI), ESI ) 	/* ptr to first source vertex */
70
71
 
72
    ADD_L( EDI, ECX ) 				/* count += dest ptr */
73
74
 
75
    MOVAPS( M(0), XMM0 )			/* m3  | m2  | m1  | m0 */
76
    MOVAPS( M(4), XMM1 )			/* m7  | m6  | m5  | m4 */
77
    MOVAPS( M(12), XMM2 )			/* m15 | m14 | m13 | m12 */
78
79
 
80
LLBL(K_GTP2GR_top):
81
    MOVSS( S(0), XMM3 )				/* ox */
82
    SHUFPS( CONST(0x0), XMM3, XMM3 )		/* ox | ox | ox | ox */
83
    MULPS( XMM0, XMM3 )				/* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
84
    MOVSS( S(1), XMM4 )				/* oy */
85
    SHUFPS( CONST(0x0), XMM4, XMM4 )		/* oy | oy | oy | oy */
86
    MULPS( XMM1, XMM4 )				/* oy*m7 | oy*m6 | oy*m5 | oy*m4 */
87
88
 
89
    ADDPS( XMM2, XMM3 )
90
    MOVAPS( XMM3, D(0) )
91
92
 
93
    ADD_L     ( CONST(16), EDI )
94
    ADD_L     ( EAX, ESI )
95
    CMP_L     ( ECX, EDI )
96
    JNE       ( LLBL(K_GTP2GR_top) )
97
98
 
99
    POP_L     ( EDI )
100
    POP_L     ( ESI )
101
    RET
102
#undef FRAME_OFFSET
103
104
 
105
 
106
GLOBL GLNAME(_mesa_sse_transform_points2_identity)
4632 Serge 107
HIDDEN(_mesa_sse_transform_points2_identity)
108
GLNAME( _mesa_sse_transform_points2_identity ):
109
4358 Serge 110
 
111
    PUSH_L    ( ESI )
112
    PUSH_L    ( EDI )
113
114
 
115
    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) 	/* ptr to dest GLvector4f */
116
117
 
118
119
 
120
    JZ( LLBL(K_GTP2IR_finish) )			/* count was zero; go to finish */
121
122
 
123
    OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) 	/* set dest flags */
124
125
 
126
    MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) 	/* set dest size */
127
128
 
129
    MOV_L( REGOFF(V4F_START, ESI), ESI ) 	/* ptr to first source vertex */
130
131
 
132
    ADD_L( EDI, ECX ) 				/* count += dest ptr */
133
134
 
135
    JE( LLBL(K_GTP2IR_finish) )
136
137
 
138
 
139
LLBL(K_GTP2IR_top):
140
    MOV_L     ( S(0), EDX )
141
    MOV_L     ( EDX, D(0) )
142
    MOV_L     ( S(1), EDX )
143
    MOV_L     ( EDX, D(1) )
144
145
 
146
    ADD_L     ( CONST(16), EDI )
147
    ADD_L     ( EAX, ESI )
148
    CMP_L     ( ECX, EDI )
149
    JNE       ( LLBL(K_GTP2IR_top) )
150
151
 
152
    POP_L     ( EDI )
153
    POP_L     ( ESI )
154
    RET
155
#undef FRAME_OFFSET
156
157
 
158
 
159
GLOBL GLNAME(_mesa_sse_transform_points2_3d_no_rot)
4632 Serge 160
HIDDEN(_mesa_sse_transform_points2_3d_no_rot)
161
GLNAME(_mesa_sse_transform_points2_3d_no_rot):
162
4358 Serge 163
 
164
    PUSH_L( ESI )
165
    PUSH_L( EDI )
166
167
 
168
    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) 	/* ptr to dest GLvector4f */
169
170
 
171
    MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
172
173
 
174
    JZ( LLBL(K_GTP23DNRR_finish) ) 		/* count was zero; go to finish */
175
176
 
177
    OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) 	/* set dest flags */
178
179
 
180
    MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) 	/* set dest size */
181
182
 
183
    MOV_L( REGOFF(V4F_START, ESI), ESI ) 	/* ptr to first source vertex */
184
185
 
186
    ADD_L( EDI, ECX ) 				/* count += dest ptr */
187
188
 
189
190
 
191
    MOVSS    ( M(0), XMM1 )			/* - | - |  -  | m0  */
192
    MOVSS    ( M(5), XMM2 )			/* - | - |  -  | m5  */
193
    UNPCKLPS ( XMM2, XMM1 )			/* - | - | m5  | m0  */
194
    MOVLPS   ( M(12), XMM2 )			/* - | - | m13 | m12 */
195
    MOVSS    ( M(14), XMM3 )			/* - | - |  -  | m14 */
196
197
 
198
LLBL(K_GTP23DNRR_top):
199
    MOVLPS   ( S(0), XMM0 )			/* - | - |  oy   | ox */
200
    MULPS    ( XMM1, XMM0 )			/* - | - | oy*m5 | ox*m0 */
201
    ADDPS    ( XMM2, XMM0 )			/* - | - | +m13  | +m12 */
202
    MOVLPS   ( XMM0, D(0) )			/* -> D(1) | -> D(0) */
203
204
 
205
206
 
207
    ADD_L    ( CONST(16), EDI )
208
    ADD_L    ( EAX, ESI )
209
    CMP_L    ( ECX, EDI )
210
    JNE      ( LLBL(K_GTP23DNRR_top) )
211
212
 
213
    POP_L    ( EDI )
214
    POP_L    ( ESI )
215
    RET
216
#undef FRAME_OFFSET
217
218
 
219
 
220
GLOBL GLNAME(_mesa_sse_transform_points2_perspective)
4632 Serge 221
HIDDEN(_mesa_sse_transform_points2_perspective)
222
GLNAME(_mesa_sse_transform_points2_perspective):
223
4358 Serge 224
 
225
    PUSH_L   ( ESI )
226
    PUSH_L   ( EDI )
227
228
 
229
    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) 	/* ptr to dest GLvector4f */
230
231
 
232
    MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
233
234
 
235
    JZ( LLBL(K_GTP23PR_finish) )		/* count was zero; go to finish */
236
237
 
238
    OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 	/* set dest flags */
239
240
 
241
    MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 	/* set dest size */
242
243
 
244
    MOV_L( REGOFF(V4F_START, ESI), ESI ) 	/* ptr to first source vertex */
245
246
 
247
    ADD_L( EDI, ECX ) 				/* count += dest ptr */
248
249
 
250
    MOVSS    ( M(0), XMM1 )			/* -  | -  |  -  | m0  */
251
    MOVSS    ( M(5), XMM2 )			/* -  | -  |  -  | m5  */
252
    UNPCKLPS ( XMM2, XMM1 )			/* -  | -  | m5  | m0  */
253
    MOVSS    ( M(14), XMM3 )			/* m14 */
254
    XORPS    ( XMM0, XMM0 )			/* 0 | 0 | 0 | 0 */
255
256
 
257
LLBL(K_GTP23PR_top):
258
    MOVLPS( S(0), XMM4 )			/* oy | ox */
259
    MULPS( XMM1, XMM4 )				/* oy*m5 | ox*m0 */
260
    MOVLPS( XMM4, D(0) )			/* ->D(1) | ->D(0) */
261
    MOVSS( XMM3, D(2) )				/* ->D(2) */
262
    MOVSS( XMM0, D(3) )				/* ->D(3) */
263
264
 
265
    ADD_L( CONST(16), EDI )
266
    ADD_L( EAX, ESI )
267
    CMP_L( ECX, EDI )
268
    JNE( LLBL(K_GTP23PR_top) )
269
270
 
271
    POP_L    ( EDI )
272
    POP_L    ( ESI )
273
    RET
274
#undef FRAME_OFFSET
275
276
 
277
 
278
 
279
GLOBL GLNAME(_mesa_sse_transform_points2_2d)
4632 Serge 280
HIDDEN(_mesa_sse_transform_points2_2d)
281
GLNAME(_mesa_sse_transform_points2_2d):
282
4358 Serge 283
 
284
    PUSH_L( ESI )
285
    PUSH_L( EDI )
286
287
 
288
    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) 	/* ptr to dest GLvector4f */
289
290
 
291
    MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
292
293
 
294
    JZ( LLBL(K_GTP23P2DR_finish) ) 		/* count was zero; go to finish */
295
296
 
297
    OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) 	/* set dest flags */
298
299
 
300
    MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) 	/* set dest size */
301
302
 
303
    MOV_L( REGOFF(V4F_START, ESI), ESI ) 	/* ptr to first source vertex */
304
305
 
306
    ADD_L( EDI, ECX ) 				/* count += dest ptr */
307
308
 
309
    MOVLPS( M(0), XMM0 )			/* m1  | m0 */
310
    MOVLPS( M(4), XMM1 )			/* m5  | m4 */
311
    MOVLPS( M(12), XMM2 )			/* m13 | m12 */
312
313
 
314
LLBL(K_GTP23P2DR_top):
315
    MOVSS( S(0), XMM3 )				/* ox */
316
    SHUFPS( CONST(0x0), XMM3, XMM3 )		/* ox | ox */
317
    MULPS( XMM0, XMM3 )				/* ox*m1 | ox*m0 */
318
319
 
320
    SHUFPS( CONST(0x0), XMM4, XMM4 )		/* oy | oy */
321
    MULPS( XMM1, XMM4 )				/* oy*m5 | oy*m4 */
322
323
 
324
    ADDPS( XMM2, XMM3 )
325
    MOVLPS( XMM3, D(0) )			/* ->D(1) | ->D(0) */
326
327
 
328
    ADD_L    ( CONST(16), EDI )
329
    ADD_L    ( EAX, ESI )
330
    CMP_L    ( ECX, EDI )
331
    JNE      ( LLBL(K_GTP23P2DR_top) )
332
333
 
334
    POP_L    ( EDI )
335
    POP_L    ( ESI )
336
    RET
337
#undef FRAME_OFFSET
338
339
 
340
 
341
 
342
GLOBL GLNAME(_mesa_sse_transform_points2_2d_no_rot)
4632 Serge 343
HIDDEN(_mesa_sse_transform_points2_2d_no_rot)
344
GLNAME(_mesa_sse_transform_points2_2d_no_rot):
345
4358 Serge 346
 
347
	PUSH_L( ESI )
348
	PUSH_L( EDI )
349
350
 
351
	MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) 	/* ptr to dest GLvector4f */
352
353
 
354
	MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
355
356
 
357
	JZ( LLBL(K_GTP23P2DNRR_finish) ) 	/* count was zero; go to finish */
358
359
 
360
	OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) 	/* set dest flags */
361
362
 
363
	MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) 	/* set dest size */
364
365
 
366
	MOV_L( REGOFF(V4F_START, ESI), ESI ) 	/* ptr to first source vertex */
367
368
 
369
	ADD_L( EDI, ECX ) 			/* count += dest ptr */
370
371
 
372
	MOVSS    ( M(0), XMM1 )			/* m0 */
373
	MOVSS    ( M(5), XMM2 )			/* m5 */
374
	UNPCKLPS ( XMM2, XMM1 )			/* m5 | m0 */
375
	MOVLPS   ( M(12), XMM2 )		/* m13 | m12 */
376
377
 
378
LLBL(K_GTP23P2DNRR_top):
379
	MOVLPS( S(0), XMM0 )			/* oy | ox */
380
	MULPS( XMM1, XMM0 )			/* oy*m5 | ox*m0 */
381
	ADDPS( XMM2, XMM0 )			/* +m13 | +m12 */
382
	MOVLPS( XMM0, D(0) )			/* ->D(1) | ->D(0) */
383
384
 
385
	ADD_L( CONST(16), EDI )
386
	ADD_L( EAX, ESI )
387
	CMP_L( ECX, EDI )
388
	JNE( LLBL(K_GTP23P2DNRR_top) )
389
390
 
391
	POP_L( EDI )
392
	POP_L( ESI )
393
	RET
394
#undef FRAME_OFFSET
395
396
 
397
 
398
 
399
GLOBL GLNAME(_mesa_sse_transform_points2_3d)
4632 Serge 400
HIDDEN(_mesa_sse_transform_points2_3d)
401
GLNAME(_mesa_sse_transform_points2_3d):
402
4358 Serge 403
 
404
	PUSH_L( ESI )
405
	PUSH_L( EDI )
406
407
 
408
	MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) 	/* ptr to dest GLvector4f */
409
410
 
411
	MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
412
413
 
414
	JZ( LLBL(K_GTP23P3DR_finish) ) 	/* count was zero; go to finish */
415
416
 
417
	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) 	/* set dest flags */
418
419
 
420
	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) 	/* set dest size */
421
422
 
423
	MOV_L( REGOFF(V4F_START, ESI), ESI ) 	/* ptr to first source vertex */
424
425
 
426
	ADD_L( EDI, ECX ) 			/* count += dest ptr */
427
428
 
429
	MOVAPS( M(0), XMM0 )			/* m2  | m1  | m0 */
430
	MOVAPS( M(4), XMM1 )			/* m6  | m5  | m4 */
431
	MOVAPS( M(12), XMM2 )			/* m14 | m13 | m12 */
432
433
 
434
LLBL(K_GTP23P3DR_top):
435
	MOVSS( S(0), XMM3 )			/* ox */
436
	SHUFPS( CONST(0x0), XMM3, XMM3 )	/* ox | ox | ox */
437
	MULPS( XMM0, XMM3 )			/* ox*m2 | ox*m1 | ox*m0 */
438
439
 
440
	SHUFPS( CONST(0x0), XMM4, XMM4 )	/* oy | oy | oy */
441
	MULPS( XMM1, XMM4 )			/* oy*m6 | oy*m5 | oy*m4 */
442
443
 
444
	ADDPS( XMM2, XMM3 )
445
446
 
447
	UNPCKHPS( XMM3, XMM3 )
448
	MOVSS( XMM3, D(2) )			/* ->D(2) */
449
450
 
451
	ADD_L( CONST(16), EDI )
452
	ADD_L( EAX, ESI )
453
	CMP_L( ECX, EDI )
454
	JNE( LLBL(K_GTP23P3DR_top) )
455
456
 
457
	POP_L( EDI )
458
	POP_L( ESI )
459
	RET
460
#undef FRAME_OFFSET
461
#endif
462
463
 
464
	.section .note.GNU-stack,"",%progbits
465
#endif
466