Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5131 clevermous 1
/*
2
Copyright (C) 1996-1997 Id Software, Inc.
3
 
4
This program is free software; you can redistribute it and/or
5
modify it under the terms of the GNU General Public License
6
as published by the Free Software Foundation; either version 2
7
of the License, or (at your option) any later version.
8
 
9
This program is distributed in the hope that it will be useful,
10
but WITHOUT ANY WARRANTY; without even the implied warranty of
11
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12
 
13
See the GNU General Public License for more details.
14
 
15
You should have received a copy of the GNU General Public License
16
along with this program; if not, write to the Free Software
17
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
18
 
19
*/
20
//
21
// d_parta.s
22
// x86 assembly-language 8-bpp particle-drawing code.
23
//
24
 
25
#include "asm_i386.h"
26
#include "quakeasm.h"
27
#include "d_ifacea.h"
28
#include "asm_draw.h"
29
 
30
#if	id386
31
 
32
//----------------------------------------------------------------------
33
// 8-bpp particle drawing code.
34
//----------------------------------------------------------------------
35
 
36
//FIXME: comments, full optimization
37
 
38
//----------------------------------------------------------------------
39
// 8-bpp particle queueing code.
40
//----------------------------------------------------------------------
41
 
42
	.text
43
 
44
#define P	12+4
45
 
46
	.align 4
47
.globl C(D_DrawParticle)
48
C(D_DrawParticle):
49
	pushl	%ebp				// preserve caller's stack frame
50
	pushl	%edi				// preserve register variables
51
	pushl	%ebx
52
 
53
	movl	P(%esp),%edi
54
 
55
// FIXME: better FP overlap in general here
56
 
57
// transform point
58
//	VectorSubtract (p->org, r_origin, local);
59
	flds	C(r_origin)
60
	fsubrs	pt_org(%edi)
61
	flds	pt_org+4(%edi)
62
	fsubs	C(r_origin)+4
63
	flds	pt_org+8(%edi)
64
	fsubs	C(r_origin)+8
65
	fxch	%st(2)			// local[0] | local[1] | local[2]
66
 
67
//	transformed[2] = DotProduct(local, r_ppn);
68
	flds	C(r_ppn)		// r_ppn[0] | local[0] | local[1] | local[2]
69
	fmul	%st(1),%st(0)	// dot0 | local[0] | local[1] | local[2]
70
	flds	C(r_ppn)+4	// r_ppn[1] | dot0 | local[0] | local[1] | local[2]
71
	fmul	%st(3),%st(0)	// dot1 | dot0 | local[0] | local[1] | local[2]
72
	flds	C(r_ppn)+8	// r_ppn[2] | dot1 | dot0 | local[0] |
73
						//  local[1] | local[2]
74
	fmul	%st(5),%st(0)	// dot2 | dot1 | dot0 | local[0] | local[1] | local[2]
75
	fxch	%st(2)		// dot0 | dot1 | dot2 | local[0] | local[1] | local[2]
76
	faddp	%st(0),%st(1) // dot0 + dot1 | dot2 | local[0] | local[1] |
77
						  //  local[2]
78
	faddp	%st(0),%st(1) // z | local[0] | local[1] | local[2]
79
	fld		%st(0)		// z | z | local[0] | local[1] |
80
						//  local[2]
81
	fdivrs	float_1		// 1/z | z | local[0] | local[1] | local[2]
82
	fxch	%st(1)		// z | 1/z | local[0] | local[1] | local[2]
83
 
84
//	if (transformed[2] < PARTICLE_Z_CLIP)
85
//		return;
86
	fcomps	float_particle_z_clip	// 1/z | local[0] | local[1] | local[2]
87
	fxch	%st(3)					// local[2] | local[0] | local[1] | 1/z
88
 
89
	flds	C(r_pup)	// r_pup[0] | local[2] | local[0] | local[1] | 1/z
90
	fmul	%st(2),%st(0)	// dot0 | local[2] | local[0] | local[1] | 1/z
91
	flds	C(r_pup)+4	// r_pup[1] | dot0 | local[2] | local[0] |
92
						//  local[1] | 1/z
93
 
94
	fnstsw	%ax
95
	testb	$1,%ah
96
	jnz		LPop6AndDone
97
 
98
//	transformed[1] = DotProduct(local, r_pup);
99
	fmul	%st(4),%st(0)	// dot1 | dot0 | local[2] | local[0] | local[1] | 1/z
100
	flds	C(r_pup)+8	// r_pup[2] | dot1 | dot0 | local[2] |
101
						//  local[0] | local[1] | 1/z
102
	fmul	%st(3),%st(0)	// dot2 | dot1 | dot0 | local[2] | local[0] |
103
						//  local[1] | 1/z
104
	fxch	%st(2)		// dot0 | dot1 | dot2 | local[2] | local[0] |
105
						//  local[1] | 1/z
106
	faddp	%st(0),%st(1) // dot0 + dot1 | dot2 | local[2] | local[0] |
107
						//  local[1] | 1/z
108
	faddp	%st(0),%st(1) // y | local[2] | local[0] | local[1] | 1/z
109
	fxch	%st(3)		// local[1] | local[2] | local[0] | y | 1/z
110
 
111
//	transformed[0] = DotProduct(local, r_pright);
112
	fmuls	C(r_pright)+4	// dot1 | local[2] | local[0] | y | 1/z
113
	fxch	%st(2)		// local[0] | local[2] | dot1 | y | 1/z
114
	fmuls	C(r_pright)	// dot0 | local[2] | dot1 | y | 1/z
115
	fxch	%st(1)		// local[2] | dot0 | dot1 | y | 1/z
116
	fmuls	C(r_pright)+8	// dot2 | dot0 | dot1 | y | 1/z
117
	fxch	%st(2)		// dot1 | dot0 | dot2 | y | 1/z
118
	faddp	%st(0),%st(1) // dot1 + dot0 | dot2 | y | 1/z
119
 
120
	faddp	%st(0),%st(1)	// x | y | 1/z
121
	fxch	%st(1)			// y | x | 1/z
122
 
123
// project the point
124
	fmul	%st(2),%st(0)	// y/z | x | 1/z
125
	fxch	%st(1)			// x | y/z | 1/z
126
	fmul	%st(2),%st(0)	// x/z | y/z | 1/z
127
	fxch	%st(1)			// y/z | x/z | 1/z
128
	fsubrs	C(ycenter)		// v | x/z | 1/z
129
	fxch	%st(1)			// x/z | v | 1/z
130
	fadds	C(xcenter)		// u | v | 1/z
131
// FIXME: preadjust xcenter and ycenter
132
	fxch	%st(1)			// v | u | 1/z
133
	fadds	float_point5	// v | u | 1/z
134
	fxch	%st(1)			// u | v | 1/z
135
	fadds	float_point5	// u | v | 1/z
136
	fxch	%st(2)			// 1/z | v | u
137
	fmuls	DP_32768		// 1/z * 0x8000 | v | u
138
	fxch	%st(2)			// u | v | 1/z * 0x8000
139
 
140
// FIXME: use Terje's fp->int trick here?
141
// FIXME: check we're getting proper rounding here
142
	fistpl	DP_u			// v | 1/z * 0x8000
143
	fistpl	DP_v			// 1/z * 0x8000
144
 
145
	movl	DP_u,%eax
146
	movl	DP_v,%edx
147
 
148
// if ((v > d_vrectbottom_particle) ||
149
// 	(u > d_vrectright_particle) ||
150
// 	(v < d_vrecty) ||
151
// 	(u < d_vrectx))
152
// {
153
// 	continue;
154
// }
155
 
156
	movl	C(d_vrectbottom_particle),%ebx
157
	movl	C(d_vrectright_particle),%ecx
158
	cmpl	%ebx,%edx
159
	jg		LPop1AndDone
160
	cmpl	%ecx,%eax
161
	jg		LPop1AndDone
162
	movl	C(d_vrecty),%ebx
163
	movl	C(d_vrectx),%ecx
164
	cmpl	%ebx,%edx
165
	jl		LPop1AndDone
166
 
167
	cmpl	%ecx,%eax
168
	jl		LPop1AndDone
169
 
170
	flds	pt_color(%edi)	// color | 1/z * 0x8000
171
// FIXME: use Terje's fast fp->int trick?
172
	fistpl	DP_Color		// 1/z * 0x8000
173
 
174
	movl	C(d_viewbuffer),%ebx
175
 
176
	addl	%eax,%ebx
177
	movl	C(d_scantable)(,%edx,4),%edi		// point to the pixel
178
 
179
	imull	C(d_zrowbytes),%edx		// point to the z pixel
180
 
181
	leal	(%edx,%eax,2),%edx
182
	movl	C(d_pzbuffer),%eax
183
 
184
	fistpl	izi
185
 
186
	addl	%ebx,%edi
187
	addl	%eax,%edx
188
 
189
// pix = izi >> d_pix_shift;
190
 
191
	movl	izi,%eax
192
	movl	C(d_pix_shift),%ecx
193
	shrl	%cl,%eax
194
	movl	izi,%ebp
195
 
196
// if (pix < d_pix_min)
197
// 		pix = d_pix_min;
198
// else if (pix > d_pix_max)
199
//  	pix = d_pix_max;
200
 
201
	movl	C(d_pix_min),%ebx
202
	movl	C(d_pix_max),%ecx
203
	cmpl	%ebx,%eax
204
	jnl		LTestPixMax
205
	movl	%ebx,%eax
206
	jmp		LTestDone
207
 
208
LTestPixMax:
209
	cmpl	%ecx,%eax
210
	jng		LTestDone
211
	movl	%ecx,%eax
212
LTestDone:
213
 
214
	movb	DP_Color,%ch
215
 
216
	movl	C(d_y_aspect_shift),%ebx
217
	testl	%ebx,%ebx
218
	jnz		LDefault
219
 
220
	cmpl	$4,%eax
221
	ja		LDefault
222
 
223
	jmp		DP_EntryTable-4(,%eax,4)
224
 
225
// 1x1
226
.globl	DP_1x1
227
DP_1x1:
228
	cmpw	%bp,(%edx)		// just one pixel to do
229
	jg		LDone
230
	movw	%bp,(%edx)
231
	movb	%ch,(%edi)
232
	jmp		LDone
233
 
234
// 2x2
235
.globl	DP_2x2
236
DP_2x2:
237
	pushl	%esi
238
	movl	C(screenwidth),%ebx
239
	movl	C(d_zrowbytes),%esi
240
 
241
	cmpw	%bp,(%edx)
242
	jg		L2x2_1
243
	movw	%bp,(%edx)
244
	movb	%ch,(%edi)
245
L2x2_1:
246
	cmpw	%bp,2(%edx)
247
	jg		L2x2_2
248
	movw	%bp,2(%edx)
249
	movb	%ch,1(%edi)
250
L2x2_2:
251
	cmpw	%bp,(%edx,%esi,1)
252
	jg		L2x2_3
253
	movw	%bp,(%edx,%esi,1)
254
	movb	%ch,(%edi,%ebx,1)
255
L2x2_3:
256
	cmpw	%bp,2(%edx,%esi,1)
257
	jg		L2x2_4
258
	movw	%bp,2(%edx,%esi,1)
259
	movb	%ch,1(%edi,%ebx,1)
260
L2x2_4:
261
 
262
	popl	%esi
263
	jmp		LDone
264
 
265
// 3x3
266
.globl	DP_3x3
267
DP_3x3:
268
	pushl	%esi
269
	movl	C(screenwidth),%ebx
270
	movl	C(d_zrowbytes),%esi
271
 
272
	cmpw	%bp,(%edx)
273
	jg		L3x3_1
274
	movw	%bp,(%edx)
275
	movb	%ch,(%edi)
276
L3x3_1:
277
	cmpw	%bp,2(%edx)
278
	jg		L3x3_2
279
	movw	%bp,2(%edx)
280
	movb	%ch,1(%edi)
281
L3x3_2:
282
	cmpw	%bp,4(%edx)
283
	jg		L3x3_3
284
	movw	%bp,4(%edx)
285
	movb	%ch,2(%edi)
286
L3x3_3:
287
 
288
	cmpw	%bp,(%edx,%esi,1)
289
	jg		L3x3_4
290
	movw	%bp,(%edx,%esi,1)
291
	movb	%ch,(%edi,%ebx,1)
292
L3x3_4:
293
	cmpw	%bp,2(%edx,%esi,1)
294
	jg		L3x3_5
295
	movw	%bp,2(%edx,%esi,1)
296
	movb	%ch,1(%edi,%ebx,1)
297
L3x3_5:
298
	cmpw	%bp,4(%edx,%esi,1)
299
	jg		L3x3_6
300
	movw	%bp,4(%edx,%esi,1)
301
	movb	%ch,2(%edi,%ebx,1)
302
L3x3_6:
303
 
304
	cmpw	%bp,(%edx,%esi,2)
305
	jg		L3x3_7
306
	movw	%bp,(%edx,%esi,2)
307
	movb	%ch,(%edi,%ebx,2)
308
L3x3_7:
309
	cmpw	%bp,2(%edx,%esi,2)
310
	jg		L3x3_8
311
	movw	%bp,2(%edx,%esi,2)
312
	movb	%ch,1(%edi,%ebx,2)
313
L3x3_8:
314
	cmpw	%bp,4(%edx,%esi,2)
315
	jg		L3x3_9
316
	movw	%bp,4(%edx,%esi,2)
317
	movb	%ch,2(%edi,%ebx,2)
318
L3x3_9:
319
 
320
	popl	%esi
321
	jmp		LDone
322
 
323
 
324
// 4x4
325
.globl	DP_4x4
326
DP_4x4:
327
	pushl	%esi
328
	movl	C(screenwidth),%ebx
329
	movl	C(d_zrowbytes),%esi
330
 
331
	cmpw	%bp,(%edx)
332
	jg		L4x4_1
333
	movw	%bp,(%edx)
334
	movb	%ch,(%edi)
335
L4x4_1:
336
	cmpw	%bp,2(%edx)
337
	jg		L4x4_2
338
	movw	%bp,2(%edx)
339
	movb	%ch,1(%edi)
340
L4x4_2:
341
	cmpw	%bp,4(%edx)
342
	jg		L4x4_3
343
	movw	%bp,4(%edx)
344
	movb	%ch,2(%edi)
345
L4x4_3:
346
	cmpw	%bp,6(%edx)
347
	jg		L4x4_4
348
	movw	%bp,6(%edx)
349
	movb	%ch,3(%edi)
350
L4x4_4:
351
 
352
	cmpw	%bp,(%edx,%esi,1)
353
	jg		L4x4_5
354
	movw	%bp,(%edx,%esi,1)
355
	movb	%ch,(%edi,%ebx,1)
356
L4x4_5:
357
	cmpw	%bp,2(%edx,%esi,1)
358
	jg		L4x4_6
359
	movw	%bp,2(%edx,%esi,1)
360
	movb	%ch,1(%edi,%ebx,1)
361
L4x4_6:
362
	cmpw	%bp,4(%edx,%esi,1)
363
	jg		L4x4_7
364
	movw	%bp,4(%edx,%esi,1)
365
	movb	%ch,2(%edi,%ebx,1)
366
L4x4_7:
367
	cmpw	%bp,6(%edx,%esi,1)
368
	jg		L4x4_8
369
	movw	%bp,6(%edx,%esi,1)
370
	movb	%ch,3(%edi,%ebx,1)
371
L4x4_8:
372
 
373
	leal	(%edx,%esi,2),%edx
374
	leal	(%edi,%ebx,2),%edi
375
 
376
	cmpw	%bp,(%edx)
377
	jg		L4x4_9
378
	movw	%bp,(%edx)
379
	movb	%ch,(%edi)
380
L4x4_9:
381
	cmpw	%bp,2(%edx)
382
	jg		L4x4_10
383
	movw	%bp,2(%edx)
384
	movb	%ch,1(%edi)
385
L4x4_10:
386
	cmpw	%bp,4(%edx)
387
	jg		L4x4_11
388
	movw	%bp,4(%edx)
389
	movb	%ch,2(%edi)
390
L4x4_11:
391
	cmpw	%bp,6(%edx)
392
	jg		L4x4_12
393
	movw	%bp,6(%edx)
394
	movb	%ch,3(%edi)
395
L4x4_12:
396
 
397
	cmpw	%bp,(%edx,%esi,1)
398
	jg		L4x4_13
399
	movw	%bp,(%edx,%esi,1)
400
	movb	%ch,(%edi,%ebx,1)
401
L4x4_13:
402
	cmpw	%bp,2(%edx,%esi,1)
403
	jg		L4x4_14
404
	movw	%bp,2(%edx,%esi,1)
405
	movb	%ch,1(%edi,%ebx,1)
406
L4x4_14:
407
	cmpw	%bp,4(%edx,%esi,1)
408
	jg		L4x4_15
409
	movw	%bp,4(%edx,%esi,1)
410
	movb	%ch,2(%edi,%ebx,1)
411
L4x4_15:
412
	cmpw	%bp,6(%edx,%esi,1)
413
	jg		L4x4_16
414
	movw	%bp,6(%edx,%esi,1)
415
	movb	%ch,3(%edi,%ebx,1)
416
L4x4_16:
417
 
418
	popl	%esi
419
	jmp		LDone
420
 
421
// default case, handling any size particle
422
LDefault:
423
 
424
// count = pix << d_y_aspect_shift;
425
 
426
	movl	%eax,%ebx
427
	movl	%eax,DP_Pix
428
	movb	C(d_y_aspect_shift),%cl
429
	shll	%cl,%ebx
430
 
431
// for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth)
432
// {
433
// 	for (i=0 ; i
434
// 	{
435
// 		if (pz[i] <= izi)
436
// 		{
437
// 			pz[i] = izi;
438
// 			pdest[i] = color;
439
// 		}
440
// 	}
441
// }
442
 
443
LGenRowLoop:
444
	movl	DP_Pix,%eax
445
 
446
LGenColLoop:
447
	cmpw	%bp,-2(%edx,%eax,2)
448
	jg		LGSkip
449
	movw	%bp,-2(%edx,%eax,2)
450
	movb	%ch,-1(%edi,%eax,1)
451
LGSkip:
452
	decl	%eax			// --pix
453
	jnz		LGenColLoop
454
 
455
	addl	C(d_zrowbytes),%edx
456
	addl	C(screenwidth),%edi
457
 
458
	decl	%ebx			// --count
459
	jnz		LGenRowLoop
460
 
461
LDone:
462
	popl	%ebx				// restore register variables
463
	popl	%edi
464
	popl	%ebp				// restore the caller's stack frame
465
	ret
466
 
467
LPop6AndDone:
468
	fstp	%st(0)
469
	fstp	%st(0)
470
	fstp	%st(0)
471
	fstp	%st(0)
472
	fstp	%st(0)
473
LPop1AndDone:
474
	fstp	%st(0)
475
	jmp		LDone
476
 
477
#endif	// id386