Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5131 clevermous 1
/*
2
Copyright (C) 1996-1997 Id Software, Inc.
3
 
4
This program is free software; you can redistribute it and/or
5
modify it under the terms of the GNU General Public License
6
as published by the Free Software Foundation; either version 2
7
of the License, or (at your option) any later version.
8
 
9
This program is distributed in the hope that it will be useful,
10
but WITHOUT ANY WARRANTY; without even the implied warranty of
11
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12
 
13
See the GNU General Public License for more details.
14
 
15
You should have received a copy of the GNU General Public License
16
along with this program; if not, write to the Free Software
17
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
18
 
19
*/
20
//
21
// d_spr8.s
22
// x86 assembly-language horizontal 8-bpp transparent span-drawing code.
23
//
24
 
25
#include "asm_i386.h"
26
#include "quakeasm.h"
27
#include "asm_draw.h"
28
 
29
#if id386
30
 
31
//----------------------------------------------------------------------
32
// 8-bpp horizontal span drawing code for polygons, with transparency.
33
//----------------------------------------------------------------------
34
 
35
	.text
36
 
37
// out-of-line, rarely-needed clamping code
38
 
39
LClampHigh0:
40
	movl	C(bbextents),%esi
41
	jmp		LClampReentry0
42
LClampHighOrLow0:
43
	jg		LClampHigh0
44
	xorl	%esi,%esi
45
	jmp		LClampReentry0
46
 
47
LClampHigh1:
48
	movl	C(bbextentt),%edx
49
	jmp		LClampReentry1
50
LClampHighOrLow1:
51
	jg		LClampHigh1
52
	xorl	%edx,%edx
53
	jmp		LClampReentry1
54
 
55
LClampLow2:
56
	movl	$2048,%ebp
57
	jmp		LClampReentry2
58
LClampHigh2:
59
	movl	C(bbextents),%ebp
60
	jmp		LClampReentry2
61
 
62
LClampLow3:
63
	movl	$2048,%ecx
64
	jmp		LClampReentry3
65
LClampHigh3:
66
	movl	C(bbextentt),%ecx
67
	jmp		LClampReentry3
68
 
69
LClampLow4:
70
	movl	$2048,%eax
71
	jmp		LClampReentry4
72
LClampHigh4:
73
	movl	C(bbextents),%eax
74
	jmp		LClampReentry4
75
 
76
LClampLow5:
77
	movl	$2048,%ebx
78
	jmp		LClampReentry5
79
LClampHigh5:
80
	movl	C(bbextentt),%ebx
81
	jmp		LClampReentry5
82
 
83
 
84
#define pspans	4+16
85
 
86
	.align 4
87
.globl C(D_SpriteDrawSpans)
88
C(D_SpriteDrawSpans):
89
	pushl	%ebp				// preserve caller's stack frame
90
	pushl	%edi
91
	pushl	%esi				// preserve register variables
92
	pushl	%ebx
93
 
94
//
95
// set up scaled-by-8 steps, for 8-long segments; also set up cacheblock
96
// and span list pointers, and 1/z step in 0.32 fixed-point
97
//
98
// FIXME: any overlap from rearranging?
99
	flds	C(d_sdivzstepu)
100
	fmuls	fp_8
101
	movl	C(cacheblock),%edx
102
	flds	C(d_tdivzstepu)
103
	fmuls	fp_8
104
	movl	pspans(%esp),%ebx	// point to the first span descriptor
105
	flds	C(d_zistepu)
106
	fmuls	fp_8
107
	movl	%edx,pbase			// pbase = cacheblock
108
	flds	C(d_zistepu)
109
	fmuls	fp_64kx64k
110
	fxch	%st(3)
111
	fstps	sdivz8stepu
112
	fstps	zi8stepu
113
	fstps	tdivz8stepu
114
	fistpl	izistep
115
	movl	izistep,%eax
116
	rorl	$16,%eax		// put upper 16 bits in low word
117
	movl	sspan_t_count(%ebx),%ecx
118
	movl	%eax,izistep
119
 
120
	cmpl	$0,%ecx
121
	jle		LNextSpan
122
 
123
LSpanLoop:
124
 
125
//
126
// set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
127
// initial s and t values
128
//
129
// FIXME: pipeline FILD?
130
	fildl	sspan_t_v(%ebx)
131
	fildl	sspan_t_u(%ebx)
132
 
133
	fld		%st(1)			// dv | du | dv
134
	fmuls	C(d_sdivzstepv)	// dv*d_sdivzstepv | du | dv
135
	fld		%st(1)			// du | dv*d_sdivzstepv | du | dv
136
	fmuls	C(d_sdivzstepu)	// du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
137
	fld		%st(2)			// du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
138
	fmuls	C(d_tdivzstepu)	// du*d_tdivzstepu | du*d_sdivzstepu |
139
							//  dv*d_sdivzstepv | du | dv
140
	fxch	%st(1)			// du*d_sdivzstepu | du*d_tdivzstepu |
141
							//  dv*d_sdivzstepv | du | dv
142
	faddp	%st(0),%st(2)	// du*d_tdivzstepu |
143
							//  du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
144
	fxch	%st(1)			// du*d_sdivzstepu + dv*d_sdivzstepv |
145
							//  du*d_tdivzstepu | du | dv
146
	fld		%st(3)			// dv | du*d_sdivzstepu + dv*d_sdivzstepv |
147
							//  du*d_tdivzstepu | du | dv
148
	fmuls	C(d_tdivzstepv)	// dv*d_tdivzstepv |
149
							//  du*d_sdivzstepu + dv*d_sdivzstepv |
150
							//  du*d_tdivzstepu | du | dv
151
	fxch	%st(1)			// du*d_sdivzstepu + dv*d_sdivzstepv |
152
							//  dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
153
	fadds	C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv +
154
							//  du*d_sdivzstepu; stays in %st(2) at end
155
	fxch	%st(4)			// dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
156
							//  s/z
157
	fmuls	C(d_zistepv)		// dv*d_zistepv | dv*d_tdivzstepv |
158
							//  du*d_tdivzstepu | du | s/z
159
	fxch	%st(1)			// dv*d_tdivzstepv |  dv*d_zistepv |
160
							//  du*d_tdivzstepu | du | s/z
161
	faddp	%st(0),%st(2)	// dv*d_zistepv |
162
							//  dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
163
	fxch	%st(2)			// du | dv*d_tdivzstepv + du*d_tdivzstepu |
164
							//  dv*d_zistepv | s/z
165
	fmuls	C(d_zistepu)		// du*d_zistepu |
166
							//  dv*d_tdivzstepv + du*d_tdivzstepu |
167
							//  dv*d_zistepv | s/z
168
	fxch	%st(1)			// dv*d_tdivzstepv + du*d_tdivzstepu |
169
							//  du*d_zistepu | dv*d_zistepv | s/z
170
	fadds	C(d_tdivzorigin)	// tdivz = d_tdivzorigin + dv*d_tdivzstepv +
171
							//  du*d_tdivzstepu; stays in %st(1) at end
172
	fxch	%st(2)			// dv*d_zistepv | du*d_zistepu | t/z | s/z
173
	faddp	%st(0),%st(1)	// dv*d_zistepv + du*d_zistepu | t/z | s/z
174
 
175
	flds	fp_64k			// fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
176
	fxch	%st(1)			// dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
177
	fadds	C(d_ziorigin)		// zi = d_ziorigin + dv*d_zistepv +
178
							//  du*d_zistepu; stays in %st(0) at end
179
							// 1/z | fp_64k | t/z | s/z
180
 
181
	fld		%st(0)			// FIXME: get rid of stall on FMUL?
182
	fmuls	fp_64kx64k
183
	fxch	%st(1)
184
 
185
//
186
// calculate and clamp s & t
187
//
188
	fdivr	%st(0),%st(2)	// 1/z | z*64k | t/z | s/z
189
	fxch	%st(1)
190
 
191
	fistpl	izi				// 0.32 fixed-point 1/z
192
	movl	izi,%ebp
193
 
194
//
195
// set pz to point to the first z-buffer pixel in the span
196
//
197
	rorl	$16,%ebp		// put upper 16 bits in low word
198
	movl	sspan_t_v(%ebx),%eax
199
	movl	%ebp,izi
200
	movl	sspan_t_u(%ebx),%ebp
201
	imull	C(d_zrowbytes)
202
	shll	$1,%ebp					// a word per pixel
203
	addl	C(d_pzbuffer),%eax
204
	addl	%ebp,%eax
205
	movl	%eax,pz
206
 
207
//
208
// point %edi to the first pixel in the span
209
//
210
	movl	C(d_viewbuffer),%ebp
211
	movl	sspan_t_v(%ebx),%eax
212
	pushl	%ebx		// preserve spans pointer
213
	movl	C(tadjust),%edx
214
	movl	C(sadjust),%esi
215
	movl	C(d_scantable)(,%eax,4),%edi	// v * screenwidth
216
	addl	%ebp,%edi
217
	movl	sspan_t_u(%ebx),%ebp
218
	addl	%ebp,%edi				// pdest = &pdestspan[scans->u];
219
 
220
//
221
// now start the FDIV for the end of the span
222
//
223
	cmpl	$8,%ecx
224
	ja		LSetupNotLast1
225
 
226
	decl	%ecx
227
	jz		LCleanup1		// if only one pixel, no need to start an FDIV
228
	movl	%ecx,spancountminus1
229
 
230
// finish up the s and t calcs
231
	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
232
 
233
	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
234
	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
235
	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
236
	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
237
	fxch	%st(1)			// s | t | 1/z | t/z | s/z
238
	fistpl	s				// 1/z | t | t/z | s/z
239
	fistpl	t				// 1/z | t/z | s/z
240
 
241
	fildl	spancountminus1
242
 
243
	flds	C(d_tdivzstepu)	// _d_tdivzstepu | spancountminus1
244
	flds	C(d_zistepu)	// _d_zistepu | _d_tdivzstepu | spancountminus1
245
	fmul	%st(2),%st(0)	// _d_zistepu*scm1 | _d_tdivzstepu | scm1
246
	fxch	%st(1)			// _d_tdivzstepu | _d_zistepu*scm1 | scm1
247
	fmul	%st(2),%st(0)	// _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
248
	fxch	%st(2)			// scm1 | _d_zistepu*scm1 | _d_tdivzstepu*scm1
249
	fmuls	C(d_sdivzstepu)	// _d_sdivzstepu*scm1 | _d_zistepu*scm1 |
250
							//  _d_tdivzstepu*scm1
251
	fxch	%st(1)			// _d_zistepu*scm1 | _d_sdivzstepu*scm1 |
252
							//  _d_tdivzstepu*scm1
253
	faddp	%st(0),%st(3)	// _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
254
	fxch	%st(1)			// _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
255
	faddp	%st(0),%st(3)	// _d_sdivzstepu*scm1
256
	faddp	%st(0),%st(3)
257
 
258
	flds	fp_64k
259
	fdiv	%st(1),%st(0)	// this is what we've gone to all this trouble to
260
							//  overlap
261
	jmp		LFDIVInFlight1
262
 
263
LCleanup1:
264
// finish up the s and t calcs
265
	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
266
 
267
	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
268
	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
269
	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
270
	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
271
	fxch	%st(1)			// s | t | 1/z | t/z | s/z
272
	fistpl	s				// 1/z | t | t/z | s/z
273
	fistpl	t				// 1/z | t/z | s/z
274
	jmp		LFDIVInFlight1
275
 
276
	.align	4
277
LSetupNotLast1:
278
// finish up the s and t calcs
279
	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
280
 
281
	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
282
	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
283
	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
284
	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
285
	fxch	%st(1)			// s | t | 1/z | t/z | s/z
286
	fistpl	s				// 1/z | t | t/z | s/z
287
	fistpl	t				// 1/z | t/z | s/z
288
 
289
	fadds	zi8stepu
290
	fxch	%st(2)
291
	fadds	sdivz8stepu
292
	fxch	%st(2)
293
	flds	tdivz8stepu
294
	faddp	%st(0),%st(2)
295
	flds	fp_64k
296
	fdiv	%st(1),%st(0)	// z = 1/1/z
297
							// this is what we've gone to all this trouble to
298
							//  overlap
299
LFDIVInFlight1:
300
 
301
	addl	s,%esi
302
	addl	t,%edx
303
	movl	C(bbextents),%ebx
304
	movl	C(bbextentt),%ebp
305
	cmpl	%ebx,%esi
306
	ja		LClampHighOrLow0
307
LClampReentry0:
308
	movl	%esi,s
309
	movl	pbase,%ebx
310
	shll	$16,%esi
311
	cmpl	%ebp,%edx
312
	movl	%esi,sfracf
313
	ja		LClampHighOrLow1
314
LClampReentry1:
315
	movl	%edx,t
316
	movl	s,%esi					// sfrac = scans->sfrac;
317
	shll	$16,%edx
318
	movl	t,%eax					// tfrac = scans->tfrac;
319
	sarl	$16,%esi
320
	movl	%edx,tfracf
321
 
322
//
323
// calculate the texture starting address
324
//
325
	sarl	$16,%eax
326
	addl	%ebx,%esi
327
	imull	C(cachewidth),%eax		// (tfrac >> 16) * cachewidth
328
	addl	%eax,%esi				// psource = pbase + (sfrac >> 16) +
329
									//           ((tfrac >> 16) * cachewidth);
330
 
331
//
332
// determine whether last span or not
333
//
334
	cmpl	$8,%ecx
335
	jna		LLastSegment
336
 
337
//
338
// not the last segment; do full 8-wide segment
339
//
340
LNotLastSegment:
341
 
342
//
343
// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
344
// get there
345
//
346
 
347
// pick up after the FDIV that was left in flight previously
348
 
349
	fld		%st(0)			// duplicate it
350
	fmul	%st(4),%st(0)	// s = s/z * z
351
	fxch	%st(1)
352
	fmul	%st(3),%st(0)	// t = t/z * z
353
	fxch	%st(1)
354
	fistpl	snext
355
	fistpl	tnext
356
	movl	snext,%eax
357
	movl	tnext,%edx
358
 
359
	subl	$8,%ecx		// count off this segments' pixels
360
	movl	C(sadjust),%ebp
361
	pushl	%ecx		// remember count of remaining pixels
362
	movl	C(tadjust),%ecx
363
 
364
	addl	%eax,%ebp
365
	addl	%edx,%ecx
366
 
367
	movl	C(bbextents),%eax
368
	movl	C(bbextentt),%edx
369
 
370
	cmpl	$2048,%ebp
371
	jl		LClampLow2
372
	cmpl	%eax,%ebp
373
	ja		LClampHigh2
374
LClampReentry2:
375
 
376
	cmpl	$2048,%ecx
377
	jl		LClampLow3
378
	cmpl	%edx,%ecx
379
	ja		LClampHigh3
380
LClampReentry3:
381
 
382
	movl	%ebp,snext
383
	movl	%ecx,tnext
384
 
385
	subl	s,%ebp
386
	subl	t,%ecx
387
 
388
//
389
// set up advancetable
390
//
391
	movl	%ecx,%eax
392
	movl	%ebp,%edx
393
	sarl	$19,%edx			// sstep >>= 16;
394
	movl	C(cachewidth),%ebx
395
	sarl	$19,%eax			// tstep >>= 16;
396
	jz		LIsZero
397
	imull	%ebx,%eax			// (tstep >> 16) * cachewidth;
398
LIsZero:
399
	addl	%edx,%eax			// add in sstep
400
								// (tstep >> 16) * cachewidth + (sstep >> 16);
401
	movl	tfracf,%edx
402
	movl	%eax,advancetable+4	// advance base in t
403
	addl	%ebx,%eax			// ((tstep >> 16) + 1) * cachewidth +
404
								//  (sstep >> 16);
405
	shll	$13,%ebp			// left-justify sstep fractional part
406
	movl	%ebp,sstep
407
	movl	sfracf,%ebx
408
	shll	$13,%ecx			// left-justify tstep fractional part
409
	movl	%eax,advancetable	// advance extra in t
410
	movl	%ecx,tstep
411
 
412
	movl	pz,%ecx
413
	movl	izi,%ebp
414
 
415
	cmpw	(%ecx),%bp
416
	jl		Lp1
417
	movb	(%esi),%al			// get first source texel
418
	cmpb	$(TRANSPARENT_COLOR),%al
419
	jz		Lp1
420
	movw	%bp,(%ecx)
421
	movb	%al,(%edi)			// store first dest pixel
422
Lp1:
423
	addl	izistep,%ebp
424
	adcl	$0,%ebp
425
	addl	tstep,%edx			// advance tfrac fractional part by tstep frac
426
 
427
	sbbl	%eax,%eax			// turn tstep carry into -1 (0 if none)
428
	addl	sstep,%ebx			// advance sfrac fractional part by sstep frac
429
	adcl	advancetable+4(,%eax,4),%esi	// point to next source texel
430
 
431
	cmpw	2(%ecx),%bp
432
	jl		Lp2
433
	movb	(%esi),%al
434
	cmpb	$(TRANSPARENT_COLOR),%al
435
	jz		Lp2
436
	movw	%bp,2(%ecx)
437
	movb	%al,1(%edi)
438
Lp2:
439
	addl	izistep,%ebp
440
	adcl	$0,%ebp
441
	addl	tstep,%edx
442
	sbbl	%eax,%eax
443
	addl	sstep,%ebx
444
	adcl	advancetable+4(,%eax,4),%esi
445
 
446
	cmpw	4(%ecx),%bp
447
	jl		Lp3
448
	movb	(%esi),%al
449
	cmpb	$(TRANSPARENT_COLOR),%al
450
	jz		Lp3
451
	movw	%bp,4(%ecx)
452
	movb	%al,2(%edi)
453
Lp3:
454
	addl	izistep,%ebp
455
	adcl	$0,%ebp
456
	addl	tstep,%edx
457
	sbbl	%eax,%eax
458
	addl	sstep,%ebx
459
	adcl	advancetable+4(,%eax,4),%esi
460
 
461
	cmpw	6(%ecx),%bp
462
	jl		Lp4
463
	movb	(%esi),%al
464
	cmpb	$(TRANSPARENT_COLOR),%al
465
	jz		Lp4
466
	movw	%bp,6(%ecx)
467
	movb	%al,3(%edi)
468
Lp4:
469
	addl	izistep,%ebp
470
	adcl	$0,%ebp
471
	addl	tstep,%edx
472
	sbbl	%eax,%eax
473
	addl	sstep,%ebx
474
	adcl	advancetable+4(,%eax,4),%esi
475
 
476
	cmpw	8(%ecx),%bp
477
	jl		Lp5
478
	movb	(%esi),%al
479
	cmpb	$(TRANSPARENT_COLOR),%al
480
	jz		Lp5
481
	movw	%bp,8(%ecx)
482
	movb	%al,4(%edi)
483
Lp5:
484
	addl	izistep,%ebp
485
	adcl	$0,%ebp
486
	addl	tstep,%edx
487
	sbbl	%eax,%eax
488
	addl	sstep,%ebx
489
	adcl	advancetable+4(,%eax,4),%esi
490
 
491
//
492
// start FDIV for end of next segment in flight, so it can overlap
493
//
494
	popl	%eax
495
	cmpl	$8,%eax			// more than one segment after this?
496
	ja		LSetupNotLast2	// yes
497
 
498
	decl	%eax
499
	jz		LFDIVInFlight2	// if only one pixel, no need to start an FDIV
500
	movl	%eax,spancountminus1
501
	fildl	spancountminus1
502
 
503
	flds	C(d_zistepu)		// _d_zistepu | spancountminus1
504
	fmul	%st(1),%st(0)	// _d_zistepu*scm1 | scm1
505
	flds	C(d_tdivzstepu)	// _d_tdivzstepu | _d_zistepu*scm1 | scm1
506
	fmul	%st(2),%st(0)	// _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
507
	fxch	%st(1)			// _d_zistepu*scm1 | _d_tdivzstepu*scm1 | scm1
508
	faddp	%st(0),%st(3)	// _d_tdivzstepu*scm1 | scm1
509
	fxch	%st(1)			// scm1 | _d_tdivzstepu*scm1
510
	fmuls	C(d_sdivzstepu)	// _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
511
	fxch	%st(1)			// _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
512
	faddp	%st(0),%st(3)	// _d_sdivzstepu*scm1
513
	flds	fp_64k			// 64k | _d_sdivzstepu*scm1
514
	fxch	%st(1)			// _d_sdivzstepu*scm1 | 64k
515
	faddp	%st(0),%st(4)	// 64k
516
 
517
	fdiv	%st(1),%st(0)	// this is what we've gone to all this trouble to
518
							//  overlap
519
	jmp		LFDIVInFlight2
520
 
521
	.align	4
522
LSetupNotLast2:
523
	fadds	zi8stepu
524
	fxch	%st(2)
525
	fadds	sdivz8stepu
526
	fxch	%st(2)
527
	flds	tdivz8stepu
528
	faddp	%st(0),%st(2)
529
	flds	fp_64k
530
	fdiv	%st(1),%st(0)	// z = 1/1/z
531
							// this is what we've gone to all this trouble to
532
							//  overlap
533
LFDIVInFlight2:
534
	pushl	%eax
535
 
536
	cmpw	10(%ecx),%bp
537
	jl		Lp6
538
	movb	(%esi),%al
539
	cmpb	$(TRANSPARENT_COLOR),%al
540
	jz		Lp6
541
	movw	%bp,10(%ecx)
542
	movb	%al,5(%edi)
543
Lp6:
544
	addl	izistep,%ebp
545
	adcl	$0,%ebp
546
	addl	tstep,%edx
547
	sbbl	%eax,%eax
548
	addl	sstep,%ebx
549
	adcl	advancetable+4(,%eax,4),%esi
550
 
551
	cmpw	12(%ecx),%bp
552
	jl		Lp7
553
	movb	(%esi),%al
554
	cmpb	$(TRANSPARENT_COLOR),%al
555
	jz		Lp7
556
	movw	%bp,12(%ecx)
557
	movb	%al,6(%edi)
558
Lp7:
559
	addl	izistep,%ebp
560
	adcl	$0,%ebp
561
	addl	tstep,%edx
562
	sbbl	%eax,%eax
563
	addl	sstep,%ebx
564
	adcl	advancetable+4(,%eax,4),%esi
565
 
566
	cmpw	14(%ecx),%bp
567
	jl		Lp8
568
	movb	(%esi),%al
569
	cmpb	$(TRANSPARENT_COLOR),%al
570
	jz		Lp8
571
	movw	%bp,14(%ecx)
572
	movb	%al,7(%edi)
573
Lp8:
574
	addl	izistep,%ebp
575
	adcl	$0,%ebp
576
	addl	tstep,%edx
577
	sbbl	%eax,%eax
578
	addl	sstep,%ebx
579
	adcl	advancetable+4(,%eax,4),%esi
580
 
581
	addl	$8,%edi
582
	addl	$16,%ecx
583
	movl	%edx,tfracf
584
	movl	snext,%edx
585
	movl	%ebx,sfracf
586
	movl	tnext,%ebx
587
	movl	%edx,s
588
	movl	%ebx,t
589
 
590
	movl	%ecx,pz
591
	movl	%ebp,izi
592
 
593
	popl	%ecx				// retrieve count
594
 
595
//
596
// determine whether last span or not
597
//
598
	cmpl	$8,%ecx				// are there multiple segments remaining?
599
	ja		LNotLastSegment		// yes
600
 
601
//
602
// last segment of scan
603
//
604
LLastSegment:
605
 
606
//
607
// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
608
// get there. The number of pixels left is variable, and we want to land on the
609
// last pixel, not step one past it, so we can't run into arithmetic problems
610
//
611
	testl	%ecx,%ecx
612
	jz		LNoSteps		// just draw the last pixel and we're done
613
 
614
// pick up after the FDIV that was left in flight previously
615
 
616
 
617
	fld		%st(0)			// duplicate it
618
	fmul	%st(4),%st(0)	// s = s/z * z
619
	fxch	%st(1)
620
	fmul	%st(3),%st(0)	// t = t/z * z
621
	fxch	%st(1)
622
	fistpl	snext
623
	fistpl	tnext
624
 
625
	movl	C(tadjust),%ebx
626
	movl	C(sadjust),%eax
627
 
628
	addl	snext,%eax
629
	addl	tnext,%ebx
630
 
631
	movl	C(bbextents),%ebp
632
	movl	C(bbextentt),%edx
633
 
634
	cmpl	$2048,%eax
635
	jl		LClampLow4
636
	cmpl	%ebp,%eax
637
	ja		LClampHigh4
638
LClampReentry4:
639
	movl	%eax,snext
640
 
641
	cmpl	$2048,%ebx
642
	jl		LClampLow5
643
	cmpl	%edx,%ebx
644
	ja		LClampHigh5
645
LClampReentry5:
646
 
647
	cmpl	$1,%ecx			// don't bother
648
	je		LOnlyOneStep	// if two pixels in segment, there's only one step,
649
							//  of the segment length
650
	subl	s,%eax
651
	subl	t,%ebx
652
 
653
	addl	%eax,%eax		// convert to 15.17 format so multiply by 1.31
654
	addl	%ebx,%ebx		//  reciprocal yields 16.48
655
	imull	reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1)
656
	movl	%edx,%ebp
657
 
658
	movl	%ebx,%eax
659
	imull	reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1)
660
 
661
LSetEntryvec:
662
//
663
// set up advancetable
664
//
665
	movl	spr8entryvec_table(,%ecx,4),%ebx
666
	movl	%edx,%eax
667
	pushl	%ebx				// entry point into code for RET later
668
	movl	%ebp,%ecx
669
	sarl	$16,%ecx			// sstep >>= 16;
670
	movl	C(cachewidth),%ebx
671
	sarl	$16,%edx			// tstep >>= 16;
672
	jz		LIsZeroLast
673
	imull	%ebx,%edx			// (tstep >> 16) * cachewidth;
674
LIsZeroLast:
675
	addl	%ecx,%edx			// add in sstep
676
								// (tstep >> 16) * cachewidth + (sstep >> 16);
677
	movl	tfracf,%ecx
678
	movl	%edx,advancetable+4	// advance base in t
679
	addl	%ebx,%edx			// ((tstep >> 16) + 1) * cachewidth +
680
								//  (sstep >> 16);
681
	shll	$16,%ebp			// left-justify sstep fractional part
682
	movl	sfracf,%ebx
683
	shll	$16,%eax			// left-justify tstep fractional part
684
	movl	%edx,advancetable	// advance extra in t
685
 
686
	movl	%eax,tstep
687
	movl	%ebp,sstep
688
	movl	%ecx,%edx
689
 
690
	movl	pz,%ecx
691
	movl	izi,%ebp
692
 
693
	ret							// jump to the number-of-pixels handler
694
 
695
//----------------------------------------
696
 
697
LNoSteps:
698
	movl	pz,%ecx
699
	subl	$7,%edi			// adjust for hardwired offset
700
	subl	$14,%ecx
701
	jmp		LEndSpan
702
 
703
 
704
LOnlyOneStep:
705
	subl	s,%eax
706
	subl	t,%ebx
707
	movl	%eax,%ebp
708
	movl	%ebx,%edx
709
	jmp		LSetEntryvec
710
 
711
//----------------------------------------
712
 
713
.globl	Spr8Entry2_8
714
Spr8Entry2_8:
715
	subl	$6,%edi		// adjust for hardwired offsets
716
	subl	$12,%ecx
717
	movb	(%esi),%al
718
	jmp		LLEntry2_8
719
 
720
//----------------------------------------
721
 
722
.globl	Spr8Entry3_8
723
Spr8Entry3_8:
724
	subl	$5,%edi		// adjust for hardwired offsets
725
	subl	$10,%ecx
726
	jmp		LLEntry3_8
727
 
728
//----------------------------------------
729
 
730
.globl	Spr8Entry4_8
731
Spr8Entry4_8:
732
	subl	$4,%edi		// adjust for hardwired offsets
733
	subl	$8,%ecx
734
	jmp		LLEntry4_8
735
 
736
//----------------------------------------
737
 
738
.globl	Spr8Entry5_8
739
Spr8Entry5_8:
740
	subl	$3,%edi		// adjust for hardwired offsets
741
	subl	$6,%ecx
742
	jmp		LLEntry5_8
743
 
744
//----------------------------------------
745
 
746
.globl	Spr8Entry6_8
747
Spr8Entry6_8:
748
	subl	$2,%edi		// adjust for hardwired offsets
749
	subl	$4,%ecx
750
	jmp		LLEntry6_8
751
 
752
//----------------------------------------
753
 
754
.globl	Spr8Entry7_8
755
Spr8Entry7_8:
756
	decl	%edi		// adjust for hardwired offsets
757
	subl	$2,%ecx
758
	jmp		LLEntry7_8
759
 
760
//----------------------------------------
761
 
762
.globl	Spr8Entry8_8
763
Spr8Entry8_8:
764
	cmpw	(%ecx),%bp
765
	jl		Lp9
766
	movb	(%esi),%al
767
	cmpb	$(TRANSPARENT_COLOR),%al
768
	jz		Lp9
769
	movw	%bp,(%ecx)
770
	movb	%al,(%edi)
771
Lp9:
772
	addl	izistep,%ebp
773
	adcl	$0,%ebp
774
	addl	tstep,%edx
775
	sbbl	%eax,%eax
776
	addl	sstep,%ebx
777
	adcl	advancetable+4(,%eax,4),%esi
778
LLEntry7_8:
779
	cmpw	2(%ecx),%bp
780
	jl		Lp10
781
	movb	(%esi),%al
782
	cmpb	$(TRANSPARENT_COLOR),%al
783
	jz		Lp10
784
	movw	%bp,2(%ecx)
785
	movb	%al,1(%edi)
786
Lp10:
787
	addl	izistep,%ebp
788
	adcl	$0,%ebp
789
	addl	tstep,%edx
790
	sbbl	%eax,%eax
791
	addl	sstep,%ebx
792
	adcl	advancetable+4(,%eax,4),%esi
793
LLEntry6_8:
794
	cmpw	4(%ecx),%bp
795
	jl		Lp11
796
	movb	(%esi),%al
797
	cmpb	$(TRANSPARENT_COLOR),%al
798
	jz		Lp11
799
	movw	%bp,4(%ecx)
800
	movb	%al,2(%edi)
801
Lp11:
802
	addl	izistep,%ebp
803
	adcl	$0,%ebp
804
	addl	tstep,%edx
805
	sbbl	%eax,%eax
806
	addl	sstep,%ebx
807
	adcl	advancetable+4(,%eax,4),%esi
808
LLEntry5_8:
809
	cmpw	6(%ecx),%bp
810
	jl		Lp12
811
	movb	(%esi),%al
812
	cmpb	$(TRANSPARENT_COLOR),%al
813
	jz		Lp12
814
	movw	%bp,6(%ecx)
815
	movb	%al,3(%edi)
816
Lp12:
817
	addl	izistep,%ebp
818
	adcl	$0,%ebp
819
	addl	tstep,%edx
820
	sbbl	%eax,%eax
821
	addl	sstep,%ebx
822
	adcl	advancetable+4(,%eax,4),%esi
823
LLEntry4_8:
824
	cmpw	8(%ecx),%bp
825
	jl		Lp13
826
	movb	(%esi),%al
827
	cmpb	$(TRANSPARENT_COLOR),%al
828
	jz		Lp13
829
	movw	%bp,8(%ecx)
830
	movb	%al,4(%edi)
831
Lp13:
832
	addl	izistep,%ebp
833
	adcl	$0,%ebp
834
	addl	tstep,%edx
835
	sbbl	%eax,%eax
836
	addl	sstep,%ebx
837
	adcl	advancetable+4(,%eax,4),%esi
838
LLEntry3_8:
839
	cmpw	10(%ecx),%bp
840
	jl		Lp14
841
	movb	(%esi),%al
842
	cmpb	$(TRANSPARENT_COLOR),%al
843
	jz		Lp14
844
	movw	%bp,10(%ecx)
845
	movb	%al,5(%edi)
846
Lp14:
847
	addl	izistep,%ebp
848
	adcl	$0,%ebp
849
	addl	tstep,%edx
850
	sbbl	%eax,%eax
851
	addl	sstep,%ebx
852
	adcl	advancetable+4(,%eax,4),%esi
853
LLEntry2_8:
854
	cmpw	12(%ecx),%bp
855
	jl		Lp15
856
	movb	(%esi),%al
857
	cmpb	$(TRANSPARENT_COLOR),%al
858
	jz		Lp15
859
	movw	%bp,12(%ecx)
860
	movb	%al,6(%edi)
861
Lp15:
862
	addl	izistep,%ebp
863
	adcl	$0,%ebp
864
	addl	tstep,%edx
865
	sbbl	%eax,%eax
866
	addl	sstep,%ebx
867
	adcl	advancetable+4(,%eax,4),%esi
868
 
869
LEndSpan:
870
	cmpw	14(%ecx),%bp
871
	jl		Lp16
872
	movb	(%esi),%al		// load first texel in segment
873
	cmpb	$(TRANSPARENT_COLOR),%al
874
	jz		Lp16
875
	movw	%bp,14(%ecx)
876
	movb	%al,7(%edi)
877
Lp16:
878
 
879
//
880
// clear s/z, t/z, 1/z from FP stack
881
//
882
	fstp %st(0)
883
	fstp %st(0)
884
	fstp %st(0)
885
 
886
	popl	%ebx				// restore spans pointer
887
LNextSpan:
888
	addl	$(sspan_t_size),%ebx // point to next span
889
	movl	sspan_t_count(%ebx),%ecx
890
	cmpl	$0,%ecx				// any more spans?
891
	jg		LSpanLoop			// yes
892
	jz		LNextSpan			// yes, but this one's empty
893
 
894
	popl	%ebx				// restore register variables
895
	popl	%esi
896
	popl	%edi
897
	popl	%ebp				// restore the caller's stack frame
898
	ret
899
 
900
#endif	// id386