Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5131 clevermous 1
/*
2
Copyright (C) 1996-1997 Id Software, Inc.
3
 
4
This program is free software; you can redistribute it and/or
5
modify it under the terms of the GNU General Public License
6
as published by the Free Software Foundation; either version 2
7
of the License, or (at your option) any later version.
8
 
9
This program is distributed in the hope that it will be useful,
10
but WITHOUT ANY WARRANTY; without even the implied warranty of
11
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12
 
13
See the GNU General Public License for more details.
14
 
15
You should have received a copy of the GNU General Public License
16
along with this program; if not, write to the Free Software
17
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
18
 
19
*/
20
//
21
// d_draw16.s
22
// x86 assembly-language horizontal 8-bpp span-drawing code, with 16-pixel
23
// subdivision.
24
//
25
 
26
#include "asm_i386.h"
27
#include "quakeasm.h"
28
#include "asm_draw.h"
29
#include "d_ifacea.h"
30
 
31
#if	id386
32
 
33
//----------------------------------------------------------------------
34
// 8-bpp horizontal span drawing code for polygons, with no transparency and
35
// 16-pixel subdivision.
36
//
37
// Assumes there is at least one span in pspans, and that every span
38
// contains at least one pixel
39
//----------------------------------------------------------------------
40
 
41
	.data
42
 
43
	.text
44
 
45
// out-of-line, rarely-needed clamping code
46
 
47
LClampHigh0:
48
	movl	C(bbextents),%esi
49
	jmp		LClampReentry0
50
LClampHighOrLow0:
51
	jg		LClampHigh0
52
	xorl	%esi,%esi
53
	jmp		LClampReentry0
54
 
55
LClampHigh1:
56
	movl	C(bbextentt),%edx
57
	jmp		LClampReentry1
58
LClampHighOrLow1:
59
	jg		LClampHigh1
60
	xorl	%edx,%edx
61
	jmp		LClampReentry1
62
 
63
LClampLow2:
64
	movl	$4096,%ebp
65
	jmp		LClampReentry2
66
LClampHigh2:
67
	movl	C(bbextents),%ebp
68
	jmp		LClampReentry2
69
 
70
LClampLow3:
71
	movl	$4096,%ecx
72
	jmp		LClampReentry3
73
LClampHigh3:
74
	movl	C(bbextentt),%ecx
75
	jmp		LClampReentry3
76
 
77
LClampLow4:
78
	movl	$4096,%eax
79
	jmp		LClampReentry4
80
LClampHigh4:
81
	movl	C(bbextents),%eax
82
	jmp		LClampReentry4
83
 
84
LClampLow5:
85
	movl	$4096,%ebx
86
	jmp		LClampReentry5
87
LClampHigh5:
88
	movl	C(bbextentt),%ebx
89
	jmp		LClampReentry5
90
 
91
 
92
#define pspans	4+16
93
 
94
	.align 4
95
.globl C(D_DrawSpans16)
96
C(D_DrawSpans16):
97
	pushl	%ebp				// preserve caller's stack frame
98
	pushl	%edi
99
	pushl	%esi				// preserve register variables
100
	pushl	%ebx
101
 
102
//
103
// set up scaled-by-16 steps, for 16-long segments; also set up cacheblock
104
// and span list pointers
105
//
106
// TODO: any overlap from rearranging?
107
	flds	C(d_sdivzstepu)
108
	fmuls	fp_16
109
	movl	C(cacheblock),%edx
110
	flds	C(d_tdivzstepu)
111
	fmuls	fp_16
112
	movl	pspans(%esp),%ebx	// point to the first span descriptor
113
	flds	C(d_zistepu)
114
	fmuls	fp_16
115
	movl	%edx,pbase			// pbase = cacheblock
116
	fstps	zi16stepu
117
	fstps	tdivz16stepu
118
	fstps	sdivz16stepu
119
 
120
LSpanLoop:
121
//
122
// set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
123
// initial s and t values
124
//
125
// FIXME: pipeline FILD?
126
	fildl	espan_t_v(%ebx)
127
	fildl	espan_t_u(%ebx)
128
 
129
	fld		%st(1)			// dv | du | dv
130
	fmuls	C(d_sdivzstepv)	// dv*d_sdivzstepv | du | dv
131
	fld		%st(1)			// du | dv*d_sdivzstepv | du | dv
132
	fmuls	C(d_sdivzstepu)	// du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
133
	fld		%st(2)			// du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
134
	fmuls	C(d_tdivzstepu)	// du*d_tdivzstepu | du*d_sdivzstepu |
135
							//  dv*d_sdivzstepv | du | dv
136
	fxch	%st(1)			// du*d_sdivzstepu | du*d_tdivzstepu |
137
							//  dv*d_sdivzstepv | du | dv
138
	faddp	%st(0),%st(2)	// du*d_tdivzstepu |
139
							//  du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
140
	fxch	%st(1)			// du*d_sdivzstepu + dv*d_sdivzstepv |
141
							//  du*d_tdivzstepu | du | dv
142
	fld		%st(3)			// dv | du*d_sdivzstepu + dv*d_sdivzstepv |
143
							//  du*d_tdivzstepu | du | dv
144
	fmuls	C(d_tdivzstepv)	// dv*d_tdivzstepv |
145
							//  du*d_sdivzstepu + dv*d_sdivzstepv |
146
							//  du*d_tdivzstepu | du | dv
147
	fxch	%st(1)			// du*d_sdivzstepu + dv*d_sdivzstepv |
148
							//  dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
149
	fadds	C(d_sdivzorigin)	// sdivz = d_sdivzorigin + dv*d_sdivzstepv +
150
							//  du*d_sdivzstepu; stays in %st(2) at end
151
	fxch	%st(4)			// dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
152
							//  s/z
153
	fmuls	C(d_zistepv)		// dv*d_zistepv | dv*d_tdivzstepv |
154
							//  du*d_tdivzstepu | du | s/z
155
	fxch	%st(1)			// dv*d_tdivzstepv |  dv*d_zistepv |
156
							//  du*d_tdivzstepu | du | s/z
157
	faddp	%st(0),%st(2)	// dv*d_zistepv |
158
							//  dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
159
	fxch	%st(2)			// du | dv*d_tdivzstepv + du*d_tdivzstepu |
160
							//  dv*d_zistepv | s/z
161
	fmuls	C(d_zistepu)		// du*d_zistepu |
162
							//  dv*d_tdivzstepv + du*d_tdivzstepu |
163
							//  dv*d_zistepv | s/z
164
	fxch	%st(1)			// dv*d_tdivzstepv + du*d_tdivzstepu |
165
							//  du*d_zistepu | dv*d_zistepv | s/z
166
	fadds	C(d_tdivzorigin)	// tdivz = d_tdivzorigin + dv*d_tdivzstepv +
167
							//  du*d_tdivzstepu; stays in %st(1) at end
168
	fxch	%st(2)			// dv*d_zistepv | du*d_zistepu | t/z | s/z
169
	faddp	%st(0),%st(1)	// dv*d_zistepv + du*d_zistepu | t/z | s/z
170
 
171
	flds	fp_64k			// fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
172
	fxch	%st(1)			// dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
173
	fadds	C(d_ziorigin)		// zi = d_ziorigin + dv*d_zistepv +
174
							//  du*d_zistepu; stays in %st(0) at end
175
							// 1/z | fp_64k | t/z | s/z
176
//
177
// calculate and clamp s & t
178
//
179
	fdivr	%st(0),%st(1)	// 1/z | z*64k | t/z | s/z
180
 
181
//
182
// point %edi to the first pixel in the span
183
//
184
	movl	C(d_viewbuffer),%ecx
185
	movl	espan_t_v(%ebx),%eax
186
	movl	%ebx,pspantemp	// preserve spans pointer
187
 
188
	movl	C(tadjust),%edx
189
	movl	C(sadjust),%esi
190
	movl	C(d_scantable)(,%eax,4),%edi	// v * screenwidth
191
	addl	%ecx,%edi
192
	movl	espan_t_u(%ebx),%ecx
193
	addl	%ecx,%edi				// pdest = &pdestspan[scans->u];
194
	movl	espan_t_count(%ebx),%ecx
195
 
196
//
197
// now start the FDIV for the end of the span
198
//
199
	cmpl	$16,%ecx
200
	ja		LSetupNotLast1
201
 
202
	decl	%ecx
203
	jz		LCleanup1		// if only one pixel, no need to start an FDIV
204
	movl	%ecx,spancountminus1
205
 
206
// finish up the s and t calcs
207
	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
208
 
209
	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
210
	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
211
	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
212
	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
213
	fxch	%st(1)			// s | t | 1/z | t/z | s/z
214
	fistpl	s				// 1/z | t | t/z | s/z
215
	fistpl	t				// 1/z | t/z | s/z
216
 
217
	fildl	spancountminus1
218
 
219
	flds	C(d_tdivzstepu)	// C(d_tdivzstepu) | spancountminus1
220
	flds	C(d_zistepu)		// C(d_zistepu) | C(d_tdivzstepu) | spancountminus1
221
	fmul	%st(2),%st(0)	// C(d_zistepu)*scm1 | C(d_tdivzstepu) | scm1
222
	fxch	%st(1)			// C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
223
	fmul	%st(2),%st(0)	// C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
224
	fxch	%st(2)			// scm1 | C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1
225
	fmuls	C(d_sdivzstepu)	// C(d_sdivzstepu)*scm1 | C(d_zistepu)*scm1 |
226
							//  C(d_tdivzstepu)*scm1
227
	fxch	%st(1)			// C(d_zistepu)*scm1 | C(d_sdivzstepu)*scm1 |
228
							//  C(d_tdivzstepu)*scm1
229
	faddp	%st(0),%st(3)	// C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
230
	fxch	%st(1)			// C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
231
	faddp	%st(0),%st(3)	// C(d_sdivzstepu)*scm1
232
	faddp	%st(0),%st(3)
233
 
234
	flds	fp_64k
235
	fdiv	%st(1),%st(0)	// this is what we've gone to all this trouble to
236
							//  overlap
237
	jmp		LFDIVInFlight1
238
 
239
LCleanup1:
240
// finish up the s and t calcs
241
	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
242
 
243
	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
244
	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
245
	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
246
	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
247
	fxch	%st(1)			// s | t | 1/z | t/z | s/z
248
	fistpl	s				// 1/z | t | t/z | s/z
249
	fistpl	t				// 1/z | t/z | s/z
250
	jmp		LFDIVInFlight1
251
 
252
	.align	4
253
LSetupNotLast1:
254
// finish up the s and t calcs
255
	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
256
 
257
	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
258
	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
259
	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
260
	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
261
	fxch	%st(1)			// s | t | 1/z | t/z | s/z
262
	fistpl	s				// 1/z | t | t/z | s/z
263
	fistpl	t				// 1/z | t/z | s/z
264
 
265
	fadds	zi16stepu
266
	fxch	%st(2)
267
	fadds	sdivz16stepu
268
	fxch	%st(2)
269
	flds	tdivz16stepu
270
	faddp	%st(0),%st(2)
271
	flds	fp_64k
272
	fdiv	%st(1),%st(0)	// z = 1/1/z
273
							// this is what we've gone to all this trouble to
274
							//  overlap
275
LFDIVInFlight1:
276
 
277
	addl	s,%esi
278
	addl	t,%edx
279
	movl	C(bbextents),%ebx
280
	movl	C(bbextentt),%ebp
281
	cmpl	%ebx,%esi
282
	ja		LClampHighOrLow0
283
LClampReentry0:
284
	movl	%esi,s
285
	movl	pbase,%ebx
286
	shll	$16,%esi
287
	cmpl	%ebp,%edx
288
	movl	%esi,sfracf
289
	ja		LClampHighOrLow1
290
LClampReentry1:
291
	movl	%edx,t
292
	movl	s,%esi					// sfrac = scans->sfrac;
293
	shll	$16,%edx
294
	movl	t,%eax					// tfrac = scans->tfrac;
295
	sarl	$16,%esi
296
	movl	%edx,tfracf
297
 
298
//
299
// calculate the texture starting address
300
//
301
	sarl	$16,%eax
302
	movl	C(cachewidth),%edx
303
	imull	%edx,%eax				// (tfrac >> 16) * cachewidth
304
	addl	%ebx,%esi
305
	addl	%eax,%esi				// psource = pbase + (sfrac >> 16) +
306
									//           ((tfrac >> 16) * cachewidth);
307
//
308
// determine whether last span or not
309
//
310
	cmpl	$16,%ecx
311
	jna		LLastSegment
312
 
313
//
314
// not the last segment; do full 16-wide segment
315
//
316
LNotLastSegment:
317
 
318
//
319
// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
320
// get there
321
//
322
 
323
// pick up after the FDIV that was left in flight previously
324
 
325
	fld		%st(0)			// duplicate it
326
	fmul	%st(4),%st(0)	// s = s/z * z
327
	fxch	%st(1)
328
	fmul	%st(3),%st(0)	// t = t/z * z
329
	fxch	%st(1)
330
	fistpl	snext
331
	fistpl	tnext
332
	movl	snext,%eax
333
	movl	tnext,%edx
334
 
335
	movb	(%esi),%bl	// get first source texel
336
	subl	$16,%ecx		// count off this segments' pixels
337
	movl	C(sadjust),%ebp
338
	movl	%ecx,counttemp	// remember count of remaining pixels
339
 
340
	movl	C(tadjust),%ecx
341
	movb	%bl,(%edi)	// store first dest pixel
342
 
343
	addl	%eax,%ebp
344
	addl	%edx,%ecx
345
 
346
	movl	C(bbextents),%eax
347
	movl	C(bbextentt),%edx
348
 
349
	cmpl	$4096,%ebp
350
	jl		LClampLow2
351
	cmpl	%eax,%ebp
352
	ja		LClampHigh2
353
LClampReentry2:
354
 
355
	cmpl	$4096,%ecx
356
	jl		LClampLow3
357
	cmpl	%edx,%ecx
358
	ja		LClampHigh3
359
LClampReentry3:
360
 
361
	movl	%ebp,snext
362
	movl	%ecx,tnext
363
 
364
	subl	s,%ebp
365
	subl	t,%ecx
366
 
367
//
368
// set up advancetable
369
//
370
	movl	%ecx,%eax
371
	movl	%ebp,%edx
372
	sarl	$20,%eax			// tstep >>= 16;
373
	jz		LZero
374
	sarl	$20,%edx			// sstep >>= 16;
375
	movl	C(cachewidth),%ebx
376
	imull	%ebx,%eax
377
	jmp		LSetUp1
378
 
379
LZero:
380
	sarl	$20,%edx			// sstep >>= 16;
381
	movl	C(cachewidth),%ebx
382
 
383
LSetUp1:
384
 
385
	addl	%edx,%eax			// add in sstep
386
								// (tstep >> 16) * cachewidth + (sstep >> 16);
387
	movl	tfracf,%edx
388
	movl	%eax,advancetable+4	// advance base in t
389
	addl	%ebx,%eax			// ((tstep >> 16) + 1) * cachewidth +
390
								//  (sstep >> 16);
391
	shll	$12,%ebp			// left-justify sstep fractional part
392
	movl	sfracf,%ebx
393
	shll	$12,%ecx			// left-justify tstep fractional part
394
	movl	%eax,advancetable	// advance extra in t
395
 
396
	movl	%ecx,tstep
397
	addl	%ecx,%edx			// advance tfrac fractional part by tstep frac
398
 
399
	sbbl	%ecx,%ecx			// turn tstep carry into -1 (0 if none)
400
	addl	%ebp,%ebx			// advance sfrac fractional part by sstep frac
401
	adcl	advancetable+4(,%ecx,4),%esi	// point to next source texel
402
 
403
	addl	tstep,%edx
404
	sbbl	%ecx,%ecx
405
	movb	(%esi),%al
406
	addl	%ebp,%ebx
407
	movb	%al,1(%edi)
408
	adcl	advancetable+4(,%ecx,4),%esi
409
 
410
	addl	tstep,%edx
411
	sbbl	%ecx,%ecx
412
	addl	%ebp,%ebx
413
	movb	(%esi),%al
414
	adcl	advancetable+4(,%ecx,4),%esi
415
 
416
	addl	tstep,%edx
417
	sbbl	%ecx,%ecx
418
	movb	%al,2(%edi)
419
	addl	%ebp,%ebx
420
	movb	(%esi),%al
421
	adcl	advancetable+4(,%ecx,4),%esi
422
 
423
	addl	tstep,%edx
424
	sbbl	%ecx,%ecx
425
	movb	%al,3(%edi)
426
	addl	%ebp,%ebx
427
	movb	(%esi),%al
428
	adcl	advancetable+4(,%ecx,4),%esi
429
 
430
	addl	tstep,%edx
431
	sbbl	%ecx,%ecx
432
	movb	%al,4(%edi)
433
	addl	%ebp,%ebx
434
	movb	(%esi),%al
435
	adcl	advancetable+4(,%ecx,4),%esi
436
 
437
	addl	tstep,%edx
438
	sbbl	%ecx,%ecx
439
	movb	%al,5(%edi)
440
	addl	%ebp,%ebx
441
	movb	(%esi),%al
442
	adcl	advancetable+4(,%ecx,4),%esi
443
 
444
	addl	tstep,%edx
445
	sbbl	%ecx,%ecx
446
	movb	%al,6(%edi)
447
	addl	%ebp,%ebx
448
	movb	(%esi),%al
449
	adcl	advancetable+4(,%ecx,4),%esi
450
 
451
	addl	tstep,%edx
452
	sbbl	%ecx,%ecx
453
	movb	%al,7(%edi)
454
	addl	%ebp,%ebx
455
	movb	(%esi),%al
456
	adcl	advancetable+4(,%ecx,4),%esi
457
 
458
 
459
//
460
// start FDIV for end of next segment in flight, so it can overlap
461
//
462
	movl	counttemp,%ecx
463
	cmpl	$16,%ecx			// more than one segment after this?
464
	ja		LSetupNotLast2	// yes
465
 
466
	decl	%ecx
467
	jz		LFDIVInFlight2	// if only one pixel, no need to start an FDIV
468
	movl	%ecx,spancountminus1
469
	fildl	spancountminus1
470
 
471
	flds	C(d_zistepu)		// C(d_zistepu) | spancountminus1
472
	fmul	%st(1),%st(0)	// C(d_zistepu)*scm1 | scm1
473
	flds	C(d_tdivzstepu)	// C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
474
	fmul	%st(2),%st(0)	// C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
475
	fxch	%st(1)			// C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1
476
	faddp	%st(0),%st(3)	// C(d_tdivzstepu)*scm1 | scm1
477
	fxch	%st(1)			// scm1 | C(d_tdivzstepu)*scm1
478
	fmuls	C(d_sdivzstepu)	// C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
479
	fxch	%st(1)			// C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
480
	faddp	%st(0),%st(3)	// C(d_sdivzstepu)*scm1
481
	flds	fp_64k			// 64k | C(d_sdivzstepu)*scm1
482
	fxch	%st(1)			// C(d_sdivzstepu)*scm1 | 64k
483
	faddp	%st(0),%st(4)	// 64k
484
 
485
	fdiv	%st(1),%st(0)	// this is what we've gone to all this trouble to
486
							//  overlap
487
	jmp		LFDIVInFlight2
488
 
489
	.align	4
490
LSetupNotLast2:
491
	fadds	zi16stepu
492
	fxch	%st(2)
493
	fadds	sdivz16stepu
494
	fxch	%st(2)
495
	flds	tdivz16stepu
496
	faddp	%st(0),%st(2)
497
	flds	fp_64k
498
	fdiv	%st(1),%st(0)	// z = 1/1/z
499
							// this is what we've gone to all this trouble to
500
							//  overlap
501
LFDIVInFlight2:
502
	movl	%ecx,counttemp
503
 
504
	addl	tstep,%edx
505
	sbbl	%ecx,%ecx
506
	movb	%al,8(%edi)
507
	addl	%ebp,%ebx
508
	movb	(%esi),%al
509
	adcl	advancetable+4(,%ecx,4),%esi
510
 
511
	addl	tstep,%edx
512
	sbbl	%ecx,%ecx
513
	movb	%al,9(%edi)
514
	addl	%ebp,%ebx
515
	movb	(%esi),%al
516
	adcl	advancetable+4(,%ecx,4),%esi
517
 
518
	addl	tstep,%edx
519
	sbbl	%ecx,%ecx
520
	movb	%al,10(%edi)
521
	addl	%ebp,%ebx
522
	movb	(%esi),%al
523
	adcl	advancetable+4(,%ecx,4),%esi
524
 
525
	addl	tstep,%edx
526
	sbbl	%ecx,%ecx
527
	movb	%al,11(%edi)
528
	addl	%ebp,%ebx
529
	movb	(%esi),%al
530
	adcl	advancetable+4(,%ecx,4),%esi
531
 
532
	addl	tstep,%edx
533
	sbbl	%ecx,%ecx
534
	movb	%al,12(%edi)
535
	addl	%ebp,%ebx
536
	movb	(%esi),%al
537
	adcl	advancetable+4(,%ecx,4),%esi
538
 
539
	addl	tstep,%edx
540
	sbbl	%ecx,%ecx
541
	movb	%al,13(%edi)
542
	addl	%ebp,%ebx
543
	movb	(%esi),%al
544
	adcl	advancetable+4(,%ecx,4),%esi
545
 
546
	addl	tstep,%edx
547
	sbbl	%ecx,%ecx
548
	movb	%al,14(%edi)
549
	addl	%ebp,%ebx
550
	movb	(%esi),%al
551
	adcl	advancetable+4(,%ecx,4),%esi
552
 
553
	addl	$16,%edi
554
	movl	%edx,tfracf
555
	movl	snext,%edx
556
	movl	%ebx,sfracf
557
	movl	tnext,%ebx
558
	movl	%edx,s
559
	movl	%ebx,t
560
 
561
	movl	counttemp,%ecx		// retrieve count
562
 
563
//
564
// determine whether last span or not
565
//
566
	cmpl	$16,%ecx				// are there multiple segments remaining?
567
	movb	%al,-1(%edi)
568
	ja		LNotLastSegment		// yes
569
 
570
//
571
// last segment of scan
572
//
573
LLastSegment:
574
 
575
//
576
// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
577
// get there. The number of pixels left is variable, and we want to land on the
578
// last pixel, not step one past it, so we can't run into arithmetic problems
579
//
580
	testl	%ecx,%ecx
581
	jz		LNoSteps		// just draw the last pixel and we're done
582
 
583
// pick up after the FDIV that was left in flight previously
584
 
585
 
586
	fld		%st(0)			// duplicate it
587
	fmul	%st(4),%st(0)	// s = s/z * z
588
	fxch	%st(1)
589
	fmul	%st(3),%st(0)	// t = t/z * z
590
	fxch	%st(1)
591
	fistpl	snext
592
	fistpl	tnext
593
 
594
	movb	(%esi),%al		// load first texel in segment
595
	movl	C(tadjust),%ebx
596
	movb	%al,(%edi)		// store first pixel in segment
597
	movl	C(sadjust),%eax
598
 
599
	addl	snext,%eax
600
	addl	tnext,%ebx
601
 
602
	movl	C(bbextents),%ebp
603
	movl	C(bbextentt),%edx
604
 
605
	cmpl	$4096,%eax
606
	jl		LClampLow4
607
	cmpl	%ebp,%eax
608
	ja		LClampHigh4
609
LClampReentry4:
610
	movl	%eax,snext
611
 
612
	cmpl	$4096,%ebx
613
	jl		LClampLow5
614
	cmpl	%edx,%ebx
615
	ja		LClampHigh5
616
LClampReentry5:
617
 
618
	cmpl	$1,%ecx			// don't bother
619
	je		LOnlyOneStep	// if two pixels in segment, there's only one step,
620
							//  of the segment length
621
	subl	s,%eax
622
	subl	t,%ebx
623
 
624
	addl	%eax,%eax		// convert to 15.17 format so multiply by 1.31
625
	addl	%ebx,%ebx		//  reciprocal yields 16.48
626
 
627
	imull	reciprocal_table_16-8(,%ecx,4)	// sstep = (snext - s) /
628
											//  (spancount-1)
629
	movl	%edx,%ebp
630
 
631
	movl	%ebx,%eax
632
	imull	reciprocal_table_16-8(,%ecx,4)	// tstep = (tnext - t) /
633
											//  (spancount-1)
634
LSetEntryvec:
635
//
636
// set up advancetable
637
//
638
	movl	entryvec_table_16(,%ecx,4),%ebx
639
	movl	%edx,%eax
640
	movl	%ebx,jumptemp		// entry point into code for RET later
641
	movl	%ebp,%ecx
642
	sarl	$16,%edx			// tstep >>= 16;
643
	movl	C(cachewidth),%ebx
644
	sarl	$16,%ecx			// sstep >>= 16;
645
	imull	%ebx,%edx
646
 
647
	addl	%ecx,%edx			// add in sstep
648
								// (tstep >> 16) * cachewidth + (sstep >> 16);
649
	movl	tfracf,%ecx
650
	movl	%edx,advancetable+4	// advance base in t
651
	addl	%ebx,%edx			// ((tstep >> 16) + 1) * cachewidth +
652
								//  (sstep >> 16);
653
	shll	$16,%ebp			// left-justify sstep fractional part
654
	movl	sfracf,%ebx
655
	shll	$16,%eax			// left-justify tstep fractional part
656
	movl	%edx,advancetable	// advance extra in t
657
 
658
	movl	%eax,tstep
659
	movl	%ecx,%edx
660
	addl	%eax,%edx
661
	sbbl	%ecx,%ecx
662
	addl	%ebp,%ebx
663
	adcl	advancetable+4(,%ecx,4),%esi
664
 
665
	jmp		*jumptemp			// jump to the number-of-pixels handler
666
 
667
//----------------------------------------
668
 
669
LNoSteps:
670
	movb	(%esi),%al		// load first texel in segment
671
	subl	$15,%edi			// adjust for hardwired offset
672
	jmp		LEndSpan
673
 
674
 
675
LOnlyOneStep:
676
	subl	s,%eax
677
	subl	t,%ebx
678
	movl	%eax,%ebp
679
	movl	%ebx,%edx
680
	jmp		LSetEntryvec
681
 
682
//----------------------------------------
683
 
684
.globl	Entry2_16, Entry3_16, Entry4_16, Entry5_16
685
.globl	Entry6_16, Entry7_16, Entry8_16, Entry9_16
686
.globl	Entry10_16, Entry11_16, Entry12_16, Entry13_16
687
.globl	Entry14_16, Entry15_16, Entry16_16
688
 
689
Entry2_16:
690
	subl	$14,%edi		// adjust for hardwired offsets
691
	movb	(%esi),%al
692
	jmp		LEntry2_16
693
 
694
//----------------------------------------
695
 
696
Entry3_16:
697
	subl	$13,%edi		// adjust for hardwired offsets
698
	addl	%eax,%edx
699
	movb	(%esi),%al
700
	sbbl	%ecx,%ecx
701
	addl	%ebp,%ebx
702
	adcl	advancetable+4(,%ecx,4),%esi
703
	jmp		LEntry3_16
704
 
705
//----------------------------------------
706
 
707
Entry4_16:
708
	subl	$12,%edi		// adjust for hardwired offsets
709
	addl	%eax,%edx
710
	movb	(%esi),%al
711
	sbbl	%ecx,%ecx
712
	addl	%ebp,%ebx
713
	adcl	advancetable+4(,%ecx,4),%esi
714
	addl	tstep,%edx
715
	jmp		LEntry4_16
716
 
717
//----------------------------------------
718
 
719
Entry5_16:
720
	subl	$11,%edi		// adjust for hardwired offsets
721
	addl	%eax,%edx
722
	movb	(%esi),%al
723
	sbbl	%ecx,%ecx
724
	addl	%ebp,%ebx
725
	adcl	advancetable+4(,%ecx,4),%esi
726
	addl	tstep,%edx
727
	jmp		LEntry5_16
728
 
729
//----------------------------------------
730
 
731
Entry6_16:
732
	subl	$10,%edi		// adjust for hardwired offsets
733
	addl	%eax,%edx
734
	movb	(%esi),%al
735
	sbbl	%ecx,%ecx
736
	addl	%ebp,%ebx
737
	adcl	advancetable+4(,%ecx,4),%esi
738
	addl	tstep,%edx
739
	jmp		LEntry6_16
740
 
741
//----------------------------------------
742
 
743
Entry7_16:
744
	subl	$9,%edi		// adjust for hardwired offsets
745
	addl	%eax,%edx
746
	movb	(%esi),%al
747
	sbbl	%ecx,%ecx
748
	addl	%ebp,%ebx
749
	adcl	advancetable+4(,%ecx,4),%esi
750
	addl	tstep,%edx
751
	jmp		LEntry7_16
752
 
753
//----------------------------------------
754
 
755
Entry8_16:
756
	subl	$8,%edi		// adjust for hardwired offsets
757
	addl	%eax,%edx
758
	movb	(%esi),%al
759
	sbbl	%ecx,%ecx
760
	addl	%ebp,%ebx
761
	adcl	advancetable+4(,%ecx,4),%esi
762
	addl	tstep,%edx
763
	jmp		LEntry8_16
764
 
765
//----------------------------------------
766
 
767
Entry9_16:
768
	subl	$7,%edi		// adjust for hardwired offsets
769
	addl	%eax,%edx
770
	movb	(%esi),%al
771
	sbbl	%ecx,%ecx
772
	addl	%ebp,%ebx
773
	adcl	advancetable+4(,%ecx,4),%esi
774
	addl	tstep,%edx
775
	jmp		LEntry9_16
776
 
777
//----------------------------------------
778
 
779
Entry10_16:
780
	subl	$6,%edi		// adjust for hardwired offsets
781
	addl	%eax,%edx
782
	movb	(%esi),%al
783
	sbbl	%ecx,%ecx
784
	addl	%ebp,%ebx
785
	adcl	advancetable+4(,%ecx,4),%esi
786
	addl	tstep,%edx
787
	jmp		LEntry10_16
788
 
789
//----------------------------------------
790
 
791
Entry11_16:
792
	subl	$5,%edi		// adjust for hardwired offsets
793
	addl	%eax,%edx
794
	movb	(%esi),%al
795
	sbbl	%ecx,%ecx
796
	addl	%ebp,%ebx
797
	adcl	advancetable+4(,%ecx,4),%esi
798
	addl	tstep,%edx
799
	jmp		LEntry11_16
800
 
801
//----------------------------------------
802
 
803
Entry12_16:
804
	subl	$4,%edi		// adjust for hardwired offsets
805
	addl	%eax,%edx
806
	movb	(%esi),%al
807
	sbbl	%ecx,%ecx
808
	addl	%ebp,%ebx
809
	adcl	advancetable+4(,%ecx,4),%esi
810
	addl	tstep,%edx
811
	jmp		LEntry12_16
812
 
813
//----------------------------------------
814
 
815
Entry13_16:
816
	subl	$3,%edi		// adjust for hardwired offsets
817
	addl	%eax,%edx
818
	movb	(%esi),%al
819
	sbbl	%ecx,%ecx
820
	addl	%ebp,%ebx
821
	adcl	advancetable+4(,%ecx,4),%esi
822
	addl	tstep,%edx
823
	jmp		LEntry13_16
824
 
825
//----------------------------------------
826
 
827
Entry14_16:
828
	subl	$2,%edi		// adjust for hardwired offsets
829
	addl	%eax,%edx
830
	movb	(%esi),%al
831
	sbbl	%ecx,%ecx
832
	addl	%ebp,%ebx
833
	adcl	advancetable+4(,%ecx,4),%esi
834
	addl	tstep,%edx
835
	jmp		LEntry14_16
836
 
837
//----------------------------------------
838
 
839
Entry15_16:
840
	decl	%edi		// adjust for hardwired offsets
841
	addl	%eax,%edx
842
	movb	(%esi),%al
843
	sbbl	%ecx,%ecx
844
	addl	%ebp,%ebx
845
	adcl	advancetable+4(,%ecx,4),%esi
846
	addl	tstep,%edx
847
	jmp		LEntry15_16
848
 
849
//----------------------------------------
850
 
851
Entry16_16:
852
	addl	%eax,%edx
853
	movb	(%esi),%al
854
	sbbl	%ecx,%ecx
855
	addl	%ebp,%ebx
856
	adcl	advancetable+4(,%ecx,4),%esi
857
 
858
	addl	tstep,%edx
859
	sbbl	%ecx,%ecx
860
	movb	%al,1(%edi)
861
	addl	%ebp,%ebx
862
	movb	(%esi),%al
863
	adcl	advancetable+4(,%ecx,4),%esi
864
	addl	tstep,%edx
865
LEntry15_16:
866
	sbbl	%ecx,%ecx
867
	movb	%al,2(%edi)
868
	addl	%ebp,%ebx
869
	movb	(%esi),%al
870
	adcl	advancetable+4(,%ecx,4),%esi
871
	addl	tstep,%edx
872
LEntry14_16:
873
	sbbl	%ecx,%ecx
874
	movb	%al,3(%edi)
875
	addl	%ebp,%ebx
876
	movb	(%esi),%al
877
	adcl	advancetable+4(,%ecx,4),%esi
878
	addl	tstep,%edx
879
LEntry13_16:
880
	sbbl	%ecx,%ecx
881
	movb	%al,4(%edi)
882
	addl	%ebp,%ebx
883
	movb	(%esi),%al
884
	adcl	advancetable+4(,%ecx,4),%esi
885
	addl	tstep,%edx
886
LEntry12_16:
887
	sbbl	%ecx,%ecx
888
	movb	%al,5(%edi)
889
	addl	%ebp,%ebx
890
	movb	(%esi),%al
891
	adcl	advancetable+4(,%ecx,4),%esi
892
	addl	tstep,%edx
893
LEntry11_16:
894
	sbbl	%ecx,%ecx
895
	movb	%al,6(%edi)
896
	addl	%ebp,%ebx
897
	movb	(%esi),%al
898
	adcl	advancetable+4(,%ecx,4),%esi
899
	addl	tstep,%edx
900
LEntry10_16:
901
	sbbl	%ecx,%ecx
902
	movb	%al,7(%edi)
903
	addl	%ebp,%ebx
904
	movb	(%esi),%al
905
	adcl	advancetable+4(,%ecx,4),%esi
906
	addl	tstep,%edx
907
LEntry9_16:
908
	sbbl	%ecx,%ecx
909
	movb	%al,8(%edi)
910
	addl	%ebp,%ebx
911
	movb	(%esi),%al
912
	adcl	advancetable+4(,%ecx,4),%esi
913
	addl	tstep,%edx
914
LEntry8_16:
915
	sbbl	%ecx,%ecx
916
	movb	%al,9(%edi)
917
	addl	%ebp,%ebx
918
	movb	(%esi),%al
919
	adcl	advancetable+4(,%ecx,4),%esi
920
	addl	tstep,%edx
921
LEntry7_16:
922
	sbbl	%ecx,%ecx
923
	movb	%al,10(%edi)
924
	addl	%ebp,%ebx
925
	movb	(%esi),%al
926
	adcl	advancetable+4(,%ecx,4),%esi
927
	addl	tstep,%edx
928
LEntry6_16:
929
	sbbl	%ecx,%ecx
930
	movb	%al,11(%edi)
931
	addl	%ebp,%ebx
932
	movb	(%esi),%al
933
	adcl	advancetable+4(,%ecx,4),%esi
934
	addl	tstep,%edx
935
LEntry5_16:
936
	sbbl	%ecx,%ecx
937
	movb	%al,12(%edi)
938
	addl	%ebp,%ebx
939
	movb	(%esi),%al
940
	adcl	advancetable+4(,%ecx,4),%esi
941
	addl	tstep,%edx
942
LEntry4_16:
943
	sbbl	%ecx,%ecx
944
	movb	%al,13(%edi)
945
	addl	%ebp,%ebx
946
	movb	(%esi),%al
947
	adcl	advancetable+4(,%ecx,4),%esi
948
LEntry3_16:
949
	movb	%al,14(%edi)
950
	movb	(%esi),%al
951
LEntry2_16:
952
 
953
LEndSpan:
954
 
955
//
956
// clear s/z, t/z, 1/z from FP stack
957
//
958
	fstp %st(0)
959
	fstp %st(0)
960
	fstp %st(0)
961
 
962
	movl	pspantemp,%ebx				// restore spans pointer
963
	movl	espan_t_pnext(%ebx),%ebx	// point to next span
964
	testl	%ebx,%ebx			// any more spans?
965
	movb	%al,15(%edi)
966
	jnz		LSpanLoop			// more spans
967
 
968
	popl	%ebx				// restore register variables
969
	popl	%esi
970
	popl	%edi
971
	popl	%ebp				// restore the caller's stack frame
972
	ret
973
 
974
#endif	// id386