Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5131 | clevermous | 1 | /* |
2 | Copyright (C) 1996-1997 Id Software, Inc. |
||
3 | |||
4 | This program is free software; you can redistribute it and/or |
||
5 | modify it under the terms of the GNU General Public License |
||
6 | as published by the Free Software Foundation; either version 2 |
||
7 | of the License, or (at your option) any later version. |
||
8 | |||
9 | This program is distributed in the hope that it will be useful, |
||
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
||
12 | |||
13 | See the GNU General Public License for more details. |
||
14 | |||
15 | You should have received a copy of the GNU General Public License |
||
16 | along with this program; if not, write to the Free Software |
||
17 | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
||
18 | |||
19 | */ |
||
20 | // |
||
21 | // d_spr8.s |
||
22 | // x86 assembly-language horizontal 8-bpp transparent span-drawing code. |
||
23 | // |
||
24 | |||
25 | #include "asm_i386.h" |
||
26 | #include "quakeasm.h" |
||
27 | #include "asm_draw.h" |
||
28 | |||
29 | #if id386 |
||
30 | |||
31 | //---------------------------------------------------------------------- |
||
32 | // 8-bpp horizontal span drawing code for polygons, with transparency. |
||
33 | //---------------------------------------------------------------------- |
||
34 | |||
35 | .text |
||
36 | |||
37 | // out-of-line, rarely-needed clamping code |
||
38 | |||
39 | LClampHigh0: |
||
40 | movl C(bbextents),%esi |
||
41 | jmp LClampReentry0 |
||
42 | LClampHighOrLow0: |
||
43 | jg LClampHigh0 |
||
44 | xorl %esi,%esi |
||
45 | jmp LClampReentry0 |
||
46 | |||
47 | LClampHigh1: |
||
48 | movl C(bbextentt),%edx |
||
49 | jmp LClampReentry1 |
||
50 | LClampHighOrLow1: |
||
51 | jg LClampHigh1 |
||
52 | xorl %edx,%edx |
||
53 | jmp LClampReentry1 |
||
54 | |||
55 | LClampLow2: |
||
56 | movl $2048,%ebp |
||
57 | jmp LClampReentry2 |
||
58 | LClampHigh2: |
||
59 | movl C(bbextents),%ebp |
||
60 | jmp LClampReentry2 |
||
61 | |||
62 | LClampLow3: |
||
63 | movl $2048,%ecx |
||
64 | jmp LClampReentry3 |
||
65 | LClampHigh3: |
||
66 | movl C(bbextentt),%ecx |
||
67 | jmp LClampReentry3 |
||
68 | |||
69 | LClampLow4: |
||
70 | movl $2048,%eax |
||
71 | jmp LClampReentry4 |
||
72 | LClampHigh4: |
||
73 | movl C(bbextents),%eax |
||
74 | jmp LClampReentry4 |
||
75 | |||
76 | LClampLow5: |
||
77 | movl $2048,%ebx |
||
78 | jmp LClampReentry5 |
||
79 | LClampHigh5: |
||
80 | movl C(bbextentt),%ebx |
||
81 | jmp LClampReentry5 |
||
82 | |||
83 | |||
84 | #define pspans 4+16 |
||
85 | |||
86 | .align 4 |
||
87 | .globl C(D_SpriteDrawSpans) |
||
88 | C(D_SpriteDrawSpans): |
||
89 | pushl %ebp // preserve caller's stack frame |
||
90 | pushl %edi |
||
91 | pushl %esi // preserve register variables |
||
92 | pushl %ebx |
||
93 | |||
94 | // |
||
95 | // set up scaled-by-8 steps, for 8-long segments; also set up cacheblock |
||
96 | // and span list pointers, and 1/z step in 0.32 fixed-point |
||
97 | // |
||
98 | // FIXME: any overlap from rearranging? |
||
99 | flds C(d_sdivzstepu) |
||
100 | fmuls fp_8 |
||
101 | movl C(cacheblock),%edx |
||
102 | flds C(d_tdivzstepu) |
||
103 | fmuls fp_8 |
||
104 | movl pspans(%esp),%ebx // point to the first span descriptor |
||
105 | flds C(d_zistepu) |
||
106 | fmuls fp_8 |
||
107 | movl %edx,pbase // pbase = cacheblock |
||
108 | flds C(d_zistepu) |
||
109 | fmuls fp_64kx64k |
||
110 | fxch %st(3) |
||
111 | fstps sdivz8stepu |
||
112 | fstps zi8stepu |
||
113 | fstps tdivz8stepu |
||
114 | fistpl izistep |
||
115 | movl izistep,%eax |
||
116 | rorl $16,%eax // put upper 16 bits in low word |
||
117 | movl sspan_t_count(%ebx),%ecx |
||
118 | movl %eax,izistep |
||
119 | |||
120 | cmpl $0,%ecx |
||
121 | jle LNextSpan |
||
122 | |||
123 | LSpanLoop: |
||
124 | |||
125 | // |
||
126 | // set up the initial s/z, t/z, and 1/z on the FP stack, and generate the |
||
127 | // initial s and t values |
||
128 | // |
||
129 | // FIXME: pipeline FILD? |
||
130 | fildl sspan_t_v(%ebx) |
||
131 | fildl sspan_t_u(%ebx) |
||
132 | |||
133 | fld %st(1) // dv | du | dv |
||
134 | fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv |
||
135 | fld %st(1) // du | dv*d_sdivzstepv | du | dv |
||
136 | fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv |
||
137 | fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv |
||
138 | fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu | |
||
139 | // dv*d_sdivzstepv | du | dv |
||
140 | fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu | |
||
141 | // dv*d_sdivzstepv | du | dv |
||
142 | faddp %st(0),%st(2) // du*d_tdivzstepu | |
||
143 | // du*d_sdivzstepu + dv*d_sdivzstepv | du | dv |
||
144 | fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv | |
||
145 | // du*d_tdivzstepu | du | dv |
||
146 | fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv | |
||
147 | // du*d_tdivzstepu | du | dv |
||
148 | fmuls C(d_tdivzstepv) // dv*d_tdivzstepv | |
||
149 | // du*d_sdivzstepu + dv*d_sdivzstepv | |
||
150 | // du*d_tdivzstepu | du | dv |
||
151 | fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv | |
||
152 | // dv*d_tdivzstepv | du*d_tdivzstepu | du | dv |
||
153 | fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv + |
||
154 | // du*d_sdivzstepu; stays in %st(2) at end |
||
155 | fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du | |
||
156 | // s/z |
||
157 | fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv | |
||
158 | // du*d_tdivzstepu | du | s/z |
||
159 | fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv | |
||
160 | // du*d_tdivzstepu | du | s/z |
||
161 | faddp %st(0),%st(2) // dv*d_zistepv | |
||
162 | // dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z |
||
163 | fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu | |
||
164 | // dv*d_zistepv | s/z |
||
165 | fmuls C(d_zistepu) // du*d_zistepu | |
||
166 | // dv*d_tdivzstepv + du*d_tdivzstepu | |
||
167 | // dv*d_zistepv | s/z |
||
168 | fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu | |
||
169 | // du*d_zistepu | dv*d_zistepv | s/z |
||
170 | fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv + |
||
171 | // du*d_tdivzstepu; stays in %st(1) at end |
||
172 | fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z |
||
173 | faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z |
||
174 | |||
175 | flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z |
||
176 | fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z |
||
177 | fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv + |
||
178 | // du*d_zistepu; stays in %st(0) at end |
||
179 | // 1/z | fp_64k | t/z | s/z |
||
180 | |||
181 | fld %st(0) // FIXME: get rid of stall on FMUL? |
||
182 | fmuls fp_64kx64k |
||
183 | fxch %st(1) |
||
184 | |||
185 | // |
||
186 | // calculate and clamp s & t |
||
187 | // |
||
188 | fdivr %st(0),%st(2) // 1/z | z*64k | t/z | s/z |
||
189 | fxch %st(1) |
||
190 | |||
191 | fistpl izi // 0.32 fixed-point 1/z |
||
192 | movl izi,%ebp |
||
193 | |||
194 | // |
||
195 | // set pz to point to the first z-buffer pixel in the span |
||
196 | // |
||
197 | rorl $16,%ebp // put upper 16 bits in low word |
||
198 | movl sspan_t_v(%ebx),%eax |
||
199 | movl %ebp,izi |
||
200 | movl sspan_t_u(%ebx),%ebp |
||
201 | imull C(d_zrowbytes) |
||
202 | shll $1,%ebp // a word per pixel |
||
203 | addl C(d_pzbuffer),%eax |
||
204 | addl %ebp,%eax |
||
205 | movl %eax,pz |
||
206 | |||
207 | // |
||
208 | // point %edi to the first pixel in the span |
||
209 | // |
||
210 | movl C(d_viewbuffer),%ebp |
||
211 | movl sspan_t_v(%ebx),%eax |
||
212 | pushl %ebx // preserve spans pointer |
||
213 | movl C(tadjust),%edx |
||
214 | movl C(sadjust),%esi |
||
215 | movl C(d_scantable)(,%eax,4),%edi // v * screenwidth |
||
216 | addl %ebp,%edi |
||
217 | movl sspan_t_u(%ebx),%ebp |
||
218 | addl %ebp,%edi // pdest = &pdestspan[scans->u]; |
||
219 | |||
220 | // |
||
221 | // now start the FDIV for the end of the span |
||
222 | // |
||
223 | cmpl $8,%ecx |
||
224 | ja LSetupNotLast1 |
||
225 | |||
226 | decl %ecx |
||
227 | jz LCleanup1 // if only one pixel, no need to start an FDIV |
||
228 | movl %ecx,spancountminus1 |
||
229 | |||
230 | // finish up the s and t calcs |
||
231 | fxch %st(1) // z*64k | 1/z | t/z | s/z |
||
232 | |||
233 | fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z |
||
234 | fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z |
||
235 | fxch %st(1) // z*64k | s | 1/z | t/z | s/z |
||
236 | fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z |
||
237 | fxch %st(1) // s | t | 1/z | t/z | s/z |
||
238 | fistpl s // 1/z | t | t/z | s/z |
||
239 | fistpl t // 1/z | t/z | s/z |
||
240 | |||
241 | fildl spancountminus1 |
||
242 | |||
243 | flds C(d_tdivzstepu) // _d_tdivzstepu | spancountminus1 |
||
244 | flds C(d_zistepu) // _d_zistepu | _d_tdivzstepu | spancountminus1 |
||
245 | fmul %st(2),%st(0) // _d_zistepu*scm1 | _d_tdivzstepu | scm1 |
||
246 | fxch %st(1) // _d_tdivzstepu | _d_zistepu*scm1 | scm1 |
||
247 | fmul %st(2),%st(0) // _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1 |
||
248 | fxch %st(2) // scm1 | _d_zistepu*scm1 | _d_tdivzstepu*scm1 |
||
249 | fmuls C(d_sdivzstepu) // _d_sdivzstepu*scm1 | _d_zistepu*scm1 | |
||
250 | // _d_tdivzstepu*scm1 |
||
251 | fxch %st(1) // _d_zistepu*scm1 | _d_sdivzstepu*scm1 | |
||
252 | // _d_tdivzstepu*scm1 |
||
253 | faddp %st(0),%st(3) // _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1 |
||
254 | fxch %st(1) // _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1 |
||
255 | faddp %st(0),%st(3) // _d_sdivzstepu*scm1 |
||
256 | faddp %st(0),%st(3) |
||
257 | |||
258 | flds fp_64k |
||
259 | fdiv %st(1),%st(0) // this is what we've gone to all this trouble to |
||
260 | // overlap |
||
261 | jmp LFDIVInFlight1 |
||
262 | |||
263 | LCleanup1: |
||
264 | // finish up the s and t calcs |
||
265 | fxch %st(1) // z*64k | 1/z | t/z | s/z |
||
266 | |||
267 | fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z |
||
268 | fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z |
||
269 | fxch %st(1) // z*64k | s | 1/z | t/z | s/z |
||
270 | fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z |
||
271 | fxch %st(1) // s | t | 1/z | t/z | s/z |
||
272 | fistpl s // 1/z | t | t/z | s/z |
||
273 | fistpl t // 1/z | t/z | s/z |
||
274 | jmp LFDIVInFlight1 |
||
275 | |||
276 | .align 4 |
||
277 | LSetupNotLast1: |
||
278 | // finish up the s and t calcs |
||
279 | fxch %st(1) // z*64k | 1/z | t/z | s/z |
||
280 | |||
281 | fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z |
||
282 | fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z |
||
283 | fxch %st(1) // z*64k | s | 1/z | t/z | s/z |
||
284 | fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z |
||
285 | fxch %st(1) // s | t | 1/z | t/z | s/z |
||
286 | fistpl s // 1/z | t | t/z | s/z |
||
287 | fistpl t // 1/z | t/z | s/z |
||
288 | |||
289 | fadds zi8stepu |
||
290 | fxch %st(2) |
||
291 | fadds sdivz8stepu |
||
292 | fxch %st(2) |
||
293 | flds tdivz8stepu |
||
294 | faddp %st(0),%st(2) |
||
295 | flds fp_64k |
||
296 | fdiv %st(1),%st(0) // z = 1/1/z |
||
297 | // this is what we've gone to all this trouble to |
||
298 | // overlap |
||
299 | LFDIVInFlight1: |
||
300 | |||
301 | addl s,%esi |
||
302 | addl t,%edx |
||
303 | movl C(bbextents),%ebx |
||
304 | movl C(bbextentt),%ebp |
||
305 | cmpl %ebx,%esi |
||
306 | ja LClampHighOrLow0 |
||
307 | LClampReentry0: |
||
308 | movl %esi,s |
||
309 | movl pbase,%ebx |
||
310 | shll $16,%esi |
||
311 | cmpl %ebp,%edx |
||
312 | movl %esi,sfracf |
||
313 | ja LClampHighOrLow1 |
||
314 | LClampReentry1: |
||
315 | movl %edx,t |
||
316 | movl s,%esi // sfrac = scans->sfrac; |
||
317 | shll $16,%edx |
||
318 | movl t,%eax // tfrac = scans->tfrac; |
||
319 | sarl $16,%esi |
||
320 | movl %edx,tfracf |
||
321 | |||
322 | // |
||
323 | // calculate the texture starting address |
||
324 | // |
||
325 | sarl $16,%eax |
||
326 | addl %ebx,%esi |
||
327 | imull C(cachewidth),%eax // (tfrac >> 16) * cachewidth |
||
328 | addl %eax,%esi // psource = pbase + (sfrac >> 16) + |
||
329 | // ((tfrac >> 16) * cachewidth); |
||
330 | |||
331 | // |
||
332 | // determine whether last span or not |
||
333 | // |
||
334 | cmpl $8,%ecx |
||
335 | jna LLastSegment |
||
336 | |||
337 | // |
||
338 | // not the last segment; do full 8-wide segment |
||
339 | // |
||
340 | LNotLastSegment: |
||
341 | |||
342 | // |
||
343 | // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to |
||
344 | // get there |
||
345 | // |
||
346 | |||
347 | // pick up after the FDIV that was left in flight previously |
||
348 | |||
349 | fld %st(0) // duplicate it |
||
350 | fmul %st(4),%st(0) // s = s/z * z |
||
351 | fxch %st(1) |
||
352 | fmul %st(3),%st(0) // t = t/z * z |
||
353 | fxch %st(1) |
||
354 | fistpl snext |
||
355 | fistpl tnext |
||
356 | movl snext,%eax |
||
357 | movl tnext,%edx |
||
358 | |||
359 | subl $8,%ecx // count off this segments' pixels |
||
360 | movl C(sadjust),%ebp |
||
361 | pushl %ecx // remember count of remaining pixels |
||
362 | movl C(tadjust),%ecx |
||
363 | |||
364 | addl %eax,%ebp |
||
365 | addl %edx,%ecx |
||
366 | |||
367 | movl C(bbextents),%eax |
||
368 | movl C(bbextentt),%edx |
||
369 | |||
370 | cmpl $2048,%ebp |
||
371 | jl LClampLow2 |
||
372 | cmpl %eax,%ebp |
||
373 | ja LClampHigh2 |
||
374 | LClampReentry2: |
||
375 | |||
376 | cmpl $2048,%ecx |
||
377 | jl LClampLow3 |
||
378 | cmpl %edx,%ecx |
||
379 | ja LClampHigh3 |
||
380 | LClampReentry3: |
||
381 | |||
382 | movl %ebp,snext |
||
383 | movl %ecx,tnext |
||
384 | |||
385 | subl s,%ebp |
||
386 | subl t,%ecx |
||
387 | |||
388 | // |
||
389 | // set up advancetable |
||
390 | // |
||
391 | movl %ecx,%eax |
||
392 | movl %ebp,%edx |
||
393 | sarl $19,%edx // sstep >>= 16; |
||
394 | movl C(cachewidth),%ebx |
||
395 | sarl $19,%eax // tstep >>= 16; |
||
396 | jz LIsZero |
||
397 | imull %ebx,%eax // (tstep >> 16) * cachewidth; |
||
398 | LIsZero: |
||
399 | addl %edx,%eax // add in sstep |
||
400 | // (tstep >> 16) * cachewidth + (sstep >> 16); |
||
401 | movl tfracf,%edx |
||
402 | movl %eax,advancetable+4 // advance base in t |
||
403 | addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth + |
||
404 | // (sstep >> 16); |
||
405 | shll $13,%ebp // left-justify sstep fractional part |
||
406 | movl %ebp,sstep |
||
407 | movl sfracf,%ebx |
||
408 | shll $13,%ecx // left-justify tstep fractional part |
||
409 | movl %eax,advancetable // advance extra in t |
||
410 | movl %ecx,tstep |
||
411 | |||
412 | movl pz,%ecx |
||
413 | movl izi,%ebp |
||
414 | |||
415 | cmpw (%ecx),%bp |
||
416 | jl Lp1 |
||
417 | movb (%esi),%al // get first source texel |
||
418 | cmpb $(TRANSPARENT_COLOR),%al |
||
419 | jz Lp1 |
||
420 | movw %bp,(%ecx) |
||
421 | movb %al,(%edi) // store first dest pixel |
||
422 | Lp1: |
||
423 | addl izistep,%ebp |
||
424 | adcl $0,%ebp |
||
425 | addl tstep,%edx // advance tfrac fractional part by tstep frac |
||
426 | |||
427 | sbbl %eax,%eax // turn tstep carry into -1 (0 if none) |
||
428 | addl sstep,%ebx // advance sfrac fractional part by sstep frac |
||
429 | adcl advancetable+4(,%eax,4),%esi // point to next source texel |
||
430 | |||
431 | cmpw 2(%ecx),%bp |
||
432 | jl Lp2 |
||
433 | movb (%esi),%al |
||
434 | cmpb $(TRANSPARENT_COLOR),%al |
||
435 | jz Lp2 |
||
436 | movw %bp,2(%ecx) |
||
437 | movb %al,1(%edi) |
||
438 | Lp2: |
||
439 | addl izistep,%ebp |
||
440 | adcl $0,%ebp |
||
441 | addl tstep,%edx |
||
442 | sbbl %eax,%eax |
||
443 | addl sstep,%ebx |
||
444 | adcl advancetable+4(,%eax,4),%esi |
||
445 | |||
446 | cmpw 4(%ecx),%bp |
||
447 | jl Lp3 |
||
448 | movb (%esi),%al |
||
449 | cmpb $(TRANSPARENT_COLOR),%al |
||
450 | jz Lp3 |
||
451 | movw %bp,4(%ecx) |
||
452 | movb %al,2(%edi) |
||
453 | Lp3: |
||
454 | addl izistep,%ebp |
||
455 | adcl $0,%ebp |
||
456 | addl tstep,%edx |
||
457 | sbbl %eax,%eax |
||
458 | addl sstep,%ebx |
||
459 | adcl advancetable+4(,%eax,4),%esi |
||
460 | |||
461 | cmpw 6(%ecx),%bp |
||
462 | jl Lp4 |
||
463 | movb (%esi),%al |
||
464 | cmpb $(TRANSPARENT_COLOR),%al |
||
465 | jz Lp4 |
||
466 | movw %bp,6(%ecx) |
||
467 | movb %al,3(%edi) |
||
468 | Lp4: |
||
469 | addl izistep,%ebp |
||
470 | adcl $0,%ebp |
||
471 | addl tstep,%edx |
||
472 | sbbl %eax,%eax |
||
473 | addl sstep,%ebx |
||
474 | adcl advancetable+4(,%eax,4),%esi |
||
475 | |||
476 | cmpw 8(%ecx),%bp |
||
477 | jl Lp5 |
||
478 | movb (%esi),%al |
||
479 | cmpb $(TRANSPARENT_COLOR),%al |
||
480 | jz Lp5 |
||
481 | movw %bp,8(%ecx) |
||
482 | movb %al,4(%edi) |
||
483 | Lp5: |
||
484 | addl izistep,%ebp |
||
485 | adcl $0,%ebp |
||
486 | addl tstep,%edx |
||
487 | sbbl %eax,%eax |
||
488 | addl sstep,%ebx |
||
489 | adcl advancetable+4(,%eax,4),%esi |
||
490 | |||
491 | // |
||
492 | // start FDIV for end of next segment in flight, so it can overlap |
||
493 | // |
||
494 | popl %eax |
||
495 | cmpl $8,%eax // more than one segment after this? |
||
496 | ja LSetupNotLast2 // yes |
||
497 | |||
498 | decl %eax |
||
499 | jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV |
||
500 | movl %eax,spancountminus1 |
||
501 | fildl spancountminus1 |
||
502 | |||
503 | flds C(d_zistepu) // _d_zistepu | spancountminus1 |
||
504 | fmul %st(1),%st(0) // _d_zistepu*scm1 | scm1 |
||
505 | flds C(d_tdivzstepu) // _d_tdivzstepu | _d_zistepu*scm1 | scm1 |
||
506 | fmul %st(2),%st(0) // _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1 |
||
507 | fxch %st(1) // _d_zistepu*scm1 | _d_tdivzstepu*scm1 | scm1 |
||
508 | faddp %st(0),%st(3) // _d_tdivzstepu*scm1 | scm1 |
||
509 | fxch %st(1) // scm1 | _d_tdivzstepu*scm1 |
||
510 | fmuls C(d_sdivzstepu) // _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1 |
||
511 | fxch %st(1) // _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1 |
||
512 | faddp %st(0),%st(3) // _d_sdivzstepu*scm1 |
||
513 | flds fp_64k // 64k | _d_sdivzstepu*scm1 |
||
514 | fxch %st(1) // _d_sdivzstepu*scm1 | 64k |
||
515 | faddp %st(0),%st(4) // 64k |
||
516 | |||
517 | fdiv %st(1),%st(0) // this is what we've gone to all this trouble to |
||
518 | // overlap |
||
519 | jmp LFDIVInFlight2 |
||
520 | |||
521 | .align 4 |
||
522 | LSetupNotLast2: |
||
523 | fadds zi8stepu |
||
524 | fxch %st(2) |
||
525 | fadds sdivz8stepu |
||
526 | fxch %st(2) |
||
527 | flds tdivz8stepu |
||
528 | faddp %st(0),%st(2) |
||
529 | flds fp_64k |
||
530 | fdiv %st(1),%st(0) // z = 1/1/z |
||
531 | // this is what we've gone to all this trouble to |
||
532 | // overlap |
||
533 | LFDIVInFlight2: |
||
534 | pushl %eax |
||
535 | |||
536 | cmpw 10(%ecx),%bp |
||
537 | jl Lp6 |
||
538 | movb (%esi),%al |
||
539 | cmpb $(TRANSPARENT_COLOR),%al |
||
540 | jz Lp6 |
||
541 | movw %bp,10(%ecx) |
||
542 | movb %al,5(%edi) |
||
543 | Lp6: |
||
544 | addl izistep,%ebp |
||
545 | adcl $0,%ebp |
||
546 | addl tstep,%edx |
||
547 | sbbl %eax,%eax |
||
548 | addl sstep,%ebx |
||
549 | adcl advancetable+4(,%eax,4),%esi |
||
550 | |||
551 | cmpw 12(%ecx),%bp |
||
552 | jl Lp7 |
||
553 | movb (%esi),%al |
||
554 | cmpb $(TRANSPARENT_COLOR),%al |
||
555 | jz Lp7 |
||
556 | movw %bp,12(%ecx) |
||
557 | movb %al,6(%edi) |
||
558 | Lp7: |
||
559 | addl izistep,%ebp |
||
560 | adcl $0,%ebp |
||
561 | addl tstep,%edx |
||
562 | sbbl %eax,%eax |
||
563 | addl sstep,%ebx |
||
564 | adcl advancetable+4(,%eax,4),%esi |
||
565 | |||
566 | cmpw 14(%ecx),%bp |
||
567 | jl Lp8 |
||
568 | movb (%esi),%al |
||
569 | cmpb $(TRANSPARENT_COLOR),%al |
||
570 | jz Lp8 |
||
571 | movw %bp,14(%ecx) |
||
572 | movb %al,7(%edi) |
||
573 | Lp8: |
||
574 | addl izistep,%ebp |
||
575 | adcl $0,%ebp |
||
576 | addl tstep,%edx |
||
577 | sbbl %eax,%eax |
||
578 | addl sstep,%ebx |
||
579 | adcl advancetable+4(,%eax,4),%esi |
||
580 | |||
581 | addl $8,%edi |
||
582 | addl $16,%ecx |
||
583 | movl %edx,tfracf |
||
584 | movl snext,%edx |
||
585 | movl %ebx,sfracf |
||
586 | movl tnext,%ebx |
||
587 | movl %edx,s |
||
588 | movl %ebx,t |
||
589 | |||
590 | movl %ecx,pz |
||
591 | movl %ebp,izi |
||
592 | |||
593 | popl %ecx // retrieve count |
||
594 | |||
595 | // |
||
596 | // determine whether last span or not |
||
597 | // |
||
598 | cmpl $8,%ecx // are there multiple segments remaining? |
||
599 | ja LNotLastSegment // yes |
||
600 | |||
601 | // |
||
602 | // last segment of scan |
||
603 | // |
||
604 | LLastSegment: |
||
605 | |||
606 | // |
||
607 | // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to |
||
608 | // get there. The number of pixels left is variable, and we want to land on the |
||
609 | // last pixel, not step one past it, so we can't run into arithmetic problems |
||
610 | // |
||
611 | testl %ecx,%ecx |
||
612 | jz LNoSteps // just draw the last pixel and we're done |
||
613 | |||
614 | // pick up after the FDIV that was left in flight previously |
||
615 | |||
616 | |||
617 | fld %st(0) // duplicate it |
||
618 | fmul %st(4),%st(0) // s = s/z * z |
||
619 | fxch %st(1) |
||
620 | fmul %st(3),%st(0) // t = t/z * z |
||
621 | fxch %st(1) |
||
622 | fistpl snext |
||
623 | fistpl tnext |
||
624 | |||
625 | movl C(tadjust),%ebx |
||
626 | movl C(sadjust),%eax |
||
627 | |||
628 | addl snext,%eax |
||
629 | addl tnext,%ebx |
||
630 | |||
631 | movl C(bbextents),%ebp |
||
632 | movl C(bbextentt),%edx |
||
633 | |||
634 | cmpl $2048,%eax |
||
635 | jl LClampLow4 |
||
636 | cmpl %ebp,%eax |
||
637 | ja LClampHigh4 |
||
638 | LClampReentry4: |
||
639 | movl %eax,snext |
||
640 | |||
641 | cmpl $2048,%ebx |
||
642 | jl LClampLow5 |
||
643 | cmpl %edx,%ebx |
||
644 | ja LClampHigh5 |
||
645 | LClampReentry5: |
||
646 | |||
647 | cmpl $1,%ecx // don't bother |
||
648 | je LOnlyOneStep // if two pixels in segment, there's only one step, |
||
649 | // of the segment length |
||
650 | subl s,%eax |
||
651 | subl t,%ebx |
||
652 | |||
653 | addl %eax,%eax // convert to 15.17 format so multiply by 1.31 |
||
654 | addl %ebx,%ebx // reciprocal yields 16.48 |
||
655 | imull reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1) |
||
656 | movl %edx,%ebp |
||
657 | |||
658 | movl %ebx,%eax |
||
659 | imull reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1) |
||
660 | |||
661 | LSetEntryvec: |
||
662 | // |
||
663 | // set up advancetable |
||
664 | // |
||
665 | movl spr8entryvec_table(,%ecx,4),%ebx |
||
666 | movl %edx,%eax |
||
667 | pushl %ebx // entry point into code for RET later |
||
668 | movl %ebp,%ecx |
||
669 | sarl $16,%ecx // sstep >>= 16; |
||
670 | movl C(cachewidth),%ebx |
||
671 | sarl $16,%edx // tstep >>= 16; |
||
672 | jz LIsZeroLast |
||
673 | imull %ebx,%edx // (tstep >> 16) * cachewidth; |
||
674 | LIsZeroLast: |
||
675 | addl %ecx,%edx // add in sstep |
||
676 | // (tstep >> 16) * cachewidth + (sstep >> 16); |
||
677 | movl tfracf,%ecx |
||
678 | movl %edx,advancetable+4 // advance base in t |
||
679 | addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth + |
||
680 | // (sstep >> 16); |
||
681 | shll $16,%ebp // left-justify sstep fractional part |
||
682 | movl sfracf,%ebx |
||
683 | shll $16,%eax // left-justify tstep fractional part |
||
684 | movl %edx,advancetable // advance extra in t |
||
685 | |||
686 | movl %eax,tstep |
||
687 | movl %ebp,sstep |
||
688 | movl %ecx,%edx |
||
689 | |||
690 | movl pz,%ecx |
||
691 | movl izi,%ebp |
||
692 | |||
693 | ret // jump to the number-of-pixels handler |
||
694 | |||
695 | //---------------------------------------- |
||
696 | |||
697 | LNoSteps: |
||
698 | movl pz,%ecx |
||
699 | subl $7,%edi // adjust for hardwired offset |
||
700 | subl $14,%ecx |
||
701 | jmp LEndSpan |
||
702 | |||
703 | |||
704 | LOnlyOneStep: |
||
705 | subl s,%eax |
||
706 | subl t,%ebx |
||
707 | movl %eax,%ebp |
||
708 | movl %ebx,%edx |
||
709 | jmp LSetEntryvec |
||
710 | |||
711 | //---------------------------------------- |
||
712 | |||
713 | .globl Spr8Entry2_8 |
||
714 | Spr8Entry2_8: |
||
715 | subl $6,%edi // adjust for hardwired offsets |
||
716 | subl $12,%ecx |
||
717 | movb (%esi),%al |
||
718 | jmp LLEntry2_8 |
||
719 | |||
720 | //---------------------------------------- |
||
721 | |||
722 | .globl Spr8Entry3_8 |
||
723 | Spr8Entry3_8: |
||
724 | subl $5,%edi // adjust for hardwired offsets |
||
725 | subl $10,%ecx |
||
726 | jmp LLEntry3_8 |
||
727 | |||
728 | //---------------------------------------- |
||
729 | |||
730 | .globl Spr8Entry4_8 |
||
731 | Spr8Entry4_8: |
||
732 | subl $4,%edi // adjust for hardwired offsets |
||
733 | subl $8,%ecx |
||
734 | jmp LLEntry4_8 |
||
735 | |||
736 | //---------------------------------------- |
||
737 | |||
738 | .globl Spr8Entry5_8 |
||
739 | Spr8Entry5_8: |
||
740 | subl $3,%edi // adjust for hardwired offsets |
||
741 | subl $6,%ecx |
||
742 | jmp LLEntry5_8 |
||
743 | |||
744 | //---------------------------------------- |
||
745 | |||
746 | .globl Spr8Entry6_8 |
||
747 | Spr8Entry6_8: |
||
748 | subl $2,%edi // adjust for hardwired offsets |
||
749 | subl $4,%ecx |
||
750 | jmp LLEntry6_8 |
||
751 | |||
752 | //---------------------------------------- |
||
753 | |||
754 | .globl Spr8Entry7_8 |
||
755 | Spr8Entry7_8: |
||
756 | decl %edi // adjust for hardwired offsets |
||
757 | subl $2,%ecx |
||
758 | jmp LLEntry7_8 |
||
759 | |||
760 | //---------------------------------------- |
||
761 | |||
762 | .globl Spr8Entry8_8 |
||
763 | Spr8Entry8_8: |
||
764 | cmpw (%ecx),%bp |
||
765 | jl Lp9 |
||
766 | movb (%esi),%al |
||
767 | cmpb $(TRANSPARENT_COLOR),%al |
||
768 | jz Lp9 |
||
769 | movw %bp,(%ecx) |
||
770 | movb %al,(%edi) |
||
771 | Lp9: |
||
772 | addl izistep,%ebp |
||
773 | adcl $0,%ebp |
||
774 | addl tstep,%edx |
||
775 | sbbl %eax,%eax |
||
776 | addl sstep,%ebx |
||
777 | adcl advancetable+4(,%eax,4),%esi |
||
778 | LLEntry7_8: |
||
779 | cmpw 2(%ecx),%bp |
||
780 | jl Lp10 |
||
781 | movb (%esi),%al |
||
782 | cmpb $(TRANSPARENT_COLOR),%al |
||
783 | jz Lp10 |
||
784 | movw %bp,2(%ecx) |
||
785 | movb %al,1(%edi) |
||
786 | Lp10: |
||
787 | addl izistep,%ebp |
||
788 | adcl $0,%ebp |
||
789 | addl tstep,%edx |
||
790 | sbbl %eax,%eax |
||
791 | addl sstep,%ebx |
||
792 | adcl advancetable+4(,%eax,4),%esi |
||
793 | LLEntry6_8: |
||
794 | cmpw 4(%ecx),%bp |
||
795 | jl Lp11 |
||
796 | movb (%esi),%al |
||
797 | cmpb $(TRANSPARENT_COLOR),%al |
||
798 | jz Lp11 |
||
799 | movw %bp,4(%ecx) |
||
800 | movb %al,2(%edi) |
||
801 | Lp11: |
||
802 | addl izistep,%ebp |
||
803 | adcl $0,%ebp |
||
804 | addl tstep,%edx |
||
805 | sbbl %eax,%eax |
||
806 | addl sstep,%ebx |
||
807 | adcl advancetable+4(,%eax,4),%esi |
||
808 | LLEntry5_8: |
||
809 | cmpw 6(%ecx),%bp |
||
810 | jl Lp12 |
||
811 | movb (%esi),%al |
||
812 | cmpb $(TRANSPARENT_COLOR),%al |
||
813 | jz Lp12 |
||
814 | movw %bp,6(%ecx) |
||
815 | movb %al,3(%edi) |
||
816 | Lp12: |
||
817 | addl izistep,%ebp |
||
818 | adcl $0,%ebp |
||
819 | addl tstep,%edx |
||
820 | sbbl %eax,%eax |
||
821 | addl sstep,%ebx |
||
822 | adcl advancetable+4(,%eax,4),%esi |
||
823 | LLEntry4_8: |
||
824 | cmpw 8(%ecx),%bp |
||
825 | jl Lp13 |
||
826 | movb (%esi),%al |
||
827 | cmpb $(TRANSPARENT_COLOR),%al |
||
828 | jz Lp13 |
||
829 | movw %bp,8(%ecx) |
||
830 | movb %al,4(%edi) |
||
831 | Lp13: |
||
832 | addl izistep,%ebp |
||
833 | adcl $0,%ebp |
||
834 | addl tstep,%edx |
||
835 | sbbl %eax,%eax |
||
836 | addl sstep,%ebx |
||
837 | adcl advancetable+4(,%eax,4),%esi |
||
838 | LLEntry3_8: |
||
839 | cmpw 10(%ecx),%bp |
||
840 | jl Lp14 |
||
841 | movb (%esi),%al |
||
842 | cmpb $(TRANSPARENT_COLOR),%al |
||
843 | jz Lp14 |
||
844 | movw %bp,10(%ecx) |
||
845 | movb %al,5(%edi) |
||
846 | Lp14: |
||
847 | addl izistep,%ebp |
||
848 | adcl $0,%ebp |
||
849 | addl tstep,%edx |
||
850 | sbbl %eax,%eax |
||
851 | addl sstep,%ebx |
||
852 | adcl advancetable+4(,%eax,4),%esi |
||
853 | LLEntry2_8: |
||
854 | cmpw 12(%ecx),%bp |
||
855 | jl Lp15 |
||
856 | movb (%esi),%al |
||
857 | cmpb $(TRANSPARENT_COLOR),%al |
||
858 | jz Lp15 |
||
859 | movw %bp,12(%ecx) |
||
860 | movb %al,6(%edi) |
||
861 | Lp15: |
||
862 | addl izistep,%ebp |
||
863 | adcl $0,%ebp |
||
864 | addl tstep,%edx |
||
865 | sbbl %eax,%eax |
||
866 | addl sstep,%ebx |
||
867 | adcl advancetable+4(,%eax,4),%esi |
||
868 | |||
869 | LEndSpan: |
||
870 | cmpw 14(%ecx),%bp |
||
871 | jl Lp16 |
||
872 | movb (%esi),%al // load first texel in segment |
||
873 | cmpb $(TRANSPARENT_COLOR),%al |
||
874 | jz Lp16 |
||
875 | movw %bp,14(%ecx) |
||
876 | movb %al,7(%edi) |
||
877 | Lp16: |
||
878 | |||
879 | // |
||
880 | // clear s/z, t/z, 1/z from FP stack |
||
881 | // |
||
882 | fstp %st(0) |
||
883 | fstp %st(0) |
||
884 | fstp %st(0) |
||
885 | |||
886 | popl %ebx // restore spans pointer |
||
887 | LNextSpan: |
||
888 | addl $(sspan_t_size),%ebx // point to next span |
||
889 | movl sspan_t_count(%ebx),%ecx |
||
890 | cmpl $0,%ecx // any more spans? |
||
891 | jg LSpanLoop // yes |
||
892 | jz LNextSpan // yes, but this one's empty |
||
893 | |||
894 | popl %ebx // restore register variables |
||
895 | popl %esi |
||
896 | popl %edi |
||
897 | popl %ebp // restore the caller's stack frame |
||
898 | ret |
||
899 | |||
900 | #endif // id386 |