Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5131 | clevermous | 1 | /* |
2 | Copyright (C) 1996-1997 Id Software, Inc. |
||
3 | |||
4 | This program is free software; you can redistribute it and/or |
||
5 | modify it under the terms of the GNU General Public License |
||
6 | as published by the Free Software Foundation; either version 2 |
||
7 | of the License, or (at your option) any later version. |
||
8 | |||
9 | This program is distributed in the hope that it will be useful, |
||
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
||
12 | |||
13 | See the GNU General Public License for more details. |
||
14 | |||
15 | You should have received a copy of the GNU General Public License |
||
16 | along with this program; if not, write to the Free Software |
||
17 | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
||
18 | |||
19 | */ |
||
20 | // |
||
21 | // d_parta.s |
||
22 | // x86 assembly-language 8-bpp particle-drawing code. |
||
23 | // |
||
24 | |||
25 | #include "asm_i386.h" |
||
26 | #include "quakeasm.h" |
||
27 | #include "d_ifacea.h" |
||
28 | #include "asm_draw.h" |
||
29 | |||
30 | #if id386 |
||
31 | |||
32 | //---------------------------------------------------------------------- |
||
33 | // 8-bpp particle drawing code. |
||
34 | //---------------------------------------------------------------------- |
||
35 | |||
36 | //FIXME: comments, full optimization |
||
37 | |||
38 | //---------------------------------------------------------------------- |
||
39 | // 8-bpp particle queueing code. |
||
40 | //---------------------------------------------------------------------- |
||
41 | |||
42 | .text |
||
43 | |||
44 | #define P 12+4 |
||
45 | |||
46 | .align 4 |
||
47 | .globl C(D_DrawParticle) |
||
48 | C(D_DrawParticle): |
||
49 | pushl %ebp // preserve caller's stack frame |
||
50 | pushl %edi // preserve register variables |
||
51 | pushl %ebx |
||
52 | |||
53 | movl P(%esp),%edi |
||
54 | |||
55 | // FIXME: better FP overlap in general here |
||
56 | |||
57 | // transform point |
||
58 | // VectorSubtract (p->org, r_origin, local); |
||
59 | flds C(r_origin) |
||
60 | fsubrs pt_org(%edi) |
||
61 | flds pt_org+4(%edi) |
||
62 | fsubs C(r_origin)+4 |
||
63 | flds pt_org+8(%edi) |
||
64 | fsubs C(r_origin)+8 |
||
65 | fxch %st(2) // local[0] | local[1] | local[2] |
||
66 | |||
67 | // transformed[2] = DotProduct(local, r_ppn); |
||
68 | flds C(r_ppn) // r_ppn[0] | local[0] | local[1] | local[2] |
||
69 | fmul %st(1),%st(0) // dot0 | local[0] | local[1] | local[2] |
||
70 | flds C(r_ppn)+4 // r_ppn[1] | dot0 | local[0] | local[1] | local[2] |
||
71 | fmul %st(3),%st(0) // dot1 | dot0 | local[0] | local[1] | local[2] |
||
72 | flds C(r_ppn)+8 // r_ppn[2] | dot1 | dot0 | local[0] | |
||
73 | // local[1] | local[2] |
||
74 | fmul %st(5),%st(0) // dot2 | dot1 | dot0 | local[0] | local[1] | local[2] |
||
75 | fxch %st(2) // dot0 | dot1 | dot2 | local[0] | local[1] | local[2] |
||
76 | faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[0] | local[1] | |
||
77 | // local[2] |
||
78 | faddp %st(0),%st(1) // z | local[0] | local[1] | local[2] |
||
79 | fld %st(0) // z | z | local[0] | local[1] | |
||
80 | // local[2] |
||
81 | fdivrs float_1 // 1/z | z | local[0] | local[1] | local[2] |
||
82 | fxch %st(1) // z | 1/z | local[0] | local[1] | local[2] |
||
83 | |||
84 | // if (transformed[2] < PARTICLE_Z_CLIP) |
||
85 | // return; |
||
86 | fcomps float_particle_z_clip // 1/z | local[0] | local[1] | local[2] |
||
87 | fxch %st(3) // local[2] | local[0] | local[1] | 1/z |
||
88 | |||
89 | flds C(r_pup) // r_pup[0] | local[2] | local[0] | local[1] | 1/z |
||
90 | fmul %st(2),%st(0) // dot0 | local[2] | local[0] | local[1] | 1/z |
||
91 | flds C(r_pup)+4 // r_pup[1] | dot0 | local[2] | local[0] | |
||
92 | // local[1] | 1/z |
||
93 | |||
94 | fnstsw %ax |
||
95 | testb $1,%ah |
||
96 | jnz LPop6AndDone |
||
97 | |||
98 | // transformed[1] = DotProduct(local, r_pup); |
||
99 | fmul %st(4),%st(0) // dot1 | dot0 | local[2] | local[0] | local[1] | 1/z |
||
100 | flds C(r_pup)+8 // r_pup[2] | dot1 | dot0 | local[2] | |
||
101 | // local[0] | local[1] | 1/z |
||
102 | fmul %st(3),%st(0) // dot2 | dot1 | dot0 | local[2] | local[0] | |
||
103 | // local[1] | 1/z |
||
104 | fxch %st(2) // dot0 | dot1 | dot2 | local[2] | local[0] | |
||
105 | // local[1] | 1/z |
||
106 | faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[2] | local[0] | |
||
107 | // local[1] | 1/z |
||
108 | faddp %st(0),%st(1) // y | local[2] | local[0] | local[1] | 1/z |
||
109 | fxch %st(3) // local[1] | local[2] | local[0] | y | 1/z |
||
110 | |||
111 | // transformed[0] = DotProduct(local, r_pright); |
||
112 | fmuls C(r_pright)+4 // dot1 | local[2] | local[0] | y | 1/z |
||
113 | fxch %st(2) // local[0] | local[2] | dot1 | y | 1/z |
||
114 | fmuls C(r_pright) // dot0 | local[2] | dot1 | y | 1/z |
||
115 | fxch %st(1) // local[2] | dot0 | dot1 | y | 1/z |
||
116 | fmuls C(r_pright)+8 // dot2 | dot0 | dot1 | y | 1/z |
||
117 | fxch %st(2) // dot1 | dot0 | dot2 | y | 1/z |
||
118 | faddp %st(0),%st(1) // dot1 + dot0 | dot2 | y | 1/z |
||
119 | |||
120 | faddp %st(0),%st(1) // x | y | 1/z |
||
121 | fxch %st(1) // y | x | 1/z |
||
122 | |||
123 | // project the point |
||
124 | fmul %st(2),%st(0) // y/z | x | 1/z |
||
125 | fxch %st(1) // x | y/z | 1/z |
||
126 | fmul %st(2),%st(0) // x/z | y/z | 1/z |
||
127 | fxch %st(1) // y/z | x/z | 1/z |
||
128 | fsubrs C(ycenter) // v | x/z | 1/z |
||
129 | fxch %st(1) // x/z | v | 1/z |
||
130 | fadds C(xcenter) // u | v | 1/z |
||
131 | // FIXME: preadjust xcenter and ycenter |
||
132 | fxch %st(1) // v | u | 1/z |
||
133 | fadds float_point5 // v | u | 1/z |
||
134 | fxch %st(1) // u | v | 1/z |
||
135 | fadds float_point5 // u | v | 1/z |
||
136 | fxch %st(2) // 1/z | v | u |
||
137 | fmuls DP_32768 // 1/z * 0x8000 | v | u |
||
138 | fxch %st(2) // u | v | 1/z * 0x8000 |
||
139 | |||
140 | // FIXME: use Terje's fp->int trick here? |
||
141 | // FIXME: check we're getting proper rounding here |
||
142 | fistpl DP_u // v | 1/z * 0x8000 |
||
143 | fistpl DP_v // 1/z * 0x8000 |
||
144 | |||
145 | movl DP_u,%eax |
||
146 | movl DP_v,%edx |
||
147 | |||
148 | // if ((v > d_vrectbottom_particle) || |
||
149 | // (u > d_vrectright_particle) || |
||
150 | // (v < d_vrecty) || |
||
151 | // (u < d_vrectx)) |
||
152 | // { |
||
153 | // continue; |
||
154 | // } |
||
155 | |||
156 | movl C(d_vrectbottom_particle),%ebx |
||
157 | movl C(d_vrectright_particle),%ecx |
||
158 | cmpl %ebx,%edx |
||
159 | jg LPop1AndDone |
||
160 | cmpl %ecx,%eax |
||
161 | jg LPop1AndDone |
||
162 | movl C(d_vrecty),%ebx |
||
163 | movl C(d_vrectx),%ecx |
||
164 | cmpl %ebx,%edx |
||
165 | jl LPop1AndDone |
||
166 | |||
167 | cmpl %ecx,%eax |
||
168 | jl LPop1AndDone |
||
169 | |||
170 | flds pt_color(%edi) // color | 1/z * 0x8000 |
||
171 | // FIXME: use Terje's fast fp->int trick? |
||
172 | fistpl DP_Color // 1/z * 0x8000 |
||
173 | |||
174 | movl C(d_viewbuffer),%ebx |
||
175 | |||
176 | addl %eax,%ebx |
||
177 | movl C(d_scantable)(,%edx,4),%edi // point to the pixel |
||
178 | |||
179 | imull C(d_zrowbytes),%edx // point to the z pixel |
||
180 | |||
181 | leal (%edx,%eax,2),%edx |
||
182 | movl C(d_pzbuffer),%eax |
||
183 | |||
184 | fistpl izi |
||
185 | |||
186 | addl %ebx,%edi |
||
187 | addl %eax,%edx |
||
188 | |||
189 | // pix = izi >> d_pix_shift; |
||
190 | |||
191 | movl izi,%eax |
||
192 | movl C(d_pix_shift),%ecx |
||
193 | shrl %cl,%eax |
||
194 | movl izi,%ebp |
||
195 | |||
196 | // if (pix < d_pix_min) |
||
197 | // pix = d_pix_min; |
||
198 | // else if (pix > d_pix_max) |
||
199 | // pix = d_pix_max; |
||
200 | |||
201 | movl C(d_pix_min),%ebx |
||
202 | movl C(d_pix_max),%ecx |
||
203 | cmpl %ebx,%eax |
||
204 | jnl LTestPixMax |
||
205 | movl %ebx,%eax |
||
206 | jmp LTestDone |
||
207 | |||
208 | LTestPixMax: |
||
209 | cmpl %ecx,%eax |
||
210 | jng LTestDone |
||
211 | movl %ecx,%eax |
||
212 | LTestDone: |
||
213 | |||
214 | movb DP_Color,%ch |
||
215 | |||
216 | movl C(d_y_aspect_shift),%ebx |
||
217 | testl %ebx,%ebx |
||
218 | jnz LDefault |
||
219 | |||
220 | cmpl $4,%eax |
||
221 | ja LDefault |
||
222 | |||
223 | jmp DP_EntryTable-4(,%eax,4) |
||
224 | |||
225 | // 1x1 |
||
226 | .globl DP_1x1 |
||
227 | DP_1x1: |
||
228 | cmpw %bp,(%edx) // just one pixel to do |
||
229 | jg LDone |
||
230 | movw %bp,(%edx) |
||
231 | movb %ch,(%edi) |
||
232 | jmp LDone |
||
233 | |||
234 | // 2x2 |
||
235 | .globl DP_2x2 |
||
236 | DP_2x2: |
||
237 | pushl %esi |
||
238 | movl C(screenwidth),%ebx |
||
239 | movl C(d_zrowbytes),%esi |
||
240 | |||
241 | cmpw %bp,(%edx) |
||
242 | jg L2x2_1 |
||
243 | movw %bp,(%edx) |
||
244 | movb %ch,(%edi) |
||
245 | L2x2_1: |
||
246 | cmpw %bp,2(%edx) |
||
247 | jg L2x2_2 |
||
248 | movw %bp,2(%edx) |
||
249 | movb %ch,1(%edi) |
||
250 | L2x2_2: |
||
251 | cmpw %bp,(%edx,%esi,1) |
||
252 | jg L2x2_3 |
||
253 | movw %bp,(%edx,%esi,1) |
||
254 | movb %ch,(%edi,%ebx,1) |
||
255 | L2x2_3: |
||
256 | cmpw %bp,2(%edx,%esi,1) |
||
257 | jg L2x2_4 |
||
258 | movw %bp,2(%edx,%esi,1) |
||
259 | movb %ch,1(%edi,%ebx,1) |
||
260 | L2x2_4: |
||
261 | |||
262 | popl %esi |
||
263 | jmp LDone |
||
264 | |||
265 | // 3x3 |
||
266 | .globl DP_3x3 |
||
267 | DP_3x3: |
||
268 | pushl %esi |
||
269 | movl C(screenwidth),%ebx |
||
270 | movl C(d_zrowbytes),%esi |
||
271 | |||
272 | cmpw %bp,(%edx) |
||
273 | jg L3x3_1 |
||
274 | movw %bp,(%edx) |
||
275 | movb %ch,(%edi) |
||
276 | L3x3_1: |
||
277 | cmpw %bp,2(%edx) |
||
278 | jg L3x3_2 |
||
279 | movw %bp,2(%edx) |
||
280 | movb %ch,1(%edi) |
||
281 | L3x3_2: |
||
282 | cmpw %bp,4(%edx) |
||
283 | jg L3x3_3 |
||
284 | movw %bp,4(%edx) |
||
285 | movb %ch,2(%edi) |
||
286 | L3x3_3: |
||
287 | |||
288 | cmpw %bp,(%edx,%esi,1) |
||
289 | jg L3x3_4 |
||
290 | movw %bp,(%edx,%esi,1) |
||
291 | movb %ch,(%edi,%ebx,1) |
||
292 | L3x3_4: |
||
293 | cmpw %bp,2(%edx,%esi,1) |
||
294 | jg L3x3_5 |
||
295 | movw %bp,2(%edx,%esi,1) |
||
296 | movb %ch,1(%edi,%ebx,1) |
||
297 | L3x3_5: |
||
298 | cmpw %bp,4(%edx,%esi,1) |
||
299 | jg L3x3_6 |
||
300 | movw %bp,4(%edx,%esi,1) |
||
301 | movb %ch,2(%edi,%ebx,1) |
||
302 | L3x3_6: |
||
303 | |||
304 | cmpw %bp,(%edx,%esi,2) |
||
305 | jg L3x3_7 |
||
306 | movw %bp,(%edx,%esi,2) |
||
307 | movb %ch,(%edi,%ebx,2) |
||
308 | L3x3_7: |
||
309 | cmpw %bp,2(%edx,%esi,2) |
||
310 | jg L3x3_8 |
||
311 | movw %bp,2(%edx,%esi,2) |
||
312 | movb %ch,1(%edi,%ebx,2) |
||
313 | L3x3_8: |
||
314 | cmpw %bp,4(%edx,%esi,2) |
||
315 | jg L3x3_9 |
||
316 | movw %bp,4(%edx,%esi,2) |
||
317 | movb %ch,2(%edi,%ebx,2) |
||
318 | L3x3_9: |
||
319 | |||
320 | popl %esi |
||
321 | jmp LDone |
||
322 | |||
323 | |||
324 | // 4x4 |
||
325 | .globl DP_4x4 |
||
326 | DP_4x4: |
||
327 | pushl %esi |
||
328 | movl C(screenwidth),%ebx |
||
329 | movl C(d_zrowbytes),%esi |
||
330 | |||
331 | cmpw %bp,(%edx) |
||
332 | jg L4x4_1 |
||
333 | movw %bp,(%edx) |
||
334 | movb %ch,(%edi) |
||
335 | L4x4_1: |
||
336 | cmpw %bp,2(%edx) |
||
337 | jg L4x4_2 |
||
338 | movw %bp,2(%edx) |
||
339 | movb %ch,1(%edi) |
||
340 | L4x4_2: |
||
341 | cmpw %bp,4(%edx) |
||
342 | jg L4x4_3 |
||
343 | movw %bp,4(%edx) |
||
344 | movb %ch,2(%edi) |
||
345 | L4x4_3: |
||
346 | cmpw %bp,6(%edx) |
||
347 | jg L4x4_4 |
||
348 | movw %bp,6(%edx) |
||
349 | movb %ch,3(%edi) |
||
350 | L4x4_4: |
||
351 | |||
352 | cmpw %bp,(%edx,%esi,1) |
||
353 | jg L4x4_5 |
||
354 | movw %bp,(%edx,%esi,1) |
||
355 | movb %ch,(%edi,%ebx,1) |
||
356 | L4x4_5: |
||
357 | cmpw %bp,2(%edx,%esi,1) |
||
358 | jg L4x4_6 |
||
359 | movw %bp,2(%edx,%esi,1) |
||
360 | movb %ch,1(%edi,%ebx,1) |
||
361 | L4x4_6: |
||
362 | cmpw %bp,4(%edx,%esi,1) |
||
363 | jg L4x4_7 |
||
364 | movw %bp,4(%edx,%esi,1) |
||
365 | movb %ch,2(%edi,%ebx,1) |
||
366 | L4x4_7: |
||
367 | cmpw %bp,6(%edx,%esi,1) |
||
368 | jg L4x4_8 |
||
369 | movw %bp,6(%edx,%esi,1) |
||
370 | movb %ch,3(%edi,%ebx,1) |
||
371 | L4x4_8: |
||
372 | |||
373 | leal (%edx,%esi,2),%edx |
||
374 | leal (%edi,%ebx,2),%edi |
||
375 | |||
376 | cmpw %bp,(%edx) |
||
377 | jg L4x4_9 |
||
378 | movw %bp,(%edx) |
||
379 | movb %ch,(%edi) |
||
380 | L4x4_9: |
||
381 | cmpw %bp,2(%edx) |
||
382 | jg L4x4_10 |
||
383 | movw %bp,2(%edx) |
||
384 | movb %ch,1(%edi) |
||
385 | L4x4_10: |
||
386 | cmpw %bp,4(%edx) |
||
387 | jg L4x4_11 |
||
388 | movw %bp,4(%edx) |
||
389 | movb %ch,2(%edi) |
||
390 | L4x4_11: |
||
391 | cmpw %bp,6(%edx) |
||
392 | jg L4x4_12 |
||
393 | movw %bp,6(%edx) |
||
394 | movb %ch,3(%edi) |
||
395 | L4x4_12: |
||
396 | |||
397 | cmpw %bp,(%edx,%esi,1) |
||
398 | jg L4x4_13 |
||
399 | movw %bp,(%edx,%esi,1) |
||
400 | movb %ch,(%edi,%ebx,1) |
||
401 | L4x4_13: |
||
402 | cmpw %bp,2(%edx,%esi,1) |
||
403 | jg L4x4_14 |
||
404 | movw %bp,2(%edx,%esi,1) |
||
405 | movb %ch,1(%edi,%ebx,1) |
||
406 | L4x4_14: |
||
407 | cmpw %bp,4(%edx,%esi,1) |
||
408 | jg L4x4_15 |
||
409 | movw %bp,4(%edx,%esi,1) |
||
410 | movb %ch,2(%edi,%ebx,1) |
||
411 | L4x4_15: |
||
412 | cmpw %bp,6(%edx,%esi,1) |
||
413 | jg L4x4_16 |
||
414 | movw %bp,6(%edx,%esi,1) |
||
415 | movb %ch,3(%edi,%ebx,1) |
||
416 | L4x4_16: |
||
417 | |||
418 | popl %esi |
||
419 | jmp LDone |
||
420 | |||
421 | // default case, handling any size particle |
||
422 | LDefault: |
||
423 | |||
424 | // count = pix << d_y_aspect_shift; |
||
425 | |||
426 | movl %eax,%ebx |
||
427 | movl %eax,DP_Pix |
||
428 | movb C(d_y_aspect_shift),%cl |
||
429 | shll %cl,%ebx |
||
430 | |||
431 | // for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth) |
||
432 | // { |
||
433 | // for (i=0 ; i |
||
434 | // { |
||
435 | // if (pz[i] <= izi) |
||
436 | // { |
||
437 | // pz[i] = izi; |
||
438 | // pdest[i] = color; |
||
439 | // } |
||
440 | // } |
||
441 | // } |
||
442 | |||
443 | LGenRowLoop: |
||
444 | movl DP_Pix,%eax |
||
445 | |||
446 | LGenColLoop: |
||
447 | cmpw %bp,-2(%edx,%eax,2) |
||
448 | jg LGSkip |
||
449 | movw %bp,-2(%edx,%eax,2) |
||
450 | movb %ch,-1(%edi,%eax,1) |
||
451 | LGSkip: |
||
452 | decl %eax // --pix |
||
453 | jnz LGenColLoop |
||
454 | |||
455 | addl C(d_zrowbytes),%edx |
||
456 | addl C(screenwidth),%edi |
||
457 | |||
458 | decl %ebx // --count |
||
459 | jnz LGenRowLoop |
||
460 | |||
461 | LDone: |
||
462 | popl %ebx // restore register variables |
||
463 | popl %edi |
||
464 | popl %ebp // restore the caller's stack frame |
||
465 | ret |
||
466 | |||
467 | LPop6AndDone: |
||
468 | fstp %st(0) |
||
469 | fstp %st(0) |
||
470 | fstp %st(0) |
||
471 | fstp %st(0) |
||
472 | fstp %st(0) |
||
473 | LPop1AndDone: |
||
474 | fstp %st(0) |
||
475 | jmp LDone |
||
476 | |||
477 | #endif // id386=>><>>>>> |