Subversion Repositories Kolibri OS

Rev

Rev 6619 | Rev 9237 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
1245 hidnplayr 1
 
2
;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1
3
;ROUND equ 8
4
;Ext = NON
5
;MMX = 1
6
;NON = 0
7
;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great---
8
;------- DOS 13h mode demos --------------------------------------------
9
;------- Procedure draws bump triangle with texture, I use -------------
10
;--------Catmull Z-buffer algorithm- (Z coordinate interpolation)-------
11
;--------I calc texture pixel by this way: col1*col2/256 ---------------
12
bump_tex_triangle_z:
13
;------------------in - eax - x1 shl 16 + y1 -----------
14
;---------------------- ebx - x2 shl 16 + y2 -----------
15
;---------------------- ecx - x3 shl 16 + y3 -----------
16
;---------------------- edx - pointer to bump map-------
17
;---------------------- esi - pointer to env map--------
18
;---------------------- edi - pointer to screen buffer--
19
;---------------------- stack : bump coordinates--------
20
;----------------------         environment coordinates-
21
;----------------------         Z position coordinates--
22
;----------------------         pointer to Z buffer-----
23
;----------------------         pointer to texture------
24
;----------------------         texture coordinates-----
25
;-- Z-buffer - filled with coordinates as dword --------
26
;-- (Z coor. as word) shl CATMULL_SHIFT ----------------
27
.b_x1	equ ebp+4   ; procedure don't save registers !!!
28
.b_y1	equ ebp+6   ; each coordinate as word
29
.b_x2	equ ebp+8
30
.b_y2	equ ebp+10	 ; b - bump map coords
31
.b_x3	equ ebp+12	 ; e - env map coords
32
.b_y3	equ ebp+14
33
.e_x1	equ ebp+16
34
.e_y1	equ ebp+18
35
.e_x2	equ ebp+20
36
.e_y2	equ ebp+22
37
.e_x3	equ ebp+24
38
.e_y3	equ ebp+26
39
.z1	equ word[ebp+28]
40
.z2	equ word[ebp+30]
41
.z3	equ word[ebp+32]
42
.z_buff equ dword[ebp+34]	; pointer to Z-buffer
43
.tex_ptr equ dword[ebp+38]	; ptr to texture
44
.t_x1	equ ebp+42		; texture coords
45
.t_y1	equ ebp+44
46
.t_x2	equ ebp+46
47
.t_y2	equ ebp+48
48
.t_x3	equ ebp+50
49
.t_y3	equ ebp+52
50
51
 
52
 
53
 
54
.t_emap equ dword[ebp-8]	; pointer to env map
55
.x1	equ word[ebp-10]
56
.y1	equ word[ebp-12]
57
.x2	equ word[ebp-14]
58
.y2	equ word[ebp-16]
59
.x3	equ word[ebp-18]
60
.y3	equ word[ebp-20]
61
62
 
2881 leency 63
64
 
65
.dz12	equ	 [edi-8]
66
.dbx12	equ dword[edi-12]
67
.dby12	equ	 [edi-16]
68
.dex12	equ dword[edi-20]
69
.dey12	equ	 [edi-24]
70
.dtx12	equ dword[edi-28]
71
.dty12	equ	 [edi-32]
72
73
 
74
.dz13  equ	[ebp-52-4*2]
75
.dbx13 equ dword[ebp-52-4*3]
76
.dby13 equ	[ebp-52-4*4]
77
.dex13 equ dword[ebp-52-4*5]
78
.dey13 equ	[ebp-52-4*6]
79
.dtx13 equ dword[ebp-52-4*7]
80
.dty13 equ	[ebp-52-4*8]
81
82
 
83
 
84
.dz23  equ	[ebp-(52+4*10)]
85
.dbx23 equ dword[ebp-(52+4*11)]
86
.dby23 equ	[ebp-(52+4*12)]
87
.dex23 equ dword[ebp-(52+4*13)]
88
.dey23 equ	[ebp-(52+4*14)]
89
.dtx23 equ dword[ebp-(52+4*15)]
90
.dty23 equ	[ebp-(52+4*16)]
91
92
 
93
94
 
1245 hidnplayr 95
.dz12	equ	 [ebp-28]
96
.dbx12	equ dword[ebp-32]
97
.dby12	equ	 [ebp-36]
98
.dex12	equ dword[ebp-40]
99
.dey12	equ	 [ebp-44]
100
.dtx12	equ dword[ebp-48]
101
.dty12	equ	 [ebp-52]
102
103
 
104
.dz13  equ	[ebp-52-4*2]
105
.dbx13 equ dword[ebp-52-4*3]
106
.dby13 equ	[ebp-52-4*4]
107
.dex13 equ dword[ebp-52-4*5]
108
.dey13 equ	[ebp-52-4*6]
109
.dtx13 equ dword[ebp-52-4*7]
110
.dty13 equ	[ebp-52-4*8]
111
112
 
113
 
114
.dz23  equ	[ebp-(52+4*10)]
115
.dbx23 equ dword[ebp-(52+4*11)]
116
.dby23 equ	[ebp-(52+4*12)]
117
.dex23 equ dword[ebp-(52+4*13)]
118
.dey23 equ	[ebp-(52+4*14)]
119
.dtx23 equ dword[ebp-(52+4*15)]
120
.dty23 equ	[ebp-(52+4*16)]
121
122
 
2881 leency 123
124
 
1819 yogev_ezra 125
126
 
1245 hidnplayr 127
.cz1   equ	[ebp-(52+4*18)]
128
.cx2   equ dword[ebp-(52+4*19)]
129
.cz2   equ	[ebp-(52+4*20)]
130
.cbx1  equ dword[ebp-(52+4*21)]
131
.cby1  equ	[ebp-(52+4*22)]
132
.cbx2  equ dword[ebp-(52+4*23)]
133
.cby2  equ	[ebp-(52+4*24)]
134
.cex1  equ dword[ebp-(52+4*25)]
135
.cey1  equ	[ebp-(52+4*26)]
136
.cex2  equ dword[ebp-(52+4*27)]
137
.cey2  equ	[ebp-(52+4*28)]
138
139
 
140
.cty1  equ	[ebp-(52+4*30)]
141
.ctx2  equ dword[ebp-(52+4*31)]
142
.cty2  equ	[ebp-(52+4*32)]
143
144
 
1819 yogev_ezra 145
146
 
147
.cz1   equ	[ebp-(52+4*18)]
148
.cbx1  equ dword[ebp-(52+4*19)]
149
.cby1  equ	[ebp-(52+4*20)]
150
.cex1  equ dword[ebp-(52+4*21)]
151
.cey1  equ	[ebp-(52+4*22)]
152
.ctx1  equ dword[ebp-(52+4*23)]
153
.cty1  equ	[ebp-(52+4*24)]
154
155
 
156
.cz2   equ	[ebp-(52+4*26)]
157
.cbx2  equ dword[ebp-(52+4*27)]
158
.cby2  equ	[ebp-(52+4*28)]
159
.cex2  equ dword[ebp-(52+4*29)]
160
.cey2  equ	[ebp-(52+4*30)]
161
.ctx2  equ dword[ebp-(52+4*31)]
162
.cty2  equ	[ebp-(52+4*32)]
163
164
 
165
       cld
1245 hidnplayr 166
       mov     ebp,esp
167
       push    edx	  ; store bump map
168
       push    esi	  ; store e. map
169
     ; sub     esp,120
170
 .sort3:		  ; sort triangle coordinates...
171
       cmp     ax,bx
172
       jle     .sort1
173
       xchg    eax,ebx
174
       mov     edx,dword[.b_x1]
175
       xchg    edx,dword[.b_x2]
176
       mov     dword[.b_x1],edx
177
       mov     edx,dword[.e_x1]
178
       xchg    edx,dword[.e_x2]
179
       mov     dword[.e_x1],edx
180
       mov     edx,dword[.t_x1]
181
       xchg    edx,dword[.t_x2]
182
       mov     dword[.t_x1],edx
183
       mov     dx,.z1
184
       xchg    dx,.z2
185
       mov     .z1,dx
186
 .sort1:
187
       cmp	bx,cx
188
       jle	.sort2
189
       xchg	ebx,ecx
190
       mov	edx,dword[.b_x2]
191
       xchg	edx,dword[.b_x3]
192
       mov	dword[.b_x2],edx
193
       mov	edx,dword[.e_x2]
194
       xchg	edx,dword[.e_x3]
195
       mov	dword[.e_x2],edx
196
       mov	edx,dword[.t_x2]
197
       xchg	edx,dword[.t_x3]
198
       mov	dword[.t_x2],edx
199
       mov     dx,.z2
200
       xchg    dx,.z3
201
       mov     .z2,dx
202
       jmp	.sort3
203
 .sort2:
204
       push	eax	; store triangle coords in variables
205
       push	ebx
206
       push	ecx
207
	 mov	  edx,80008000h  ; eax,ebx,ecx are ANDd together into edx which means that
208
	 and	  edx,ebx	 ; if *all* of them are negative a sign flag is raised
209
	 and	  edx,ecx
210
	 and	  edx,eax
211
	 test	  edx,80008000h  ; Check both X&Y at once
212
	 jne	  .loop23_done
213
    ;   mov     edx,eax         ; eax,ebx,ecx are ORd together into edx which means that
214
    ;   or      edx,ebx         ; if any *one* of them is negative a sign flag is raised
215
    ;   or      edx,ecx
216
    ;   test    edx,80000000h   ; Check only X
217
    ;   jne     .loop23_done
218
219
 
220
    ;   jg      .loop23_done
221
    ;   cmp     .x2,SIZE_X     ; This can be optimized with effort
222
    ;   jg      .loop23_done
223
    ;   cmp     .x3,SIZE_X
224
    ;   jg      .loop23_done    ; {
225
226
 
227
 
228
       sub	bx,.y1
229
       jnz	.bt_dx12_make
230
if 0 ;Ext >= SSE2
2881 leency 231
       pxor	xmm0,xmm0
232
       movups	.dty12,xmm0
233
       movups	.dey12,xmm0
234
       sub	esp,16
235
else
236
       mov	ecx,8
1245 hidnplayr 237
       xor	edx,edx
238
     @@:
239
       push	edx   ;dword 0
240
       loop	@b
241
end if
2881 leency 242
       jmp	.bt_dx12_done
1245 hidnplayr 243
 .bt_dx12_make:
244
       movsx	ebx,bx
1776 yogev_ezra 245
1245 hidnplayr 246
 
247
 
248
       sub	 esp,32
1819 yogev_ezra 249
   ;    mov       eax,256
1245 hidnplayr 250
       cvtsi2ss  xmm4,[i255d]
251
       cvtsi2ss  xmm3,ebx ;rcps
2881 leency 252
if 0 ;Ext >= SSE2
253
       mov	 edi,ebp
254
       sub	 edi,512
255
       or	 edi,0x0000000f
256
end if
257
       divss	 xmm3,xmm4
1245 hidnplayr 258
       shufps	 xmm3,xmm3,0
259
260
 
261
       movd	 mm1,[.b_x2]
262
       movd	 mm2,[.e_x1]
263
       movd	 mm3,[.e_x2]
264
265
 
266
       punpcklwd  mm0,mm4
267
       punpcklwd  mm1,mm4
268
       punpcklwd  mm2,mm4
269
       punpcklwd  mm3,mm4
270
271
 
272
       psubd	  mm3,mm2
273
274
 
275
       movlhps	 xmm1,xmm1
276
       cvtpi2ps  xmm1,mm3
277
278
 
279
280
 
281
			     ;xmm1--> | dbx | dby | dex | dey |
282
;1       movups    .dey12,xmm1
1819 yogev_ezra 283
       cvtps2pi  mm0,xmm1 ;mm0,xmm1          ; mm0 -> 2 delta dwords
1776 yogev_ezra 284
       movhlps	 xmm1,xmm1
1245 hidnplayr 285
       cvtps2pi  mm1,xmm1 ;mm1,xmm1
1776 yogev_ezra 286
       movq	 .dey12,mm0
1245 hidnplayr 287
       movq	 .dby12,mm1
288
;-------------
289
  ;    pxor      mm0,mm0
1776 yogev_ezra 290
  ;    pxor      mm1,mm1
291
   ;/   pinsrw    mm0,.z1,1
292
   ;/   pinsrw    mm0,.x1,0
293
   ;/   pinsrw    mm1,.z2,1
294
   ;/   pinsrw    mm1,.x2,0
295
       mov	 ax,.z2
296
       sub	 ax,.z1
297
       cwde
298
1819 yogev_ezra 299
 
300
       sub	dx,.x1
301
       movsx	edx,dx
302
303
 
1776 yogev_ezra 304
305
 
306
   ;/    punpcklwd  mm1,mm4
307
308
 
309
  ;     cvtpi2ps   xmm2,mm0
310
  ;     subps      xmm1,xmm2
311
312
 
313
314
 
315
       movd	  mm3,[.t_x2]
316
317
 
318
       punpcklwd  mm3,mm4
319
       psubd	  mm3,mm2
320
321
 
322
       cvtsi2ss  xmm1,eax
323
       movlhps	 xmm1,xmm1
324
       cvtsi2ss  xmm1,edx
1819 yogev_ezra 325
   ;    movss     xmm1,xmm4
326
       shufps	 xmm1,xmm1,00101111b
327
       cvtpi2ps  xmm1,mm3
1776 yogev_ezra 328
329
 
1819 yogev_ezra 330
1776 yogev_ezra 331
 
332
			     ; xmm1--> | dx | dz | dtx | dty |
1819 yogev_ezra 333
;1       movlps    .dty12,xmm1
334
;1       movhps    .dz12,xmm1
335
       cvtps2pi  mm0,xmm1    ; mm0 -> 2 delta dwords  | dtx | dty |
1776 yogev_ezra 336
       movhlps	 xmm1,xmm1
337
       cvtps2pi  mm1,xmm1
1819 yogev_ezra 338
       movq	 .dty12,mm0
1776 yogev_ezra 339
       movq	 .dz12,mm1
1819 yogev_ezra 340
;----
1776 yogev_ezra 341
;       mov       ax,.z2
1245 hidnplayr 342
;       sub       ax,.z1
343
;       cwde
344
;       mov       bx,.x2
345
;       sub       bx,.x1
346
;       movsx     ebx,bx
347
;       movd      mm1,eax
348
;       psllq     mm1,32
349
;       movd      mm1,ebx
350
1776 yogev_ezra 351
 
1245 hidnplayr 352
;;       push      eax
353
;;       movq      mm1,[esp]
354
;;       add       esp,8
355
;;;       mov       ax,.z1
356
;;;       mov       bx,.z2
357
;;;       shl       eax,16
358
;;;       shl       ebx,16
359
;;;       mov       ax,.x1
360
;;;       mov       bx,.x2
361
;       movd       mm2,[.t_x1]
362
;       movd       mm3,[.t_x2]
363
;;       movd      mm0,eax
364
;;       movd      mm1,ebx
365
366
 
367
;;       punpcklwd  mm0,mm4
368
;;       punpcklwd  mm1,mm4
369
;       punpcklwd  mm2,mm4
370
;       punpcklwd  mm3,mm4
371
372
 
373
;       psubd      mm3,mm2
374
375
 
376
 
377
;       movlhps   xmm1,xmm1
378
;       cvtpi2ps  xmm1,mm3
379
380
 
381
382
 
383
			     ; xmm1--> | dx | dz | dtx | dty |
384
;       cvtps2pi  mm0,xmm1    ; mm0 -> 2 delta dwords  | dtx | dty |
385
;       movhlps   xmm1,xmm1
386
;       cvtps2pi  mm1,xmm1    ; mm1 --> 2 delta dwords | dx | dz |
387
;       movq      .dty12,mm0
388
;       movq      .dz12,mm1
389
else
390
       mov	ax,.x2
1819 yogev_ezra 391
       sub	ax,.x1
392
       cwde
393
       shl	eax,ROUND
394
       cdq
395
       idiv	ebx
396
 ;     mov      .dx12,eax
397
       push	 eax
398
399
 
1776 yogev_ezra 400
       sub     ax,.z1
401
       cwde
402
       shl     eax,CATMULL_SHIFT
403
       cdq
404
       idiv    ebx
405
       push    eax
406
1245 hidnplayr 407
 
408
       sub	ax,word[.b_x1]
409
       cwde
410
       shl	eax,ROUND
411
       cdq
412
       idiv	ebx
413
 ;     mov      .dbx12,eax
414
       push	 eax
415
416
 
417
       sub	ax,word[.b_y1]
418
       cwde
419
       shl	eax,ROUND
420
       cdq
421
       idiv	ebx
422
 ;     mov      .dby12,eax
423
       push	 eax
424
425
 
426
       sub	ax,word[.e_x1]
427
       cwde
428
       shl	eax,ROUND
429
       cdq
430
       idiv	ebx
431
 ;     mov      .dex12,eax
432
       push	 eax
433
434
 
435
       sub	ax,word[.e_y1]
436
       cwde
437
       shl	eax,ROUND
438
       cdq
439
       idiv	ebx
440
 ;     mov      .dey12,eax
441
       push	 eax
442
443
 
444
       sub	ax,word[.t_x1]
445
       cwde
446
       shl	eax,ROUND
447
       cdq
448
       idiv	ebx
449
 ;     mov      .dtx12,eax
450
       push	 eax
451
452
 
453
       sub	ax,word[.t_y1]
454
       cwde
455
       shl	eax,ROUND
456
       cdq
457
       idiv	ebx
458
 ;     mov      .dty12,eax
459
       push	 eax
460
end if
1776 yogev_ezra 461
   .bt_dx12_done:
1245 hidnplayr 462
463
 
464
       sub	bx,.y1
465
       jnz	.bt_dx13_make
466
       mov	ecx,8
467
       xor	edx,edx
468
     @@:
469
       push	edx   ;dword 0
470
       loop	@b
471
       jmp	.bt_dx13_done
472
 .bt_dx13_make:
473
       movsx	ebx,bx
474
475
 
476
477
 
1819 yogev_ezra 478
   ;    mov       eax,256
479
       cvtsi2ss  xmm4,[i255d]
1245 hidnplayr 480
       cvtsi2ss  xmm3,ebx	     ;rcps
481
       divss	 xmm3,xmm4
482
       shufps	 xmm3,xmm3,0
483
484
 
485
       movd	 mm1,[.b_x3]
486
       movd	 mm2,[.e_x1]
487
       movd	 mm3,[.e_x3]
488
489
 
490
       punpcklwd  mm0,mm4
491
       punpcklwd  mm1,mm4
492
       punpcklwd  mm2,mm4
493
       punpcklwd  mm3,mm4
494
495
 
496
       psubd	  mm3,mm2
497
498
 
499
       movlhps	 xmm1,xmm1
500
       cvtpi2ps  xmm1,mm3
501
502
 
503
504
 
505
			     ;xmm1--> | dbx | dby | dex | dey |
506
;1       movups    .dey13,xmm1
1819 yogev_ezra 507
508
 
509
       movhlps	 xmm1,xmm1
1245 hidnplayr 510
       cvtps2pi  mm1,xmm1 ;mm1,xmm1
1819 yogev_ezra 511
       movq	 .dey13,mm0
1245 hidnplayr 512
       movq	 .dby13,mm1
513
1819 yogev_ezra 514
 
1776 yogev_ezra 515
       sub	 ax,.z1
516
       cwde
517
518
 
1819 yogev_ezra 519
       sub	dx,.x1
520
       movsx	edx,dx
521
522
 
1776 yogev_ezra 523
       movd	  mm3,[.t_x3]
524
525
 
526
       punpcklwd  mm3,mm4
527
       psubd	  mm3,mm2
528
529
 
530
       movlhps	 xmm1,xmm1
531
       cvtsi2ss  xmm1,edx
1819 yogev_ezra 532
       shufps	 xmm1,xmm1,00101111b
533
       cvtpi2ps  xmm1,mm3
1776 yogev_ezra 534
535
 
1819 yogev_ezra 536
1776 yogev_ezra 537
 
538
			     ; xmm1--> | dx | dz | dtx | dty |
1819 yogev_ezra 539
;1       movlps    .dty13,xmm1
540
;1       movhps    .dz13,xmm1
541
542
 
1776 yogev_ezra 543
       movhlps	 xmm1,xmm1
544
       cvtps2pi  mm1,xmm1
1819 yogev_ezra 545
       movq	 .dty13,mm0
1776 yogev_ezra 546
       movq	 .dz13,mm1
1819 yogev_ezra 547
548
 
1245 hidnplayr 549
1819 yogev_ezra 550
 
551
       sub	ax,.x1
552
       cwde
553
       shl	eax,ROUND
554
       cdq
555
       idiv	ebx
556
 ;     mov      .dx13,eax
557
       push	 eax
558
559
 
1776 yogev_ezra 560
       sub     ax,.z1
561
       cwde
562
       shl     eax,CATMULL_SHIFT
563
       cdq
564
       idiv    ebx
565
  ;    mov    .dz13,eax
566
       push    eax
567
568
 
569
 
1245 hidnplayr 570
       sub	ax,word[.b_x1]
571
       cwde
572
       shl	eax,ROUND
573
       cdq
574
       idiv	ebx
575
 ;     mov      .dbx13,eax
576
       push	 eax
577
578
 
579
       sub	ax,word[.b_y1]
580
       cwde
581
       shl	eax,ROUND
582
       cdq
583
       idiv	ebx
584
 ;     mov      .dby13,eax
585
       push	 eax
586
587
 
588
       sub	ax,word[.e_x1]
589
       cwde
590
       shl	eax,ROUND
591
       cdq
592
       idiv	ebx
593
 ;     mov      .dex13,eax
594
       push	 eax
595
596
 
597
       sub	ax,word[.e_y1]
598
       cwde
599
       shl	eax,ROUND
600
       cdq
601
       idiv	ebx
602
 ;     mov      .dey13,eax
603
       push	 eax
604
605
 
606
       sub	ax,word[.t_x1]
607
       cwde
608
       shl	eax,ROUND
609
       cdq
610
       idiv	ebx
611
 ;     mov      .dtx13,eax
612
       push	 eax
613
614
 
615
       sub	ax,word[.t_y1]
616
       cwde
617
       shl	eax,ROUND
618
       cdq
619
       idiv	ebx
620
 ;     mov      .dty13,eax
621
       push	 eax
622
end if
1776 yogev_ezra 623
   .bt_dx13_done:
1245 hidnplayr 624
625
 
626
       sub	bx,.y2
627
       jnz	.bt_dx23_make
628
       mov	ecx,8
629
       xor	edx,edx
630
     @@:
631
       push	edx   ;dword 0
632
       loop	@b
633
       jmp	.bt_dx23_done
634
 .bt_dx23_make:
635
       movsx	ebx,bx
636
637
 
638
639
 
1819 yogev_ezra 640
   ;    mov       eax,256
641
       cvtsi2ss  xmm4,[i255d]
1245 hidnplayr 642
       cvtsi2ss  xmm3,ebx	     ;rcps
643
       divss	 xmm3,xmm4
644
       shufps	 xmm3,xmm3,0
645
646
 
647
       movd	 mm1,[.b_x3]
648
       movd	 mm2,[.e_x2]
649
       movd	 mm3,[.e_x3]
650
651
 
652
       punpcklwd  mm0,mm4
653
       punpcklwd  mm1,mm4
654
       punpcklwd  mm2,mm4
655
       punpcklwd  mm3,mm4
656
657
 
658
       psubd	  mm3,mm2
659
660
 
661
       movlhps	 xmm1,xmm1
662
       cvtpi2ps  xmm1,mm3
663
664
 
665
666
 
667
			     ;xmm1--> | dbx | dby | dex | dey |
668
;1       movups    .dey23,xmm1
1819 yogev_ezra 669
670
 
671
       movhlps	 xmm1,xmm1
1245 hidnplayr 672
       cvtps2pi  mm1,xmm1 ;mm1,xmm1
1819 yogev_ezra 673
       movq	 .dey23,mm0
1245 hidnplayr 674
       movq	 .dby23,mm1
675
1819 yogev_ezra 676
 
1776 yogev_ezra 677
       sub	 ax,.z2
678
       cwde
679
680
 
1819 yogev_ezra 681
       sub	dx,.x2
682
       movsx	edx,dx
683
684
 
1776 yogev_ezra 685
       movd	  mm3,[.t_x3]
686
687
 
688
       punpcklwd  mm3,mm4
689
       psubd	  mm3,mm2
690
691
 
692
       movlhps	 xmm1,xmm1
693
       cvtsi2ss  xmm1,edx
1819 yogev_ezra 694
       shufps	 xmm1,xmm1,00101111b
695
       cvtpi2ps  xmm1,mm3
1776 yogev_ezra 696
697
 
1819 yogev_ezra 698
1776 yogev_ezra 699
 
700
			    ; xmm1--> | dx | dz | dtx | dty |
1819 yogev_ezra 701
;       movlps    .dty23,xmm1
702
;       movhps    .dz23,xmm1
703
       cvtps2pi  mm0,xmm1    ; mm0 -> 2 delta dwords  | dtx | dty |
1776 yogev_ezra 704
       movhlps	 xmm1,xmm1
705
       cvtps2pi  mm1,xmm1    ; mm1 --> 2 delta dwords | dx  |  dz |
1819 yogev_ezra 706
       movq	 .dty23,mm0
1776 yogev_ezra 707
       movq	 .dz23,mm1
1819 yogev_ezra 708
709
 
710
 
1245 hidnplayr 711
       mov	ax,.x3
1819 yogev_ezra 712
       sub	ax,.x2
713
       cwde
714
       shl	eax,ROUND
715
       cdq
716
       idiv	ebx
717
 ;     mov      .dx23,eax
718
       push	 eax
719
1776 yogev_ezra 720
 
721
       sub     ax,.z2
722
       cwde
723
       shl     eax,CATMULL_SHIFT
724
       cdq
725
       idiv    ebx
726
     ; mov     .dz23,eax
727
       push    eax
728
729
 
1245 hidnplayr 730
       sub	ax,word[.b_x2]
731
       cwde
732
       shl	eax,ROUND
733
       cdq
734
       idiv	ebx
735
 ;     mov      .dbx23,eax
736
       push	 eax
737
738
 
739
       sub	ax,word[.b_y2]
740
       cwde
741
       shl	eax,ROUND
742
       cdq
743
       idiv	ebx
744
 ;     mov      .dby23,eax
745
       push	 eax
746
747
 
748
       sub	ax,word[.e_x2]
749
       cwde
750
       shl	eax,ROUND
751
       cdq
752
       idiv	ebx
753
 ;     mov      .dex23,eax
754
       push	 eax
755
756
 
757
       sub	ax,word[.e_y2]
758
       cwde
759
       shl	eax,ROUND
760
       cdq
761
       idiv	ebx
762
 ;     mov      .dey23,eax
763
       push	 eax
764
765
 
1776 yogev_ezra 766
 
1245 hidnplayr 767
       sub	ax,word[.t_x2]
768
       cwde
769
       shl	eax,ROUND
770
       cdq
771
       idiv	ebx
772
 ;     mov      .dtx23,eax
773
       push	 eax
774
775
 
776
       sub	ax,word[.t_y2]
777
       cwde
778
       shl	eax,ROUND
779
       cdq
780
       idiv	ebx
781
 ;     mov      .dty23,eax
782
       push	 eax
783
end if
1776 yogev_ezra 784
      ;  sub     esp,40
1245 hidnplayr 785
   .bt_dx23_done:
786
       sub	 esp,64
787
1819 yogev_ezra 788
 
1245 hidnplayr 789
       shl	eax,ROUND
790
       mov	.cx1,eax
791
       mov	.cx2,eax
792
  ;     push     eax
793
  ;     push     eax
794
795
 
796
       shl	ebx,ROUND
797
       mov	.cbx1,ebx
798
       mov	.cbx2,ebx
799
      ; push     ebx
800
      ; push     ebx
801
802
 
803
       shl	ecx,ROUND
804
       mov	.cby1,ecx
805
       mov	.cby2,ecx
806
      ; push     ecx
807
      ; push     ecx
808
809
 
810
       shl	edx,ROUND
811
       mov	.cex1,edx
812
       mov	.cex2,edx
813
    ;   push     edx
814
    ;   push     edx
815
816
 
817
       shl	eax,ROUND
818
       mov	.cey1,eax
819
       mov	.cey2,eax
820
    ;   push     eax
821
    ;   push     eax
822
823
 
824
       shl	ebx,CATMULL_SHIFT
825
       mov	.cz1,ebx
826
       mov	.cz2,ebx
827
   ;    push     ebx
828
   ;    push     ebx
829
830
 
831
       movsx	ecx,word[.t_x1]
832
       shl	ecx,ROUND
833
       mov	.ctx1,ecx
834
       mov	.ctx2,ecx
835
       ;push     ecx
836
       ;push     ecx
837
838
 
839
       shl	edx,ROUND
840
       mov	.cty1,edx
841
       mov	.cty2,edx
842
      ; push     edx
843
      ; push     edx
844
845
 
2881 leency 846
       movups  xmm0,.cby1
847
       movups  xmm1,.cty1
848
       movups  xmm2,.cby2
849
       movups  xmm3,.cty2
850
       movups  xmm4,.dby13
851
       movups  xmm5,.dty13
852
       movups  xmm6,.dby12
853
       movups  xmm7,.dty12
854
       .scby1  equ [edi]
855
       .scty1  equ [edi+16]
856
       .scby2  equ [edi+32]
857
       .scty2  equ [edi+48]
858
       .sdby13 equ [edi+64]
859
       .sdty13 equ [edi+80]
860
       .sdby12 equ [edi+96]
861
       .sdty12 equ [edi+128]
862
       push    edi
863
       mov     edi,sse_repository
864
       movaps  .scby1,xmm0
865
       movaps  .scty1,xmm1
866
       movaps  .scby2,xmm2
867
       movaps  .scty2,xmm3
868
       movaps  .sdby13,xmm4
869
       movaps  .sdty13,xmm5
870
       movaps  .sdby12,xmm6
871
       movaps  .sdty12,xmm7
872
       pop     edi
873
874
 
875
       movsx	ecx,.y1
2192 leency 876
       cmp	cx,.y2
877
       jge	.loop12_done
878
  .loop12:
879
;if Ext >= SSE2
880
;       fxsave  [sse_repository]
881
;end if
882
       call	.call_line
883
if Ext >= SSE2
884
;       fxrstor [sse_repository]
885
       movups  xmm0,.cby1
886
       movups  xmm1,.cty1
887
       movups  xmm2,.cby2
888
       movups  xmm3,.cty2
889
    ;   movups  xmm4,.dby13
2881 leency 890
    ;   movups  xmm5,.dty13
891
    ;   movups  xmm6,.dby12
892
    ;   movups  xmm7,.dty12
893
    ;   paddd   xmm0,xmm4
894
    ;   paddd   xmm1,xmm5
895
    ;   paddd   xmm2,xmm6
896
    ;   paddd   xmm3,xmm7
897
       push    edi
898
       mov     edi,sse_repository
899
       paddd   xmm0,.sdby13
900
       paddd   xmm1,.sdty13
901
       paddd   xmm2,.sdby12
902
       paddd   xmm3,.sdty12
903
       pop     edi
904
       movups  .cby1,xmm0
2192 leency 905
       movups  .cty1,xmm1
906
       movups  .cby2,xmm2
907
       movups  .cty2,xmm3
908
end if
909
1245 hidnplayr 910
 
2192 leency 911
       movq	mm0,.cby2
1245 hidnplayr 912
       movq	mm1,.cby1
913
       movq	mm2,.cey2
914
       movq	mm3,.cey1
915
       movq	mm4,.cty1
916
       movq	mm5,.cty2
917
       movq	mm6,.cz1
918
       movq	mm7,.cz2
919
       paddd	mm0,.dby12
920
       paddd	mm1,.dby13
921
       paddd	mm2,.dey12
922
       paddd	mm3,.dey13
923
       paddd	mm4,.dty13
924
       paddd	mm5,.dty12
925
       paddd	mm6,.dz13
926
       paddd	mm7,.dz12
927
       movq	.cby2,mm0
928
       movq	.cby1,mm1
929
       movq	.cey1,mm3
930
       movq	.cey2,mm2
931
       movq	.cty1,mm4
932
       movq	.cty2,mm5
933
       movq	.cz1,mm6
934
       movq	.cz2,mm7
935
end if
1819 yogev_ezra 936
if Ext = NON
937
       mov	edx,.dbx13
1245 hidnplayr 938
       add	.cbx1,edx
939
       mov	eax,.dbx12
940
       add	.cbx2,eax
941
       mov	ebx,.dby13
942
       add	.cby1,ebx
943
       mov	edx,.dby12
944
       add	.cby2,edx
945
946
 
947
       add	.cex1,eax
948
       mov	ebx,.dex12
949
       add	.cex2,ebx
950
       mov	edx,.dey13
951
       add	.cey1,edx
952
       mov	eax,.dey12
953
       add	.cey2,eax
954
955
 
956
       add	.ctx1,eax
957
       mov	ebx,.dtx12
958
       add	.ctx2,ebx
959
       mov	edx,.dty13
960
       add	.cty1,edx
961
       mov	eax,.dty12
962
       add	.cty2,eax
963
964
 
965
       add	.cx1,eax
966
       mov	ebx,.dx12
967
       add	.cx2,ebx
968
       mov	ebx,.dz13
969
       add	.cz1,ebx
970
       mov	edx,.dz12
971
       add	.cz2,edx
972
end if
973
       inc	ecx
974
       cmp	cx,.y2
975
       jl	.loop12
976
    .loop12_done:
977
978
 
979
       cmp	cx,.y3
980
       jge	.loop23_done
981
982
 
1819 yogev_ezra 983
 
1245 hidnplayr 984
       shl	eax,CATMULL_SHIFT
985
       mov	.cz2,eax
986
987
 
988
       shl	ebx,ROUND
989
       mov	.cx2,ebx
990
991
 
992
       shl	edx,ROUND
993
       mov	.cbx2,edx
994
995
 
996
       shl	eax,ROUND
997
       mov	.cby2,eax
998
999
 
1000
       shl	ebx,ROUND
1001
       mov	.cex2,ebx
1002
1003
 
1004
       shl	edx,ROUND
1005
       mov	.cey2,edx
1006
1007
 
1008
       shl	eax,ROUND
1009
       mov	.ctx2,eax
1010
1011
 
1012
       shl	ebx,ROUND
1013
       mov	.cty2,ebx
1014
if Ext >= SSE2
2881 leency 1015
       movups  xmm2,.cby2
1016
       movups  xmm3,.cty2
1017
   ;    movups  xmm4,.dby13
1018
   ;    movups  xmm5,.dty13
1019
       movups  xmm6,.dby23
1020
       movups  xmm7,.dty23
1021
;       .scby1  equ [edi]
1022
;       .scty1  equ [edi+16]
1023
;       .scby2  equ [edi+32]
1024
;       .scty2  equ [edi+48]
1025
;       .sdby13 equ [edi+64]
1026
;       .sdty13 equ [edi+80]
1027
       .sdby23 equ [edi+160]
1028
       .sdty23 equ [edi+192]
1029
       push    edi
1030
       mov     edi,sse_repository
1031
;       movaps  .scby1,xmm0
1032
;       movaps  .scty1,xmm1
1033
       movaps  .scby2,xmm2
1034
       movaps  .scty2,xmm3
1035
;       movaps  .sdby13,xmm4
1036
;       movaps  .sdty13,xmm5
1037
       movaps  .sdby23,xmm6
1038
       movaps  .sdty23,xmm7
1039
       pop     edi
1040
1041
 
1042
1043
 
2192 leency 1044
;if Ext >= SSE2
1045
;       fxsave  [sse_repository]
1046
;end if
1047
       call	.call_line
1048
1049
 
1050
2881 leency 1051
 
2192 leency 1052
       movups  xmm1,.cty1
1053
       movups  xmm2,.cby2
1054
       movups  xmm3,.cty2
1055
2881 leency 1056
 
1057
 
1058
       mov     edi,sse_repository
1059
       paddd   xmm0,.sdby13
1060
       paddd   xmm1,.sdty13
1061
       paddd   xmm2,.sdby23
1062
       paddd   xmm3,.sdty23
1063
       pop     edi
1064
       movups  .cby1,xmm0
2192 leency 1065
       movups  .cty1,xmm1
1066
       movups  .cby2,xmm2
1067
       movups  .cty2,xmm3
1068
2881 leency 1069
 
1070
 
1071
 
1072
 
1073
;       movups  xmm0,.cby1
1074
;       movups  xmm1,.cty1
1075
;       movups  xmm2,.cby2
1076
;       movups  xmm3,.cty2
1077
;       movups  xmm4,.dby13
1078
;       movups  xmm5,.dty13
1079
;       movups  xmm6,.dby23
1080
;       movups  xmm7,.dty23
1081
;       paddd   xmm0,xmm4
1082
;       paddd   xmm1,xmm5
1083
;       paddd   xmm2,xmm6
1084
 ;      paddd   xmm3,xmm7
1085
 ;      movups  .cby1,xmm0
1086
 ;      movups  .cty1,xmm1
1087
 ;      movups  .cby2,xmm2
1088
 ;      movups  .cty2,xmm3
1089
;
1819 yogev_ezra 1090
end if
2192 leency 1091
if (Ext = MMX) | (Ext = SSE)
1092
       movq	mm0,.cby2
1245 hidnplayr 1093
       movq	mm1,.cby1
1094
       movq	mm2,.cey2
1095
       movq	mm3,.cey1
1096
       movq	mm4,.cty1
1097
       movq	mm5,.cty2
1098
       movq	mm6,.cz1
1099
       movq	mm7,.cz2
1100
       paddd	mm0,.dby23
1101
       paddd	mm1,.dby13
1102
       paddd	mm2,.dey23
1103
       paddd	mm3,.dey13
1104
       paddd	mm4,.dty13
1105
       paddd	mm5,.dty23
1106
       paddd	mm6,.dz13
1107
       paddd	mm7,.dz23
1108
       movq	.cby2,mm0
1109
       movq	.cby1,mm1
1110
       movq	.cey2,mm2
1111
       movq	.cey1,mm3
1112
       movq	.cty1,mm4
1113
       movq	.cty2,mm5
1114
       movq	.cz1,mm6
1115
       movq	.cz2,mm7
1116
end if
1819 yogev_ezra 1117
If Ext = NON
1118
       mov	edx,.dbx13
1245 hidnplayr 1119
       add	.cbx1,edx
1120
       mov	eax,.dbx23
1121
       add	.cbx2,eax
1122
       mov	ebx,.dby13
1123
       add	.cby1,ebx
1124
       mov	edx,.dby23
1125
       add	.cby2,edx
1126
1127
 
1128
       add	.cex1,eax
1129
       mov	ebx,.dex23
1130
       add	.cex2,ebx
1131
       mov	edx,.dey13
1132
       add	.cey1,edx
1133
       mov	eax,.dey23
1134
       add	.cey2,eax
1135
1136
 
1137
       add	.cx1,eax
1138
       mov	ebx,.dx23
1139
       add	.cx2,ebx
1140
       mov	ebx,.dz13
1141
       add	.cz1,ebx
1142
       mov	edx,.dz23
1143
       add	.cz2,edx
1144
1145
 
1146
       add	.ctx1,eax
1147
       mov	ebx,.dtx23
1148
       add	.ctx2,ebx
1149
       mov	edx,.dty13
1150
       add	.cty1,edx
1151
       mov	eax,.dty23
1152
       add	.cty2,eax
1153
end if
1154
       inc	ecx
1155
       cmp	cx,.y3
1156
       jl	.loop23
1157
    .loop23_done:
1158
1159
 
1160
ret   50
1161
1162
 
1163
1164
 
1165
       ; xmm0= cby1,cbx1,cz1,cx1
2881 leency 1166
       ; xmm1= cty1,ctx1,cey1,cex1
1167
if Ext >= SSE2
1168
       sub	esp,8
1169
       shufps	xmm1,xmm1,10110001b
1170
       shufps	xmm3,xmm3,10110001b
1171
       movlps	[esp],xmm1
1172
else
1173
       push	dword .cty1
1245 hidnplayr 1174
       push	.ctx1
1175
end if
2881 leency 1176
       push	dword .cz1
1245 hidnplayr 1177
if Ext>=SSE2
2881 leency 1178
       sub	esp,8
1179
       movlps	[esp],xmm3
1180
else
1181
       push	dword .cty2
1819 yogev_ezra 1182
       push	.ctx2
1183
end if
2881 leency 1184
       push	dword .cz2
1245 hidnplayr 1185
if Ext>=SSE2
2881 leency 1186
       sub	esp,32
1187
       movhps	[esp+24],xmm3
1188
       shufps	xmm2,xmm2,10110001b
1189
       movlps	[esp+16],xmm2
1190
       movhps	[esp+8],xmm1
1191
       shufps	xmm0,xmm0,10110001b
1192
       movlps	[esp],xmm0 ;================================
1193
1194
 
1195
       push	dword .cey2
1245 hidnplayr 1196
       push	.cex2
1197
       push	dword .cby2
1819 yogev_ezra 1198
       push	.cbx2
1199
       push	dword .cey1
1245 hidnplayr 1200
       push	.cex1
1201
       push	dword .cby1
1202
       push	.cbx1
1203
end if
2881 leency 1204
2192 leency 1205
 
2881 leency 1206
       push	.z_buff
1207
       push	.t_emap
1208
       push	.t_bmap
1209
1210
 
1245 hidnplayr 1211
1212
 
1213
       sar	eax,ROUND
1214
       mov	ebx,.cx2
1215
       sar	ebx,ROUND
1216
1217
 
1218
1219
 
1220
;end if
1819 yogev_ezra 1221
ret
1245 hidnplayr 1222
bump_tex_line_z:
1223
;--------------in: eax - x1
1224
;--------------    ebx - x2
1225
;--------------    edi - pointer to screen buffer
1226
;stack - another parameters :
1227
.y	equ dword [ebp+4]
1228
.bmap	equ dword [ebp+8]	 ; bump map pointer
2881 leency 1229
.emap	equ dword [ebp+12]	 ; env map pointer
1230
.z_buff equ dword [ebp+16]	 ; z buffer
1231
.tex_map equ dword [ebp+20]	 ; texture pointer
1232
1245 hidnplayr 1233
 
2881 leency 1234
.by1	equ  [ebp+28]  ;       |
1235
.ex1	equ  [ebp+32]  ;       |
1236
.ey1	equ  [ebp+36]  ;       |
1237
.bx2	equ  [ebp+40]  ;       |
1238
.by2	equ  [ebp+44]  ;       |>   b. map and e. map coords
1239
.ex2	equ  [ebp+48]  ;       |>   shifted shl ROUND
1240
.ey2	equ  [ebp+52]  ;   ---
1241
.z2	equ  [ebp+56]
1242
.tx2	equ  [ebp+60]
1243
.ty2	equ  [ebp+64]
1244
.z1	equ  [ebp+68]
1245
.tx1	equ  [ebp+72]
1246
.ty1	equ  [ebp+76]
1247
1245 hidnplayr 1248
 
1249
 
2881 leency 1250
 
1819 yogev_ezra 1251
.x2	equ [ebp-8]
1252
.dbx	equ [ebp-12]
1253
.dby	equ [ebp-16]
1254
.dex	equ [ebp-20]
1255
.dey	equ [ebp-24]
1256
.dz	equ [ebp-28]
1257
.dtx	equ [ebp-32]
1258
.dty	equ [ebp-36]
1259
1245 hidnplayr 1260
 
1819 yogev_ezra 1261
.cby	equ [ebp-44]
1262
.cex	equ [ebp-48]
1263
.cey	equ [ebp-52]
1264
.cz	equ [ebp-56]
1265
.czbuff equ [ebp-60]
1266
.ctx	equ [ebp-64]
1267
.cty	equ [ebp-68]
1268
.c_scr	equ [ebp-72]
1269
1245 hidnplayr 1270
 
1271
.temp2	equ	   ebp-88
1272
.temp3	equ	   ebp-76
1273
.temp4	equ	   ebp-84
1274
.temp5	equ	   ebp-92
1275
1276
 
1277
1278
 
1279
	or	ecx,ecx
1280
	jl	.bl_end
1281
	movzx	edx,word[size_y_var]
6619 leency 1282
	cmp	ecx,edx  ;SIZE_Y
1283
	jge	.bl_end
1245 hidnplayr 1284
1285
 
1286
	jl	.bl_ok
1287
	je	.bl_end
1288
1289
 
2881 leency 1290
 
1245 hidnplayr 1291
	mov	edx,.bx1
1292
	xchg	edx,.bx2
1293
	mov	.bx1,edx
1294
	mov	edx,.by1
1295
	xchg	edx,.by2
1296
	mov	.by1,edx
1297
1298
 
1299
	xchg	edx,.ex2
1300
	mov	.ex1,edx
1301
	mov	edx,.ey1
1302
	xchg	edx,.ey2
1303
	mov	.ey1,edx
1304
1305
 
1306
	xchg	edx,.tx2
1307
	mov	.tx1,edx
1308
	mov	edx,.ty1
1309
	xchg	edx,.ty2
1310
	mov	.ty1,edx
1311
end if
1312
if Ext = MMX
1819 yogev_ezra 1313
	movq	mm0,.bx1
1314
	movq	mm1,.bx2
1315
	movq	mm2,.ex1
1316
	movq	mm3,.ex2
1317
	movq	mm4,.tx1
1318
	movq	mm5,.tx2
1319
	movq	.bx2,mm0
1320
	movq	.bx1,mm1
1321
	movq	.ex1,mm3
1322
	movq	.ex2,mm2
1323
	movq	.tx1,mm5
1324
	movq	.tx2,mm4
1325
end if
1326
if Ext>=SSE
1327
	movups xmm0,.bx1
1328
	movups xmm1,.bx2
1329
	movups .bx1,xmm1
1330
	movups .bx2,xmm0
1331
	movq	mm0,.tx1
1332
	movq	mm1,.tx2
1333
	movq	.tx1,mm1
1334
	movq	.tx2,mm0
1335
end if
1336
;if Ext>=SSE2
2881 leency 1337
;        movaps  xmm4,xmm0
1338
;        movaps  xmm0,xmm2
1339
;        movaps  xmm2,xmm4
1340
;        movaps  xmm5,xmm1
1341
;        movaps  xmm1,xmm3
1342
;        movaps  xmm3,xmm5
1343
;else
1344
1245 hidnplayr 1345
 
2881 leency 1346
	mov	edx,.z1
1245 hidnplayr 1347
	xchg	edx,.z2
1348
	mov	.z1,edx
1349
;end if
2881 leency 1350
  .bl_ok:
1245 hidnplayr 1351
;if Ext >= SSE2
2881 leency 1352
;        shufps  xmm0,xmm0,11100001b
1353
;        shufps  xmm2,xmm2,11100001b
1354
;        movlps  .bx1,xmm0
1355
;        movlps  .bx2,xmm2
1356
1357
 
1358
 
1359
;        shufps  xmm2,xmm2,00011011b
1360
;        movd    eax,xmm0
1361
;        movd    ebx,xmm2
1362
;        shufps  xmm0,xmm0,11000110b
1363
;        shufps  xmm2,xmm2,11000110b
1364
;        movd    .z1,xmm0
1365
;        movd    .z2,xmm2
1366
;        shufps  xmm1,xmm1,10110001b
1367
;        shufps  xmm3,xmm3,10110001b
1368
;        movlps  .ex1,xmm1
1369
;        movlps  .ex2,xmm2
1370
;        movhps  .tx1,xmm1
1371
;        movhps  .tx2,xmm2
1372
1373
 
1374
;        mov     edx,.z1
1375
;        xchg    edx,.z2
1376
;        mov     .z1,edx
1377
1378
 
1379
 
1380
1381
 
1245 hidnplayr 1382
	push	ebx	      ;store x1, x2
1383
	movzx	ebx,word[size_x_var]
6619 leency 1384
    ;    mov     eax,.x1
1385
	cmp	dword .x1,ebx  ;dword .x1,SIZE_X
1386
	jge	.bl_end
1245 hidnplayr 1387
	cmp	dword .x2,0
1819 yogev_ezra 1388
	jle	.bl_end
1245 hidnplayr 1389
1390
 
1391
	sub	ebx,.x1
1392
1393
 
1394
1395
 
1396
       cvtsi2ss  xmm3,ebx	     ;rcps
1397
       shufps	 xmm3,xmm3,0
1398
; float using SSE variant  ::-->
1819 yogev_ezra 1399
;       movups    xmm0,.bx1  ; new
1400
;       movups    xmm1,.bx2  ; new
1401
1245 hidnplayr 1402
 
1819 yogev_ezra 1403
       movlhps	 xmm0,xmm0
1245 hidnplayr 1404
       cvtpi2ps  xmm0,.ex1 ;mm2
1819 yogev_ezra 1405
       cvtpi2ps  xmm1,.bx2 ;mm1
1406
       movlhps	 xmm1,xmm1
1245 hidnplayr 1407
       cvtpi2ps  xmm1,.ex2 ;mm3
1819 yogev_ezra 1408
       subps	 xmm1,xmm0
1245 hidnplayr 1409
1410
 
1411
1412
 
1413
;       movups    .dey,xmm1  ; new
1819 yogev_ezra 1414
       cvtps2pi  mm0,xmm1	   ; mm0 -> 2 delta dwords
1245 hidnplayr 1415
       movhlps	 xmm1,xmm1
1416
       cvtps2pi  mm1,xmm1
1417
       movq	 .dey,mm0
1819 yogev_ezra 1418
       movq	 .dby,mm1
1419
1245 hidnplayr 1420
 
1421
       movd	 mm3,.z2
1422
1423
 
1819 yogev_ezra 1424
       movlhps	 xmm0,xmm0
1245 hidnplayr 1425
       cvtpi2ps  xmm0,mm2
1426
       cvtpi2ps  xmm1,.tx2 ;mm1
1819 yogev_ezra 1427
       movlhps	 xmm1,xmm1
1245 hidnplayr 1428
       cvtpi2ps  xmm1,mm3
1429
;       movups    xmm0,,z1  ; new
1819 yogev_ezra 1430
;       movups    xmm1,.z2  ; new
1431
       subps	 xmm1,xmm0
1245 hidnplayr 1432
1433
 
1434
1435
 
1819 yogev_ezra 1436
1437
 
1245 hidnplayr 1438
       cvtps2pi  mm0,xmm1	   ; mm0 -> 2 delta dwords
1439
       movhlps	 xmm1,xmm1
1440
       cvtps2pi  mm1,xmm1
1441
       movd	 .dz,mm0
1442
       movq	 .dty,mm1
1819 yogev_ezra 1443
1245 hidnplayr 1444
 
1445
1446
 
1447
	sub	eax,.bx1
1448
	cdq
1449
	idiv	ebx
1450
	push	eax
1451
1452
 
1453
	sub	eax,.by1
1454
	cdq
1455
	idiv	ebx
1456
	push	eax
1457
1458
 
1459
	sub	eax,.ex1
1460
	cdq
1461
	idiv	ebx
1462
	push	eax
1463
1464
 
1465
	sub	eax,.ey1
1466
	cdq
1467
	idiv	ebx
1468
	push	eax
1469
1470
 
1471
 
1472
	sub	eax,.z1
1473
	cdq
1474
	idiv	ebx
1475
	push	eax
1476
1477
 
1478
	sub	eax,.tx1
1479
	cdq
1480
	idiv	ebx
1481
	push	eax
1482
1483
 
1484
	sub	eax,.ty1
1485
	cdq
1486
	idiv	ebx
1487
	push	eax
1488
1489
 
1490
	cmp	dword .x1,0	    ; set correctly begin variable
1819 yogev_ezra 1491
	jge	@f	      ; CLIPPING ON FUNCTION
1245 hidnplayr 1492
			      ; cutting triangle exceedes screen
1493
	mov	ebx,.x1
1494
	neg	ebx
1495
1819 yogev_ezra 1496
 
1497
1498
 
1499
;        shufps   xmm0,xmm0,0
1500
;        movups   xmm1,.dey
1501
;        mulps    xmm1,xmm0
1502
;        shufps   xmm1,xmm1,00011011b
1503
;        movups   xmm2,.bx1
1504
;        addps    xmm2,xmm1
1505
;        movups   .bx1,xmm2
1506
1507
 
1508
	imul	ebx	      ; eax = .dz * abs(.x1)
1245 hidnplayr 1509
	add	.z1,eax
1510
	mov	dword .x1,0
1819 yogev_ezra 1511
1245 hidnplayr 1512
 
1513
	imul	ebx
1514
	add    .bx1,eax
1515
1516
 
1517
	imul	ebx
1518
	add	.by1,eax
1519
1520
 
1521
	imul	ebx
1522
	add	.ex1,eax
1523
1524
 
1525
	imul	ebx
1526
	add	.ey1,eax
1527
1528
 
1529
	imul	ebx
1530
	add	.tx1,eax
1531
1532
 
1533
	imul	ebx
1534
	add	.ty1,eax
1535
1536
 
1537
   ;     mov     ebx,.x2
6619 leency 1538
	movzx	eax,word[size_x_var]
1539
       ; cmp     dword .x2,SIZE_X
1540
	cmp	dword .x2,eax  ; eax,ebx
1541
	jl	@f
1245 hidnplayr 1542
	mov	dword .x2,eax  ;SIZE_X
6619 leency 1543
      @@:
1245 hidnplayr 1544
	movzx	eax,word[size_x_var]  ;SIZE_X       ;calc memory begin in buffers
6619 leency 1545
	mul	.y
1245 hidnplayr 1546
	add	eax,.x1
1547
	lea	esi,[4*eax]
1548
	add	esi,.z_buff	  ; z-buffer filled with dd variables
1549
	lea	eax,[eax*3]
1550
	add	edi,eax
1551
1552
 
1553
 
1554
	sub	ecx,.x1
1555
	; init current variables
1556
	push	dword .bx1   ; current b, e and t shifted shl ROUND   .cbx
1819 yogev_ezra 1557
	push	dword .by1					   ;  .cby
1558
	push	dword .ex1					   ;  .cex
1559
	push	dword .ey1					   ;  .cey
1560
1245 hidnplayr 1561
 
1819 yogev_ezra 1562
	push	esi					     ; .czbuff
1245 hidnplayr 1563
1564
 
1819 yogev_ezra 1565
	push	dword .ty1	;         .cty
1566
	push	edi	  ;         .c_scr
1245 hidnplayr 1567
if Ext = SSE2
2984 leency 1568
	mov    eax,TEXTURE_SIZE
1569
	movd   xmm1,eax
1570
	shufps xmm1,xmm1,0
1571
	push   dword  TEX_X
1572
	push   dword  -TEX_X
1573
	push   dword  1
1574
	push   dword  -1
1575
	movups xmm2,[esp]
1576
	movd   xmm3,.bmap
1577
	shufps xmm3,xmm3,0
1578
end if
1579
1979 yogev_ezra 1580
 
1245 hidnplayr 1581
	movq	mm7,.cty
1819 yogev_ezra 1582
	movq	mm6,.cby
1583
	movq	mm5,.cey
1584
;        movq    mm4,.dtyq
1245 hidnplayr 1585
;        movq    mm3,.dbyq
1586
end if
1587
1588
 
1589
    ; if TEX = SHIFTING   ;bump drawing only in shifting mode
1590
	mov	esi,.czbuff	 ; .czbuff current address in buffer
1591
	mov	ebx,.cz 	 ; .cz - cur z position
1592
	cmp	ebx,dword[esi]
1593
	jge	.skip
1594
if Ext=NON
1595
	mov	eax,.cby
1596
	shr	eax,ROUND
1597
	mov	esi,.cbx
1598
	shr	esi,ROUND
1599
else
1600
	movq	mm1,mm6
1601
	psrld	mm1,ROUND
1602
	movd	eax,mm1
1603
	psrlq	mm1,32
1604
	movd	esi,mm1
1605
end if
1606
1607
 
1608
	add	esi,eax 	;-  ; esi - current bump map index
1609
1610
 
2984 leency 1611
1612
 
1613
	shufps	xmm0,xmm0,0
1614
	paddd	xmm0,xmm2
1615
	pand	xmm0,xmm1
1616
	paddd	xmm0,xmm3
1617
1618
 
1619
	movzx	eax,byte[ebx]
1620
;
1979 yogev_ezra 1621
;        shufps  xmm0,xmm0,11100001b
1622
	psrldq	xmm0,4
2984 leency 1623
	movd	ebx,xmm0
1624
	movzx	ebx,byte[ebx]
1625
	sub	eax,ebx
1626
;
1979 yogev_ezra 1627
;        shufps  xmm0,xmm0,11111110b
1628
	psrldq	xmm0,4
2984 leency 1629
	movd	ebx,xmm0
1630
	movzx	edx, byte [ebx]
1631
;
1979 yogev_ezra 1632
;        shufps  xmm0,xmm0,11111111b
1633
	psrldq	xmm0,4
2984 leency 1634
	movd	ebx,xmm0
1635
	movzx	ebx, byte [ebx]
1636
	sub	edx,ebx
1637
;
1979 yogev_ezra 1638
else
2984 leency 1639
;        mov     ebx,esi
1640
;        dec     ebx
1641
	lea	ebx,[esi-1]
1642
	and	ebx,TEXTURE_SIZE
1245 hidnplayr 1643
	add	ebx,.bmap
1644
	movzx	eax,byte [ebx]
1645
1646
 
2984 leency 1647
;        inc     ebx
1648
	lea	ebx,[esi+1]
1649
	and	ebx,TEXTURE_SIZE
1245 hidnplayr 1650
	add	ebx,.bmap
1651
	movzx	ebx,byte [ebx]
1652
	sub	eax,ebx
1653
1654
 
2984 leency 1655
;        sub     ebx,TEX_X
1656
	lea	ebx,[esi-TEX_X]
1657
	and	ebx,TEXTURE_SIZE
1245 hidnplayr 1658
	add	ebx,.bmap
1659
	movzx	edx,byte [ebx]
1660
1661
 
2984 leency 1662
;        add     ebx,TEX_X
1663
	lea	ebx,[esi+TEX_X]
1664
	and	ebx,TEXTURE_SIZE
1245 hidnplayr 1665
	add	ebx,.bmap
1666
	movzx	ebx,byte [ebx]
1667
	sub	edx,ebx
1668
end if
2984 leency 1669
1245 hidnplayr 1670
 
1671
     ;  edx - vertical   sub    modificated y coord
1672
if Ext=NON
1673
	mov	ebx,.cex       ;.cex - current env map X
1674
	shr	ebx,ROUND
1675
	add	eax,ebx
1676
1677
 
1678
 
1679
	shr	ebx,ROUND
1680
	add	edx,ebx
1681
1682
 
1683
	movq	mm1,mm5        ; mm5 - copy of cur env coords
1684
	psrld	mm1,ROUND
1685
	movd	ebx,mm1
1686
	psrlq	mm1,32
1687
	add	eax,ebx
1688
	movd	ebx,mm1
1689
	add	edx,ebx
1690
;        movq    qword[.temp1],mm3
1691
;        add     eax,dword [.temp1]
1692
;        add     edx,dword [.temp1+4]
1693
end if
1694
1695
 
1696
	jl	.black
1697
	cmp	eax,TEX_X
1698
	jg	.black
1699
	or	edx,edx
1700
	jl	.black
1701
	cmp	edx,TEX_Y
1702
	jg	.black
1703
1704
 
1705
	add	edx,eax 	; proponuje nie stawiac czarnego pixela tylko
1706
	lea	esi,[edx*3]	; niezaburzony.
1707
	add	esi,.emap	;
1708
	lodsd
1709
1710
 
1711
	mov	edx,.cty
1712
	shr	edx,ROUND  ; sar
1713
1714
 
1715
	shr	edi,ROUND  ; sar
1716
else
1717
	movq	mm1,mm7
1718
	psrld	mm1,ROUND
1719
	movd	edx,mm1
1720
	psrlq	mm1,32
1721
	movd	edi,mm1
1722
1723
 
1724
1725
 
1726
	add	edi,edx
1727
	and	edi,TEXTURE_SIZE
1728
	lea	esi,[edi*3]
1729
	add	esi,.tex_map
1730
1731
 
1732
	mov	edx,eax
1733
	lodsd
1734
	push	ax
1735
	mul	dl
1736
	mov	dl,ah
1737
	pop	ax
1738
	shr	ax,8
1739
	mul	dh
1740
	mov	al,dl
1741
	mov	edi,.c_scr
1742
	stosw
1743
	shr	edx,16
1744
	shr	eax,16
1745
	mul	dl
1746
	shr	ax,8
1747
	stosb
1748
else
1749
	movd	   mm0,eax
1750
	pxor	   mm1,mm1
1751
	punpcklbw  mm0,mm1
1752
	movd	   mm2,[esi]
1753
	punpcklbw  mm2,mm1
1754
	pmullw	   mm0,mm2
1755
	psrlw	   mm0,8
1756
	packuswb   mm0,mm1
1757
	mov	   edi,.c_scr
1758
	movd	   [edi],mm0
1759
1760
 
1761
1762
 
1763
     @@:
1764
     .black:
1765
	xor	eax,eax
1766
	mov	edi,.c_scr
1767
	stosd
1768
     .actual_zbuff:
1769
	mov	eax,.cz
1770
	mov	edi,.czbuff
1771
	stosd
1772
1773
 
1774
	add	dword .czbuff,4
1819 yogev_ezra 1775
	add	dword .c_scr,3
1776
1245 hidnplayr 1777
 
1778
	mov	eax,.dbx
1779
	add	.cbx,eax
1780
	mov	ebx,.dby
1781
	add	.cby,ebx
1782
1783
 
1784
	add	.cex,edx
1785
	mov	eax,.dey
1786
	add	.cey,eax
1787
1788
 
1789
	add	.ctx,ebx
1790
	mov	edx,.dty
1791
	add	.cty,edx
1792
1793
 
1794
	paddd	mm7,.dty
1819 yogev_ezra 1795
	paddd	mm6,.dby
1796
	paddd	mm5,.dey
1797
end if
1245 hidnplayr 1798
	mov	eax,.dz
1799
	add	.cz,eax
1800
1801
 
1802
	jnz	.draw
1803
1804
 
1805
	mov	esp,ebp
1806
ret 76
1807
;Ext = MMX
1808
1809
 
1810
;        movq    mm5, qword[.temp1]  ;-
1811
;        paddd   mm5, qword[.temp5]  ; .temp5 == low dword = TEX_X, high dword = -TEX_X
1812
;        pand    mm5, qword[.temp3]  ; .temp3 == low = high dword = TEX_SIZE
1813
;        paddd   mm5, qword[.temp4]  ; .temp4 == low = high dword = offset .bmap
1814
;        movd    ebx,mm5
1815
;        psrlq   mm5,32
1816
;     end if
1817