Subversion Repositories Kolibri OS

Rev

Rev 2881 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
1245 hidnplayr 1
 
2
;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1
3
;ROUND equ 8
4
;Ext = NON
5
;MMX = 1
6
;NON = 0
7
;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great---
8
;------- DOS 13h mode demos --------------------------------------------
9
;------- Procedure draws bump triangle with texture, I use -------------
10
;--------Catmull Z-buffer algorithm- (Z coordinate interpolation)-------
11
;--------I calc texture pixel by this way: col1*col2/256 ---------------
12
bump_tex_triangle_z:
13
;------------------in - eax - x1 shl 16 + y1 -----------
14
;---------------------- ebx - x2 shl 16 + y2 -----------
15
;---------------------- ecx - x3 shl 16 + y3 -----------
16
;---------------------- edx - pointer to bump map-------
17
;---------------------- esi - pointer to env map--------
18
;---------------------- edi - pointer to screen buffer--
19
;---------------------- stack : bump coordinates--------
20
;----------------------         environment coordinates-
21
;----------------------         Z position coordinates--
22
;----------------------         pointer to Z buffer-----
23
;----------------------         pointer to texture------
24
;----------------------         texture coordinates-----
25
;-- Z-buffer - filled with coordinates as dword --------
26
;-- (Z coor. as word) shl CATMULL_SHIFT ----------------
27
.b_x1	equ ebp+4   ; procedure don't save registers !!!
28
.b_y1	equ ebp+6   ; each coordinate as word
29
.b_x2	equ ebp+8
30
.b_y2	equ ebp+10	 ; b - bump map coords
31
.b_x3	equ ebp+12	 ; e - env map coords
32
.b_y3	equ ebp+14
33
.e_x1	equ ebp+16
34
.e_y1	equ ebp+18
35
.e_x2	equ ebp+20
36
.e_y2	equ ebp+22
37
.e_x3	equ ebp+24
38
.e_y3	equ ebp+26
39
.z1	equ word[ebp+28]
40
.z2	equ word[ebp+30]
41
.z3	equ word[ebp+32]
42
.z_buff equ dword[ebp+34]	; pointer to Z-buffer
43
.tex_ptr equ dword[ebp+38]	; ptr to texture
44
.t_x1	equ ebp+42		; texture coords
45
.t_y1	equ ebp+44
46
.t_x2	equ ebp+46
47
.t_y2	equ ebp+48
48
.t_x3	equ ebp+50
49
.t_y3	equ ebp+52
50
51
 
52
 
53
 
54
.t_emap equ dword[ebp-8]	; pointer to env map
55
.x1	equ word[ebp-10]
56
.y1	equ word[ebp-12]
57
.x2	equ word[ebp-14]
58
.y2	equ word[ebp-16]
59
.x3	equ word[ebp-18]
60
.y3	equ word[ebp-20]
61
62
 
2881 leency 63
64
 
65
.dz12	equ	 [edi-8]
66
.dbx12	equ dword[edi-12]
67
.dby12	equ	 [edi-16]
68
.dex12	equ dword[edi-20]
69
.dey12	equ	 [edi-24]
70
.dtx12	equ dword[edi-28]
71
.dty12	equ	 [edi-32]
72
73
 
74
.dz13  equ	[ebp-52-4*2]
75
.dbx13 equ dword[ebp-52-4*3]
76
.dby13 equ	[ebp-52-4*4]
77
.dex13 equ dword[ebp-52-4*5]
78
.dey13 equ	[ebp-52-4*6]
79
.dtx13 equ dword[ebp-52-4*7]
80
.dty13 equ	[ebp-52-4*8]
81
82
 
83
 
84
.dz23  equ	[ebp-(52+4*10)]
85
.dbx23 equ dword[ebp-(52+4*11)]
86
.dby23 equ	[ebp-(52+4*12)]
87
.dex23 equ dword[ebp-(52+4*13)]
88
.dey23 equ	[ebp-(52+4*14)]
89
.dtx23 equ dword[ebp-(52+4*15)]
90
.dty23 equ	[ebp-(52+4*16)]
91
92
 
93
94
 
1245 hidnplayr 95
.dz12	equ	 [ebp-28]
96
.dbx12	equ dword[ebp-32]
97
.dby12	equ	 [ebp-36]
98
.dex12	equ dword[ebp-40]
99
.dey12	equ	 [ebp-44]
100
.dtx12	equ dword[ebp-48]
101
.dty12	equ	 [ebp-52]
102
103
 
104
.dz13  equ	[ebp-52-4*2]
105
.dbx13 equ dword[ebp-52-4*3]
106
.dby13 equ	[ebp-52-4*4]
107
.dex13 equ dword[ebp-52-4*5]
108
.dey13 equ	[ebp-52-4*6]
109
.dtx13 equ dword[ebp-52-4*7]
110
.dty13 equ	[ebp-52-4*8]
111
112
 
113
 
114
.dz23  equ	[ebp-(52+4*10)]
115
.dbx23 equ dword[ebp-(52+4*11)]
116
.dby23 equ	[ebp-(52+4*12)]
117
.dex23 equ dword[ebp-(52+4*13)]
118
.dey23 equ	[ebp-(52+4*14)]
119
.dtx23 equ dword[ebp-(52+4*15)]
120
.dty23 equ	[ebp-(52+4*16)]
121
122
 
2881 leency 123
124
 
1819 yogev_ezra 125
126
 
1245 hidnplayr 127
.cz1   equ	[ebp-(52+4*18)]
128
.cx2   equ dword[ebp-(52+4*19)]
129
.cz2   equ	[ebp-(52+4*20)]
130
.cbx1  equ dword[ebp-(52+4*21)]
131
.cby1  equ	[ebp-(52+4*22)]
132
.cbx2  equ dword[ebp-(52+4*23)]
133
.cby2  equ	[ebp-(52+4*24)]
134
.cex1  equ dword[ebp-(52+4*25)]
135
.cey1  equ	[ebp-(52+4*26)]
136
.cex2  equ dword[ebp-(52+4*27)]
137
.cey2  equ	[ebp-(52+4*28)]
138
139
 
140
.cty1  equ	[ebp-(52+4*30)]
141
.ctx2  equ dword[ebp-(52+4*31)]
142
.cty2  equ	[ebp-(52+4*32)]
143
144
 
1819 yogev_ezra 145
146
 
147
.cz1   equ	[ebp-(52+4*18)]
148
.cbx1  equ dword[ebp-(52+4*19)]
149
.cby1  equ	[ebp-(52+4*20)]
150
.cex1  equ dword[ebp-(52+4*21)]
151
.cey1  equ	[ebp-(52+4*22)]
152
.ctx1  equ dword[ebp-(52+4*23)]
153
.cty1  equ	[ebp-(52+4*24)]
154
155
 
156
.cz2   equ	[ebp-(52+4*26)]
157
.cbx2  equ dword[ebp-(52+4*27)]
158
.cby2  equ	[ebp-(52+4*28)]
159
.cex2  equ dword[ebp-(52+4*29)]
160
.cey2  equ	[ebp-(52+4*30)]
161
.ctx2  equ dword[ebp-(52+4*31)]
162
.cty2  equ	[ebp-(52+4*32)]
163
164
 
165
       cld
1245 hidnplayr 166
       mov     ebp,esp
167
       push    edx	  ; store bump map
168
       push    esi	  ; store e. map
169
     ; sub     esp,120
170
 .sort3:		  ; sort triangle coordinates...
171
       cmp     ax,bx
172
       jle     .sort1
173
       xchg    eax,ebx
174
       mov     edx,dword[.b_x1]
175
       xchg    edx,dword[.b_x2]
176
       mov     dword[.b_x1],edx
177
       mov     edx,dword[.e_x1]
178
       xchg    edx,dword[.e_x2]
179
       mov     dword[.e_x1],edx
180
       mov     edx,dword[.t_x1]
181
       xchg    edx,dword[.t_x2]
182
       mov     dword[.t_x1],edx
183
       mov     dx,.z1
184
       xchg    dx,.z2
185
       mov     .z1,dx
186
 .sort1:
187
       cmp	bx,cx
188
       jle	.sort2
189
       xchg	ebx,ecx
190
       mov	edx,dword[.b_x2]
191
       xchg	edx,dword[.b_x3]
192
       mov	dword[.b_x2],edx
193
       mov	edx,dword[.e_x2]
194
       xchg	edx,dword[.e_x3]
195
       mov	dword[.e_x2],edx
196
       mov	edx,dword[.t_x2]
197
       xchg	edx,dword[.t_x3]
198
       mov	dword[.t_x2],edx
199
       mov     dx,.z2
200
       xchg    dx,.z3
201
       mov     .z2,dx
202
       jmp	.sort3
203
 .sort2:
204
       push	eax	; store triangle coords in variables
205
       push	ebx
206
       push	ecx
207
	 mov	  edx,80008000h  ; eax,ebx,ecx are ANDd together into edx which means that
208
	 and	  edx,ebx	 ; if *all* of them are negative a sign flag is raised
209
	 and	  edx,ecx
210
	 and	  edx,eax
211
	 test	  edx,80008000h  ; Check both X&Y at once
212
	 jne	  .loop23_done
213
    ;   mov     edx,eax         ; eax,ebx,ecx are ORd together into edx which means that
214
    ;   or      edx,ebx         ; if any *one* of them is negative a sign flag is raised
215
    ;   or      edx,ecx
216
    ;   test    edx,80000000h   ; Check only X
217
    ;   jne     .loop23_done
218
219
 
220
    ;   jg      .loop23_done
221
    ;   cmp     .x2,SIZE_X     ; This can be optimized with effort
222
    ;   jg      .loop23_done
223
    ;   cmp     .x3,SIZE_X
224
    ;   jg      .loop23_done    ; {
225
226
 
227
 
228
       sub	bx,.y1
229
       jnz	.bt_dx12_make
230
if 0 ;Ext >= SSE2
2881 leency 231
       pxor	xmm0,xmm0
232
       movups	.dty12,xmm0
233
       movups	.dey12,xmm0
234
       sub	esp,16
235
else
236
       mov	ecx,8
1245 hidnplayr 237
       xor	edx,edx
238
     @@:
239
       push	edx   ;dword 0
240
       loop	@b
241
end if
2881 leency 242
       jmp	.bt_dx12_done
1245 hidnplayr 243
 .bt_dx12_make:
244
       movsx	ebx,bx
1776 yogev_ezra 245
1245 hidnplayr 246
 
247
 
248
       sub	 esp,32
1819 yogev_ezra 249
   ;    mov       eax,256
1245 hidnplayr 250
       cvtsi2ss  xmm4,[i255d]
251
       cvtsi2ss  xmm3,ebx ;rcps
2881 leency 252
if 0 ;Ext >= SSE2
253
       mov	 edi,ebp
254
       sub	 edi,512
255
       or	 edi,0x0000000f
256
end if
257
       divss	 xmm3,xmm4
1245 hidnplayr 258
       shufps	 xmm3,xmm3,0
259
260
 
261
       movd	 mm1,[.b_x2]
262
       movd	 mm2,[.e_x1]
263
       movd	 mm3,[.e_x2]
264
265
 
266
       punpcklwd  mm0,mm4
267
       punpcklwd  mm1,mm4
268
       punpcklwd  mm2,mm4
269
       punpcklwd  mm3,mm4
270
271
 
272
       psubd	  mm3,mm2
273
274
 
275
       movlhps	 xmm1,xmm1
276
       cvtpi2ps  xmm1,mm3
277
278
 
279
280
 
281
			     ;xmm1--> | dbx | dby | dex | dey |
282
;1       movups    .dey12,xmm1
1819 yogev_ezra 283
       cvtps2pi  mm0,xmm1 ;mm0,xmm1          ; mm0 -> 2 delta dwords
1776 yogev_ezra 284
       movhlps	 xmm1,xmm1
1245 hidnplayr 285
       cvtps2pi  mm1,xmm1 ;mm1,xmm1
1776 yogev_ezra 286
       movq	 .dey12,mm0
1245 hidnplayr 287
       movq	 .dby12,mm1
288
;-------------
289
  ;    pxor      mm0,mm0
1776 yogev_ezra 290
  ;    pxor      mm1,mm1
291
   ;/   pinsrw    mm0,.z1,1
292
   ;/   pinsrw    mm0,.x1,0
293
   ;/   pinsrw    mm1,.z2,1
294
   ;/   pinsrw    mm1,.x2,0
295
       mov	 ax,.z2
296
       sub	 ax,.z1
297
       cwde
298
1819 yogev_ezra 299
 
300
       sub	dx,.x1
301
       movsx	edx,dx
302
303
 
1776 yogev_ezra 304
305
 
306
   ;/    punpcklwd  mm1,mm4
307
308
 
309
  ;     cvtpi2ps   xmm2,mm0
310
  ;     subps      xmm1,xmm2
311
312
 
313
314
 
315
       movd	  mm3,[.t_x2]
316
317
 
318
       punpcklwd  mm3,mm4
319
       psubd	  mm3,mm2
320
321
 
322
       cvtsi2ss  xmm1,eax
323
       movlhps	 xmm1,xmm1
324
       cvtsi2ss  xmm1,edx
1819 yogev_ezra 325
   ;    movss     xmm1,xmm4
326
       shufps	 xmm1,xmm1,00101111b
327
       cvtpi2ps  xmm1,mm3
1776 yogev_ezra 328
329
 
1819 yogev_ezra 330
1776 yogev_ezra 331
 
332
			     ; xmm1--> | dx | dz | dtx | dty |
1819 yogev_ezra 333
;1       movlps    .dty12,xmm1
334
;1       movhps    .dz12,xmm1
335
       cvtps2pi  mm0,xmm1    ; mm0 -> 2 delta dwords  | dtx | dty |
1776 yogev_ezra 336
       movhlps	 xmm1,xmm1
337
       cvtps2pi  mm1,xmm1
1819 yogev_ezra 338
       movq	 .dty12,mm0
1776 yogev_ezra 339
       movq	 .dz12,mm1
1819 yogev_ezra 340
;----
1776 yogev_ezra 341
;       mov       ax,.z2
1245 hidnplayr 342
;       sub       ax,.z1
343
;       cwde
344
;       mov       bx,.x2
345
;       sub       bx,.x1
346
;       movsx     ebx,bx
347
;       movd      mm1,eax
348
;       psllq     mm1,32
349
;       movd      mm1,ebx
350
1776 yogev_ezra 351
 
1245 hidnplayr 352
;;       push      eax
353
;;       movq      mm1,[esp]
354
;;       add       esp,8
355
;;;       mov       ax,.z1
356
;;;       mov       bx,.z2
357
;;;       shl       eax,16
358
;;;       shl       ebx,16
359
;;;       mov       ax,.x1
360
;;;       mov       bx,.x2
361
;       movd       mm2,[.t_x1]
362
;       movd       mm3,[.t_x2]
363
;;       movd      mm0,eax
364
;;       movd      mm1,ebx
365
366
 
367
;;       punpcklwd  mm0,mm4
368
;;       punpcklwd  mm1,mm4
369
;       punpcklwd  mm2,mm4
370
;       punpcklwd  mm3,mm4
371
372
 
373
;       psubd      mm3,mm2
374
375
 
376
 
377
;       movlhps   xmm1,xmm1
378
;       cvtpi2ps  xmm1,mm3
379
380
 
381
382
 
383
			     ; xmm1--> | dx | dz | dtx | dty |
384
;       cvtps2pi  mm0,xmm1    ; mm0 -> 2 delta dwords  | dtx | dty |
385
;       movhlps   xmm1,xmm1
386
;       cvtps2pi  mm1,xmm1    ; mm1 --> 2 delta dwords | dx | dz |
387
;       movq      .dty12,mm0
388
;       movq      .dz12,mm1
389
else
390
       mov	ax,.x2
1819 yogev_ezra 391
       sub	ax,.x1
392
       cwde
393
       shl	eax,ROUND
394
       cdq
395
       idiv	ebx
396
 ;     mov      .dx12,eax
397
       push	 eax
398
399
 
1776 yogev_ezra 400
       sub     ax,.z1
401
       cwde
402
       shl     eax,CATMULL_SHIFT
403
       cdq
404
       idiv    ebx
405
       push    eax
406
1245 hidnplayr 407
 
408
       sub	ax,word[.b_x1]
409
       cwde
410
       shl	eax,ROUND
411
       cdq
412
       idiv	ebx
413
 ;     mov      .dbx12,eax
414
       push	 eax
415
416
 
417
       sub	ax,word[.b_y1]
418
       cwde
419
       shl	eax,ROUND
420
       cdq
421
       idiv	ebx
422
 ;     mov      .dby12,eax
423
       push	 eax
424
425
 
426
       sub	ax,word[.e_x1]
427
       cwde
428
       shl	eax,ROUND
429
       cdq
430
       idiv	ebx
431
 ;     mov      .dex12,eax
432
       push	 eax
433
434
 
435
       sub	ax,word[.e_y1]
436
       cwde
437
       shl	eax,ROUND
438
       cdq
439
       idiv	ebx
440
 ;     mov      .dey12,eax
441
       push	 eax
442
443
 
444
       sub	ax,word[.t_x1]
445
       cwde
446
       shl	eax,ROUND
447
       cdq
448
       idiv	ebx
449
 ;     mov      .dtx12,eax
450
       push	 eax
451
452
 
453
       sub	ax,word[.t_y1]
454
       cwde
455
       shl	eax,ROUND
456
       cdq
457
       idiv	ebx
458
 ;     mov      .dty12,eax
459
       push	 eax
460
end if
1776 yogev_ezra 461
   .bt_dx12_done:
1245 hidnplayr 462
463
 
464
       sub	bx,.y1
465
       jnz	.bt_dx13_make
466
       mov	ecx,8
467
       xor	edx,edx
468
     @@:
469
       push	edx   ;dword 0
470
       loop	@b
471
       jmp	.bt_dx13_done
472
 .bt_dx13_make:
473
       movsx	ebx,bx
474
475
 
476
477
 
1819 yogev_ezra 478
   ;    mov       eax,256
479
       cvtsi2ss  xmm4,[i255d]
1245 hidnplayr 480
       cvtsi2ss  xmm3,ebx	     ;rcps
481
       divss	 xmm3,xmm4
482
       shufps	 xmm3,xmm3,0
483
484
 
485
       movd	 mm1,[.b_x3]
486
       movd	 mm2,[.e_x1]
487
       movd	 mm3,[.e_x3]
488
489
 
490
       punpcklwd  mm0,mm4
491
       punpcklwd  mm1,mm4
492
       punpcklwd  mm2,mm4
493
       punpcklwd  mm3,mm4
494
495
 
496
       psubd	  mm3,mm2
497
498
 
499
       movlhps	 xmm1,xmm1
500
       cvtpi2ps  xmm1,mm3
501
502
 
503
504
 
505
			     ;xmm1--> | dbx | dby | dex | dey |
506
;1       movups    .dey13,xmm1
1819 yogev_ezra 507
508
 
509
       movhlps	 xmm1,xmm1
1245 hidnplayr 510
       cvtps2pi  mm1,xmm1 ;mm1,xmm1
1819 yogev_ezra 511
       movq	 .dey13,mm0
1245 hidnplayr 512
       movq	 .dby13,mm1
513
1819 yogev_ezra 514
 
1776 yogev_ezra 515
       sub	 ax,.z1
516
       cwde
517
518
 
1819 yogev_ezra 519
       sub	dx,.x1
520
       movsx	edx,dx
521
522
 
1776 yogev_ezra 523
       movd	  mm3,[.t_x3]
524
525
 
526
       punpcklwd  mm3,mm4
527
       psubd	  mm3,mm2
528
529
 
530
       movlhps	 xmm1,xmm1
531
       cvtsi2ss  xmm1,edx
1819 yogev_ezra 532
       shufps	 xmm1,xmm1,00101111b
533
       cvtpi2ps  xmm1,mm3
1776 yogev_ezra 534
535
 
1819 yogev_ezra 536
1776 yogev_ezra 537
 
538
			     ; xmm1--> | dx | dz | dtx | dty |
1819 yogev_ezra 539
;1       movlps    .dty13,xmm1
540
;1       movhps    .dz13,xmm1
541
542
 
1776 yogev_ezra 543
       movhlps	 xmm1,xmm1
544
       cvtps2pi  mm1,xmm1
1819 yogev_ezra 545
       movq	 .dty13,mm0
1776 yogev_ezra 546
       movq	 .dz13,mm1
1819 yogev_ezra 547
548
 
1245 hidnplayr 549
1819 yogev_ezra 550
 
551
       sub	ax,.x1
552
       cwde
553
       shl	eax,ROUND
554
       cdq
555
       idiv	ebx
556
 ;     mov      .dx13,eax
557
       push	 eax
558
559
 
1776 yogev_ezra 560
       sub     ax,.z1
561
       cwde
562
       shl     eax,CATMULL_SHIFT
563
       cdq
564
       idiv    ebx
565
  ;    mov    .dz13,eax
566
       push    eax
567
568
 
569
 
1245 hidnplayr 570
       sub	ax,word[.b_x1]
571
       cwde
572
       shl	eax,ROUND
573
       cdq
574
       idiv	ebx
575
 ;     mov      .dbx13,eax
576
       push	 eax
577
578
 
579
       sub	ax,word[.b_y1]
580
       cwde
581
       shl	eax,ROUND
582
       cdq
583
       idiv	ebx
584
 ;     mov      .dby13,eax
585
       push	 eax
586
587
 
588
       sub	ax,word[.e_x1]
589
       cwde
590
       shl	eax,ROUND
591
       cdq
592
       idiv	ebx
593
 ;     mov      .dex13,eax
594
       push	 eax
595
596
 
597
       sub	ax,word[.e_y1]
598
       cwde
599
       shl	eax,ROUND
600
       cdq
601
       idiv	ebx
602
 ;     mov      .dey13,eax
603
       push	 eax
604
605
 
606
       sub	ax,word[.t_x1]
607
       cwde
608
       shl	eax,ROUND
609
       cdq
610
       idiv	ebx
611
 ;     mov      .dtx13,eax
612
       push	 eax
613
614
 
615
       sub	ax,word[.t_y1]
616
       cwde
617
       shl	eax,ROUND
618
       cdq
619
       idiv	ebx
620
 ;     mov      .dty13,eax
621
       push	 eax
622
end if
1776 yogev_ezra 623
   .bt_dx13_done:
1245 hidnplayr 624
625
 
626
       sub	bx,.y2
627
       jnz	.bt_dx23_make
628
       mov	ecx,8
629
       xor	edx,edx
630
     @@:
631
       push	edx   ;dword 0
632
       loop	@b
633
       jmp	.bt_dx23_done
634
 .bt_dx23_make:
635
       movsx	ebx,bx
636
637
 
638
639
 
1819 yogev_ezra 640
   ;    mov       eax,256
641
       cvtsi2ss  xmm4,[i255d]
1245 hidnplayr 642
       cvtsi2ss  xmm3,ebx	     ;rcps
643
       divss	 xmm3,xmm4
644
       shufps	 xmm3,xmm3,0
645
646
 
647
       movd	 mm1,[.b_x3]
648
       movd	 mm2,[.e_x2]
649
       movd	 mm3,[.e_x3]
650
651
 
652
       punpcklwd  mm0,mm4
653
       punpcklwd  mm1,mm4
654
       punpcklwd  mm2,mm4
655
       punpcklwd  mm3,mm4
656
657
 
658
       psubd	  mm3,mm2
659
660
 
661
       movlhps	 xmm1,xmm1
662
       cvtpi2ps  xmm1,mm3
663
664
 
665
666
 
667
			     ;xmm1--> | dbx | dby | dex | dey |
668
;1       movups    .dey23,xmm1
1819 yogev_ezra 669
670
 
671
       movhlps	 xmm1,xmm1
1245 hidnplayr 672
       cvtps2pi  mm1,xmm1 ;mm1,xmm1
1819 yogev_ezra 673
       movq	 .dey23,mm0
1245 hidnplayr 674
       movq	 .dby23,mm1
675
1819 yogev_ezra 676
 
1776 yogev_ezra 677
       sub	 ax,.z2
678
       cwde
679
680
 
1819 yogev_ezra 681
       sub	dx,.x2
682
       movsx	edx,dx
683
684
 
1776 yogev_ezra 685
       movd	  mm3,[.t_x3]
686
687
 
688
       punpcklwd  mm3,mm4
689
       psubd	  mm3,mm2
690
691
 
692
       movlhps	 xmm1,xmm1
693
       cvtsi2ss  xmm1,edx
1819 yogev_ezra 694
       shufps	 xmm1,xmm1,00101111b
695
       cvtpi2ps  xmm1,mm3
1776 yogev_ezra 696
697
 
1819 yogev_ezra 698
1776 yogev_ezra 699
 
700
			    ; xmm1--> | dx | dz | dtx | dty |
1819 yogev_ezra 701
;       movlps    .dty23,xmm1
702
;       movhps    .dz23,xmm1
703
       cvtps2pi  mm0,xmm1    ; mm0 -> 2 delta dwords  | dtx | dty |
1776 yogev_ezra 704
       movhlps	 xmm1,xmm1
705
       cvtps2pi  mm1,xmm1    ; mm1 --> 2 delta dwords | dx  |  dz |
1819 yogev_ezra 706
       movq	 .dty23,mm0
1776 yogev_ezra 707
       movq	 .dz23,mm1
1819 yogev_ezra 708
709
 
710
 
1245 hidnplayr 711
       mov	ax,.x3
1819 yogev_ezra 712
       sub	ax,.x2
713
       cwde
714
       shl	eax,ROUND
715
       cdq
716
       idiv	ebx
717
 ;     mov      .dx23,eax
718
       push	 eax
719
1776 yogev_ezra 720
 
721
       sub     ax,.z2
722
       cwde
723
       shl     eax,CATMULL_SHIFT
724
       cdq
725
       idiv    ebx
726
     ; mov     .dz23,eax
727
       push    eax
728
729
 
1245 hidnplayr 730
       sub	ax,word[.b_x2]
731
       cwde
732
       shl	eax,ROUND
733
       cdq
734
       idiv	ebx
735
 ;     mov      .dbx23,eax
736
       push	 eax
737
738
 
739
       sub	ax,word[.b_y2]
740
       cwde
741
       shl	eax,ROUND
742
       cdq
743
       idiv	ebx
744
 ;     mov      .dby23,eax
745
       push	 eax
746
747
 
748
       sub	ax,word[.e_x2]
749
       cwde
750
       shl	eax,ROUND
751
       cdq
752
       idiv	ebx
753
 ;     mov      .dex23,eax
754
       push	 eax
755
756
 
757
       sub	ax,word[.e_y2]
758
       cwde
759
       shl	eax,ROUND
760
       cdq
761
       idiv	ebx
762
 ;     mov      .dey23,eax
763
       push	 eax
764
765
 
1776 yogev_ezra 766
 
1245 hidnplayr 767
       sub	ax,word[.t_x2]
768
       cwde
769
       shl	eax,ROUND
770
       cdq
771
       idiv	ebx
772
 ;     mov      .dtx23,eax
773
       push	 eax
774
775
 
776
       sub	ax,word[.t_y2]
777
       cwde
778
       shl	eax,ROUND
779
       cdq
780
       idiv	ebx
781
 ;     mov      .dty23,eax
782
       push	 eax
783
end if
1776 yogev_ezra 784
      ;  sub     esp,40
1245 hidnplayr 785
   .bt_dx23_done:
786
       sub	 esp,64
787
1819 yogev_ezra 788
 
1245 hidnplayr 789
       shl	eax,ROUND
790
       mov	.cx1,eax
791
       mov	.cx2,eax
792
  ;     push     eax
793
  ;     push     eax
794
795
 
796
       shl	ebx,ROUND
797
       mov	.cbx1,ebx
798
       mov	.cbx2,ebx
799
      ; push     ebx
800
      ; push     ebx
801
802
 
803
       shl	ecx,ROUND
804
       mov	.cby1,ecx
805
       mov	.cby2,ecx
806
      ; push     ecx
807
      ; push     ecx
808
809
 
810
       shl	edx,ROUND
811
       mov	.cex1,edx
812
       mov	.cex2,edx
813
    ;   push     edx
814
    ;   push     edx
815
816
 
817
       shl	eax,ROUND
818
       mov	.cey1,eax
819
       mov	.cey2,eax
820
    ;   push     eax
821
    ;   push     eax
822
823
 
824
       shl	ebx,CATMULL_SHIFT
825
       mov	.cz1,ebx
826
       mov	.cz2,ebx
827
   ;    push     ebx
828
   ;    push     ebx
829
830
 
831
       movsx	ecx,word[.t_x1]
832
       shl	ecx,ROUND
833
       mov	.ctx1,ecx
834
       mov	.ctx2,ecx
835
       ;push     ecx
836
       ;push     ecx
837
838
 
839
       shl	edx,ROUND
840
       mov	.cty1,edx
841
       mov	.cty2,edx
842
      ; push     edx
843
      ; push     edx
844
845
 
2881 leency 846
       movups  xmm0,.cby1
847
       movups  xmm1,.cty1
848
       movups  xmm2,.cby2
849
       movups  xmm3,.cty2
850
       movups  xmm4,.dby13
851
       movups  xmm5,.dty13
852
       movups  xmm6,.dby12
853
       movups  xmm7,.dty12
854
       .scby1  equ [edi]
855
       .scty1  equ [edi+16]
856
       .scby2  equ [edi+32]
857
       .scty2  equ [edi+48]
858
       .sdby13 equ [edi+64]
859
       .sdty13 equ [edi+80]
860
       .sdby12 equ [edi+96]
861
       .sdty12 equ [edi+128]
862
       push    edi
863
       mov     edi,sse_repository
864
       movaps  .scby1,xmm0
865
       movaps  .scty1,xmm1
866
       movaps  .scby2,xmm2
867
       movaps  .scty2,xmm3
868
       movaps  .sdby13,xmm4
869
       movaps  .sdty13,xmm5
870
       movaps  .sdby12,xmm6
871
       movaps  .sdty12,xmm7
872
       pop     edi
873
874
 
875
       movsx	ecx,.y1
2192 leency 876
       cmp	cx,.y2
877
       jge	.loop12_done
878
  .loop12:
879
;if Ext >= SSE2
880
;       fxsave  [sse_repository]
881
;end if
882
       call	.call_line
883
if Ext >= SSE2
884
;       fxrstor [sse_repository]
885
       movups  xmm0,.cby1
886
       movups  xmm1,.cty1
887
       movups  xmm2,.cby2
888
       movups  xmm3,.cty2
889
    ;   movups  xmm4,.dby13
2881 leency 890
    ;   movups  xmm5,.dty13
891
    ;   movups  xmm6,.dby12
892
    ;   movups  xmm7,.dty12
893
    ;   paddd   xmm0,xmm4
894
    ;   paddd   xmm1,xmm5
895
    ;   paddd   xmm2,xmm6
896
    ;   paddd   xmm3,xmm7
897
       push    edi
898
       mov     edi,sse_repository
899
       paddd   xmm0,.sdby13
900
       paddd   xmm1,.sdty13
901
       paddd   xmm2,.sdby12
902
       paddd   xmm3,.sdty12
903
       pop     edi
904
       movups  .cby1,xmm0
2192 leency 905
       movups  .cty1,xmm1
906
       movups  .cby2,xmm2
907
       movups  .cty2,xmm3
908
end if
909
1245 hidnplayr 910
 
2192 leency 911
       movq	mm0,.cby2
1245 hidnplayr 912
       movq	mm1,.cby1
913
       movq	mm2,.cey2
914
       movq	mm3,.cey1
915
       movq	mm4,.cty1
916
       movq	mm5,.cty2
917
       movq	mm6,.cz1
918
       movq	mm7,.cz2
919
       paddd	mm0,.dby12
920
       paddd	mm1,.dby13
921
       paddd	mm2,.dey12
922
       paddd	mm3,.dey13
923
       paddd	mm4,.dty13
924
       paddd	mm5,.dty12
925
       paddd	mm6,.dz13
926
       paddd	mm7,.dz12
927
       movq	.cby2,mm0
928
       movq	.cby1,mm1
929
       movq	.cey1,mm3
930
       movq	.cey2,mm2
931
       movq	.cty1,mm4
932
       movq	.cty2,mm5
933
       movq	.cz1,mm6
934
       movq	.cz2,mm7
935
end if
1819 yogev_ezra 936
if Ext = NON
937
       mov	edx,.dbx13
1245 hidnplayr 938
       add	.cbx1,edx
939
       mov	eax,.dbx12
940
       add	.cbx2,eax
941
       mov	ebx,.dby13
942
       add	.cby1,ebx
943
       mov	edx,.dby12
944
       add	.cby2,edx
945
946
 
947
       add	.cex1,eax
948
       mov	ebx,.dex12
949
       add	.cex2,ebx
950
       mov	edx,.dey13
951
       add	.cey1,edx
952
       mov	eax,.dey12
953
       add	.cey2,eax
954
955
 
956
       add	.ctx1,eax
957
       mov	ebx,.dtx12
958
       add	.ctx2,ebx
959
       mov	edx,.dty13
960
       add	.cty1,edx
961
       mov	eax,.dty12
962
       add	.cty2,eax
963
964
 
965
       add	.cx1,eax
966
       mov	ebx,.dx12
967
       add	.cx2,ebx
968
       mov	ebx,.dz13
969
       add	.cz1,ebx
970
       mov	edx,.dz12
971
       add	.cz2,edx
972
end if
973
       inc	ecx
974
       cmp	cx,.y2
975
       jl	.loop12
976
    .loop12_done:
977
978
 
979
       cmp	cx,.y3
980
       jge	.loop23_done
981
982
 
1819 yogev_ezra 983
 
1245 hidnplayr 984
       shl	eax,CATMULL_SHIFT
985
       mov	.cz2,eax
986
987
 
988
       shl	ebx,ROUND
989
       mov	.cx2,ebx
990
991
 
992
       shl	edx,ROUND
993
       mov	.cbx2,edx
994
995
 
996
       shl	eax,ROUND
997
       mov	.cby2,eax
998
999
 
1000
       shl	ebx,ROUND
1001
       mov	.cex2,ebx
1002
1003
 
1004
       shl	edx,ROUND
1005
       mov	.cey2,edx
1006
1007
 
1008
       shl	eax,ROUND
1009
       mov	.ctx2,eax
1010
1011
 
1012
       shl	ebx,ROUND
1013
       mov	.cty2,ebx
1014
if Ext >= SSE2
2881 leency 1015
       movups  xmm2,.cby2
1016
       movups  xmm3,.cty2
1017
   ;    movups  xmm4,.dby13
1018
   ;    movups  xmm5,.dty13
1019
       movups  xmm6,.dby23
1020
       movups  xmm7,.dty23
1021
;       .scby1  equ [edi]
1022
;       .scty1  equ [edi+16]
1023
;       .scby2  equ [edi+32]
1024
;       .scty2  equ [edi+48]
1025
;       .sdby13 equ [edi+64]
1026
;       .sdty13 equ [edi+80]
1027
       .sdby23 equ [edi+160]
1028
       .sdty23 equ [edi+192]
1029
       push    edi
1030
       mov     edi,sse_repository
1031
;       movaps  .scby1,xmm0
1032
;       movaps  .scty1,xmm1
1033
       movaps  .scby2,xmm2
1034
       movaps  .scty2,xmm3
1035
;       movaps  .sdby13,xmm4
1036
;       movaps  .sdty13,xmm5
1037
       movaps  .sdby23,xmm6
1038
       movaps  .sdty23,xmm7
1039
       pop     edi
1040
1041
 
1042
1043
 
2192 leency 1044
;if Ext >= SSE2
1045
;       fxsave  [sse_repository]
1046
;end if
1047
       call	.call_line
1048
1049
 
1050
2881 leency 1051
 
2192 leency 1052
       movups  xmm1,.cty1
1053
       movups  xmm2,.cby2
1054
       movups  xmm3,.cty2
1055
2881 leency 1056
 
1057
 
1058
       mov     edi,sse_repository
1059
       paddd   xmm0,.sdby13
1060
       paddd   xmm1,.sdty13
1061
       paddd   xmm2,.sdby23
1062
       paddd   xmm3,.sdty23
1063
       pop     edi
1064
       movups  .cby1,xmm0
2192 leency 1065
       movups  .cty1,xmm1
1066
       movups  .cby2,xmm2
1067
       movups  .cty2,xmm3
1068
2881 leency 1069
 
1070
 
1071
 
1072
 
1073
;       movups  xmm0,.cby1
1074
;       movups  xmm1,.cty1
1075
;       movups  xmm2,.cby2
1076
;       movups  xmm3,.cty2
1077
;       movups  xmm4,.dby13
1078
;       movups  xmm5,.dty13
1079
;       movups  xmm6,.dby23
1080
;       movups  xmm7,.dty23
1081
;       paddd   xmm0,xmm4
1082
;       paddd   xmm1,xmm5
1083
;       paddd   xmm2,xmm6
1084
 ;      paddd   xmm3,xmm7
1085
 ;      movups  .cby1,xmm0
1086
 ;      movups  .cty1,xmm1
1087
 ;      movups  .cby2,xmm2
1088
 ;      movups  .cty2,xmm3
1089
;
1819 yogev_ezra 1090
end if
2192 leency 1091
if (Ext = MMX) | (Ext = SSE)
1092
       movq	mm0,.cby2
1245 hidnplayr 1093
       movq	mm1,.cby1
1094
       movq	mm2,.cey2
1095
       movq	mm3,.cey1
1096
       movq	mm4,.cty1
1097
       movq	mm5,.cty2
1098
       movq	mm6,.cz1
1099
       movq	mm7,.cz2
1100
       paddd	mm0,.dby23
1101
       paddd	mm1,.dby13
1102
       paddd	mm2,.dey23
1103
       paddd	mm3,.dey13
1104
       paddd	mm4,.dty13
1105
       paddd	mm5,.dty23
1106
       paddd	mm6,.dz13
1107
       paddd	mm7,.dz23
1108
       movq	.cby2,mm0
1109
       movq	.cby1,mm1
1110
       movq	.cey2,mm2
1111
       movq	.cey1,mm3
1112
       movq	.cty1,mm4
1113
       movq	.cty2,mm5
1114
       movq	.cz1,mm6
1115
       movq	.cz2,mm7
1116
end if
1819 yogev_ezra 1117
If Ext = NON
1118
       mov	edx,.dbx13
1245 hidnplayr 1119
       add	.cbx1,edx
1120
       mov	eax,.dbx23
1121
       add	.cbx2,eax
1122
       mov	ebx,.dby13
1123
       add	.cby1,ebx
1124
       mov	edx,.dby23
1125
       add	.cby2,edx
1126
1127
 
1128
       add	.cex1,eax
1129
       mov	ebx,.dex23
1130
       add	.cex2,ebx
1131
       mov	edx,.dey13
1132
       add	.cey1,edx
1133
       mov	eax,.dey23
1134
       add	.cey2,eax
1135
1136
 
1137
       add	.cx1,eax
1138
       mov	ebx,.dx23
1139
       add	.cx2,ebx
1140
       mov	ebx,.dz13
1141
       add	.cz1,ebx
1142
       mov	edx,.dz23
1143
       add	.cz2,edx
1144
1145
 
1146
       add	.ctx1,eax
1147
       mov	ebx,.dtx23
1148
       add	.ctx2,ebx
1149
       mov	edx,.dty13
1150
       add	.cty1,edx
1151
       mov	eax,.dty23
1152
       add	.cty2,eax
1153
end if
1154
       inc	ecx
1155
       cmp	cx,.y3
1156
       jl	.loop23
1157
    .loop23_done:
1158
1159
 
1160
ret   50
1161
1162
 
1163
1164
 
1165
       ; xmm0= cby1,cbx1,cz1,cx1
2881 leency 1166
       ; xmm1= cty1,ctx1,cey1,cex1
1167
if Ext >= SSE2
1168
       sub	esp,8
1169
       shufps	xmm1,xmm1,10110001b
1170
       shufps	xmm3,xmm3,10110001b
1171
       movlps	[esp],xmm1
1172
else
1173
       push	dword .cty1
1245 hidnplayr 1174
       push	.ctx1
1175
end if
2881 leency 1176
       push	dword .cz1
1245 hidnplayr 1177
if Ext>=SSE2
2881 leency 1178
       sub	esp,8
1179
       movlps	[esp],xmm3
1180
else
1181
       push	dword .cty2
1819 yogev_ezra 1182
       push	.ctx2
1183
end if
2881 leency 1184
       push	dword .cz2
1245 hidnplayr 1185
if Ext>=SSE2
2881 leency 1186
       sub	esp,32
1187
       movhps	[esp+24],xmm3
1188
       shufps	xmm2,xmm2,10110001b
1189
       movlps	[esp+16],xmm2
1190
       movhps	[esp+8],xmm1
1191
       shufps	xmm0,xmm0,10110001b
1192
       movlps	[esp],xmm0 ;================================
1193
1194
 
1195
       push	dword .cey2
1245 hidnplayr 1196
       push	.cex2
1197
       push	dword .cby2
1819 yogev_ezra 1198
       push	.cbx2
1199
       push	dword .cey1
1245 hidnplayr 1200
       push	.cex1
1201
       push	dword .cby1
1202
       push	.cbx1
1203
end if
2881 leency 1204
2192 leency 1205
 
2881 leency 1206
       push	.z_buff
1207
       push	.t_emap
1208
       push	.t_bmap
1209
1210
 
1245 hidnplayr 1211
1212
 
1213
       sar	eax,ROUND
1214
       mov	ebx,.cx2
1215
       sar	ebx,ROUND
1216
1217
 
1218
1219
 
1220
;end if
1819 yogev_ezra 1221
ret
1245 hidnplayr 1222
bump_tex_line_z:
1223
;--------------in: eax - x1
1224
;--------------    ebx - x2
1225
;--------------    edi - pointer to screen buffer
1226
;stack - another parameters :
1227
.y	equ dword [ebp+4]
1228
.bmap	equ dword [ebp+8]	 ; bump map pointer
2881 leency 1229
.emap	equ dword [ebp+12]	 ; env map pointer
1230
.z_buff equ dword [ebp+16]	 ; z buffer
1231
.tex_map equ dword [ebp+20]	 ; texture pointer
1232
1245 hidnplayr 1233
 
2881 leency 1234
.by1	equ  [ebp+28]  ;       |
1235
.ex1	equ  [ebp+32]  ;       |
1236
.ey1	equ  [ebp+36]  ;       |
1237
.bx2	equ  [ebp+40]  ;       |
1238
.by2	equ  [ebp+44]  ;       |>   b. map and e. map coords
1239
.ex2	equ  [ebp+48]  ;       |>   shifted shl ROUND
1240
.ey2	equ  [ebp+52]  ;   ---
1241
.z2	equ  [ebp+56]
1242
.tx2	equ  [ebp+60]
1243
.ty2	equ  [ebp+64]
1244
.z1	equ  [ebp+68]
1245
.tx1	equ  [ebp+72]
1246
.ty1	equ  [ebp+76]
1247
1245 hidnplayr 1248
 
1249
 
2881 leency 1250
 
1819 yogev_ezra 1251
.x2	equ [ebp-8]
1252
.dbx	equ [ebp-12]
1253
.dby	equ [ebp-16]
1254
.dex	equ [ebp-20]
1255
.dey	equ [ebp-24]
1256
.dz	equ [ebp-28]
1257
.dtx	equ [ebp-32]
1258
.dty	equ [ebp-36]
1259
1245 hidnplayr 1260
 
1819 yogev_ezra 1261
.cby	equ [ebp-44]
1262
.cex	equ [ebp-48]
1263
.cey	equ [ebp-52]
1264
.cz	equ [ebp-56]
1265
.czbuff equ [ebp-60]
1266
.ctx	equ [ebp-64]
1267
.cty	equ [ebp-68]
1268
.c_scr	equ [ebp-72]
1269
1245 hidnplayr 1270
 
1271
.temp2	equ	   ebp-88
1272
.temp3	equ	   ebp-76
1273
.temp4	equ	   ebp-84
1274
.temp5	equ	   ebp-92
1275
1276
 
1277
1278
 
1279
	or	ecx,ecx
1280
	jl	.bl_end
1281
	cmp	ecx,SIZE_Y
1282
	jge	.bl_end
1283
1284
 
1285
	jl	.bl_ok
1286
	je	.bl_end
1287
1288
 
2881 leency 1289
 
1245 hidnplayr 1290
	mov	edx,.bx1
1291
	xchg	edx,.bx2
1292
	mov	.bx1,edx
1293
	mov	edx,.by1
1294
	xchg	edx,.by2
1295
	mov	.by1,edx
1296
1297
 
1298
	xchg	edx,.ex2
1299
	mov	.ex1,edx
1300
	mov	edx,.ey1
1301
	xchg	edx,.ey2
1302
	mov	.ey1,edx
1303
1304
 
1305
	xchg	edx,.tx2
1306
	mov	.tx1,edx
1307
	mov	edx,.ty1
1308
	xchg	edx,.ty2
1309
	mov	.ty1,edx
1310
end if
1311
if Ext = MMX
1819 yogev_ezra 1312
	movq	mm0,.bx1
1313
	movq	mm1,.bx2
1314
	movq	mm2,.ex1
1315
	movq	mm3,.ex2
1316
	movq	mm4,.tx1
1317
	movq	mm5,.tx2
1318
	movq	.bx2,mm0
1319
	movq	.bx1,mm1
1320
	movq	.ex1,mm3
1321
	movq	.ex2,mm2
1322
	movq	.tx1,mm5
1323
	movq	.tx2,mm4
1324
end if
1325
if Ext>=SSE
1326
	movups xmm0,.bx1
1327
	movups xmm1,.bx2
1328
	movups .bx1,xmm1
1329
	movups .bx2,xmm0
1330
	movq	mm0,.tx1
1331
	movq	mm1,.tx2
1332
	movq	.tx1,mm1
1333
	movq	.tx2,mm0
1334
end if
1335
;if Ext>=SSE2
2881 leency 1336
;        movaps  xmm4,xmm0
1337
;        movaps  xmm0,xmm2
1338
;        movaps  xmm2,xmm4
1339
;        movaps  xmm5,xmm1
1340
;        movaps  xmm1,xmm3
1341
;        movaps  xmm3,xmm5
1342
;else
1343
1245 hidnplayr 1344
 
2881 leency 1345
	mov	edx,.z1
1245 hidnplayr 1346
	xchg	edx,.z2
1347
	mov	.z1,edx
1348
;end if
2881 leency 1349
  .bl_ok:
1245 hidnplayr 1350
;if Ext >= SSE2
2881 leency 1351
;        shufps  xmm0,xmm0,11100001b
1352
;        shufps  xmm2,xmm2,11100001b
1353
;        movlps  .bx1,xmm0
1354
;        movlps  .bx2,xmm2
1355
1356
 
1357
 
1358
;        shufps  xmm2,xmm2,00011011b
1359
;        movd    eax,xmm0
1360
;        movd    ebx,xmm2
1361
;        shufps  xmm0,xmm0,11000110b
1362
;        shufps  xmm2,xmm2,11000110b
1363
;        movd    .z1,xmm0
1364
;        movd    .z2,xmm2
1365
;        shufps  xmm1,xmm1,10110001b
1366
;        shufps  xmm3,xmm3,10110001b
1367
;        movlps  .ex1,xmm1
1368
;        movlps  .ex2,xmm2
1369
;        movhps  .tx1,xmm1
1370
;        movhps  .tx2,xmm2
1371
1372
 
1373
;        mov     edx,.z1
1374
;        xchg    edx,.z2
1375
;        mov     .z1,edx
1376
1377
 
1378
 
1379
1380
 
1245 hidnplayr 1381
	push	ebx	      ;store x1, x2
1382
	cmp	dword .x1,SIZE_X
1819 yogev_ezra 1383
	jge	.bl_end
1245 hidnplayr 1384
	cmp	dword .x2,0
1819 yogev_ezra 1385
	jle	.bl_end
1245 hidnplayr 1386
1387
 
1388
	sub	ebx,.x1
1389
1390
 
1391
1392
 
1393
       cvtsi2ss  xmm3,ebx	     ;rcps
1394
       shufps	 xmm3,xmm3,0
1395
; float using SSE variant  ::-->
1819 yogev_ezra 1396
;       movups    xmm0,.bx1  ; new
1397
;       movups    xmm1,.bx2  ; new
1398
1245 hidnplayr 1399
 
1819 yogev_ezra 1400
       movlhps	 xmm0,xmm0
1245 hidnplayr 1401
       cvtpi2ps  xmm0,.ex1 ;mm2
1819 yogev_ezra 1402
       cvtpi2ps  xmm1,.bx2 ;mm1
1403
       movlhps	 xmm1,xmm1
1245 hidnplayr 1404
       cvtpi2ps  xmm1,.ex2 ;mm3
1819 yogev_ezra 1405
       subps	 xmm1,xmm0
1245 hidnplayr 1406
1407
 
1408
1409
 
1410
;       movups    .dey,xmm1  ; new
1819 yogev_ezra 1411
       cvtps2pi  mm0,xmm1	   ; mm0 -> 2 delta dwords
1245 hidnplayr 1412
       movhlps	 xmm1,xmm1
1413
       cvtps2pi  mm1,xmm1
1414
       movq	 .dey,mm0
1819 yogev_ezra 1415
       movq	 .dby,mm1
1416
1245 hidnplayr 1417
 
1418
       movd	 mm3,.z2
1419
1420
 
1819 yogev_ezra 1421
       movlhps	 xmm0,xmm0
1245 hidnplayr 1422
       cvtpi2ps  xmm0,mm2
1423
       cvtpi2ps  xmm1,.tx2 ;mm1
1819 yogev_ezra 1424
       movlhps	 xmm1,xmm1
1245 hidnplayr 1425
       cvtpi2ps  xmm1,mm3
1426
;       movups    xmm0,,z1  ; new
1819 yogev_ezra 1427
;       movups    xmm1,.z2  ; new
1428
       subps	 xmm1,xmm0
1245 hidnplayr 1429
1430
 
1431
1432
 
1819 yogev_ezra 1433
1434
 
1245 hidnplayr 1435
       cvtps2pi  mm0,xmm1	   ; mm0 -> 2 delta dwords
1436
       movhlps	 xmm1,xmm1
1437
       cvtps2pi  mm1,xmm1
1438
       movd	 .dz,mm0
1439
       movq	 .dty,mm1
1819 yogev_ezra 1440
1245 hidnplayr 1441
 
1442
1443
 
1444
	sub	eax,.bx1
1445
	cdq
1446
	idiv	ebx
1447
	push	eax
1448
1449
 
1450
	sub	eax,.by1
1451
	cdq
1452
	idiv	ebx
1453
	push	eax
1454
1455
 
1456
	sub	eax,.ex1
1457
	cdq
1458
	idiv	ebx
1459
	push	eax
1460
1461
 
1462
	sub	eax,.ey1
1463
	cdq
1464
	idiv	ebx
1465
	push	eax
1466
1467
 
1468
 
1469
	sub	eax,.z1
1470
	cdq
1471
	idiv	ebx
1472
	push	eax
1473
1474
 
1475
	sub	eax,.tx1
1476
	cdq
1477
	idiv	ebx
1478
	push	eax
1479
1480
 
1481
	sub	eax,.ty1
1482
	cdq
1483
	idiv	ebx
1484
	push	eax
1485
1486
 
1487
	cmp	dword .x1,0	    ; set correctly begin variable
1819 yogev_ezra 1488
	jge	@f	      ; CLIPPING ON FUNCTION
1245 hidnplayr 1489
			      ; cutting triangle exceedes screen
1490
	mov	ebx,.x1
1491
	neg	ebx
1492
1819 yogev_ezra 1493
 
1494
1495
 
1496
;        shufps   xmm0,xmm0,0
1497
;        movups   xmm1,.dey
1498
;        mulps    xmm1,xmm0
1499
;        shufps   xmm1,xmm1,00011011b
1500
;        movups   xmm2,.bx1
1501
;        addps    xmm2,xmm1
1502
;        movups   .bx1,xmm2
1503
1504
 
1505
	imul	ebx	      ; eax = .dz * abs(.x1)
1245 hidnplayr 1506
	add	.z1,eax
1507
	mov	dword .x1,0
1819 yogev_ezra 1508
1245 hidnplayr 1509
 
1510
	imul	ebx
1511
	add    .bx1,eax
1512
1513
 
1514
	imul	ebx
1515
	add	.by1,eax
1516
1517
 
1518
	imul	ebx
1519
	add	.ex1,eax
1520
1521
 
1522
	imul	ebx
1523
	add	.ey1,eax
1524
1525
 
1526
	imul	ebx
1527
	add	.tx1,eax
1528
1529
 
1530
	imul	ebx
1531
	add	.ty1,eax
1532
1533
 
1534
	cmp	dword .x2,SIZE_X
1819 yogev_ezra 1535
	jl	@f
1245 hidnplayr 1536
	mov	dword .x2,SIZE_X
1819 yogev_ezra 1537
      @@:
1245 hidnplayr 1538
	mov	eax,SIZE_X	 ;calc memory begin in buffers
1539
	mul	.y
1540
	add	eax,.x1
1541
	lea	esi,[4*eax]
1542
	add	esi,.z_buff	  ; z-buffer filled with dd variables
1543
	lea	eax,[eax*3]
1544
	add	edi,eax
1545
1546
 
1547
 
1548
	sub	ecx,.x1
1549
	; init current variables
1550
	push	dword .bx1   ; current b, e and t shifted shl ROUND   .cbx
1819 yogev_ezra 1551
	push	dword .by1					   ;  .cby
1552
	push	dword .ex1					   ;  .cex
1553
	push	dword .ey1					   ;  .cey
1554
1245 hidnplayr 1555
 
1819 yogev_ezra 1556
	push	esi					     ; .czbuff
1245 hidnplayr 1557
1558
 
1819 yogev_ezra 1559
	push	dword .ty1	;         .cty
1560
	push	edi	  ;         .c_scr
1245 hidnplayr 1561
if Ext = SSE2
2984 leency 1562
	mov    eax,TEXTURE_SIZE
1563
	movd   xmm1,eax
1564
	shufps xmm1,xmm1,0
1565
	push   dword  TEX_X
1566
	push   dword  -TEX_X
1567
	push   dword  1
1568
	push   dword  -1
1569
	movups xmm2,[esp]
1570
	movd   xmm3,.bmap
1571
	shufps xmm3,xmm3,0
1572
end if
1573
1979 yogev_ezra 1574
 
1245 hidnplayr 1575
	movq	mm7,.cty
1819 yogev_ezra 1576
	movq	mm6,.cby
1577
	movq	mm5,.cey
1578
;        movq    mm4,.dtyq
1245 hidnplayr 1579
;        movq    mm3,.dbyq
1580
end if
1581
1582
 
1583
    ; if TEX = SHIFTING   ;bump drawing only in shifting mode
1584
	mov	esi,.czbuff	 ; .czbuff current address in buffer
1585
	mov	ebx,.cz 	 ; .cz - cur z position
1586
	cmp	ebx,dword[esi]
1587
	jge	.skip
1588
if Ext=NON
1589
	mov	eax,.cby
1590
	shr	eax,ROUND
1591
	mov	esi,.cbx
1592
	shr	esi,ROUND
1593
else
1594
	movq	mm1,mm6
1595
	psrld	mm1,ROUND
1596
	movd	eax,mm1
1597
	psrlq	mm1,32
1598
	movd	esi,mm1
1599
end if
1600
1601
 
1602
	add	esi,eax 	;-  ; esi - current bump map index
1603
1604
 
2984 leency 1605
1606
 
1607
	shufps	xmm0,xmm0,0
1608
	paddd	xmm0,xmm2
1609
	pand	xmm0,xmm1
1610
	paddd	xmm0,xmm3
1611
1612
 
1613
	movzx	eax,byte[ebx]
1614
;
1979 yogev_ezra 1615
;        shufps  xmm0,xmm0,11100001b
1616
	psrldq	xmm0,4
2984 leency 1617
	movd	ebx,xmm0
1618
	movzx	ebx,byte[ebx]
1619
	sub	eax,ebx
1620
;
1979 yogev_ezra 1621
;        shufps  xmm0,xmm0,11111110b
1622
	psrldq	xmm0,4
2984 leency 1623
	movd	ebx,xmm0
1624
	movzx	edx, byte [ebx]
1625
;
1979 yogev_ezra 1626
;        shufps  xmm0,xmm0,11111111b
1627
	psrldq	xmm0,4
2984 leency 1628
	movd	ebx,xmm0
1629
	movzx	ebx, byte [ebx]
1630
	sub	edx,ebx
1631
;
1979 yogev_ezra 1632
else
2984 leency 1633
;        mov     ebx,esi
1634
;        dec     ebx
1635
	lea	ebx,[esi-1]
1636
	and	ebx,TEXTURE_SIZE
1245 hidnplayr 1637
	add	ebx,.bmap
1638
	movzx	eax,byte [ebx]
1639
1640
 
2984 leency 1641
;        inc     ebx
1642
	lea	ebx,[esi+1]
1643
	and	ebx,TEXTURE_SIZE
1245 hidnplayr 1644
	add	ebx,.bmap
1645
	movzx	ebx,byte [ebx]
1646
	sub	eax,ebx
1647
1648
 
2984 leency 1649
;        sub     ebx,TEX_X
1650
	lea	ebx,[esi-TEX_X]
1651
	and	ebx,TEXTURE_SIZE
1245 hidnplayr 1652
	add	ebx,.bmap
1653
	movzx	edx,byte [ebx]
1654
1655
 
2984 leency 1656
;        add     ebx,TEX_X
1657
	lea	ebx,[esi+TEX_X]
1658
	and	ebx,TEXTURE_SIZE
1245 hidnplayr 1659
	add	ebx,.bmap
1660
	movzx	ebx,byte [ebx]
1661
	sub	edx,ebx
1662
end if
2984 leency 1663
1245 hidnplayr 1664
 
1665
     ;  edx - vertical   sub    modificated y coord
1666
if Ext=NON
1667
	mov	ebx,.cex       ;.cex - current env map X
1668
	shr	ebx,ROUND
1669
	add	eax,ebx
1670
1671
 
1672
 
1673
	shr	ebx,ROUND
1674
	add	edx,ebx
1675
1676
 
1677
	movq	mm1,mm5        ; mm5 - copy of cur env coords
1678
	psrld	mm1,ROUND
1679
	movd	ebx,mm1
1680
	psrlq	mm1,32
1681
	add	eax,ebx
1682
	movd	ebx,mm1
1683
	add	edx,ebx
1684
;        movq    qword[.temp1],mm3
1685
;        add     eax,dword [.temp1]
1686
;        add     edx,dword [.temp1+4]
1687
end if
1688
1689
 
1690
	jl	.black
1691
	cmp	eax,TEX_X
1692
	jg	.black
1693
	or	edx,edx
1694
	jl	.black
1695
	cmp	edx,TEX_Y
1696
	jg	.black
1697
1698
 
1699
	add	edx,eax 	; proponuje nie stawiac czarnego pixela tylko
1700
	lea	esi,[edx*3]	; niezaburzony.
1701
	add	esi,.emap	;
1702
	lodsd
1703
1704
 
1705
	mov	edx,.cty
1706
	shr	edx,ROUND  ; sar
1707
1708
 
1709
	shr	edi,ROUND  ; sar
1710
else
1711
	movq	mm1,mm7
1712
	psrld	mm1,ROUND
1713
	movd	edx,mm1
1714
	psrlq	mm1,32
1715
	movd	edi,mm1
1716
1717
 
1718
1719
 
1720
	add	edi,edx
1721
	and	edi,TEXTURE_SIZE
1722
	lea	esi,[edi*3]
1723
	add	esi,.tex_map
1724
1725
 
1726
	mov	edx,eax
1727
	lodsd
1728
	push	ax
1729
	mul	dl
1730
	mov	dl,ah
1731
	pop	ax
1732
	shr	ax,8
1733
	mul	dh
1734
	mov	al,dl
1735
	mov	edi,.c_scr
1736
	stosw
1737
	shr	edx,16
1738
	shr	eax,16
1739
	mul	dl
1740
	shr	ax,8
1741
	stosb
1742
else
1743
	movd	   mm0,eax
1744
	pxor	   mm1,mm1
1745
	punpcklbw  mm0,mm1
1746
	movd	   mm2,[esi]
1747
	punpcklbw  mm2,mm1
1748
	pmullw	   mm0,mm2
1749
	psrlw	   mm0,8
1750
	packuswb   mm0,mm1
1751
	mov	   edi,.c_scr
1752
	movd	   [edi],mm0
1753
1754
 
1755
1756
 
1757
     @@:
1758
     .black:
1759
	xor	eax,eax
1760
	mov	edi,.c_scr
1761
	stosd
1762
     .actual_zbuff:
1763
	mov	eax,.cz
1764
	mov	edi,.czbuff
1765
	stosd
1766
1767
 
1768
	add	dword .czbuff,4
1819 yogev_ezra 1769
	add	dword .c_scr,3
1770
1245 hidnplayr 1771
 
1772
	mov	eax,.dbx
1773
	add	.cbx,eax
1774
	mov	ebx,.dby
1775
	add	.cby,ebx
1776
1777
 
1778
	add	.cex,edx
1779
	mov	eax,.dey
1780
	add	.cey,eax
1781
1782
 
1783
	add	.ctx,ebx
1784
	mov	edx,.dty
1785
	add	.cty,edx
1786
1787
 
1788
	paddd	mm7,.dty
1819 yogev_ezra 1789
	paddd	mm6,.dby
1790
	paddd	mm5,.dey
1791
end if
1245 hidnplayr 1792
	mov	eax,.dz
1793
	add	.cz,eax
1794
1795
 
1796
	jnz	.draw
1797
1798
 
1799
	mov	esp,ebp
1800
ret 76
1801
;Ext = MMX
1802
1803
 
1804
;        movq    mm5, qword[.temp1]  ;-
1805
;        paddd   mm5, qword[.temp5]  ; .temp5 == low dword = TEX_X, high dword = -TEX_X
1806
;        pand    mm5, qword[.temp3]  ; .temp3 == low = high dword = TEX_SIZE
1807
;        paddd   mm5, qword[.temp4]  ; .temp4 == low = high dword = offset .bmap
1808
;        movd    ebx,mm5
1809
;        psrlq   mm5,32
1810
;     end if
1811