Subversion Repositories Kolibri OS

Rev

Rev 2881 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 2881 Rev 2984
1
;CATMULL_SHIFT equ 8
1
;CATMULL_SHIFT equ 8
2
;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1
2
;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1
3
;ROUND equ 8
3
;ROUND equ 8
4
;Ext = NON
4
;Ext = NON
5
;MMX = 1
5
;MMX = 1
6
;NON = 0
6
;NON = 0
7
;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great---
7
;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great---
8
;------- DOS 13h mode demos --------------------------------------------
8
;------- DOS 13h mode demos --------------------------------------------
9
;------- Procedure draws bump triangle with texture, I use -------------
9
;------- Procedure draws bump triangle with texture, I use -------------
10
;--------Catmull Z-buffer algorithm- (Z coordinate interpolation)-------
10
;--------Catmull Z-buffer algorithm- (Z coordinate interpolation)-------
11
;--------I calc texture pixel by this way: col1*col2/256 ---------------
11
;--------I calc texture pixel by this way: col1*col2/256 ---------------
12
bump_tex_triangle_z:
12
bump_tex_triangle_z:
13
;------------------in - eax - x1 shl 16 + y1 -----------
13
;------------------in - eax - x1 shl 16 + y1 -----------
14
;---------------------- ebx - x2 shl 16 + y2 -----------
14
;---------------------- ebx - x2 shl 16 + y2 -----------
15
;---------------------- ecx - x3 shl 16 + y3 -----------
15
;---------------------- ecx - x3 shl 16 + y3 -----------
16
;---------------------- edx - pointer to bump map-------
16
;---------------------- edx - pointer to bump map-------
17
;---------------------- esi - pointer to env map--------
17
;---------------------- esi - pointer to env map--------
18
;---------------------- edi - pointer to screen buffer--
18
;---------------------- edi - pointer to screen buffer--
19
;---------------------- stack : bump coordinates--------
19
;---------------------- stack : bump coordinates--------
20
;----------------------         environment coordinates-
20
;----------------------         environment coordinates-
21
;----------------------         Z position coordinates--
21
;----------------------         Z position coordinates--
22
;----------------------         pointer to Z buffer-----
22
;----------------------         pointer to Z buffer-----
23
;----------------------         pointer to texture------
23
;----------------------         pointer to texture------
24
;----------------------         texture coordinates-----
24
;----------------------         texture coordinates-----
25
;-- Z-buffer - filled with coordinates as dword --------
25
;-- Z-buffer - filled with coordinates as dword --------
26
;-- (Z coor. as word) shl CATMULL_SHIFT ----------------
26
;-- (Z coor. as word) shl CATMULL_SHIFT ----------------
27
.b_x1	equ ebp+4   ; procedure don't save registers !!!
27
.b_x1	equ ebp+4   ; procedure don't save registers !!!
28
.b_y1	equ ebp+6   ; each coordinate as word
28
.b_y1	equ ebp+6   ; each coordinate as word
29
.b_x2	equ ebp+8
29
.b_x2	equ ebp+8
30
.b_y2	equ ebp+10	 ; b - bump map coords
30
.b_y2	equ ebp+10	 ; b - bump map coords
31
.b_x3	equ ebp+12	 ; e - env map coords
31
.b_x3	equ ebp+12	 ; e - env map coords
32
.b_y3	equ ebp+14
32
.b_y3	equ ebp+14
33
.e_x1	equ ebp+16
33
.e_x1	equ ebp+16
34
.e_y1	equ ebp+18
34
.e_y1	equ ebp+18
35
.e_x2	equ ebp+20
35
.e_x2	equ ebp+20
36
.e_y2	equ ebp+22
36
.e_y2	equ ebp+22
37
.e_x3	equ ebp+24
37
.e_x3	equ ebp+24
38
.e_y3	equ ebp+26
38
.e_y3	equ ebp+26
39
.z1	equ word[ebp+28]
39
.z1	equ word[ebp+28]
40
.z2	equ word[ebp+30]
40
.z2	equ word[ebp+30]
41
.z3	equ word[ebp+32]
41
.z3	equ word[ebp+32]
42
.z_buff equ dword[ebp+34]	; pointer to Z-buffer
42
.z_buff equ dword[ebp+34]	; pointer to Z-buffer
43
.tex_ptr equ dword[ebp+38]	; ptr to texture
43
.tex_ptr equ dword[ebp+38]	; ptr to texture
44
.t_x1	equ ebp+42		; texture coords
44
.t_x1	equ ebp+42		; texture coords
45
.t_y1	equ ebp+44
45
.t_y1	equ ebp+44
46
.t_x2	equ ebp+46
46
.t_x2	equ ebp+46
47
.t_y2	equ ebp+48
47
.t_y2	equ ebp+48
48
.t_x3	equ ebp+50
48
.t_x3	equ ebp+50
49
.t_y3	equ ebp+52
49
.t_y3	equ ebp+52
50
 
50
 
51
 
51
 
52
 
52
 
53
.t_bmap equ dword[ebp-4]	; pointer to bump map
53
.t_bmap equ dword[ebp-4]	; pointer to bump map
54
.t_emap equ dword[ebp-8]	; pointer to env map
54
.t_emap equ dword[ebp-8]	; pointer to env map
55
.x1	equ word[ebp-10]
55
.x1	equ word[ebp-10]
56
.y1	equ word[ebp-12]
56
.y1	equ word[ebp-12]
57
.x2	equ word[ebp-14]
57
.x2	equ word[ebp-14]
58
.y2	equ word[ebp-16]
58
.y2	equ word[ebp-16]
59
.x3	equ word[ebp-18]
59
.x3	equ word[ebp-18]
60
.y3	equ word[ebp-20]
60
.y3	equ word[ebp-20]
61
 
61
 
62
if 0 ;Ext <= SSE2
62
if 0 ;Ext <= SSE2
63
 
63
 
64
.dx12	equ dword[edi-4]
64
.dx12	equ dword[edi-4]
65
.dz12	equ	 [edi-8]
65
.dz12	equ	 [edi-8]
66
.dbx12	equ dword[edi-12]
66
.dbx12	equ dword[edi-12]
67
.dby12	equ	 [edi-16]
67
.dby12	equ	 [edi-16]
68
.dex12	equ dword[edi-20]
68
.dex12	equ dword[edi-20]
69
.dey12	equ	 [edi-24]
69
.dey12	equ	 [edi-24]
70
.dtx12	equ dword[edi-28]
70
.dtx12	equ dword[edi-28]
71
.dty12	equ	 [edi-32]
71
.dty12	equ	 [edi-32]
72
 
72
 
73
.dx13  equ dword[ebp-52-4*1]
73
.dx13  equ dword[ebp-52-4*1]
74
.dz13  equ	[ebp-52-4*2]
74
.dz13  equ	[ebp-52-4*2]
75
.dbx13 equ dword[ebp-52-4*3]
75
.dbx13 equ dword[ebp-52-4*3]
76
.dby13 equ	[ebp-52-4*4]
76
.dby13 equ	[ebp-52-4*4]
77
.dex13 equ dword[ebp-52-4*5]
77
.dex13 equ dword[ebp-52-4*5]
78
.dey13 equ	[ebp-52-4*6]
78
.dey13 equ	[ebp-52-4*6]
79
.dtx13 equ dword[ebp-52-4*7]
79
.dtx13 equ dword[ebp-52-4*7]
80
.dty13 equ	[ebp-52-4*8]
80
.dty13 equ	[ebp-52-4*8]
81
 
81
 
82
 
82
 
83
.dx23  equ dword[ebp-(52+4*9)]
83
.dx23  equ dword[ebp-(52+4*9)]
84
.dz23  equ	[ebp-(52+4*10)]
84
.dz23  equ	[ebp-(52+4*10)]
85
.dbx23 equ dword[ebp-(52+4*11)]
85
.dbx23 equ dword[ebp-(52+4*11)]
86
.dby23 equ	[ebp-(52+4*12)]
86
.dby23 equ	[ebp-(52+4*12)]
87
.dex23 equ dword[ebp-(52+4*13)]
87
.dex23 equ dword[ebp-(52+4*13)]
88
.dey23 equ	[ebp-(52+4*14)]
88
.dey23 equ	[ebp-(52+4*14)]
89
.dtx23 equ dword[ebp-(52+4*15)]
89
.dtx23 equ dword[ebp-(52+4*15)]
90
.dty23 equ	[ebp-(52+4*16)]
90
.dty23 equ	[ebp-(52+4*16)]
91
 
91
 
92
else
92
else
93
 
93
 
94
.dx12	equ dword[ebp-24]
94
.dx12	equ dword[ebp-24]
95
.dz12	equ	 [ebp-28]
95
.dz12	equ	 [ebp-28]
96
.dbx12	equ dword[ebp-32]
96
.dbx12	equ dword[ebp-32]
97
.dby12	equ	 [ebp-36]
97
.dby12	equ	 [ebp-36]
98
.dex12	equ dword[ebp-40]
98
.dex12	equ dword[ebp-40]
99
.dey12	equ	 [ebp-44]
99
.dey12	equ	 [ebp-44]
100
.dtx12	equ dword[ebp-48]
100
.dtx12	equ dword[ebp-48]
101
.dty12	equ	 [ebp-52]
101
.dty12	equ	 [ebp-52]
102
 
102
 
103
.dx13  equ dword[ebp-52-4*1]
103
.dx13  equ dword[ebp-52-4*1]
104
.dz13  equ	[ebp-52-4*2]
104
.dz13  equ	[ebp-52-4*2]
105
.dbx13 equ dword[ebp-52-4*3]
105
.dbx13 equ dword[ebp-52-4*3]
106
.dby13 equ	[ebp-52-4*4]
106
.dby13 equ	[ebp-52-4*4]
107
.dex13 equ dword[ebp-52-4*5]
107
.dex13 equ dword[ebp-52-4*5]
108
.dey13 equ	[ebp-52-4*6]
108
.dey13 equ	[ebp-52-4*6]
109
.dtx13 equ dword[ebp-52-4*7]
109
.dtx13 equ dword[ebp-52-4*7]
110
.dty13 equ	[ebp-52-4*8]
110
.dty13 equ	[ebp-52-4*8]
111
 
111
 
112
 
112
 
113
.dx23  equ dword[ebp-(52+4*9)]
113
.dx23  equ dword[ebp-(52+4*9)]
114
.dz23  equ	[ebp-(52+4*10)]
114
.dz23  equ	[ebp-(52+4*10)]
115
.dbx23 equ dword[ebp-(52+4*11)]
115
.dbx23 equ dword[ebp-(52+4*11)]
116
.dby23 equ	[ebp-(52+4*12)]
116
.dby23 equ	[ebp-(52+4*12)]
117
.dex23 equ dword[ebp-(52+4*13)]
117
.dex23 equ dword[ebp-(52+4*13)]
118
.dey23 equ	[ebp-(52+4*14)]
118
.dey23 equ	[ebp-(52+4*14)]
119
.dtx23 equ dword[ebp-(52+4*15)]
119
.dtx23 equ dword[ebp-(52+4*15)]
120
.dty23 equ	[ebp-(52+4*16)]
120
.dty23 equ	[ebp-(52+4*16)]
121
 
121
 
122
end if
122
end if
123
 
123
 
124
if Ext < SSE
124
if Ext < SSE
125
 
125
 
126
.cx1   equ dword[ebp-(52+4*17)] 	; current variables
126
.cx1   equ dword[ebp-(52+4*17)] 	; current variables
127
.cz1   equ	[ebp-(52+4*18)]
127
.cz1   equ	[ebp-(52+4*18)]
128
.cx2   equ dword[ebp-(52+4*19)]
128
.cx2   equ dword[ebp-(52+4*19)]
129
.cz2   equ	[ebp-(52+4*20)]
129
.cz2   equ	[ebp-(52+4*20)]
130
.cbx1  equ dword[ebp-(52+4*21)]
130
.cbx1  equ dword[ebp-(52+4*21)]
131
.cby1  equ	[ebp-(52+4*22)]
131
.cby1  equ	[ebp-(52+4*22)]
132
.cbx2  equ dword[ebp-(52+4*23)]
132
.cbx2  equ dword[ebp-(52+4*23)]
133
.cby2  equ	[ebp-(52+4*24)]
133
.cby2  equ	[ebp-(52+4*24)]
134
.cex1  equ dword[ebp-(52+4*25)]
134
.cex1  equ dword[ebp-(52+4*25)]
135
.cey1  equ	[ebp-(52+4*26)]
135
.cey1  equ	[ebp-(52+4*26)]
136
.cex2  equ dword[ebp-(52+4*27)]
136
.cex2  equ dword[ebp-(52+4*27)]
137
.cey2  equ	[ebp-(52+4*28)]
137
.cey2  equ	[ebp-(52+4*28)]
138
 
138
 
139
.ctx1  equ dword[ebp-(52+4*29)]
139
.ctx1  equ dword[ebp-(52+4*29)]
140
.cty1  equ	[ebp-(52+4*30)]
140
.cty1  equ	[ebp-(52+4*30)]
141
.ctx2  equ dword[ebp-(52+4*31)]
141
.ctx2  equ dword[ebp-(52+4*31)]
142
.cty2  equ	[ebp-(52+4*32)]
142
.cty2  equ	[ebp-(52+4*32)]
143
 
143
 
144
else
144
else
145
 
145
 
146
.cx1   equ dword[ebp-(52+4*17)] 	; current variables
146
.cx1   equ dword[ebp-(52+4*17)] 	; current variables
147
.cz1   equ	[ebp-(52+4*18)]
147
.cz1   equ	[ebp-(52+4*18)]
148
.cbx1  equ dword[ebp-(52+4*19)]
148
.cbx1  equ dword[ebp-(52+4*19)]
149
.cby1  equ	[ebp-(52+4*20)]
149
.cby1  equ	[ebp-(52+4*20)]
150
.cex1  equ dword[ebp-(52+4*21)]
150
.cex1  equ dword[ebp-(52+4*21)]
151
.cey1  equ	[ebp-(52+4*22)]
151
.cey1  equ	[ebp-(52+4*22)]
152
.ctx1  equ dword[ebp-(52+4*23)]
152
.ctx1  equ dword[ebp-(52+4*23)]
153
.cty1  equ	[ebp-(52+4*24)]
153
.cty1  equ	[ebp-(52+4*24)]
154
 
154
 
155
.cx2   equ dword[ebp-(52+4*25)]
155
.cx2   equ dword[ebp-(52+4*25)]
156
.cz2   equ	[ebp-(52+4*26)]
156
.cz2   equ	[ebp-(52+4*26)]
157
.cbx2  equ dword[ebp-(52+4*27)]
157
.cbx2  equ dword[ebp-(52+4*27)]
158
.cby2  equ	[ebp-(52+4*28)]
158
.cby2  equ	[ebp-(52+4*28)]
159
.cex2  equ dword[ebp-(52+4*29)]
159
.cex2  equ dword[ebp-(52+4*29)]
160
.cey2  equ	[ebp-(52+4*30)]
160
.cey2  equ	[ebp-(52+4*30)]
161
.ctx2  equ dword[ebp-(52+4*31)]
161
.ctx2  equ dword[ebp-(52+4*31)]
162
.cty2  equ	[ebp-(52+4*32)]
162
.cty2  equ	[ebp-(52+4*32)]
163
 
163
 
164
end if
164
end if
165
       cld
165
       cld
166
       mov     ebp,esp
166
       mov     ebp,esp
167
       push    edx	  ; store bump map
167
       push    edx	  ; store bump map
168
       push    esi	  ; store e. map
168
       push    esi	  ; store e. map
169
     ; sub     esp,120
169
     ; sub     esp,120
170
 .sort3:		  ; sort triangle coordinates...
170
 .sort3:		  ; sort triangle coordinates...
171
       cmp     ax,bx
171
       cmp     ax,bx
172
       jle     .sort1
172
       jle     .sort1
173
       xchg    eax,ebx
173
       xchg    eax,ebx
174
       mov     edx,dword[.b_x1]
174
       mov     edx,dword[.b_x1]
175
       xchg    edx,dword[.b_x2]
175
       xchg    edx,dword[.b_x2]
176
       mov     dword[.b_x1],edx
176
       mov     dword[.b_x1],edx
177
       mov     edx,dword[.e_x1]
177
       mov     edx,dword[.e_x1]
178
       xchg    edx,dword[.e_x2]
178
       xchg    edx,dword[.e_x2]
179
       mov     dword[.e_x1],edx
179
       mov     dword[.e_x1],edx
180
       mov     edx,dword[.t_x1]
180
       mov     edx,dword[.t_x1]
181
       xchg    edx,dword[.t_x2]
181
       xchg    edx,dword[.t_x2]
182
       mov     dword[.t_x1],edx
182
       mov     dword[.t_x1],edx
183
       mov     dx,.z1
183
       mov     dx,.z1
184
       xchg    dx,.z2
184
       xchg    dx,.z2
185
       mov     .z1,dx
185
       mov     .z1,dx
186
 .sort1:
186
 .sort1:
187
       cmp	bx,cx
187
       cmp	bx,cx
188
       jle	.sort2
188
       jle	.sort2
189
       xchg	ebx,ecx
189
       xchg	ebx,ecx
190
       mov	edx,dword[.b_x2]
190
       mov	edx,dword[.b_x2]
191
       xchg	edx,dword[.b_x3]
191
       xchg	edx,dword[.b_x3]
192
       mov	dword[.b_x2],edx
192
       mov	dword[.b_x2],edx
193
       mov	edx,dword[.e_x2]
193
       mov	edx,dword[.e_x2]
194
       xchg	edx,dword[.e_x3]
194
       xchg	edx,dword[.e_x3]
195
       mov	dword[.e_x2],edx
195
       mov	dword[.e_x2],edx
196
       mov	edx,dword[.t_x2]
196
       mov	edx,dword[.t_x2]
197
       xchg	edx,dword[.t_x3]
197
       xchg	edx,dword[.t_x3]
198
       mov	dword[.t_x2],edx
198
       mov	dword[.t_x2],edx
199
       mov     dx,.z2
199
       mov     dx,.z2
200
       xchg    dx,.z3
200
       xchg    dx,.z3
201
       mov     .z2,dx
201
       mov     .z2,dx
202
       jmp	.sort3
202
       jmp	.sort3
203
 .sort2:
203
 .sort2:
204
       push	eax	; store triangle coords in variables
204
       push	eax	; store triangle coords in variables
205
       push	ebx
205
       push	ebx
206
       push	ecx
206
       push	ecx
207
	 mov	  edx,80008000h  ; eax,ebx,ecx are ANDd together into edx which means that
207
	 mov	  edx,80008000h  ; eax,ebx,ecx are ANDd together into edx which means that
208
	 and	  edx,ebx	 ; if *all* of them are negative a sign flag is raised
208
	 and	  edx,ebx	 ; if *all* of them are negative a sign flag is raised
209
	 and	  edx,ecx
209
	 and	  edx,ecx
210
	 and	  edx,eax
210
	 and	  edx,eax
211
	 test	  edx,80008000h  ; Check both X&Y at once
211
	 test	  edx,80008000h  ; Check both X&Y at once
212
	 jne	  .loop23_done
212
	 jne	  .loop23_done
213
    ;   mov     edx,eax         ; eax,ebx,ecx are ORd together into edx which means that
213
    ;   mov     edx,eax         ; eax,ebx,ecx are ORd together into edx which means that
214
    ;   or      edx,ebx         ; if any *one* of them is negative a sign flag is raised
214
    ;   or      edx,ebx         ; if any *one* of them is negative a sign flag is raised
215
    ;   or      edx,ecx
215
    ;   or      edx,ecx
216
    ;   test    edx,80000000h   ; Check only X
216
    ;   test    edx,80000000h   ; Check only X
217
    ;   jne     .loop23_done
217
    ;   jne     .loop23_done
218
 
218
 
219
    ;   cmp     .x1,SIZE_X    ; {
219
    ;   cmp     .x1,SIZE_X    ; {
220
    ;   jg      .loop23_done
220
    ;   jg      .loop23_done
221
    ;   cmp     .x2,SIZE_X     ; This can be optimized with effort
221
    ;   cmp     .x2,SIZE_X     ; This can be optimized with effort
222
    ;   jg      .loop23_done
222
    ;   jg      .loop23_done
223
    ;   cmp     .x3,SIZE_X
223
    ;   cmp     .x3,SIZE_X
224
    ;   jg      .loop23_done    ; {
224
    ;   jg      .loop23_done    ; {
225
 
225
 
226
 
226
 
227
       mov	bx,.y2	     ; calc delta 12
227
       mov	bx,.y2	     ; calc delta 12
228
       sub	bx,.y1
228
       sub	bx,.y1
229
       jnz	.bt_dx12_make
229
       jnz	.bt_dx12_make
230
if 0 ;Ext >= SSE2
230
if 0 ;Ext >= SSE2
231
       pxor	xmm0,xmm0
231
       pxor	xmm0,xmm0
232
       movups	.dty12,xmm0
232
       movups	.dty12,xmm0
233
       movups	.dey12,xmm0
233
       movups	.dey12,xmm0
234
       sub	esp,16
234
       sub	esp,16
235
else
235
else
236
       mov	ecx,8
236
       mov	ecx,8
237
       xor	edx,edx
237
       xor	edx,edx
238
     @@:
238
     @@:
239
       push	edx   ;dword 0
239
       push	edx   ;dword 0
240
       loop	@b
240
       loop	@b
241
end if
241
end if
242
       jmp	.bt_dx12_done
242
       jmp	.bt_dx12_done
243
 .bt_dx12_make:
243
 .bt_dx12_make:
244
       movsx	ebx,bx
244
       movsx	ebx,bx
245
 
245
 
246
 
246
 
247
if Ext>=SSE
247
if Ext>=SSE
248
       sub	 esp,32
248
       sub	 esp,32
249
   ;    mov       eax,256
249
   ;    mov       eax,256
250
       cvtsi2ss  xmm4,[i255d]
250
       cvtsi2ss  xmm4,[i255d]
251
       cvtsi2ss  xmm3,ebx ;rcps
251
       cvtsi2ss  xmm3,ebx ;rcps
252
if 0 ;Ext >= SSE2
252
if 0 ;Ext >= SSE2
253
       mov	 edi,ebp
253
       mov	 edi,ebp
254
       sub	 edi,512
254
       sub	 edi,512
255
       or	 edi,0x0000000f
255
       or	 edi,0x0000000f
256
end if
256
end if
257
       divss	 xmm3,xmm4
257
       divss	 xmm3,xmm4
258
       shufps	 xmm3,xmm3,0
258
       shufps	 xmm3,xmm3,0
259
 
259
 
260
       movd	 mm0,[.b_x1]
260
       movd	 mm0,[.b_x1]
261
       movd	 mm1,[.b_x2]
261
       movd	 mm1,[.b_x2]
262
       movd	 mm2,[.e_x1]
262
       movd	 mm2,[.e_x1]
263
       movd	 mm3,[.e_x2]
263
       movd	 mm3,[.e_x2]
264
 
264
 
265
       pxor	  mm4,mm4
265
       pxor	  mm4,mm4
266
       punpcklwd  mm0,mm4
266
       punpcklwd  mm0,mm4
267
       punpcklwd  mm1,mm4
267
       punpcklwd  mm1,mm4
268
       punpcklwd  mm2,mm4
268
       punpcklwd  mm2,mm4
269
       punpcklwd  mm3,mm4
269
       punpcklwd  mm3,mm4
270
 
270
 
271
       psubd	  mm1,mm0
271
       psubd	  mm1,mm0
272
       psubd	  mm3,mm2
272
       psubd	  mm3,mm2
273
 
273
 
274
       cvtpi2ps  xmm1,mm1
274
       cvtpi2ps  xmm1,mm1
275
       movlhps	 xmm1,xmm1
275
       movlhps	 xmm1,xmm1
276
       cvtpi2ps  xmm1,mm3
276
       cvtpi2ps  xmm1,mm3
277
 
277
 
278
       divps	 xmm1,xmm3   ;xmm1--> | dby | dbx | dey | dex |
278
       divps	 xmm1,xmm3   ;xmm1--> | dby | dbx | dey | dex |
279
 
279
 
280
       shufps	 xmm1,xmm1,10110001b
280
       shufps	 xmm1,xmm1,10110001b
281
			     ;xmm1--> | dbx | dby | dex | dey |
281
			     ;xmm1--> | dbx | dby | dex | dey |
282
;1       movups    .dey12,xmm1
282
;1       movups    .dey12,xmm1
283
       cvtps2pi  mm0,xmm1 ;mm0,xmm1          ; mm0 -> 2 delta dwords
283
       cvtps2pi  mm0,xmm1 ;mm0,xmm1          ; mm0 -> 2 delta dwords
284
       movhlps	 xmm1,xmm1
284
       movhlps	 xmm1,xmm1
285
       cvtps2pi  mm1,xmm1 ;mm1,xmm1
285
       cvtps2pi  mm1,xmm1 ;mm1,xmm1
286
       movq	 .dey12,mm0
286
       movq	 .dey12,mm0
287
       movq	 .dby12,mm1
287
       movq	 .dby12,mm1
288
;-------------
288
;-------------
289
  ;    pxor      mm0,mm0
289
  ;    pxor      mm0,mm0
290
  ;    pxor      mm1,mm1
290
  ;    pxor      mm1,mm1
291
   ;/   pinsrw    mm0,.z1,1
291
   ;/   pinsrw    mm0,.z1,1
292
   ;/   pinsrw    mm0,.x1,0
292
   ;/   pinsrw    mm0,.x1,0
293
   ;/   pinsrw    mm1,.z2,1
293
   ;/   pinsrw    mm1,.z2,1
294
   ;/   pinsrw    mm1,.x2,0
294
   ;/   pinsrw    mm1,.x2,0
295
       mov	 ax,.z2
295
       mov	 ax,.z2
296
       sub	 ax,.z1
296
       sub	 ax,.z1
297
       cwde
297
       cwde
298
 
298
 
299
       mov	dx,.x2
299
       mov	dx,.x2
300
       sub	dx,.x1
300
       sub	dx,.x1
301
       movsx	edx,dx
301
       movsx	edx,dx
302
 
302
 
303
   ;/    movd      mm1,eax
303
   ;/    movd      mm1,eax
304
 
304
 
305
   ;/    punpcklwd  mm0,mm4
305
   ;/    punpcklwd  mm0,mm4
306
   ;/    punpcklwd  mm1,mm4
306
   ;/    punpcklwd  mm1,mm4
307
 
307
 
308
  ;     cvtpi2ps   xmm1,mm1
308
  ;     cvtpi2ps   xmm1,mm1
309
  ;     cvtpi2ps   xmm2,mm0
309
  ;     cvtpi2ps   xmm2,mm0
310
  ;     subps      xmm1,xmm2
310
  ;     subps      xmm1,xmm2
311
 
311
 
312
   ;/   psubd      mm1,mm0
312
   ;/   psubd      mm1,mm0
313
 
313
 
314
       movd	  mm2,[.t_x1]
314
       movd	  mm2,[.t_x1]
315
       movd	  mm3,[.t_x2]
315
       movd	  mm3,[.t_x2]
316
 
316
 
317
       punpcklwd  mm2,mm4
317
       punpcklwd  mm2,mm4
318
       punpcklwd  mm3,mm4
318
       punpcklwd  mm3,mm4
319
       psubd	  mm3,mm2
319
       psubd	  mm3,mm2
320
 
320
 
321
   ;/  cvtpi2ps  xmm1,mm1
321
   ;/  cvtpi2ps  xmm1,mm1
322
       cvtsi2ss  xmm1,eax
322
       cvtsi2ss  xmm1,eax
323
       movlhps	 xmm1,xmm1
323
       movlhps	 xmm1,xmm1
324
       cvtsi2ss  xmm1,edx
324
       cvtsi2ss  xmm1,edx
325
   ;    movss     xmm1,xmm4
325
   ;    movss     xmm1,xmm4
326
       shufps	 xmm1,xmm1,00101111b
326
       shufps	 xmm1,xmm1,00101111b
327
       cvtpi2ps  xmm1,mm3
327
       cvtpi2ps  xmm1,mm3
328
 
328
 
329
       divps	 xmm1,xmm3   ; xmm1--> | dx | dz | dty | dtx |
329
       divps	 xmm1,xmm3   ; xmm1--> | dx | dz | dty | dtx |
330
 
330
 
331
       shufps	 xmm1,xmm1,11100001b
331
       shufps	 xmm1,xmm1,11100001b
332
			     ; xmm1--> | dx | dz | dtx | dty |
332
			     ; xmm1--> | dx | dz | dtx | dty |
333
;1       movlps    .dty12,xmm1
333
;1       movlps    .dty12,xmm1
334
;1       movhps    .dz12,xmm1
334
;1       movhps    .dz12,xmm1
335
       cvtps2pi  mm0,xmm1    ; mm0 -> 2 delta dwords  | dtx | dty |
335
       cvtps2pi  mm0,xmm1    ; mm0 -> 2 delta dwords  | dtx | dty |
336
       movhlps	 xmm1,xmm1
336
       movhlps	 xmm1,xmm1
337
       cvtps2pi  mm1,xmm1
337
       cvtps2pi  mm1,xmm1
338
       movq	 .dty12,mm0
338
       movq	 .dty12,mm0
339
       movq	 .dz12,mm1
339
       movq	 .dz12,mm1
340
;----
340
;----
341
;       mov       ax,.z2
341
;       mov       ax,.z2
342
;       sub       ax,.z1
342
;       sub       ax,.z1
343
;       cwde
343
;       cwde
344
;       mov       bx,.x2
344
;       mov       bx,.x2
345
;       sub       bx,.x1
345
;       sub       bx,.x1
346
;       movsx     ebx,bx
346
;       movsx     ebx,bx
347
;       movd      mm1,eax
347
;       movd      mm1,eax
348
;       psllq     mm1,32
348
;       psllq     mm1,32
349
;       movd      mm1,ebx
349
;       movd      mm1,ebx
350
 
350
 
351
;;       push      ebx
351
;;       push      ebx
352
;;       push      eax
352
;;       push      eax
353
;;       movq      mm1,[esp]
353
;;       movq      mm1,[esp]
354
;;       add       esp,8
354
;;       add       esp,8
355
;;;       mov       ax,.z1
355
;;;       mov       ax,.z1
356
;;;       mov       bx,.z2
356
;;;       mov       bx,.z2
357
;;;       shl       eax,16
357
;;;       shl       eax,16
358
;;;       shl       ebx,16
358
;;;       shl       ebx,16
359
;;;       mov       ax,.x1
359
;;;       mov       ax,.x1
360
;;;       mov       bx,.x2
360
;;;       mov       bx,.x2
361
;       movd       mm2,[.t_x1]
361
;       movd       mm2,[.t_x1]
362
;       movd       mm3,[.t_x2]
362
;       movd       mm3,[.t_x2]
363
;;       movd      mm0,eax
363
;;       movd      mm0,eax
364
;;       movd      mm1,ebx
364
;;       movd      mm1,ebx
365
 
365
 
366
;       pxor       mm4,mm4
366
;       pxor       mm4,mm4
367
;;       punpcklwd  mm0,mm4
367
;;       punpcklwd  mm0,mm4
368
;;       punpcklwd  mm1,mm4
368
;;       punpcklwd  mm1,mm4
369
;       punpcklwd  mm2,mm4
369
;       punpcklwd  mm2,mm4
370
;       punpcklwd  mm3,mm4
370
;       punpcklwd  mm3,mm4
371
 
371
 
372
;;       psubd    mm1,mm0
372
;;       psubd    mm1,mm0
373
;       psubd      mm3,mm2
373
;       psubd      mm3,mm2
374
 
374
 
375
 
375
 
376
;       cvtpi2ps  xmm1,mm1
376
;       cvtpi2ps  xmm1,mm1
377
;       movlhps   xmm1,xmm1
377
;       movlhps   xmm1,xmm1
378
;       cvtpi2ps  xmm1,mm3
378
;       cvtpi2ps  xmm1,mm3
379
 
379
 
380
;       divps     xmm1,xmm3   ; xmm1--> | dz | dx | dty | dtx |
380
;       divps     xmm1,xmm3   ; xmm1--> | dz | dx | dty | dtx |
381
 
381
 
382
;       shufps    xmm1,xmm1,10110001b
382
;       shufps    xmm1,xmm1,10110001b
383
			     ; xmm1--> | dx | dz | dtx | dty |
383
			     ; xmm1--> | dx | dz | dtx | dty |
384
;       cvtps2pi  mm0,xmm1    ; mm0 -> 2 delta dwords  | dtx | dty |
384
;       cvtps2pi  mm0,xmm1    ; mm0 -> 2 delta dwords  | dtx | dty |
385
;       movhlps   xmm1,xmm1
385
;       movhlps   xmm1,xmm1
386
;       cvtps2pi  mm1,xmm1    ; mm1 --> 2 delta dwords | dx | dz |
386
;       cvtps2pi  mm1,xmm1    ; mm1 --> 2 delta dwords | dx | dz |
387
;       movq      .dty12,mm0
387
;       movq      .dty12,mm0
388
;       movq      .dz12,mm1
388
;       movq      .dz12,mm1
389
else
389
else
390
       mov	ax,.x2
390
       mov	ax,.x2
391
       sub	ax,.x1
391
       sub	ax,.x1
392
       cwde
392
       cwde
393
       shl	eax,ROUND
393
       shl	eax,ROUND
394
       cdq
394
       cdq
395
       idiv	ebx
395
       idiv	ebx
396
 ;     mov      .dx12,eax
396
 ;     mov      .dx12,eax
397
       push	 eax
397
       push	 eax
398
 
398
 
399
       mov     ax,.z2
399
       mov     ax,.z2
400
       sub     ax,.z1
400
       sub     ax,.z1
401
       cwde
401
       cwde
402
       shl     eax,CATMULL_SHIFT
402
       shl     eax,CATMULL_SHIFT
403
       cdq
403
       cdq
404
       idiv    ebx
404
       idiv    ebx
405
       push    eax
405
       push    eax
406
 
406
 
407
       mov	ax,word[.b_x2]
407
       mov	ax,word[.b_x2]
408
       sub	ax,word[.b_x1]
408
       sub	ax,word[.b_x1]
409
       cwde
409
       cwde
410
       shl	eax,ROUND
410
       shl	eax,ROUND
411
       cdq
411
       cdq
412
       idiv	ebx
412
       idiv	ebx
413
 ;     mov      .dbx12,eax
413
 ;     mov      .dbx12,eax
414
       push	 eax
414
       push	 eax
415
 
415
 
416
       mov	ax,word[.b_y2]
416
       mov	ax,word[.b_y2]
417
       sub	ax,word[.b_y1]
417
       sub	ax,word[.b_y1]
418
       cwde
418
       cwde
419
       shl	eax,ROUND
419
       shl	eax,ROUND
420
       cdq
420
       cdq
421
       idiv	ebx
421
       idiv	ebx
422
 ;     mov      .dby12,eax
422
 ;     mov      .dby12,eax
423
       push	 eax
423
       push	 eax
424
 
424
 
425
       mov	ax,word[.e_x2]
425
       mov	ax,word[.e_x2]
426
       sub	ax,word[.e_x1]
426
       sub	ax,word[.e_x1]
427
       cwde
427
       cwde
428
       shl	eax,ROUND
428
       shl	eax,ROUND
429
       cdq
429
       cdq
430
       idiv	ebx
430
       idiv	ebx
431
 ;     mov      .dex12,eax
431
 ;     mov      .dex12,eax
432
       push	 eax
432
       push	 eax
433
 
433
 
434
       mov	ax,word[.e_y2]
434
       mov	ax,word[.e_y2]
435
       sub	ax,word[.e_y1]
435
       sub	ax,word[.e_y1]
436
       cwde
436
       cwde
437
       shl	eax,ROUND
437
       shl	eax,ROUND
438
       cdq
438
       cdq
439
       idiv	ebx
439
       idiv	ebx
440
 ;     mov      .dey12,eax
440
 ;     mov      .dey12,eax
441
       push	 eax
441
       push	 eax
442
 
442
 
443
       mov	ax,word[.t_x2]
443
       mov	ax,word[.t_x2]
444
       sub	ax,word[.t_x1]
444
       sub	ax,word[.t_x1]
445
       cwde
445
       cwde
446
       shl	eax,ROUND
446
       shl	eax,ROUND
447
       cdq
447
       cdq
448
       idiv	ebx
448
       idiv	ebx
449
 ;     mov      .dtx12,eax
449
 ;     mov      .dtx12,eax
450
       push	 eax
450
       push	 eax
451
 
451
 
452
       mov	ax,word[.t_y2]
452
       mov	ax,word[.t_y2]
453
       sub	ax,word[.t_y1]
453
       sub	ax,word[.t_y1]
454
       cwde
454
       cwde
455
       shl	eax,ROUND
455
       shl	eax,ROUND
456
       cdq
456
       cdq
457
       idiv	ebx
457
       idiv	ebx
458
 ;     mov      .dty12,eax
458
 ;     mov      .dty12,eax
459
       push	 eax
459
       push	 eax
460
end if
460
end if
461
   .bt_dx12_done:
461
   .bt_dx12_done:
462
 
462
 
463
       mov	bx,.y3	     ; calc delta13
463
       mov	bx,.y3	     ; calc delta13
464
       sub	bx,.y1
464
       sub	bx,.y1
465
       jnz	.bt_dx13_make
465
       jnz	.bt_dx13_make
466
       mov	ecx,8
466
       mov	ecx,8
467
       xor	edx,edx
467
       xor	edx,edx
468
     @@:
468
     @@:
469
       push	edx   ;dword 0
469
       push	edx   ;dword 0
470
       loop	@b
470
       loop	@b
471
       jmp	.bt_dx13_done
471
       jmp	.bt_dx13_done
472
 .bt_dx13_make:
472
 .bt_dx13_make:
473
       movsx	ebx,bx
473
       movsx	ebx,bx
474
 
474
 
475
if Ext>=SSE
475
if Ext>=SSE
476
 
476
 
477
       sub	 esp,32
477
       sub	 esp,32
478
   ;    mov       eax,256
478
   ;    mov       eax,256
479
       cvtsi2ss  xmm4,[i255d]
479
       cvtsi2ss  xmm4,[i255d]
480
       cvtsi2ss  xmm3,ebx	     ;rcps
480
       cvtsi2ss  xmm3,ebx	     ;rcps
481
       divss	 xmm3,xmm4
481
       divss	 xmm3,xmm4
482
       shufps	 xmm3,xmm3,0
482
       shufps	 xmm3,xmm3,0
483
 
483
 
484
       movd	 mm0,[.b_x1]
484
       movd	 mm0,[.b_x1]
485
       movd	 mm1,[.b_x3]
485
       movd	 mm1,[.b_x3]
486
       movd	 mm2,[.e_x1]
486
       movd	 mm2,[.e_x1]
487
       movd	 mm3,[.e_x3]
487
       movd	 mm3,[.e_x3]
488
 
488
 
489
       pxor	  mm4,mm4
489
       pxor	  mm4,mm4
490
       punpcklwd  mm0,mm4
490
       punpcklwd  mm0,mm4
491
       punpcklwd  mm1,mm4
491
       punpcklwd  mm1,mm4
492
       punpcklwd  mm2,mm4
492
       punpcklwd  mm2,mm4
493
       punpcklwd  mm3,mm4
493
       punpcklwd  mm3,mm4
494
 
494
 
495
       psubd	  mm1,mm0
495
       psubd	  mm1,mm0
496
       psubd	  mm3,mm2
496
       psubd	  mm3,mm2
497
 
497
 
498
       cvtpi2ps  xmm1,mm1
498
       cvtpi2ps  xmm1,mm1
499
       movlhps	 xmm1,xmm1
499
       movlhps	 xmm1,xmm1
500
       cvtpi2ps  xmm1,mm3
500
       cvtpi2ps  xmm1,mm3
501
 
501
 
502
       divps	 xmm1,xmm3   ;xmm1--> | dby | dbx | dey | dex |
502
       divps	 xmm1,xmm3   ;xmm1--> | dby | dbx | dey | dex |
503
 
503
 
504
       shufps	 xmm1,xmm1,10110001b
504
       shufps	 xmm1,xmm1,10110001b
505
			     ;xmm1--> | dbx | dby | dex | dey |
505
			     ;xmm1--> | dbx | dby | dex | dey |
506
;1       movups    .dey13,xmm1
506
;1       movups    .dey13,xmm1
507
 
507
 
508
       cvtps2pi  mm0,xmm1 ;mm0,xmm1          ; mm0 -> 2 delta dwords
508
       cvtps2pi  mm0,xmm1 ;mm0,xmm1          ; mm0 -> 2 delta dwords
509
       movhlps	 xmm1,xmm1
509
       movhlps	 xmm1,xmm1
510
       cvtps2pi  mm1,xmm1 ;mm1,xmm1
510
       cvtps2pi  mm1,xmm1 ;mm1,xmm1
511
       movq	 .dey13,mm0
511
       movq	 .dey13,mm0
512
       movq	 .dby13,mm1
512
       movq	 .dby13,mm1
513
 
513
 
514
       mov	 ax,.z3
514
       mov	 ax,.z3
515
       sub	 ax,.z1
515
       sub	 ax,.z1
516
       cwde
516
       cwde
517
 
517
 
518
       mov	dx,.x3
518
       mov	dx,.x3
519
       sub	dx,.x1
519
       sub	dx,.x1
520
       movsx	edx,dx
520
       movsx	edx,dx
521
 
521
 
522
       movd	  mm2,[.t_x1]
522
       movd	  mm2,[.t_x1]
523
       movd	  mm3,[.t_x3]
523
       movd	  mm3,[.t_x3]
524
 
524
 
525
       punpcklwd  mm2,mm4
525
       punpcklwd  mm2,mm4
526
       punpcklwd  mm3,mm4
526
       punpcklwd  mm3,mm4
527
       psubd	  mm3,mm2
527
       psubd	  mm3,mm2
528
 
528
 
529
       cvtsi2ss  xmm1,eax
529
       cvtsi2ss  xmm1,eax
530
       movlhps	 xmm1,xmm1
530
       movlhps	 xmm1,xmm1
531
       cvtsi2ss  xmm1,edx
531
       cvtsi2ss  xmm1,edx
532
       shufps	 xmm1,xmm1,00101111b
532
       shufps	 xmm1,xmm1,00101111b
533
       cvtpi2ps  xmm1,mm3
533
       cvtpi2ps  xmm1,mm3
534
 
534
 
535
       divps	 xmm1,xmm3   ; xmm1--> | dx | dz | dty | dtx |
535
       divps	 xmm1,xmm3   ; xmm1--> | dx | dz | dty | dtx |
536
 
536
 
537
       shufps	 xmm1,xmm1,11100001b
537
       shufps	 xmm1,xmm1,11100001b
538
			     ; xmm1--> | dx | dz | dtx | dty |
538
			     ; xmm1--> | dx | dz | dtx | dty |
539
;1       movlps    .dty13,xmm1
539
;1       movlps    .dty13,xmm1
540
;1       movhps    .dz13,xmm1
540
;1       movhps    .dz13,xmm1
541
 
541
 
542
       cvtps2pi  mm0,xmm1    ; mm0 -> 2 delta dwords  | dtx | dty |
542
       cvtps2pi  mm0,xmm1    ; mm0 -> 2 delta dwords  | dtx | dty |
543
       movhlps	 xmm1,xmm1
543
       movhlps	 xmm1,xmm1
544
       cvtps2pi  mm1,xmm1
544
       cvtps2pi  mm1,xmm1
545
       movq	 .dty13,mm0
545
       movq	 .dty13,mm0
546
       movq	 .dz13,mm1
546
       movq	 .dz13,mm1
547
 
547
 
548
else
548
else
549
 
549
 
550
       mov	ax,.x3
550
       mov	ax,.x3
551
       sub	ax,.x1
551
       sub	ax,.x1
552
       cwde
552
       cwde
553
       shl	eax,ROUND
553
       shl	eax,ROUND
554
       cdq
554
       cdq
555
       idiv	ebx
555
       idiv	ebx
556
 ;     mov      .dx13,eax
556
 ;     mov      .dx13,eax
557
       push	 eax
557
       push	 eax
558
 
558
 
559
       mov     ax,.z3
559
       mov     ax,.z3
560
       sub     ax,.z1
560
       sub     ax,.z1
561
       cwde
561
       cwde
562
       shl     eax,CATMULL_SHIFT
562
       shl     eax,CATMULL_SHIFT
563
       cdq
563
       cdq
564
       idiv    ebx
564
       idiv    ebx
565
  ;    mov    .dz13,eax
565
  ;    mov    .dz13,eax
566
       push    eax
566
       push    eax
567
 
567
 
568
 
568
 
569
       mov	ax,word[.b_x3]
569
       mov	ax,word[.b_x3]
570
       sub	ax,word[.b_x1]
570
       sub	ax,word[.b_x1]
571
       cwde
571
       cwde
572
       shl	eax,ROUND
572
       shl	eax,ROUND
573
       cdq
573
       cdq
574
       idiv	ebx
574
       idiv	ebx
575
 ;     mov      .dbx13,eax
575
 ;     mov      .dbx13,eax
576
       push	 eax
576
       push	 eax
577
 
577
 
578
       mov	ax,word[.b_y3]
578
       mov	ax,word[.b_y3]
579
       sub	ax,word[.b_y1]
579
       sub	ax,word[.b_y1]
580
       cwde
580
       cwde
581
       shl	eax,ROUND
581
       shl	eax,ROUND
582
       cdq
582
       cdq
583
       idiv	ebx
583
       idiv	ebx
584
 ;     mov      .dby13,eax
584
 ;     mov      .dby13,eax
585
       push	 eax
585
       push	 eax
586
 
586
 
587
       mov	ax,word[.e_x3]
587
       mov	ax,word[.e_x3]
588
       sub	ax,word[.e_x1]
588
       sub	ax,word[.e_x1]
589
       cwde
589
       cwde
590
       shl	eax,ROUND
590
       shl	eax,ROUND
591
       cdq
591
       cdq
592
       idiv	ebx
592
       idiv	ebx
593
 ;     mov      .dex13,eax
593
 ;     mov      .dex13,eax
594
       push	 eax
594
       push	 eax
595
 
595
 
596
       mov	ax,word[.e_y3]
596
       mov	ax,word[.e_y3]
597
       sub	ax,word[.e_y1]
597
       sub	ax,word[.e_y1]
598
       cwde
598
       cwde
599
       shl	eax,ROUND
599
       shl	eax,ROUND
600
       cdq
600
       cdq
601
       idiv	ebx
601
       idiv	ebx
602
 ;     mov      .dey13,eax
602
 ;     mov      .dey13,eax
603
       push	 eax
603
       push	 eax
604
 
604
 
605
       mov	ax,word[.t_x3]
605
       mov	ax,word[.t_x3]
606
       sub	ax,word[.t_x1]
606
       sub	ax,word[.t_x1]
607
       cwde
607
       cwde
608
       shl	eax,ROUND
608
       shl	eax,ROUND
609
       cdq
609
       cdq
610
       idiv	ebx
610
       idiv	ebx
611
 ;     mov      .dtx13,eax
611
 ;     mov      .dtx13,eax
612
       push	 eax
612
       push	 eax
613
 
613
 
614
       mov	ax,word[.t_y3]
614
       mov	ax,word[.t_y3]
615
       sub	ax,word[.t_y1]
615
       sub	ax,word[.t_y1]
616
       cwde
616
       cwde
617
       shl	eax,ROUND
617
       shl	eax,ROUND
618
       cdq
618
       cdq
619
       idiv	ebx
619
       idiv	ebx
620
 ;     mov      .dty13,eax
620
 ;     mov      .dty13,eax
621
       push	 eax
621
       push	 eax
622
end if
622
end if
623
   .bt_dx13_done:
623
   .bt_dx13_done:
624
 
624
 
625
       mov	bx,.y3	     ; calc delta23
625
       mov	bx,.y3	     ; calc delta23
626
       sub	bx,.y2
626
       sub	bx,.y2
627
       jnz	.bt_dx23_make
627
       jnz	.bt_dx23_make
628
       mov	ecx,8
628
       mov	ecx,8
629
       xor	edx,edx
629
       xor	edx,edx
630
     @@:
630
     @@:
631
       push	edx   ;dword 0
631
       push	edx   ;dword 0
632
       loop	@b
632
       loop	@b
633
       jmp	.bt_dx23_done
633
       jmp	.bt_dx23_done
634
 .bt_dx23_make:
634
 .bt_dx23_make:
635
       movsx	ebx,bx
635
       movsx	ebx,bx
636
 
636
 
637
if Ext>=SSE
637
if Ext>=SSE
638
 
638
 
639
       sub	 esp,32
639
       sub	 esp,32
640
   ;    mov       eax,256
640
   ;    mov       eax,256
641
       cvtsi2ss  xmm4,[i255d]
641
       cvtsi2ss  xmm4,[i255d]
642
       cvtsi2ss  xmm3,ebx	     ;rcps
642
       cvtsi2ss  xmm3,ebx	     ;rcps
643
       divss	 xmm3,xmm4
643
       divss	 xmm3,xmm4
644
       shufps	 xmm3,xmm3,0
644
       shufps	 xmm3,xmm3,0
645
 
645
 
646
       movd	 mm0,[.b_x2]
646
       movd	 mm0,[.b_x2]
647
       movd	 mm1,[.b_x3]
647
       movd	 mm1,[.b_x3]
648
       movd	 mm2,[.e_x2]
648
       movd	 mm2,[.e_x2]
649
       movd	 mm3,[.e_x3]
649
       movd	 mm3,[.e_x3]
650
 
650
 
651
       pxor	  mm4,mm4
651
       pxor	  mm4,mm4
652
       punpcklwd  mm0,mm4
652
       punpcklwd  mm0,mm4
653
       punpcklwd  mm1,mm4
653
       punpcklwd  mm1,mm4
654
       punpcklwd  mm2,mm4
654
       punpcklwd  mm2,mm4
655
       punpcklwd  mm3,mm4
655
       punpcklwd  mm3,mm4
656
 
656
 
657
       psubd	  mm1,mm0
657
       psubd	  mm1,mm0
658
       psubd	  mm3,mm2
658
       psubd	  mm3,mm2
659
 
659
 
660
       cvtpi2ps  xmm1,mm1
660
       cvtpi2ps  xmm1,mm1
661
       movlhps	 xmm1,xmm1
661
       movlhps	 xmm1,xmm1
662
       cvtpi2ps  xmm1,mm3
662
       cvtpi2ps  xmm1,mm3
663
 
663
 
664
       divps	 xmm1,xmm3   ;xmm1--> | dby | dbx | dey | dex |
664
       divps	 xmm1,xmm3   ;xmm1--> | dby | dbx | dey | dex |
665
 
665
 
666
       shufps	 xmm1,xmm1,10110001b
666
       shufps	 xmm1,xmm1,10110001b
667
			     ;xmm1--> | dbx | dby | dex | dey |
667
			     ;xmm1--> | dbx | dby | dex | dey |
668
;1       movups    .dey23,xmm1
668
;1       movups    .dey23,xmm1
669
 
669
 
670
       cvtps2pi  mm0,xmm1 ;mm0,xmm1          ; mm0 -> 2 delta dwords
670
       cvtps2pi  mm0,xmm1 ;mm0,xmm1          ; mm0 -> 2 delta dwords
671
       movhlps	 xmm1,xmm1
671
       movhlps	 xmm1,xmm1
672
       cvtps2pi  mm1,xmm1 ;mm1,xmm1
672
       cvtps2pi  mm1,xmm1 ;mm1,xmm1
673
       movq	 .dey23,mm0
673
       movq	 .dey23,mm0
674
       movq	 .dby23,mm1
674
       movq	 .dby23,mm1
675
 
675
 
676
       mov	 ax,.z3
676
       mov	 ax,.z3
677
       sub	 ax,.z2
677
       sub	 ax,.z2
678
       cwde
678
       cwde
679
 
679
 
680
       mov	dx,.x3
680
       mov	dx,.x3
681
       sub	dx,.x2
681
       sub	dx,.x2
682
       movsx	edx,dx
682
       movsx	edx,dx
683
 
683
 
684
       movd	  mm2,[.t_x2]
684
       movd	  mm2,[.t_x2]
685
       movd	  mm3,[.t_x3]
685
       movd	  mm3,[.t_x3]
686
 
686
 
687
       punpcklwd  mm2,mm4
687
       punpcklwd  mm2,mm4
688
       punpcklwd  mm3,mm4
688
       punpcklwd  mm3,mm4
689
       psubd	  mm3,mm2
689
       psubd	  mm3,mm2
690
 
690
 
691
       cvtsi2ss  xmm1,eax
691
       cvtsi2ss  xmm1,eax
692
       movlhps	 xmm1,xmm1
692
       movlhps	 xmm1,xmm1
693
       cvtsi2ss  xmm1,edx
693
       cvtsi2ss  xmm1,edx
694
       shufps	 xmm1,xmm1,00101111b
694
       shufps	 xmm1,xmm1,00101111b
695
       cvtpi2ps  xmm1,mm3
695
       cvtpi2ps  xmm1,mm3
696
 
696
 
697
       divps	 xmm1,xmm3   ; xmm1--> | dx | dz | dty | dtx |
697
       divps	 xmm1,xmm3   ; xmm1--> | dx | dz | dty | dtx |
698
 
698
 
699
       shufps	 xmm1,xmm1,11100001b
699
       shufps	 xmm1,xmm1,11100001b
700
			    ; xmm1--> | dx | dz | dtx | dty |
700
			    ; xmm1--> | dx | dz | dtx | dty |
701
;       movlps    .dty23,xmm1
701
;       movlps    .dty23,xmm1
702
;       movhps    .dz23,xmm1
702
;       movhps    .dz23,xmm1
703
       cvtps2pi  mm0,xmm1    ; mm0 -> 2 delta dwords  | dtx | dty |
703
       cvtps2pi  mm0,xmm1    ; mm0 -> 2 delta dwords  | dtx | dty |
704
       movhlps	 xmm1,xmm1
704
       movhlps	 xmm1,xmm1
705
       cvtps2pi  mm1,xmm1    ; mm1 --> 2 delta dwords | dx  |  dz |
705
       cvtps2pi  mm1,xmm1    ; mm1 --> 2 delta dwords | dx  |  dz |
706
       movq	 .dty23,mm0
706
       movq	 .dty23,mm0
707
       movq	 .dz23,mm1
707
       movq	 .dz23,mm1
708
 
708
 
709
 
709
 
710
else
710
else
711
       mov	ax,.x3
711
       mov	ax,.x3
712
       sub	ax,.x2
712
       sub	ax,.x2
713
       cwde
713
       cwde
714
       shl	eax,ROUND
714
       shl	eax,ROUND
715
       cdq
715
       cdq
716
       idiv	ebx
716
       idiv	ebx
717
 ;     mov      .dx23,eax
717
 ;     mov      .dx23,eax
718
       push	 eax
718
       push	 eax
719
 
719
 
720
       mov     ax,.z3
720
       mov     ax,.z3
721
       sub     ax,.z2
721
       sub     ax,.z2
722
       cwde
722
       cwde
723
       shl     eax,CATMULL_SHIFT
723
       shl     eax,CATMULL_SHIFT
724
       cdq
724
       cdq
725
       idiv    ebx
725
       idiv    ebx
726
     ; mov     .dz23,eax
726
     ; mov     .dz23,eax
727
       push    eax
727
       push    eax
728
 
728
 
729
       mov	ax,word[.b_x3]
729
       mov	ax,word[.b_x3]
730
       sub	ax,word[.b_x2]
730
       sub	ax,word[.b_x2]
731
       cwde
731
       cwde
732
       shl	eax,ROUND
732
       shl	eax,ROUND
733
       cdq
733
       cdq
734
       idiv	ebx
734
       idiv	ebx
735
 ;     mov      .dbx23,eax
735
 ;     mov      .dbx23,eax
736
       push	 eax
736
       push	 eax
737
 
737
 
738
       mov	ax,word[.b_y3]
738
       mov	ax,word[.b_y3]
739
       sub	ax,word[.b_y2]
739
       sub	ax,word[.b_y2]
740
       cwde
740
       cwde
741
       shl	eax,ROUND
741
       shl	eax,ROUND
742
       cdq
742
       cdq
743
       idiv	ebx
743
       idiv	ebx
744
 ;     mov      .dby23,eax
744
 ;     mov      .dby23,eax
745
       push	 eax
745
       push	 eax
746
 
746
 
747
       mov	ax,word[.e_x3]
747
       mov	ax,word[.e_x3]
748
       sub	ax,word[.e_x2]
748
       sub	ax,word[.e_x2]
749
       cwde
749
       cwde
750
       shl	eax,ROUND
750
       shl	eax,ROUND
751
       cdq
751
       cdq
752
       idiv	ebx
752
       idiv	ebx
753
 ;     mov      .dex23,eax
753
 ;     mov      .dex23,eax
754
       push	 eax
754
       push	 eax
755
 
755
 
756
       mov	ax,word[.e_y3]
756
       mov	ax,word[.e_y3]
757
       sub	ax,word[.e_y2]
757
       sub	ax,word[.e_y2]
758
       cwde
758
       cwde
759
       shl	eax,ROUND
759
       shl	eax,ROUND
760
       cdq
760
       cdq
761
       idiv	ebx
761
       idiv	ebx
762
 ;     mov      .dey23,eax
762
 ;     mov      .dey23,eax
763
       push	 eax
763
       push	 eax
764
 
764
 
765
 
765
 
766
       mov	ax,word[.t_x3]
766
       mov	ax,word[.t_x3]
767
       sub	ax,word[.t_x2]
767
       sub	ax,word[.t_x2]
768
       cwde
768
       cwde
769
       shl	eax,ROUND
769
       shl	eax,ROUND
770
       cdq
770
       cdq
771
       idiv	ebx
771
       idiv	ebx
772
 ;     mov      .dtx23,eax
772
 ;     mov      .dtx23,eax
773
       push	 eax
773
       push	 eax
774
 
774
 
775
       mov	ax,word[.t_y3]
775
       mov	ax,word[.t_y3]
776
       sub	ax,word[.t_y2]
776
       sub	ax,word[.t_y2]
777
       cwde
777
       cwde
778
       shl	eax,ROUND
778
       shl	eax,ROUND
779
       cdq
779
       cdq
780
       idiv	ebx
780
       idiv	ebx
781
 ;     mov      .dty23,eax
781
 ;     mov      .dty23,eax
782
       push	 eax
782
       push	 eax
783
end if
783
end if
784
      ;  sub     esp,40
784
      ;  sub     esp,40
785
   .bt_dx23_done:
785
   .bt_dx23_done:
786
       sub	 esp,64
786
       sub	 esp,64
787
 
787
 
788
       movsx	eax,.x1
788
       movsx	eax,.x1
789
       shl	eax,ROUND
789
       shl	eax,ROUND
790
       mov	.cx1,eax
790
       mov	.cx1,eax
791
       mov	.cx2,eax
791
       mov	.cx2,eax
792
  ;     push     eax
792
  ;     push     eax
793
  ;     push     eax
793
  ;     push     eax
794
 
794
 
795
       movsx	ebx,word[.b_x1]
795
       movsx	ebx,word[.b_x1]
796
       shl	ebx,ROUND
796
       shl	ebx,ROUND
797
       mov	.cbx1,ebx
797
       mov	.cbx1,ebx
798
       mov	.cbx2,ebx
798
       mov	.cbx2,ebx
799
      ; push     ebx
799
      ; push     ebx
800
      ; push     ebx
800
      ; push     ebx
801
 
801
 
802
       movsx	ecx,word[.b_y1]
802
       movsx	ecx,word[.b_y1]
803
       shl	ecx,ROUND
803
       shl	ecx,ROUND
804
       mov	.cby1,ecx
804
       mov	.cby1,ecx
805
       mov	.cby2,ecx
805
       mov	.cby2,ecx
806
      ; push     ecx
806
      ; push     ecx
807
      ; push     ecx
807
      ; push     ecx
808
 
808
 
809
       movsx	edx,word[.e_x1]
809
       movsx	edx,word[.e_x1]
810
       shl	edx,ROUND
810
       shl	edx,ROUND
811
       mov	.cex1,edx
811
       mov	.cex1,edx
812
       mov	.cex2,edx
812
       mov	.cex2,edx
813
    ;   push     edx
813
    ;   push     edx
814
    ;   push     edx
814
    ;   push     edx
815
 
815
 
816
       movsx	eax,word[.e_y1]
816
       movsx	eax,word[.e_y1]
817
       shl	eax,ROUND
817
       shl	eax,ROUND
818
       mov	.cey1,eax
818
       mov	.cey1,eax
819
       mov	.cey2,eax
819
       mov	.cey2,eax
820
    ;   push     eax
820
    ;   push     eax
821
    ;   push     eax
821
    ;   push     eax
822
 
822
 
823
       movsx	ebx,.z1
823
       movsx	ebx,.z1
824
       shl	ebx,CATMULL_SHIFT
824
       shl	ebx,CATMULL_SHIFT
825
       mov	.cz1,ebx
825
       mov	.cz1,ebx
826
       mov	.cz2,ebx
826
       mov	.cz2,ebx
827
   ;    push     ebx
827
   ;    push     ebx
828
   ;    push     ebx
828
   ;    push     ebx
829
 
829
 
830
      ; sub      esp,16
830
      ; sub      esp,16
831
       movsx	ecx,word[.t_x1]
831
       movsx	ecx,word[.t_x1]
832
       shl	ecx,ROUND
832
       shl	ecx,ROUND
833
       mov	.ctx1,ecx
833
       mov	.ctx1,ecx
834
       mov	.ctx2,ecx
834
       mov	.ctx2,ecx
835
       ;push     ecx
835
       ;push     ecx
836
       ;push     ecx
836
       ;push     ecx
837
 
837
 
838
       movsx	edx,word[.t_y1]
838
       movsx	edx,word[.t_y1]
839
       shl	edx,ROUND
839
       shl	edx,ROUND
840
       mov	.cty1,edx
840
       mov	.cty1,edx
841
       mov	.cty2,edx
841
       mov	.cty2,edx
842
      ; push     edx
842
      ; push     edx
843
      ; push     edx
843
      ; push     edx
844
 
844
 
845
if Ext >= SSE2
845
if Ext >= SSE2
846
       movups  xmm0,.cby1
846
       movups  xmm0,.cby1
847
       movups  xmm1,.cty1
847
       movups  xmm1,.cty1
848
       movups  xmm2,.cby2
848
       movups  xmm2,.cby2
849
       movups  xmm3,.cty2
849
       movups  xmm3,.cty2
850
       movups  xmm4,.dby13
850
       movups  xmm4,.dby13
851
       movups  xmm5,.dty13
851
       movups  xmm5,.dty13
852
       movups  xmm6,.dby12
852
       movups  xmm6,.dby12
853
       movups  xmm7,.dty12
853
       movups  xmm7,.dty12
854
       .scby1  equ [edi]
854
       .scby1  equ [edi]
855
       .scty1  equ [edi+16]
855
       .scty1  equ [edi+16]
856
       .scby2  equ [edi+32]
856
       .scby2  equ [edi+32]
857
       .scty2  equ [edi+48]
857
       .scty2  equ [edi+48]
858
       .sdby13 equ [edi+64]
858
       .sdby13 equ [edi+64]
859
       .sdty13 equ [edi+80]
859
       .sdty13 equ [edi+80]
860
       .sdby12 equ [edi+96]
860
       .sdby12 equ [edi+96]
861
       .sdty12 equ [edi+128]
861
       .sdty12 equ [edi+128]
862
       push    edi
862
       push    edi
863
       mov     edi,sse_repository
863
       mov     edi,sse_repository
864
       movaps  .scby1,xmm0
864
       movaps  .scby1,xmm0
865
       movaps  .scty1,xmm1
865
       movaps  .scty1,xmm1
866
       movaps  .scby2,xmm2
866
       movaps  .scby2,xmm2
867
       movaps  .scty2,xmm3
867
       movaps  .scty2,xmm3
868
       movaps  .sdby13,xmm4
868
       movaps  .sdby13,xmm4
869
       movaps  .sdty13,xmm5
869
       movaps  .sdty13,xmm5
870
       movaps  .sdby12,xmm6
870
       movaps  .sdby12,xmm6
871
       movaps  .sdty12,xmm7
871
       movaps  .sdty12,xmm7
872
       pop     edi
872
       pop     edi
873
 
873
 
874
end if
874
end if
875
       movsx	ecx,.y1
875
       movsx	ecx,.y1
876
       cmp	cx,.y2
876
       cmp	cx,.y2
877
       jge	.loop12_done
877
       jge	.loop12_done
878
  .loop12:
878
  .loop12:
879
;if Ext >= SSE2
879
;if Ext >= SSE2
880
;       fxsave  [sse_repository]
880
;       fxsave  [sse_repository]
881
;end if
881
;end if
882
       call	.call_line
882
       call	.call_line
883
if Ext >= SSE2
883
if Ext >= SSE2
884
;       fxrstor [sse_repository]
884
;       fxrstor [sse_repository]
885
       movups  xmm0,.cby1
885
       movups  xmm0,.cby1
886
       movups  xmm1,.cty1
886
       movups  xmm1,.cty1
887
       movups  xmm2,.cby2
887
       movups  xmm2,.cby2
888
       movups  xmm3,.cty2
888
       movups  xmm3,.cty2
889
    ;   movups  xmm4,.dby13
889
    ;   movups  xmm4,.dby13
890
    ;   movups  xmm5,.dty13
890
    ;   movups  xmm5,.dty13
891
    ;   movups  xmm6,.dby12
891
    ;   movups  xmm6,.dby12
892
    ;   movups  xmm7,.dty12
892
    ;   movups  xmm7,.dty12
893
    ;   paddd   xmm0,xmm4
893
    ;   paddd   xmm0,xmm4
894
    ;   paddd   xmm1,xmm5
894
    ;   paddd   xmm1,xmm5
895
    ;   paddd   xmm2,xmm6
895
    ;   paddd   xmm2,xmm6
896
    ;   paddd   xmm3,xmm7
896
    ;   paddd   xmm3,xmm7
897
       push    edi
897
       push    edi
898
       mov     edi,sse_repository
898
       mov     edi,sse_repository
899
       paddd   xmm0,.sdby13
899
       paddd   xmm0,.sdby13
900
       paddd   xmm1,.sdty13
900
       paddd   xmm1,.sdty13
901
       paddd   xmm2,.sdby12
901
       paddd   xmm2,.sdby12
902
       paddd   xmm3,.sdty12
902
       paddd   xmm3,.sdty12
903
       pop     edi
903
       pop     edi
904
       movups  .cby1,xmm0
904
       movups  .cby1,xmm0
905
       movups  .cty1,xmm1
905
       movups  .cty1,xmm1
906
       movups  .cby2,xmm2
906
       movups  .cby2,xmm2
907
       movups  .cty2,xmm3
907
       movups  .cty2,xmm3
908
end if
908
end if
909
 
909
 
910
if (Ext = MMX) | (Ext = SSE)
910
if (Ext = MMX) | (Ext = SSE)
911
       movq	mm0,.cby2
911
       movq	mm0,.cby2
912
       movq	mm1,.cby1
912
       movq	mm1,.cby1
913
       movq	mm2,.cey2
913
       movq	mm2,.cey2
914
       movq	mm3,.cey1
914
       movq	mm3,.cey1
915
       movq	mm4,.cty1
915
       movq	mm4,.cty1
916
       movq	mm5,.cty2
916
       movq	mm5,.cty2
917
       movq	mm6,.cz1
917
       movq	mm6,.cz1
918
       movq	mm7,.cz2
918
       movq	mm7,.cz2
919
       paddd	mm0,.dby12
919
       paddd	mm0,.dby12
920
       paddd	mm1,.dby13
920
       paddd	mm1,.dby13
921
       paddd	mm2,.dey12
921
       paddd	mm2,.dey12
922
       paddd	mm3,.dey13
922
       paddd	mm3,.dey13
923
       paddd	mm4,.dty13
923
       paddd	mm4,.dty13
924
       paddd	mm5,.dty12
924
       paddd	mm5,.dty12
925
       paddd	mm6,.dz13
925
       paddd	mm6,.dz13
926
       paddd	mm7,.dz12
926
       paddd	mm7,.dz12
927
       movq	.cby2,mm0
927
       movq	.cby2,mm0
928
       movq	.cby1,mm1
928
       movq	.cby1,mm1
929
       movq	.cey1,mm3
929
       movq	.cey1,mm3
930
       movq	.cey2,mm2
930
       movq	.cey2,mm2
931
       movq	.cty1,mm4
931
       movq	.cty1,mm4
932
       movq	.cty2,mm5
932
       movq	.cty2,mm5
933
       movq	.cz1,mm6
933
       movq	.cz1,mm6
934
       movq	.cz2,mm7
934
       movq	.cz2,mm7
935
end if
935
end if
936
if Ext = NON
936
if Ext = NON
937
       mov	edx,.dbx13
937
       mov	edx,.dbx13
938
       add	.cbx1,edx
938
       add	.cbx1,edx
939
       mov	eax,.dbx12
939
       mov	eax,.dbx12
940
       add	.cbx2,eax
940
       add	.cbx2,eax
941
       mov	ebx,.dby13
941
       mov	ebx,.dby13
942
       add	.cby1,ebx
942
       add	.cby1,ebx
943
       mov	edx,.dby12
943
       mov	edx,.dby12
944
       add	.cby2,edx
944
       add	.cby2,edx
945
 
945
 
946
       mov	eax,.dex13
946
       mov	eax,.dex13
947
       add	.cex1,eax
947
       add	.cex1,eax
948
       mov	ebx,.dex12
948
       mov	ebx,.dex12
949
       add	.cex2,ebx
949
       add	.cex2,ebx
950
       mov	edx,.dey13
950
       mov	edx,.dey13
951
       add	.cey1,edx
951
       add	.cey1,edx
952
       mov	eax,.dey12
952
       mov	eax,.dey12
953
       add	.cey2,eax
953
       add	.cey2,eax
954
 
954
 
955
       mov	eax,.dtx13
955
       mov	eax,.dtx13
956
       add	.ctx1,eax
956
       add	.ctx1,eax
957
       mov	ebx,.dtx12
957
       mov	ebx,.dtx12
958
       add	.ctx2,ebx
958
       add	.ctx2,ebx
959
       mov	edx,.dty13
959
       mov	edx,.dty13
960
       add	.cty1,edx
960
       add	.cty1,edx
961
       mov	eax,.dty12
961
       mov	eax,.dty12
962
       add	.cty2,eax
962
       add	.cty2,eax
963
 
963
 
964
       mov	eax,.dx13
964
       mov	eax,.dx13
965
       add	.cx1,eax
965
       add	.cx1,eax
966
       mov	ebx,.dx12
966
       mov	ebx,.dx12
967
       add	.cx2,ebx
967
       add	.cx2,ebx
968
       mov	ebx,.dz13
968
       mov	ebx,.dz13
969
       add	.cz1,ebx
969
       add	.cz1,ebx
970
       mov	edx,.dz12
970
       mov	edx,.dz12
971
       add	.cz2,edx
971
       add	.cz2,edx
972
end if
972
end if
973
       inc	ecx
973
       inc	ecx
974
       cmp	cx,.y2
974
       cmp	cx,.y2
975
       jl	.loop12
975
       jl	.loop12
976
    .loop12_done:
976
    .loop12_done:
977
 
977
 
978
       movsx	ecx,.y2
978
       movsx	ecx,.y2
979
       cmp	cx,.y3
979
       cmp	cx,.y3
980
       jge	.loop23_done
980
       jge	.loop23_done
981
 
981
 
982
 
982
 
983
       movsx	eax,.z2
983
       movsx	eax,.z2
984
       shl	eax,CATMULL_SHIFT
984
       shl	eax,CATMULL_SHIFT
985
       mov	.cz2,eax
985
       mov	.cz2,eax
986
 
986
 
987
       movsx	ebx,.x2
987
       movsx	ebx,.x2
988
       shl	ebx,ROUND
988
       shl	ebx,ROUND
989
       mov	.cx2,ebx
989
       mov	.cx2,ebx
990
 
990
 
991
       movzx	edx,word[.b_x2]
991
       movzx	edx,word[.b_x2]
992
       shl	edx,ROUND
992
       shl	edx,ROUND
993
       mov	.cbx2,edx
993
       mov	.cbx2,edx
994
 
994
 
995
       movzx	eax,word[.b_y2]
995
       movzx	eax,word[.b_y2]
996
       shl	eax,ROUND
996
       shl	eax,ROUND
997
       mov	.cby2,eax
997
       mov	.cby2,eax
998
 
998
 
999
       movzx	ebx,word[.e_x2]
999
       movzx	ebx,word[.e_x2]
1000
       shl	ebx,ROUND
1000
       shl	ebx,ROUND
1001
       mov	.cex2,ebx
1001
       mov	.cex2,ebx
1002
 
1002
 
1003
       movzx	edx,word[.e_y2]
1003
       movzx	edx,word[.e_y2]
1004
       shl	edx,ROUND
1004
       shl	edx,ROUND
1005
       mov	.cey2,edx
1005
       mov	.cey2,edx
1006
 
1006
 
1007
       movzx	eax,word[.t_x2]
1007
       movzx	eax,word[.t_x2]
1008
       shl	eax,ROUND
1008
       shl	eax,ROUND
1009
       mov	.ctx2,eax
1009
       mov	.ctx2,eax
1010
 
1010
 
1011
       movzx	ebx,word[.t_y2]
1011
       movzx	ebx,word[.t_y2]
1012
       shl	ebx,ROUND
1012
       shl	ebx,ROUND
1013
       mov	.cty2,ebx
1013
       mov	.cty2,ebx
1014
if Ext >= SSE2
1014
if Ext >= SSE2
1015
       movups  xmm2,.cby2
1015
       movups  xmm2,.cby2
1016
       movups  xmm3,.cty2
1016
       movups  xmm3,.cty2
1017
   ;    movups  xmm4,.dby13
1017
   ;    movups  xmm4,.dby13
1018
   ;    movups  xmm5,.dty13
1018
   ;    movups  xmm5,.dty13
1019
       movups  xmm6,.dby23
1019
       movups  xmm6,.dby23
1020
       movups  xmm7,.dty23
1020
       movups  xmm7,.dty23
1021
;       .scby1  equ [edi]
1021
;       .scby1  equ [edi]
1022
;       .scty1  equ [edi+16]
1022
;       .scty1  equ [edi+16]
1023
;       .scby2  equ [edi+32]
1023
;       .scby2  equ [edi+32]
1024
;       .scty2  equ [edi+48]
1024
;       .scty2  equ [edi+48]
1025
;       .sdby13 equ [edi+64]
1025
;       .sdby13 equ [edi+64]
1026
;       .sdty13 equ [edi+80]
1026
;       .sdty13 equ [edi+80]
1027
       .sdby23 equ [edi+160]
1027
       .sdby23 equ [edi+160]
1028
       .sdty23 equ [edi+192]
1028
       .sdty23 equ [edi+192]
1029
       push    edi
1029
       push    edi
1030
       mov     edi,sse_repository
1030
       mov     edi,sse_repository
1031
;       movaps  .scby1,xmm0
1031
;       movaps  .scby1,xmm0
1032
;       movaps  .scty1,xmm1
1032
;       movaps  .scty1,xmm1
1033
       movaps  .scby2,xmm2
1033
       movaps  .scby2,xmm2
1034
       movaps  .scty2,xmm3
1034
       movaps  .scty2,xmm3
1035
;       movaps  .sdby13,xmm4
1035
;       movaps  .sdby13,xmm4
1036
;       movaps  .sdty13,xmm5
1036
;       movaps  .sdty13,xmm5
1037
       movaps  .sdby23,xmm6
1037
       movaps  .sdby23,xmm6
1038
       movaps  .sdty23,xmm7
1038
       movaps  .sdty23,xmm7
1039
       pop     edi
1039
       pop     edi
1040
 
1040
 
1041
end if
1041
end if
1042
 
1042
 
1043
     .loop23:
1043
     .loop23:
1044
;if Ext >= SSE2
1044
;if Ext >= SSE2
1045
;       fxsave  [sse_repository]
1045
;       fxsave  [sse_repository]
1046
;end if
1046
;end if
1047
       call	.call_line
1047
       call	.call_line
1048
 
1048
 
1049
if Ext >= SSE2
1049
if Ext >= SSE2
1050
 
1050
 
1051
       movups  xmm0,.cby1
1051
       movups  xmm0,.cby1
1052
       movups  xmm1,.cty1
1052
       movups  xmm1,.cty1
1053
       movups  xmm2,.cby2
1053
       movups  xmm2,.cby2
1054
       movups  xmm3,.cty2
1054
       movups  xmm3,.cty2
1055
 
1055
 
1056
 
1056
 
1057
       push    edi
1057
       push    edi
1058
       mov     edi,sse_repository
1058
       mov     edi,sse_repository
1059
       paddd   xmm0,.sdby13
1059
       paddd   xmm0,.sdby13
1060
       paddd   xmm1,.sdty13
1060
       paddd   xmm1,.sdty13
1061
       paddd   xmm2,.sdby23
1061
       paddd   xmm2,.sdby23
1062
       paddd   xmm3,.sdty23
1062
       paddd   xmm3,.sdty23
1063
       pop     edi
1063
       pop     edi
1064
       movups  .cby1,xmm0
1064
       movups  .cby1,xmm0
1065
       movups  .cty1,xmm1
1065
       movups  .cty1,xmm1
1066
       movups  .cby2,xmm2
1066
       movups  .cby2,xmm2
1067
       movups  .cty2,xmm3
1067
       movups  .cty2,xmm3
1068
 
1068
 
1069
 
1069
 
1070
 
1070
 
1071
 
1071
 
1072
;       fxrstor [sse_repository]
1072
;       fxrstor [sse_repository]
1073
;       movups  xmm0,.cby1
1073
;       movups  xmm0,.cby1
1074
;       movups  xmm1,.cty1
1074
;       movups  xmm1,.cty1
1075
;       movups  xmm2,.cby2
1075
;       movups  xmm2,.cby2
1076
;       movups  xmm3,.cty2
1076
;       movups  xmm3,.cty2
1077
;       movups  xmm4,.dby13
1077
;       movups  xmm4,.dby13
1078
;       movups  xmm5,.dty13
1078
;       movups  xmm5,.dty13
1079
;       movups  xmm6,.dby23
1079
;       movups  xmm6,.dby23
1080
;       movups  xmm7,.dty23
1080
;       movups  xmm7,.dty23
1081
;       paddd   xmm0,xmm4
1081
;       paddd   xmm0,xmm4
1082
;       paddd   xmm1,xmm5
1082
;       paddd   xmm1,xmm5
1083
;       paddd   xmm2,xmm6
1083
;       paddd   xmm2,xmm6
1084
 ;      paddd   xmm3,xmm7
1084
 ;      paddd   xmm3,xmm7
1085
 ;      movups  .cby1,xmm0
1085
 ;      movups  .cby1,xmm0
1086
 ;      movups  .cty1,xmm1
1086
 ;      movups  .cty1,xmm1
1087
 ;      movups  .cby2,xmm2
1087
 ;      movups  .cby2,xmm2
1088
 ;      movups  .cty2,xmm3
1088
 ;      movups  .cty2,xmm3
1089
;
1089
;
1090
end if
1090
end if
1091
if (Ext = MMX) | (Ext = SSE)
1091
if (Ext = MMX) | (Ext = SSE)
1092
       movq	mm0,.cby2
1092
       movq	mm0,.cby2
1093
       movq	mm1,.cby1
1093
       movq	mm1,.cby1
1094
       movq	mm2,.cey2
1094
       movq	mm2,.cey2
1095
       movq	mm3,.cey1
1095
       movq	mm3,.cey1
1096
       movq	mm4,.cty1
1096
       movq	mm4,.cty1
1097
       movq	mm5,.cty2
1097
       movq	mm5,.cty2
1098
       movq	mm6,.cz1
1098
       movq	mm6,.cz1
1099
       movq	mm7,.cz2
1099
       movq	mm7,.cz2
1100
       paddd	mm0,.dby23
1100
       paddd	mm0,.dby23
1101
       paddd	mm1,.dby13
1101
       paddd	mm1,.dby13
1102
       paddd	mm2,.dey23
1102
       paddd	mm2,.dey23
1103
       paddd	mm3,.dey13
1103
       paddd	mm3,.dey13
1104
       paddd	mm4,.dty13
1104
       paddd	mm4,.dty13
1105
       paddd	mm5,.dty23
1105
       paddd	mm5,.dty23
1106
       paddd	mm6,.dz13
1106
       paddd	mm6,.dz13
1107
       paddd	mm7,.dz23
1107
       paddd	mm7,.dz23
1108
       movq	.cby2,mm0
1108
       movq	.cby2,mm0
1109
       movq	.cby1,mm1
1109
       movq	.cby1,mm1
1110
       movq	.cey2,mm2
1110
       movq	.cey2,mm2
1111
       movq	.cey1,mm3
1111
       movq	.cey1,mm3
1112
       movq	.cty1,mm4
1112
       movq	.cty1,mm4
1113
       movq	.cty2,mm5
1113
       movq	.cty2,mm5
1114
       movq	.cz1,mm6
1114
       movq	.cz1,mm6
1115
       movq	.cz2,mm7
1115
       movq	.cz2,mm7
1116
end if
1116
end if
1117
If Ext = NON
1117
If Ext = NON
1118
       mov	edx,.dbx13
1118
       mov	edx,.dbx13
1119
       add	.cbx1,edx
1119
       add	.cbx1,edx
1120
       mov	eax,.dbx23
1120
       mov	eax,.dbx23
1121
       add	.cbx2,eax
1121
       add	.cbx2,eax
1122
       mov	ebx,.dby13
1122
       mov	ebx,.dby13
1123
       add	.cby1,ebx
1123
       add	.cby1,ebx
1124
       mov	edx,.dby23
1124
       mov	edx,.dby23
1125
       add	.cby2,edx
1125
       add	.cby2,edx
1126
 
1126
 
1127
       mov	eax,.dex13
1127
       mov	eax,.dex13
1128
       add	.cex1,eax
1128
       add	.cex1,eax
1129
       mov	ebx,.dex23
1129
       mov	ebx,.dex23
1130
       add	.cex2,ebx
1130
       add	.cex2,ebx
1131
       mov	edx,.dey13
1131
       mov	edx,.dey13
1132
       add	.cey1,edx
1132
       add	.cey1,edx
1133
       mov	eax,.dey23
1133
       mov	eax,.dey23
1134
       add	.cey2,eax
1134
       add	.cey2,eax
1135
 
1135
 
1136
       mov	eax,.dx13
1136
       mov	eax,.dx13
1137
       add	.cx1,eax
1137
       add	.cx1,eax
1138
       mov	ebx,.dx23
1138
       mov	ebx,.dx23
1139
       add	.cx2,ebx
1139
       add	.cx2,ebx
1140
       mov	ebx,.dz13
1140
       mov	ebx,.dz13
1141
       add	.cz1,ebx
1141
       add	.cz1,ebx
1142
       mov	edx,.dz23
1142
       mov	edx,.dz23
1143
       add	.cz2,edx
1143
       add	.cz2,edx
1144
 
1144
 
1145
       mov	eax,.dtx13
1145
       mov	eax,.dtx13
1146
       add	.ctx1,eax
1146
       add	.ctx1,eax
1147
       mov	ebx,.dtx23
1147
       mov	ebx,.dtx23
1148
       add	.ctx2,ebx
1148
       add	.ctx2,ebx
1149
       mov	edx,.dty13
1149
       mov	edx,.dty13
1150
       add	.cty1,edx
1150
       add	.cty1,edx
1151
       mov	eax,.dty23
1151
       mov	eax,.dty23
1152
       add	.cty2,eax
1152
       add	.cty2,eax
1153
end if
1153
end if
1154
       inc	ecx
1154
       inc	ecx
1155
       cmp	cx,.y3
1155
       cmp	cx,.y3
1156
       jl	.loop23
1156
       jl	.loop23
1157
    .loop23_done:
1157
    .loop23_done:
1158
 
1158
 
1159
       mov	esp,ebp
1159
       mov	esp,ebp
1160
ret   50
1160
ret   50
1161
 
1161
 
1162
.call_line:
1162
.call_line:
1163
 
1163
 
1164
       pushad
1164
       pushad
1165
       ; xmm0= cby1,cbx1,cz1,cx1
1165
       ; xmm0= cby1,cbx1,cz1,cx1
1166
       ; xmm1= cty1,ctx1,cey1,cex1
1166
       ; xmm1= cty1,ctx1,cey1,cex1
1167
if Ext >= SSE2
1167
if Ext >= SSE2
1168
       sub	esp,8
1168
       sub	esp,8
1169
       shufps	xmm1,xmm1,10110001b
1169
       shufps	xmm1,xmm1,10110001b
1170
       shufps	xmm3,xmm3,10110001b
1170
       shufps	xmm3,xmm3,10110001b
1171
       movlps	[esp],xmm1
1171
       movlps	[esp],xmm1
1172
else
1172
else
1173
       push	dword .cty1
1173
       push	dword .cty1
1174
       push	.ctx1
1174
       push	.ctx1
1175
end if
1175
end if
1176
       push	dword .cz1
1176
       push	dword .cz1
1177
if Ext>=SSE2
1177
if Ext>=SSE2
1178
       sub	esp,8
1178
       sub	esp,8
1179
       movlps	[esp],xmm3
1179
       movlps	[esp],xmm3
1180
else
1180
else
1181
       push	dword .cty2
1181
       push	dword .cty2
1182
       push	.ctx2
1182
       push	.ctx2
1183
end if
1183
end if
1184
       push	dword .cz2
1184
       push	dword .cz2
1185
if Ext>=SSE2
1185
if Ext>=SSE2
1186
       sub	esp,32
1186
       sub	esp,32
1187
       movhps	[esp+24],xmm3
1187
       movhps	[esp+24],xmm3
1188
       shufps	xmm2,xmm2,10110001b
1188
       shufps	xmm2,xmm2,10110001b
1189
       movlps	[esp+16],xmm2
1189
       movlps	[esp+16],xmm2
1190
       movhps	[esp+8],xmm1
1190
       movhps	[esp+8],xmm1
1191
       shufps	xmm0,xmm0,10110001b
1191
       shufps	xmm0,xmm0,10110001b
1192
       movlps	[esp],xmm0 ;================================
1192
       movlps	[esp],xmm0 ;================================
1193
 
1193
 
1194
else
1194
else
1195
       push	dword .cey2
1195
       push	dword .cey2
1196
       push	.cex2
1196
       push	.cex2
1197
       push	dword .cby2
1197
       push	dword .cby2
1198
       push	.cbx2
1198
       push	.cbx2
1199
       push	dword .cey1
1199
       push	dword .cey1
1200
       push	.cex1
1200
       push	.cex1
1201
       push	dword .cby1
1201
       push	dword .cby1
1202
       push	.cbx1
1202
       push	.cbx1
1203
end if
1203
end if
1204
 
1204
 
1205
       push	.tex_ptr
1205
       push	.tex_ptr
1206
       push	.z_buff
1206
       push	.z_buff
1207
       push	.t_emap
1207
       push	.t_emap
1208
       push	.t_bmap
1208
       push	.t_bmap
1209
 
1209
 
1210
       push	ecx
1210
       push	ecx
1211
 
1211
 
1212
       mov	eax,.cx1
1212
       mov	eax,.cx1
1213
       sar	eax,ROUND
1213
       sar	eax,ROUND
1214
       mov	ebx,.cx2
1214
       mov	ebx,.cx2
1215
       sar	ebx,ROUND
1215
       sar	ebx,ROUND
1216
 
1216
 
1217
       call	bump_tex_line_z
1217
       call	bump_tex_line_z
1218
 
1218
 
1219
       popad
1219
       popad
1220
;end if
1220
;end if
1221
ret
1221
ret
1222
bump_tex_line_z:
1222
bump_tex_line_z:
1223
;--------------in: eax - x1
1223
;--------------in: eax - x1
1224
;--------------    ebx - x2
1224
;--------------    ebx - x2
1225
;--------------    edi - pointer to screen buffer
1225
;--------------    edi - pointer to screen buffer
1226
;stack - another parameters :
1226
;stack - another parameters :
1227
.y	equ dword [ebp+4]
1227
.y	equ dword [ebp+4]
1228
.bmap	equ dword [ebp+8]	 ; bump map pointer
1228
.bmap	equ dword [ebp+8]	 ; bump map pointer
1229
.emap	equ dword [ebp+12]	 ; env map pointer
1229
.emap	equ dword [ebp+12]	 ; env map pointer
1230
.z_buff equ dword [ebp+16]	 ; z buffer
1230
.z_buff equ dword [ebp+16]	 ; z buffer
1231
.tex_map equ dword [ebp+20]	 ; texture pointer
1231
.tex_map equ dword [ebp+20]	 ; texture pointer
1232
 
1232
 
1233
.bx1	equ  [ebp+24]	;   ---
1233
.bx1	equ  [ebp+24]	;   ---
1234
.by1	equ  [ebp+28]  ;       |
1234
.by1	equ  [ebp+28]  ;       |
1235
.ex1	equ  [ebp+32]  ;       |
1235
.ex1	equ  [ebp+32]  ;       |
1236
.ey1	equ  [ebp+36]  ;       |
1236
.ey1	equ  [ebp+36]  ;       |
1237
.bx2	equ  [ebp+40]  ;       |
1237
.bx2	equ  [ebp+40]  ;       |
1238
.by2	equ  [ebp+44]  ;       |>   b. map and e. map coords
1238
.by2	equ  [ebp+44]  ;       |>   b. map and e. map coords
1239
.ex2	equ  [ebp+48]  ;       |>   shifted shl ROUND
1239
.ex2	equ  [ebp+48]  ;       |>   shifted shl ROUND
1240
.ey2	equ  [ebp+52]  ;   ---
1240
.ey2	equ  [ebp+52]  ;   ---
1241
.z2	equ  [ebp+56]
1241
.z2	equ  [ebp+56]
1242
.tx2	equ  [ebp+60]
1242
.tx2	equ  [ebp+60]
1243
.ty2	equ  [ebp+64]
1243
.ty2	equ  [ebp+64]
1244
.z1	equ  [ebp+68]
1244
.z1	equ  [ebp+68]
1245
.tx1	equ  [ebp+72]
1245
.tx1	equ  [ebp+72]
1246
.ty1	equ  [ebp+76]
1246
.ty1	equ  [ebp+76]
1247
 
1247
 
1248
 
1248
 
1249
 
1249
 
1250
.x1	equ [ebp-4]
1250
.x1	equ [ebp-4]
1251
.x2	equ [ebp-8]
1251
.x2	equ [ebp-8]
1252
.dbx	equ [ebp-12]
1252
.dbx	equ [ebp-12]
1253
.dby	equ [ebp-16]
1253
.dby	equ [ebp-16]
1254
.dex	equ [ebp-20]
1254
.dex	equ [ebp-20]
1255
.dey	equ [ebp-24]
1255
.dey	equ [ebp-24]
1256
.dz	equ [ebp-28]
1256
.dz	equ [ebp-28]
1257
.dtx	equ [ebp-32]
1257
.dtx	equ [ebp-32]
1258
.dty	equ [ebp-36]
1258
.dty	equ [ebp-36]
1259
 
1259
 
1260
.cbx	equ [ebp-40]
1260
.cbx	equ [ebp-40]
1261
.cby	equ [ebp-44]
1261
.cby	equ [ebp-44]
1262
.cex	equ [ebp-48]
1262
.cex	equ [ebp-48]
1263
.cey	equ [ebp-52]
1263
.cey	equ [ebp-52]
1264
.cz	equ [ebp-56]
1264
.cz	equ [ebp-56]
1265
.czbuff equ [ebp-60]
1265
.czbuff equ [ebp-60]
1266
.ctx	equ [ebp-64]
1266
.ctx	equ [ebp-64]
1267
.cty	equ [ebp-68]
1267
.cty	equ [ebp-68]
1268
.c_scr	equ [ebp-72]
1268
.c_scr	equ [ebp-72]
1269
 
1269
 
1270
.temp1	equ	   ebp-80
1270
.temp1	equ	   ebp-80
1271
.temp2	equ	   ebp-88
1271
.temp2	equ	   ebp-88
1272
.temp3	equ	   ebp-76
1272
.temp3	equ	   ebp-76
1273
.temp4	equ	   ebp-84
1273
.temp4	equ	   ebp-84
1274
.temp5	equ	   ebp-92
1274
.temp5	equ	   ebp-92
1275
 
1275
 
1276
	mov	ebp,esp
1276
	mov	ebp,esp
1277
 
1277
 
1278
	mov	ecx,.y
1278
	mov	ecx,.y
1279
	or	ecx,ecx
1279
	or	ecx,ecx
1280
	jl	.bl_end
1280
	jl	.bl_end
1281
	cmp	ecx,SIZE_Y
1281
	cmp	ecx,SIZE_Y
1282
	jge	.bl_end
1282
	jge	.bl_end
1283
 
1283
 
1284
	cmp	eax,ebx
1284
	cmp	eax,ebx
1285
	jl	.bl_ok
1285
	jl	.bl_ok
1286
	je	.bl_end
1286
	je	.bl_end
1287
 
1287
 
1288
 
1288
 
1289
if Ext=NON
1289
if Ext=NON
1290
	mov	edx,.bx1
1290
	mov	edx,.bx1
1291
	xchg	edx,.bx2
1291
	xchg	edx,.bx2
1292
	mov	.bx1,edx
1292
	mov	.bx1,edx
1293
	mov	edx,.by1
1293
	mov	edx,.by1
1294
	xchg	edx,.by2
1294
	xchg	edx,.by2
1295
	mov	.by1,edx
1295
	mov	.by1,edx
1296
 
1296
 
1297
	mov	edx,.ex1
1297
	mov	edx,.ex1
1298
	xchg	edx,.ex2
1298
	xchg	edx,.ex2
1299
	mov	.ex1,edx
1299
	mov	.ex1,edx
1300
	mov	edx,.ey1
1300
	mov	edx,.ey1
1301
	xchg	edx,.ey2
1301
	xchg	edx,.ey2
1302
	mov	.ey1,edx
1302
	mov	.ey1,edx
1303
 
1303
 
1304
	mov	edx,.tx1
1304
	mov	edx,.tx1
1305
	xchg	edx,.tx2
1305
	xchg	edx,.tx2
1306
	mov	.tx1,edx
1306
	mov	.tx1,edx
1307
	mov	edx,.ty1
1307
	mov	edx,.ty1
1308
	xchg	edx,.ty2
1308
	xchg	edx,.ty2
1309
	mov	.ty1,edx
1309
	mov	.ty1,edx
1310
end if
1310
end if
1311
if Ext = MMX
1311
if Ext = MMX
1312
	movq	mm0,.bx1
1312
	movq	mm0,.bx1
1313
	movq	mm1,.bx2
1313
	movq	mm1,.bx2
1314
	movq	mm2,.ex1
1314
	movq	mm2,.ex1
1315
	movq	mm3,.ex2
1315
	movq	mm3,.ex2
1316
	movq	mm4,.tx1
1316
	movq	mm4,.tx1
1317
	movq	mm5,.tx2
1317
	movq	mm5,.tx2
1318
	movq	.bx2,mm0
1318
	movq	.bx2,mm0
1319
	movq	.bx1,mm1
1319
	movq	.bx1,mm1
1320
	movq	.ex1,mm3
1320
	movq	.ex1,mm3
1321
	movq	.ex2,mm2
1321
	movq	.ex2,mm2
1322
	movq	.tx1,mm5
1322
	movq	.tx1,mm5
1323
	movq	.tx2,mm4
1323
	movq	.tx2,mm4
1324
end if
1324
end if
1325
if Ext>=SSE
1325
if Ext>=SSE
1326
	movups xmm0,.bx1
1326
	movups xmm0,.bx1
1327
	movups xmm1,.bx2
1327
	movups xmm1,.bx2
1328
	movups .bx1,xmm1
1328
	movups .bx1,xmm1
1329
	movups .bx2,xmm0
1329
	movups .bx2,xmm0
1330
	movq	mm0,.tx1
1330
	movq	mm0,.tx1
1331
	movq	mm1,.tx2
1331
	movq	mm1,.tx2
1332
	movq	.tx1,mm1
1332
	movq	.tx1,mm1
1333
	movq	.tx2,mm0
1333
	movq	.tx2,mm0
1334
end if
1334
end if
1335
;if Ext>=SSE2
1335
;if Ext>=SSE2
1336
;        movaps  xmm4,xmm0
1336
;        movaps  xmm4,xmm0
1337
;        movaps  xmm0,xmm2
1337
;        movaps  xmm0,xmm2
1338
;        movaps  xmm2,xmm4
1338
;        movaps  xmm2,xmm4
1339
;        movaps  xmm5,xmm1
1339
;        movaps  xmm5,xmm1
1340
;        movaps  xmm1,xmm3
1340
;        movaps  xmm1,xmm3
1341
;        movaps  xmm3,xmm5
1341
;        movaps  xmm3,xmm5
1342
;else
1342
;else
1343
 
1343
 
1344
	xchg	eax,ebx
1344
	xchg	eax,ebx
1345
	mov	edx,.z1
1345
	mov	edx,.z1
1346
	xchg	edx,.z2
1346
	xchg	edx,.z2
1347
	mov	.z1,edx
1347
	mov	.z1,edx
1348
;end if
1348
;end if
1349
  .bl_ok:
1349
  .bl_ok:
1350
;if Ext >= SSE2
1350
;if Ext >= SSE2
1351
;        shufps  xmm0,xmm0,11100001b
1351
;        shufps  xmm0,xmm0,11100001b
1352
;        shufps  xmm2,xmm2,11100001b
1352
;        shufps  xmm2,xmm2,11100001b
1353
;        movlps  .bx1,xmm0
1353
;        movlps  .bx1,xmm0
1354
;        movlps  .bx2,xmm2
1354
;        movlps  .bx2,xmm2
1355
 
1355
 
1356
 
1356
 
1357
;        shufps  xmm0,xmm0,00011011b
1357
;        shufps  xmm0,xmm0,00011011b
1358
;        shufps  xmm2,xmm2,00011011b
1358
;        shufps  xmm2,xmm2,00011011b
1359
;        movd    eax,xmm0
1359
;        movd    eax,xmm0
1360
;        movd    ebx,xmm2
1360
;        movd    ebx,xmm2
1361
;        shufps  xmm0,xmm0,11000110b
1361
;        shufps  xmm0,xmm0,11000110b
1362
;        shufps  xmm2,xmm2,11000110b
1362
;        shufps  xmm2,xmm2,11000110b
1363
;        movd    .z1,xmm0
1363
;        movd    .z1,xmm0
1364
;        movd    .z2,xmm2
1364
;        movd    .z2,xmm2
1365
;        shufps  xmm1,xmm1,10110001b
1365
;        shufps  xmm1,xmm1,10110001b
1366
;        shufps  xmm3,xmm3,10110001b
1366
;        shufps  xmm3,xmm3,10110001b
1367
;        movlps  .ex1,xmm1
1367
;        movlps  .ex1,xmm1
1368
;        movlps  .ex2,xmm2
1368
;        movlps  .ex2,xmm2
1369
;        movhps  .tx1,xmm1
1369
;        movhps  .tx1,xmm1
1370
;        movhps  .tx2,xmm2
1370
;        movhps  .tx2,xmm2
1371
 
1371
 
1372
;        xchg    eax,ebx
1372
;        xchg    eax,ebx
1373
;        mov     edx,.z1
1373
;        mov     edx,.z1
1374
;        xchg    edx,.z2
1374
;        xchg    edx,.z2
1375
;        mov     .z1,edx
1375
;        mov     .z1,edx
1376
 
1376
 
1377
 
1377
 
1378
;end if
1378
;end if
1379
 
1379
 
1380
	push	eax
1380
	push	eax
1381
	push	ebx	      ;store x1, x2
1381
	push	ebx	      ;store x1, x2
1382
	cmp	dword .x1,SIZE_X
1382
	cmp	dword .x1,SIZE_X
1383
	jge	.bl_end
1383
	jge	.bl_end
1384
	cmp	dword .x2,0
1384
	cmp	dword .x2,0
1385
	jle	.bl_end
1385
	jle	.bl_end
1386
 
1386
 
1387
	mov	ebx,.x2
1387
	mov	ebx,.x2
1388
	sub	ebx,.x1
1388
	sub	ebx,.x1
1389
 
1389
 
1390
if Ext>=SSE
1390
if Ext>=SSE
1391
 
1391
 
1392
       sub	 esp,28
1392
       sub	 esp,28
1393
       cvtsi2ss  xmm3,ebx	     ;rcps
1393
       cvtsi2ss  xmm3,ebx	     ;rcps
1394
       shufps	 xmm3,xmm3,0
1394
       shufps	 xmm3,xmm3,0
1395
; float using SSE variant  ::-->
1395
; float using SSE variant  ::-->
1396
;       movups    xmm0,.bx1  ; new
1396
;       movups    xmm0,.bx1  ; new
1397
;       movups    xmm1,.bx2  ; new
1397
;       movups    xmm1,.bx2  ; new
1398
 
1398
 
1399
       cvtpi2ps  xmm0,.bx1 ;mm0    ; variant fixed point
1399
       cvtpi2ps  xmm0,.bx1 ;mm0    ; variant fixed point
1400
       movlhps	 xmm0,xmm0
1400
       movlhps	 xmm0,xmm0
1401
       cvtpi2ps  xmm0,.ex1 ;mm2
1401
       cvtpi2ps  xmm0,.ex1 ;mm2
1402
       cvtpi2ps  xmm1,.bx2 ;mm1
1402
       cvtpi2ps  xmm1,.bx2 ;mm1
1403
       movlhps	 xmm1,xmm1
1403
       movlhps	 xmm1,xmm1
1404
       cvtpi2ps  xmm1,.ex2 ;mm3
1404
       cvtpi2ps  xmm1,.ex2 ;mm3
1405
       subps	 xmm1,xmm0
1405
       subps	 xmm1,xmm0
1406
 
1406
 
1407
       divps	 xmm1,xmm3
1407
       divps	 xmm1,xmm3
1408
 
1408
 
1409
       shufps	 xmm1,xmm1,10110001b
1409
       shufps	 xmm1,xmm1,10110001b
1410
;       movups    .dey,xmm1  ; new
1410
;       movups    .dey,xmm1  ; new
1411
       cvtps2pi  mm0,xmm1	   ; mm0 -> 2 delta dwords
1411
       cvtps2pi  mm0,xmm1	   ; mm0 -> 2 delta dwords
1412
       movhlps	 xmm1,xmm1
1412
       movhlps	 xmm1,xmm1
1413
       cvtps2pi  mm1,xmm1
1413
       cvtps2pi  mm1,xmm1
1414
       movq	 .dey,mm0
1414
       movq	 .dey,mm0
1415
       movq	 .dby,mm1
1415
       movq	 .dby,mm1
1416
 
1416
 
1417
       movd	 mm2,.z1
1417
       movd	 mm2,.z1
1418
       movd	 mm3,.z2
1418
       movd	 mm3,.z2
1419
 
1419
 
1420
       cvtpi2ps  xmm0,.tx1 ;mm0
1420
       cvtpi2ps  xmm0,.tx1 ;mm0
1421
       movlhps	 xmm0,xmm0
1421
       movlhps	 xmm0,xmm0
1422
       cvtpi2ps  xmm0,mm2
1422
       cvtpi2ps  xmm0,mm2
1423
       cvtpi2ps  xmm1,.tx2 ;mm1
1423
       cvtpi2ps  xmm1,.tx2 ;mm1
1424
       movlhps	 xmm1,xmm1
1424
       movlhps	 xmm1,xmm1
1425
       cvtpi2ps  xmm1,mm3
1425
       cvtpi2ps  xmm1,mm3
1426
;       movups    xmm0,,z1  ; new
1426
;       movups    xmm0,,z1  ; new
1427
;       movups    xmm1,.z2  ; new
1427
;       movups    xmm1,.z2  ; new
1428
       subps	 xmm1,xmm0
1428
       subps	 xmm1,xmm0
1429
 
1429
 
1430
       divps	 xmm1,xmm3
1430
       divps	 xmm1,xmm3
1431
 
1431
 
1432
;       movups    .dz,xmm1  ;new
1432
;       movups    .dz,xmm1  ;new
1433
 
1433
 
1434
       shufps	 xmm1,xmm1,10110100b
1434
       shufps	 xmm1,xmm1,10110100b
1435
       cvtps2pi  mm0,xmm1	   ; mm0 -> 2 delta dwords
1435
       cvtps2pi  mm0,xmm1	   ; mm0 -> 2 delta dwords
1436
       movhlps	 xmm1,xmm1
1436
       movhlps	 xmm1,xmm1
1437
       cvtps2pi  mm1,xmm1
1437
       cvtps2pi  mm1,xmm1
1438
       movd	 .dz,mm0
1438
       movd	 .dz,mm0
1439
       movq	 .dty,mm1
1439
       movq	 .dty,mm1
1440
 
1440
 
1441
else
1441
else
1442
 
1442
 
1443
	mov	eax,.bx2       ; calc .dbx
1443
	mov	eax,.bx2       ; calc .dbx
1444
	sub	eax,.bx1
1444
	sub	eax,.bx1
1445
	cdq
1445
	cdq
1446
	idiv	ebx
1446
	idiv	ebx
1447
	push	eax
1447
	push	eax
1448
 
1448
 
1449
	mov	eax,.by2       ; calc .dby
1449
	mov	eax,.by2       ; calc .dby
1450
	sub	eax,.by1
1450
	sub	eax,.by1
1451
	cdq
1451
	cdq
1452
	idiv	ebx
1452
	idiv	ebx
1453
	push	eax
1453
	push	eax
1454
 
1454
 
1455
	mov	eax,.ex2       ; calc .dex
1455
	mov	eax,.ex2       ; calc .dex
1456
	sub	eax,.ex1
1456
	sub	eax,.ex1
1457
	cdq
1457
	cdq
1458
	idiv	ebx
1458
	idiv	ebx
1459
	push	eax
1459
	push	eax
1460
 
1460
 
1461
	mov	eax,.ey2       ; calc .dey
1461
	mov	eax,.ey2       ; calc .dey
1462
	sub	eax,.ey1
1462
	sub	eax,.ey1
1463
	cdq
1463
	cdq
1464
	idiv	ebx
1464
	idiv	ebx
1465
	push	eax
1465
	push	eax
1466
 
1466
 
1467
 
1467
 
1468
	mov	eax,.z2        ; calc .dz
1468
	mov	eax,.z2        ; calc .dz
1469
	sub	eax,.z1
1469
	sub	eax,.z1
1470
	cdq
1470
	cdq
1471
	idiv	ebx
1471
	idiv	ebx
1472
	push	eax
1472
	push	eax
1473
 
1473
 
1474
	mov	eax,.tx2       ; calc .dtx
1474
	mov	eax,.tx2       ; calc .dtx
1475
	sub	eax,.tx1
1475
	sub	eax,.tx1
1476
	cdq
1476
	cdq
1477
	idiv	ebx
1477
	idiv	ebx
1478
	push	eax
1478
	push	eax
1479
 
1479
 
1480
	mov	eax,.ty2       ; calc .dty
1480
	mov	eax,.ty2       ; calc .dty
1481
	sub	eax,.ty1
1481
	sub	eax,.ty1
1482
	cdq
1482
	cdq
1483
	idiv	ebx
1483
	idiv	ebx
1484
	push	eax
1484
	push	eax
1485
 
1485
 
1486
end if
1486
end if
1487
	cmp	dword .x1,0	    ; set correctly begin variable
1487
	cmp	dword .x1,0	    ; set correctly begin variable
1488
	jge	@f	      ; CLIPPING ON FUNCTION
1488
	jge	@f	      ; CLIPPING ON FUNCTION
1489
			      ; cutting triangle exceedes screen
1489
			      ; cutting triangle exceedes screen
1490
	mov	ebx,.x1
1490
	mov	ebx,.x1
1491
	neg	ebx
1491
	neg	ebx
1492
 
1492
 
1493
;if Ext >= SSE
1493
;if Ext >= SSE
1494
 
1494
 
1495
;        cvtsi2ss xmm0,ebx
1495
;        cvtsi2ss xmm0,ebx
1496
;        shufps   xmm0,xmm0,0
1496
;        shufps   xmm0,xmm0,0
1497
;        movups   xmm1,.dey
1497
;        movups   xmm1,.dey
1498
;        mulps    xmm1,xmm0
1498
;        mulps    xmm1,xmm0
1499
;        shufps   xmm1,xmm1,00011011b
1499
;        shufps   xmm1,xmm1,00011011b
1500
;        movups   xmm2,.bx1
1500
;        movups   xmm2,.bx1
1501
;        addps    xmm2,xmm1
1501
;        addps    xmm2,xmm1
1502
;        movups   .bx1,xmm2
1502
;        movups   .bx1,xmm2
1503
 
1503
 
1504
	mov	eax,.dz
1504
	mov	eax,.dz
1505
	imul	ebx	      ; eax = .dz * abs(.x1)
1505
	imul	ebx	      ; eax = .dz * abs(.x1)
1506
	add	.z1,eax
1506
	add	.z1,eax
1507
	mov	dword .x1,0
1507
	mov	dword .x1,0
1508
 
1508
 
1509
	mov	eax,.dbx
1509
	mov	eax,.dbx
1510
	imul	ebx
1510
	imul	ebx
1511
	add    .bx1,eax
1511
	add    .bx1,eax
1512
 
1512
 
1513
	mov	eax,.dby
1513
	mov	eax,.dby
1514
	imul	ebx
1514
	imul	ebx
1515
	add	.by1,eax
1515
	add	.by1,eax
1516
 
1516
 
1517
	mov	eax,.dex
1517
	mov	eax,.dex
1518
	imul	ebx
1518
	imul	ebx
1519
	add	.ex1,eax
1519
	add	.ex1,eax
1520
 
1520
 
1521
	mov	eax,.dey
1521
	mov	eax,.dey
1522
	imul	ebx
1522
	imul	ebx
1523
	add	.ey1,eax
1523
	add	.ey1,eax
1524
 
1524
 
1525
	mov	eax,.dtx
1525
	mov	eax,.dtx
1526
	imul	ebx
1526
	imul	ebx
1527
	add	.tx1,eax
1527
	add	.tx1,eax
1528
 
1528
 
1529
	mov	eax,.dty
1529
	mov	eax,.dty
1530
	imul	ebx
1530
	imul	ebx
1531
	add	.ty1,eax
1531
	add	.ty1,eax
1532
 
1532
 
1533
      @@:
1533
      @@:
1534
	cmp	dword .x2,SIZE_X
1534
	cmp	dword .x2,SIZE_X
1535
	jl	@f
1535
	jl	@f
1536
	mov	dword .x2,SIZE_X
1536
	mov	dword .x2,SIZE_X
1537
      @@:
1537
      @@:
1538
	mov	eax,SIZE_X	 ;calc memory begin in buffers
1538
	mov	eax,SIZE_X	 ;calc memory begin in buffers
1539
	mul	.y
1539
	mul	.y
1540
	add	eax,.x1
1540
	add	eax,.x1
1541
	lea	esi,[4*eax]
1541
	lea	esi,[4*eax]
1542
	add	esi,.z_buff	  ; z-buffer filled with dd variables
1542
	add	esi,.z_buff	  ; z-buffer filled with dd variables
1543
	lea	eax,[eax*3]
1543
	lea	eax,[eax*3]
1544
	add	edi,eax
1544
	add	edi,eax
1545
 
1545
 
1546
 
1546
 
1547
	mov	ecx,.x2
1547
	mov	ecx,.x2
1548
	sub	ecx,.x1
1548
	sub	ecx,.x1
1549
	; init current variables
1549
	; init current variables
1550
	push	dword .bx1   ; current b, e and t shifted shl ROUND   .cbx
1550
	push	dword .bx1   ; current b, e and t shifted shl ROUND   .cbx
1551
	push	dword .by1					   ;  .cby
1551
	push	dword .by1					   ;  .cby
1552
	push	dword .ex1					   ;  .cex
1552
	push	dword .ex1					   ;  .cex
1553
	push	dword .ey1					   ;  .cey
1553
	push	dword .ey1					   ;  .cey
1554
 
1554
 
1555
	push	dword .z1    ; current z shl CATMULL_SHIFT         ; .cz
1555
	push	dword .z1    ; current z shl CATMULL_SHIFT         ; .cz
1556
	push	esi					     ; .czbuff
1556
	push	esi					     ; .czbuff
1557
 
1557
 
1558
	push	dword .tx1	;         .ctx
1558
	push	dword .tx1	;         .ctx
1559
	push	dword .ty1	;         .cty
1559
	push	dword .ty1	;         .cty
1560
	push	edi	  ;         .c_scr
1560
	push	edi	  ;         .c_scr
1561
;if Ext = SSE2
1561
if Ext = SSE2
1562
;        mov    eax,TEXTURE_SIZE
1562
	mov    eax,TEXTURE_SIZE
1563
;        movd   xmm1,eax
1563
	movd   xmm1,eax
1564
;        shufps xmm1,xmm1,0
1564
	shufps xmm1,xmm1,0
1565
;        push   dword  TEX_X
1565
	push   dword  TEX_X
1566
;        push   dword  -TEX_X
1566
	push   dword  -TEX_X
1567
;        push   dword  1
1567
	push   dword  1
1568
;        push   dword  -1
1568
	push   dword  -1
1569
;        movups xmm2,[esp]
1569
	movups xmm2,[esp]
1570
;        movd   xmm3,.bmap
1570
	movd   xmm3,.bmap
1571
;        shufps xmm3,xmm3,0
1571
	shufps xmm3,xmm3,0
1572
;end if
1572
end if
1573
 
1573
 
1574
if Ext>=MMX
1574
if Ext>=MMX
1575
	movq	mm7,.cty
1575
	movq	mm7,.cty
1576
	movq	mm6,.cby
1576
	movq	mm6,.cby
1577
	movq	mm5,.cey
1577
	movq	mm5,.cey
1578
;        movq    mm4,.dtyq
1578
;        movq    mm4,.dtyq
1579
;        movq    mm3,.dbyq
1579
;        movq    mm3,.dbyq
1580
end if
1580
end if
1581
 
1581
 
1582
     .draw:
1582
     .draw:
1583
    ; if TEX = SHIFTING   ;bump drawing only in shifting mode
1583
    ; if TEX = SHIFTING   ;bump drawing only in shifting mode
1584
	mov	esi,.czbuff	 ; .czbuff current address in buffer
1584
	mov	esi,.czbuff	 ; .czbuff current address in buffer
1585
	mov	ebx,.cz 	 ; .cz - cur z position
1585
	mov	ebx,.cz 	 ; .cz - cur z position
1586
	cmp	ebx,dword[esi]
1586
	cmp	ebx,dword[esi]
1587
	jge	.skip
1587
	jge	.skip
1588
if Ext=NON
1588
if Ext=NON
1589
	mov	eax,.cby
1589
	mov	eax,.cby
1590
	shr	eax,ROUND
1590
	shr	eax,ROUND
1591
	mov	esi,.cbx
1591
	mov	esi,.cbx
1592
	shr	esi,ROUND
1592
	shr	esi,ROUND
1593
else
1593
else
1594
	movq	mm1,mm6
1594
	movq	mm1,mm6
1595
	psrld	mm1,ROUND
1595
	psrld	mm1,ROUND
1596
	movd	eax,mm1
1596
	movd	eax,mm1
1597
	psrlq	mm1,32
1597
	psrlq	mm1,32
1598
	movd	esi,mm1
1598
	movd	esi,mm1
1599
end if
1599
end if
1600
 
1600
 
1601
	shl	eax,TEX_SHIFT
1601
	shl	eax,TEX_SHIFT
1602
	add	esi,eax 	;-  ; esi - current bump map index
1602
	add	esi,eax 	;-  ; esi - current bump map index
1603
 
1603
 
1604
;if Ext = SSE2
1604
if Ext = SSE2
1605
;
1605
 
1606
;        movd    xmm0,esi
1606
	movd	xmm0,esi
1607
;        shufps  xmm0,xmm0,0
1607
	shufps	xmm0,xmm0,0
1608
;        paddd   xmm0,xmm2
1608
	paddd	xmm0,xmm2
1609
;        pand    xmm0,xmm1
1609
	pand	xmm0,xmm1
1610
;        paddd   xmm0,xmm3
1610
	paddd	xmm0,xmm3
1611
;
1611
 
1612
;        movd    ebx,xmm0
1612
	movd	ebx,xmm0
1613
;        movzx   eax,byte[ebx]
1613
	movzx	eax,byte[ebx]
1614
;
1614
;
1615
;        shufps  xmm0,xmm0,11100001b
1615
;        shufps  xmm0,xmm0,11100001b
-
 
1616
	psrldq	xmm0,4
1616
;        movd    ebx,xmm0
1617
	movd	ebx,xmm0
1617
;        movzx   ebx,byte[ebx]
1618
	movzx	ebx,byte[ebx]
1618
;        sub     eax,ebx
1619
	sub	eax,ebx
1619
;
1620
;
1620
;        shufps  xmm0,xmm0,11111110b
1621
;        shufps  xmm0,xmm0,11111110b
-
 
1622
	psrldq	xmm0,4
1621
;        movd    ebx,xmm0
1623
	movd	ebx,xmm0
1622
;        movzx   edx, byte [ebx]
1624
	movzx	edx, byte [ebx]
1623
;
1625
;
1624
;        shufps  xmm0,xmm0,11111111b
1626
;        shufps  xmm0,xmm0,11111111b
-
 
1627
	psrldq	xmm0,4
1625
;        movd    ebx,xmm0
1628
	movd	ebx,xmm0
1626
;        movzx   ebx, byte [ebx]
1629
	movzx	ebx, byte [ebx]
1627
;        sub     edx,ebx
1630
	sub	edx,ebx
1628
;
1631
;
1629
;else
1632
else
1630
	mov	ebx,esi
1633
;        mov     ebx,esi
-
 
1634
;        dec     ebx
1631
	dec	ebx
1635
	lea	ebx,[esi-1]
1632
	and	ebx,TEXTURE_SIZE
1636
	and	ebx,TEXTURE_SIZE
1633
	add	ebx,.bmap
1637
	add	ebx,.bmap
1634
	movzx	eax,byte [ebx]
1638
	movzx	eax,byte [ebx]
1635
 
1639
 
1636
	mov	ebx,esi
1640
;        mov     ebx,esi
-
 
1641
;        inc     ebx
1637
	inc	ebx
1642
	lea	ebx,[esi+1]
1638
	and	ebx,TEXTURE_SIZE
1643
	and	ebx,TEXTURE_SIZE
1639
	add	ebx,.bmap
1644
	add	ebx,.bmap
1640
	movzx	ebx,byte [ebx]
1645
	movzx	ebx,byte [ebx]
1641
	sub	eax,ebx
1646
	sub	eax,ebx
1642
 
1647
 
1643
	mov	ebx,esi
1648
;        mov     ebx,esi
-
 
1649
;        sub     ebx,TEX_X
1644
	sub	ebx,TEX_X
1650
	lea	ebx,[esi-TEX_X]
1645
	and	ebx,TEXTURE_SIZE
1651
	and	ebx,TEXTURE_SIZE
1646
	add	ebx,.bmap
1652
	add	ebx,.bmap
1647
	movzx	edx,byte [ebx]
1653
	movzx	edx,byte [ebx]
1648
 
1654
 
1649
	mov	ebx,esi
1655
;        mov     ebx,esi
-
 
1656
;        add     ebx,TEX_X
1650
	add	ebx,TEX_X
1657
	lea	ebx,[esi+TEX_X]
1651
	and	ebx,TEXTURE_SIZE
1658
	and	ebx,TEXTURE_SIZE
1652
	add	ebx,.bmap
1659
	add	ebx,.bmap
1653
	movzx	ebx,byte [ebx]
1660
	movzx	ebx,byte [ebx]
1654
	sub	edx,ebx
1661
	sub	edx,ebx
1655
;end if
1662
end if
1656
 
1663
 
1657
     ;  eax - horizontal sub    modificated x coord
1664
     ;  eax - horizontal sub    modificated x coord
1658
     ;  edx - vertical   sub    modificated y coord
1665
     ;  edx - vertical   sub    modificated y coord
1659
if Ext=NON
1666
if Ext=NON
1660
	mov	ebx,.cex       ;.cex - current env map X
1667
	mov	ebx,.cex       ;.cex - current env map X
1661
	shr	ebx,ROUND
1668
	shr	ebx,ROUND
1662
	add	eax,ebx
1669
	add	eax,ebx
1663
 
1670
 
1664
 
1671
 
1665
	mov	ebx,.cey       ;.cey - current  env map y
1672
	mov	ebx,.cey       ;.cey - current  env map y
1666
	shr	ebx,ROUND
1673
	shr	ebx,ROUND
1667
	add	edx,ebx
1674
	add	edx,ebx
1668
 
1675
 
1669
else
1676
else
1670
	movq	mm1,mm5        ; mm5 - copy of cur env coords
1677
	movq	mm1,mm5        ; mm5 - copy of cur env coords
1671
	psrld	mm1,ROUND
1678
	psrld	mm1,ROUND
1672
	movd	ebx,mm1
1679
	movd	ebx,mm1
1673
	psrlq	mm1,32
1680
	psrlq	mm1,32
1674
	add	eax,ebx
1681
	add	eax,ebx
1675
	movd	ebx,mm1
1682
	movd	ebx,mm1
1676
	add	edx,ebx
1683
	add	edx,ebx
1677
;        movq    qword[.temp1],mm3
1684
;        movq    qword[.temp1],mm3
1678
;        add     eax,dword [.temp1]
1685
;        add     eax,dword [.temp1]
1679
;        add     edx,dword [.temp1+4]
1686
;        add     edx,dword [.temp1+4]
1680
end if
1687
end if
1681
 
1688
 
1682
	or	eax,eax
1689
	or	eax,eax
1683
	jl	.black
1690
	jl	.black
1684
	cmp	eax,TEX_X
1691
	cmp	eax,TEX_X
1685
	jg	.black
1692
	jg	.black
1686
	or	edx,edx
1693
	or	edx,edx
1687
	jl	.black
1694
	jl	.black
1688
	cmp	edx,TEX_Y
1695
	cmp	edx,TEX_Y
1689
	jg	.black
1696
	jg	.black
1690
 
1697
 
1691
	shl	edx,TEX_SHIFT	; zaburzenie w emapie = zaburzenie w teksturze
1698
	shl	edx,TEX_SHIFT	; zaburzenie w emapie = zaburzenie w teksturze
1692
	add	edx,eax 	; proponuje nie stawiac czarnego pixela tylko
1699
	add	edx,eax 	; proponuje nie stawiac czarnego pixela tylko
1693
	lea	esi,[edx*3]	; niezaburzony.
1700
	lea	esi,[edx*3]	; niezaburzony.
1694
	add	esi,.emap	;
1701
	add	esi,.emap	;
1695
	lodsd
1702
	lodsd
1696
 
1703
 
1697
if Ext=NON
1704
if Ext=NON
1698
	mov	edx,.cty
1705
	mov	edx,.cty
1699
	shr	edx,ROUND  ; sar
1706
	shr	edx,ROUND  ; sar
1700
 
1707
 
1701
	mov	edi,.ctx
1708
	mov	edi,.ctx
1702
	shr	edi,ROUND  ; sar
1709
	shr	edi,ROUND  ; sar
1703
else
1710
else
1704
	movq	mm1,mm7
1711
	movq	mm1,mm7
1705
	psrld	mm1,ROUND
1712
	psrld	mm1,ROUND
1706
	movd	edx,mm1
1713
	movd	edx,mm1
1707
	psrlq	mm1,32
1714
	psrlq	mm1,32
1708
	movd	edi,mm1
1715
	movd	edi,mm1
1709
 
1716
 
1710
end if
1717
end if
1711
 
1718
 
1712
	shl	edx,TEX_SHIFT
1719
	shl	edx,TEX_SHIFT
1713
	add	edi,edx
1720
	add	edi,edx
1714
	and	edi,TEXTURE_SIZE
1721
	and	edi,TEXTURE_SIZE
1715
	lea	esi,[edi*3]
1722
	lea	esi,[edi*3]
1716
	add	esi,.tex_map
1723
	add	esi,.tex_map
1717
 
1724
 
1718
if Ext=NON
1725
if Ext=NON
1719
	mov	edx,eax
1726
	mov	edx,eax
1720
	lodsd
1727
	lodsd
1721
	push	ax
1728
	push	ax
1722
	mul	dl
1729
	mul	dl
1723
	mov	dl,ah
1730
	mov	dl,ah
1724
	pop	ax
1731
	pop	ax
1725
	shr	ax,8
1732
	shr	ax,8
1726
	mul	dh
1733
	mul	dh
1727
	mov	al,dl
1734
	mov	al,dl
1728
	mov	edi,.c_scr
1735
	mov	edi,.c_scr
1729
	stosw
1736
	stosw
1730
	shr	edx,16
1737
	shr	edx,16
1731
	shr	eax,16
1738
	shr	eax,16
1732
	mul	dl
1739
	mul	dl
1733
	shr	ax,8
1740
	shr	ax,8
1734
	stosb
1741
	stosb
1735
else
1742
else
1736
	movd	   mm0,eax
1743
	movd	   mm0,eax
1737
	pxor	   mm1,mm1
1744
	pxor	   mm1,mm1
1738
	punpcklbw  mm0,mm1
1745
	punpcklbw  mm0,mm1
1739
	movd	   mm2,[esi]
1746
	movd	   mm2,[esi]
1740
	punpcklbw  mm2,mm1
1747
	punpcklbw  mm2,mm1
1741
	pmullw	   mm0,mm2
1748
	pmullw	   mm0,mm2
1742
	psrlw	   mm0,8
1749
	psrlw	   mm0,8
1743
	packuswb   mm0,mm1
1750
	packuswb   mm0,mm1
1744
	mov	   edi,.c_scr
1751
	mov	   edi,.c_scr
1745
	movd	   [edi],mm0
1752
	movd	   [edi],mm0
1746
 
1753
 
1747
end if
1754
end if
1748
 
1755
 
1749
	jmp	.actual_zbuff	; actualize z buffer
1756
	jmp	.actual_zbuff	; actualize z buffer
1750
     @@:
1757
     @@:
1751
     .black:
1758
     .black:
1752
	xor	eax,eax
1759
	xor	eax,eax
1753
	mov	edi,.c_scr
1760
	mov	edi,.c_scr
1754
	stosd
1761
	stosd
1755
     .actual_zbuff:
1762
     .actual_zbuff:
1756
	mov	eax,.cz
1763
	mov	eax,.cz
1757
	mov	edi,.czbuff
1764
	mov	edi,.czbuff
1758
	stosd
1765
	stosd
1759
 
1766
 
1760
      .skip:
1767
      .skip:
1761
	add	dword .czbuff,4
1768
	add	dword .czbuff,4
1762
	add	dword .c_scr,3
1769
	add	dword .c_scr,3
1763
 
1770
 
1764
if Ext=NON
1771
if Ext=NON
1765
	mov	eax,.dbx
1772
	mov	eax,.dbx
1766
	add	.cbx,eax
1773
	add	.cbx,eax
1767
	mov	ebx,.dby
1774
	mov	ebx,.dby
1768
	add	.cby,ebx
1775
	add	.cby,ebx
1769
 
1776
 
1770
	mov	edx,.dex
1777
	mov	edx,.dex
1771
	add	.cex,edx
1778
	add	.cex,edx
1772
	mov	eax,.dey
1779
	mov	eax,.dey
1773
	add	.cey,eax
1780
	add	.cey,eax
1774
 
1781
 
1775
	mov	ebx,.dtx
1782
	mov	ebx,.dtx
1776
	add	.ctx,ebx
1783
	add	.ctx,ebx
1777
	mov	edx,.dty
1784
	mov	edx,.dty
1778
	add	.cty,edx
1785
	add	.cty,edx
1779
 
1786
 
1780
else
1787
else
1781
	paddd	mm7,.dty
1788
	paddd	mm7,.dty
1782
	paddd	mm6,.dby
1789
	paddd	mm6,.dby
1783
	paddd	mm5,.dey
1790
	paddd	mm5,.dey
1784
end if
1791
end if
1785
	mov	eax,.dz
1792
	mov	eax,.dz
1786
	add	.cz,eax
1793
	add	.cz,eax
1787
 
1794
 
1788
	dec	ecx
1795
	dec	ecx
1789
	jnz	.draw
1796
	jnz	.draw
1790
 
1797
 
1791
  .bl_end:
1798
  .bl_end:
1792
	mov	esp,ebp
1799
	mov	esp,ebp
1793
ret 76
1800
ret 76
1794
;Ext = MMX
1801
;Ext = MMX
1795
 
1802
 
1796
;     else
1803
;     else
1797
;        movq    mm5, qword[.temp1]  ;-
1804
;        movq    mm5, qword[.temp1]  ;-
1798
;        paddd   mm5, qword[.temp5]  ; .temp5 == low dword = TEX_X, high dword = -TEX_X
1805
;        paddd   mm5, qword[.temp5]  ; .temp5 == low dword = TEX_X, high dword = -TEX_X
1799
;        pand    mm5, qword[.temp3]  ; .temp3 == low = high dword = TEX_SIZE
1806
;        pand    mm5, qword[.temp3]  ; .temp3 == low = high dword = TEX_SIZE
1800
;        paddd   mm5, qword[.temp4]  ; .temp4 == low = high dword = offset .bmap
1807
;        paddd   mm5, qword[.temp4]  ; .temp4 == low = high dword = offset .bmap
1801
;        movd    ebx,mm5
1808
;        movd    ebx,mm5
1802
;        psrlq   mm5,32
1809
;        psrlq   mm5,32
1803
;     end if
1810
;     end if