Subversion Repositories Kolibri OS

Rev

Rev 9512 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
8719 leency 1
x3d equ 0
2
y3d equ 2
3
z3d equ 4
4
vec_x equ 0
5
vec_y equ 4
6
vec_z equ 8
9237 leency 7
 
8
if 0  ; Ext >= SSE3
9
calc_bounding_box:
10
; in:
11
;      xmm0  -  normal vector of ray
12
;      xmm1  -  light origin
13
; out:
14
;      eax - axis aligned bounding boxes bit mask
15
 
16
       .rmx     equ [ebp-36]
17
       .nray    equ [ebp-64]
18
       .origin  equ [ebp-80]
19
       .dirfrac equ [ebp-96]
20
       .nrayr   equ [ebp-112]
21
       .originr equ [ebp-128]
22
       .tmin    equ [ebp-132]
23
       .tmax    equ [ebp-136]
24
 
25
 
26
       push    ebp
27
       mov     ebp,esp
28
       and     ebp,-16
29
       sub     esp,160
30
 
31
       movss     xmm5,[rsscale]
32
       shufps    xmm5,xmm1,0
33
       movd      xmm2,[vect_x]
34
       punpcklwd xmm2,[the_zero]
35
       cvtdq2ps  xmm2,xmm2
36
       subps     xmm1,xmm2
37
       movaps    .origin,xmm1
38
       mulps     xmm0,xmm5
39
       movaps    .nray,xmm0
40
 
41
       mov     esi,matrix
42
       lea     edi,.rmx
43
       call    reverse_mx_3x3
44
 
45
;  in:  esi - ptr to points(normals], each point(normal) coeficient as dword
46
;       edi - ptr to rotated points(normals)
47
;       ebx - ptr to 3x3 (9 dwords, 36 bytes) rotation matrix
48
;       ecx - number of points(normals)
49
 
50
    ; reverse transform
51
       lea    esi,.nray
52
       lea    edi,.nrayr
53
       lea    ebx,.rmx
54
       mov    ecx,1
55
       call   rotary
56
 
57
       lea    esi,.origin
58
       lea    edi,.originr
59
       lea    ebx,.rmx
60
       mov    ecx,1
61
       call   rotary
62
 
63
       xor      ecx,ecx
64
       mov      ebx,aabb1
65
       xor      eax,eax
66
       rcpps    xmm7,.nrayr
67
       movaps   .dirfrac,xmm7
68
 
69
    .nx_aabb:
70
       movaps   xmm5,[ebx]
71
       movaps   xmm6,[ebx]
72
       minps    xmm5,[the_zero]
73
       maxps    xmm6,[the_zero]
74
 ;  xmm5 - lb corner of AABB with minimal coordinates
75
 ;  xmm6 - rt cor.   of AABB wit maximum coords
76
       subps    xmm5,.originr
77
       subps    xmm6,.originr
78
       mulps    xmm5,.dirfrac   ; xmm5 - tx1, ty1
79
       mulps    xmm6,.dirfrac   ; xmm6 - tx2, ty2
80
       movaps   xmm1,xmm6
81
       movaps   xmm2,xmm6
82
 
83
 
84
       minps    xmm1,xmm5
85
       maxps    xmm2,xmm5
86
 
87
       movaps   xmm5,xmm1
88
       movaps   xmm6,xmm2
89
       shufps   xmm5,xmm5,11100001b
90
       shufps   xmm6,xmm6,11100001b
91
       maxss    xmm1,xmm5  ;t min
92
       minss    xmm2,xmm6  ;t max
93
       comiss   xmm2,xmm1
94
       jb       .no_inter
95
    .yes:
96
       bts      eax,ecx
97
     .no_inter:
98
       add      ebx,16
99
       inc      ecx
100
       cmp      ecx,8
101
       jne      .nx_aabb
102
 
103
; out: eax - bit mask
104
       add      esp,160
105
       pop      ebp
106
ret
107
end if
108
 
8719 leency 109
reverse_mx_3x3:
110
; esi - source matrix
111
; edi - desired reversed matrix
112
 
113
  push  ebp
114
  mov   ebp,esp
115
  sub   esp,4
116
  .det  equ  ebp-4
117
 
118
  fninit
119
  fld  dword[esi]
120
  fmul dword[esi+16]
121
  fmul dword[esi+32]
122
  fld  dword[esi+12]
123
  fmul dword[esi+28]
124
  fmul dword[esi+8]
125
  faddp
126
  fld  dword[esi+24]
127
  fmul dword[esi+4]
128
  fmul dword[esi+20]
129
  faddp
130
  fld  dword[esi]
131
  fmul dword[esi+28]
132
  fmul dword[esi+20]
133
  fchs
134
  faddp
135
  fld  dword[esi+24]
136
  fmul dword[esi+16]
137
  fmul dword[esi+8]
138
  fchs
139
  faddp
140
  fld  dword[esi+12]
141
  fmul dword[esi+4]
142
  fmul dword[esi+32]
143
  fchs
144
  faddp
145
  fstp dword[.det]
146
  cmp  dword[.det],0
147
  jne  @f
148
  int3
149
 @@:
150
 ; fld1
151
 ; fdiv dword[.det]
152
 ; fstp dword[.det]
153
 
154
  fld  dword[esi+16]
155
  fmul dword[esi+32]
156
  fld  dword[esi+20]
157
  fmul dword[esi+28]
158
  fchs
159
  faddp
160
  fdiv dword[.det]
161
  fstp dword[edi]
162
 
163
  fld  dword[esi+8]
164
  fmul dword[esi+28]
165
  fld  dword[esi+4]
166
  fmul dword[esi+32]
167
  fchs
168
  faddp
169
  fdiv dword[.det]
170
  fstp dword[edi+4]
171
 
172
  fld  dword[esi+4]
173
  fmul dword[esi+20]
174
  fld  dword[esi+8]
175
  fmul dword[esi+16]
176
  fchs
177
  faddp
178
  fdiv dword[.det]
179
  fstp dword[edi+8]
180
 
181
  fld  dword[esi+20]
182
  fmul dword[esi+24]
183
  fld  dword[esi+12]
184
  fmul dword[esi+32]
185
  fchs
186
  faddp
187
  fdiv dword[.det]
188
  fstp dword[edi+12]
189
 
190
  fld  dword[esi]
191
  fmul dword[esi+32]
192
  fld  dword[esi+8]
193
  fmul dword[esi+24]
194
  fchs
195
  faddp
196
  fdiv dword[.det]
197
  fstp dword[edi+16]
198
 
199
  fld  dword[esi+8]
200
  fmul dword[esi+12]
201
  fld  dword[esi]
202
  fmul dword[esi+20]
203
  fchs
204
  faddp
205
  fdiv dword[.det]
206
  fstp dword[edi+20]
207
 
208
  fld  dword[esi+12]
209
  fmul dword[esi+28]
210
  fld  dword[esi+16]
211
  fmul dword[esi+24]
212
  fchs
213
  faddp
214
  fdiv dword[.det]
215
  fstp dword[edi+24]
216
 
217
  fld  dword[esi+4]
218
  fmul dword[esi+24]
219
  fld  dword[esi]
220
  fmul dword[esi+28]
221
  fchs
222
  faddp
223
  fdiv dword[.det]
224
  fstp dword[edi+28]
225
 
226
  fld  dword[esi]
227
  fmul dword[esi+16]
228
  fld  dword[esi+4]
229
  fmul dword[esi+12]
230
  fchs
231
  faddp
232
  fdiv dword[.det]
233
  fstp dword[edi+32]
234
 
235
 
236
  mov  esp,ebp
237
  pop  ebp
238
ret
9237 leency 239
; 3d point - triple integer word coordinate
240
; vector   - triple float dword coordinate
241
;----------------------in: --------------------------------
242
;------------------------ esi - pointer to 1st 3d point ---
243
;------------------------ edi - pointer to 2nd 3d point ---
244
;------------------------ ebx - pointer to result vector --
245
;---------------------- out : none ------------------------
8719 leency 246
 
247
make_vector_r:
9512 IgorA 248
   if Ext < SSE
8719 leency 249
        fninit
250
        fld     dword[edi]                ;edi+x3d
251
        fsub    dword[esi]                ;esi+x3d
252
        fstp    dword[ebx+vec_x]
253
 
254
        fld     dword[edi+4]
255
        fsub    dword[esi+4]
256
        fstp    dword[ebx+vec_y]
257
 
258
        fld     dword[edi+8]
259
        fsub    dword[esi+8]
260
        fstp    dword[ebx+vec_z]
261
    else
262
        movups  xmm0,[esi]
263
        movups  xmm1,[edi]
264
        subps   xmm1,xmm0
265
        movlps  [ebx],xmm1
266
        movhlps  xmm1,xmm1
267
        movss   [ebx+8],xmm1
268
     end if
269
 
270
ret
271
;---------------------- in: -------------------------------
272
;--------------------------- esi - pointer to 1st vector --
273
;--------------------------- edi - pointer to 2nd vector --
274
;--------------------------- ebx - pointer to result vector
275
;---------------------- out : none
276
cross_product:
277
        fninit
278
        fld     dword [esi+vec_y]
279
        fmul    dword [edi+vec_z]
280
        fld     dword [esi+vec_z]
281
        fmul    dword [edi+vec_y]
282
        fsubp   ;st1 ,st
283
        fstp    dword [ebx+vec_x]
284
 
285
        fld     dword [esi+vec_z]
286
        fmul    dword [edi+vec_x]
287
        fld     dword [esi+vec_x]
288
        fmul    dword [edi+vec_z]
289
        fsubp   ;st1 ,st
290
        fstp    dword [ebx+vec_y]
291
 
292
        fld     dword [esi+vec_x]
293
        fmul    dword [edi+vec_y]
294
        fld     dword [esi+vec_y]
295
        fmul    dword [edi+vec_x]
296
        fsubp   ;st1 ,st
297
        fstp    dword [ebx+vec_z]
298
ret
9237 leency 299
cross_aligned:
9512 IgorA 300
; params as above cross_p
9237 leency 301
      movaps  xmm0,[esi]
302
      movaps  xmm1,[esi]
303
      movaps  xmm2,[edi]
304
      movaps  xmm3,[edi]
305
      shufps  xmm0,xmm0,00001001b
306
      shufps  xmm1,xmm1,00010010b
307
      shufps  xmm2,xmm2,00010010b
308
      shufps  xmm3,xmm3,00001001b
309
      mulps   xmm0,xmm2
310
      mulps   xmm1,xmm3
311
      subps   xmm0,xmm1
312
      movaps  [ebx],xmm0
313
ret
8719 leency 314
;----------------------- in: ------------------------------
315
;---------------------------- edi - pointer to vector -----
316
;----------------------- out : none
317
normalize_vector:
9237 leency 318
if Ext >= SSE2
8719 leency 319
        movups  xmm0,[edi]
320
        andps   xmm0,[zero_hgst_dd]
321
        movups  xmm1,xmm0
322
        mulps   xmm0,xmm0
9237 leency 323
        movhlps xmm2,xmm0
324
        addps   xmm0,xmm2
325
        movaps  xmm2,xmm0
326
        shufps  xmm2,xmm2,11100101b
327
        addps   xmm0,xmm2
328
        shufps  xmm0,xmm0,0
329
;        haddps  xmm0,xmm0
330
;        haddps  xmm0,xmm0
8719 leency 331
        rsqrtps xmm0,xmm0
332
        mulps   xmm0,xmm1
333
        movlps  [edi],xmm0
334
        movhlps xmm0,xmm0
335
        movss   [edi+8],xmm0
336
else
337
 
338
        fninit
339
        fld     dword [edi+vec_x]
340
        fmul    st, st
341
        fld     dword [edi+vec_y]
342
        fmul    st, st
343
        fld     dword [edi+vec_z]
344
        fmul    st, st
345
        faddp   st1, st
346
        faddp   st1, st
347
        fsqrt
348
 
349
        ftst
350
        fstsw ax
351
        sahf
352
        jnz     @f
353
 
354
        fst     dword [edi+vec_x]
355
        fst     dword [edi+vec_y]
356
        fstp    dword [edi+vec_z]
357
        ret
358
      @@:
359
        fld st
360
        fld st
361
        fdivr dword [edi+vec_x]
362
        fstp  dword [edi+vec_x]
363
        fdivr dword [edi+vec_y]
364
        fstp  dword [edi+vec_y]
365
        fdivr dword [edi+vec_z]
366
        fstp  dword [edi+vec_z]
367
end if
368
ret
369
;------------------in: -------------------------
370
;------------------ esi - pointer to 1st vector
371
;------------------ edi - pointer to 2nd vector
372
;------------------out: ------------------------
373
;------------------ st0 - dot-product
374
dot_product:
375
        fninit
376
;if Ext >=SSE3
377
;        movups  xmm0,[esi]
378
;        movups  xmm1,[edi]
379
;        andps   xmm0,[zero_hgst_dd]
380
;        mulps   xmm0,xmm1
381
;        haddps  xmm0,xmm0
382
;        haddps  xmm0,xmm0
383
;        movss   [esp-4],xmm0
384
;        fld     dword[esp-4]
385
;else
386
        fld     dword [esi+vec_x]
387
        fmul    dword [edi+vec_x]
388
        fld     dword [esi+vec_y]
389
        fmul    dword [edi+vec_y]
390
        fld     dword [esi+vec_z]
391
        fmul    dword [edi+vec_z]
392
        faddp
393
        faddp
394
;end if
395
ret
396
 
397
; DOS version Coded by Mikolaj Felix aka Majuma
398
; mfelix@polbox.com
399
; www.majuma.xt.pl
400
; into FASM translation by Macgub
401
init_sincos_tab:
402
.counter   equ  dword [ebp-4]  ; cur angle
403
 
404
     push       ebp
405
     mov        ebp,esp
406
 
407
     xor        eax,eax
408
     push       eax            ; init .counter
409
     mov        edi,cos_tab
410
     mov        esi,sin_tab
411
     mov        ecx,256
412
     fninit
413
 
414
     fld        .counter
415
  @@:
416
     fld        st
417
     fsincos
418
     fstp       dword [edi]
419
     fstp       dword [esi]
420
;     fadd       [piD180]
421
     fadd       [piD128]
422
     add        esi,4
423
     add        edi,4
424
     loop       @b
425
     ffree      st
426
 
427
     mov        esp,ebp
428
     pop        ebp
429
ret
430
;------
431
; esi - offset (pointer) to angles, edi offset to 3x3 matrix
432
make_rotation_matrix:
433
   .sinx   equ dword[ebp-4]
434
   .cosx   equ dword[ebp-8]
435
   .siny   equ dword[ebp-12]
436
   .cosy   equ dword[ebp-16]
437
   .sinz   equ dword[ebp-20]
438
   .cosz   equ dword[ebp-24]
439
     push      ebp
440
     mov       ebp,esp
441
     sub       esp,24
442
 
443
     movzx     ebx,word[esi]
444
     shl       ebx,2
445
     mov       eax,dword[sin_tab+ebx]
446
     mov       .sinx,eax
447
     mov       edx,dword[cos_tab+ebx]
448
     mov       .cosx,edx
449
 
450
     movzx     ebx,word[esi+2]
451
     shl       ebx,2
452
     mov       eax,dword[sin_tab+ebx]
453
     mov       .siny,eax
454
     mov       edx,dword[cos_tab+ebx]
455
     mov       .cosy,edx
456
 
457
     movzx     ebx,word[esi+4]
458
     shl       ebx,2
459
     mov       eax,dword[sin_tab+ebx]
460
     mov       .sinz,eax
461
     mov       edx,dword[cos_tab+ebx]
462
     mov       .cosz,edx
463
 
464
     fninit
465
     fld       .cosy
466
     fmul      .cosz
467
     fstp      dword[edi]
468
 
469
     fld       .sinx
470
     fmul      .siny
471
     fmul      .cosz
472
     fld       .cosx
473
     fmul      .sinz
474
     fchs
475
     faddp
476
     fstp      dword[edi+12]
477
 
478
     fld       .cosx
479
     fmul      .siny
480
     fmul      .cosz
481
     fld       .sinx
482
     fmul      .sinz
483
     faddp
484
     fstp      dword[edi+24]
485
 
486
     fld       .cosy
487
     fmul      .sinz
488
     fstp      dword[edi+4]
489
 
490
     fld       .sinx
491
     fmul      .siny
492
     fmul      .sinz
493
     fld       .cosx
494
     fmul      .cosz
495
     faddp
496
     fstp      dword[edi+16]
497
 
498
     fld       .cosx
499
     fmul      .siny
500
     fmul      .sinz
501
     fld       .sinx
502
     fchs
503
     fmul      .cosz
504
     faddp
505
     fstp      dword[edi+28]
506
 
507
     fld       .siny
508
     fchs
509
     fstp      dword[edi+8]
510
 
511
     fld       .cosy
512
     fmul      .sinx
513
     fstp      dword[edi+20]
514
 
515
     fld       .cosx
516
     fmul      .cosy
517
     fstp      dword[edi+32]
518
 
519
     mov       esp,ebp
520
     pop       ebp
521
ret
522
;---------------------
523
;  in:  esi - ptr to points(normals], each point(normal) coeficient as dword
524
;       edi - ptr to rotated points(normals)
525
;       ebx - ptr to 3x3 (9 dwords, 36 bytes) rotation matrix
526
;       ecx - number of points(normals)
527
rotary:
528
if Ext
529
    fninit
530
 .again:
531
 
532
    fld     dword[esi]
533
    fmul    dword[ebx]
534
    fld     dword[esi+4]
535
    fmul    dword[ebx+12]
536
    faddp
537
    fld     dword[esi+8]
538
    fmul    dword[ebx+24]
539
    faddp
540
    fstp    dword[edi]
541
 
542
 
543
    fld     dword[esi+4]
544
    fmul    dword[ebx+16]
545
    fld     dword[esi]
546
    fmul    dword[ebx+4]
547
    faddp
548
    fld     dword[esi+8]
549
    fmul    dword[ebx+28]
550
    faddp
551
    fstp    dword[edi+4]
552
 
553
 
554
    fld     dword[esi+8]
555
    fmul    dword[ebx+32]
556
    fld     dword[esi]
557
    fmul    dword[ebx+8]
558
    fld     dword[esi+4]
559
    fmul    dword[ebx+20]
560
    faddp
561
    faddp
562
    fstp    dword[edi+8]
563
 
564
 
565
    add     esi,12
566
    add     edi,12
567
    loop    .again
568
    mov     [edi],dword -1
569
else
570
;   Copyright (C) 1999-2001  Brian Paul
571
;   Copyright (C)            Maciej Guba
572
;---------------------
573
;  in:  esi - ptr to points(normals], each point(normal) coeficient as dword
574
;       edi - ptr to rotated points(normals)
575
;       ebx - ptr to 3x3 (9 dwords, 36 bytes) rotation matrix
576
;       ecx - number of points(normals)
577
;align 32
578
    movups   xmm4,[ebx]
579
 ;   lddqu    xmm4,[ebx]   ; I tried sse3 :D
580
    movups   xmm5,[ebx+12]
581
    movups   xmm6,[ebx+24]
582
;align 32
583
  .again:
584
    movss    xmm0,dword[esi]
585
    shufps   xmm0,xmm0,0
586
    mulps    xmm0,xmm4
587
 
588
    movss    xmm1,dword[esi+4]
589
    shufps   xmm1,xmm1,0
590
    mulps    xmm1,xmm5
591
 
592
    movss    xmm2,dword[esi+8]
593
    shufps   xmm2,xmm2,0
594
    mulps    xmm2,xmm6
595
 
596
    addps    xmm0,xmm1
597
    addps    xmm0,xmm2
598
 
599
    movups   [edi],xmm0
600
 
601
    add      esi,12
602
    add      edi,12
9740 macgub 603
;    dec      ecx
604
;    jne      .again
605
   loop     .again
8719 leency 606
    mov      [edi],dword -1
607
end if
608
ret
609
;----------------------------------------------
610
;  esi - pointer to 3x3 matrix
611
add_scale_to_matrix:
9512 IgorA 612
  if Ext>SSE
613
     movss   xmm0,[rsscale]
614
     shufps  xmm0,xmm0,0
615
     movups  xmm1,[esi]
616
     movups  xmm2,[esi+16]
617
     movss   xmm3,[esi+32]
618
     mulps   xmm1,xmm0
619
     mulps   xmm2,xmm0
620
     mulss   xmm3,xmm0
621
     movups  [esi],xmm1
622
     movups  [esi+16],xmm2
623
     movss   [esi+32],xmm3
624
  else
8719 leency 625
     fninit
626
     fld     [rsscale]
627
     fld     dword[esi]            ;-----
628
     fmul    st,st1
629
     fstp    dword[esi]
630
     fld     dword[esi+12]           ; x scale
631
     fmul    st,st1
632
     fstp    dword[esi+12]
633
     fld     dword[esi+24]
634
     fmul    st,st1
635
     fstp    dword[esi+24]         ;------
636
 
637
     fld     dword[esi+4]          ;-----
638
     fmul    st,st1
639
     fstp    dword[esi+4]
640
     fld     dword[esi+16]            ; y scale
641
     fmul    st,st1
642
     fstp    dword[esi+16]
643
     fld     dword[esi+28]
644
     fmul    st,st1
645
     fstp    dword[esi+28]         ;------
646
 
647
 
648
     fld     dword[esi+8]          ;-----
649
     fmul    st,st1
650
     fstp    dword[esi+8]
651
     fld     dword[esi+20]              ; z scale
652
     fmul    st,st1
653
     fstp    dword[esi+20]
654
     fld     dword[esi+32]
655
     fmulp    st1,st
656
     fstp    dword[esi+32]         ;------
9512 IgorA 657
   end if
8719 leency 658
ret
659
 
660
;in   esi - offset to 3d points  (point as 3 dwords float)
661
;     edi - offset to 2d points  ( as 3 words integer)
662
;     ecx - number of points
663
translate_points:  ; just convert into integer; z coord still needed
9512 IgorA 664
  if Ext < SSE
8719 leency 665
    fninit
9512 IgorA 666
  else
667
 ;   movaps  xmm1,[vect_x]
668
  end if
669
 
8719 leency 670
  .again:
9740 macgub 671
  if   0
8719 leency 672
    fld    dword[esi+8]
673
 ;   fmul   [rsscale]
674
    fist   word[edi+4]
675
 
676
    fisub  [zobs]
677
    fchs
678
 
679
    fld    dword[esi]
680
;    fmul   [rsscale]
681
    fisub  [xobs]
682
    fimul  [zobs]
683
    fdiv   st0,st1
684
 
685
    fiadd  [xobs]
686
    fiadd  [vect_x]
687
    fistp  word[edi]
688
 
689
    fld    dword[esi+4]
690
;    fmul   [rsscale]
691
    fisub  [yobs]
692
    fimul  [zobs]
693
    fdivrp  ;   st0,st1
694
 
695
    fiadd  [yobs]
696
    fiadd  [vect_y]
697
    fistp  word[edi+2]
698
   end if
9740 macgub 699
  if Ext>=SSE2
9512 IgorA 700
    movups   xmm0,[esi]
701
    cvtps2dq xmm0,xmm0
702
    packssdw xmm0,xmm0
703
    paddw    xmm0,[vect_x]
704
    movd     [edi],xmm0
705
  ;  psrldq   xmm0,4
706
  ;  movd     eax,xmm0
707
    pextrw   eax,xmm0,6
708
    mov      [edi+4],ax
709
   else
710
 
8719 leency 711
   ; cvtps2dq xmm0,xmm0
712
   ; packsdw xmm0,xmm0
713
   ; movq     [edi]
714
    fld    dword[esi]
9237 leency 715
    fiadd  word[vect_x]
8719 leency 716
    fistp  word[edi]
717
    fld    dword[esi+4]
718
    fiadd  [vect_y]
719
    fistp  word[edi+2]
720
    fld    dword[esi+8]
721
    fistp  word[edi+4]
9512 IgorA 722
  end if
723
 
8719 leency 724
    add    esi,12
725
    add    edi,6
9512 IgorA 726
    loop    .again
8719 leency 727
 
728
ret