Subversion Repositories Kolibri OS

Rev

Rev 8719 | Rev 9512 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
8719 leency 1
x3d equ 0
2
y3d equ 2
3
z3d equ 4
4
vec_x equ 0
5
vec_y equ 4
6
vec_z equ 8
9237 leency 7
 
8
if 0  ; Ext >= SSE3
9
calc_bounding_box:
10
; in:
11
;      xmm0  -  normal vector of ray
12
;      xmm1  -  light origin
13
; out:
14
;      eax - axis aligned bounding boxes bit mask
15
 
16
       .rmx     equ [ebp-36]
17
       .nray    equ [ebp-64]
18
       .origin  equ [ebp-80]
19
       .dirfrac equ [ebp-96]
20
       .nrayr   equ [ebp-112]
21
       .originr equ [ebp-128]
22
       .tmin    equ [ebp-132]
23
       .tmax    equ [ebp-136]
24
 
25
 
26
       push    ebp
27
       mov     ebp,esp
28
       and     ebp,-16
29
       sub     esp,160
30
 
31
       movss     xmm5,[rsscale]
32
       shufps    xmm5,xmm1,0
33
       movd      xmm2,[vect_x]
34
       punpcklwd xmm2,[the_zero]
35
       cvtdq2ps  xmm2,xmm2
36
       subps     xmm1,xmm2
37
       movaps    .origin,xmm1
38
       mulps     xmm0,xmm5
39
       movaps    .nray,xmm0
40
 
41
       mov     esi,matrix
42
       lea     edi,.rmx
43
       call    reverse_mx_3x3
44
 
45
;  in:  esi - ptr to points(normals], each point(normal) coeficient as dword
46
;       edi - ptr to rotated points(normals)
47
;       ebx - ptr to 3x3 (9 dwords, 36 bytes) rotation matrix
48
;       ecx - number of points(normals)
49
 
50
    ; reverse transform
51
       lea    esi,.nray
52
       lea    edi,.nrayr
53
       lea    ebx,.rmx
54
       mov    ecx,1
55
       call   rotary
56
 
57
       lea    esi,.origin
58
       lea    edi,.originr
59
       lea    ebx,.rmx
60
       mov    ecx,1
61
       call   rotary
62
 
63
       xor      ecx,ecx
64
       mov      ebx,aabb1
65
       xor      eax,eax
66
       rcpps    xmm7,.nrayr
67
       movaps   .dirfrac,xmm7
68
 
69
    .nx_aabb:
70
       movaps   xmm5,[ebx]
71
       movaps   xmm6,[ebx]
72
       minps    xmm5,[the_zero]
73
       maxps    xmm6,[the_zero]
74
 ;  xmm5 - lb corner of AABB with minimal coordinates
75
 ;  xmm6 - rt cor.   of AABB wit maximum coords
76
       subps    xmm5,.originr
77
       subps    xmm6,.originr
78
       mulps    xmm5,.dirfrac   ; xmm5 - tx1, ty1
79
       mulps    xmm6,.dirfrac   ; xmm6 - tx2, ty2
80
       movaps   xmm1,xmm6
81
       movaps   xmm2,xmm6
82
 
83
 
84
       minps    xmm1,xmm5
85
       maxps    xmm2,xmm5
86
 
87
       movaps   xmm5,xmm1
88
       movaps   xmm6,xmm2
89
       shufps   xmm5,xmm5,11100001b
90
       shufps   xmm6,xmm6,11100001b
91
       maxss    xmm1,xmm5  ;t min
92
       minss    xmm2,xmm6  ;t max
93
       comiss   xmm2,xmm1
94
       jb       .no_inter
95
    .yes:
96
       bts      eax,ecx
97
     .no_inter:
98
       add      ebx,16
99
       inc      ecx
100
       cmp      ecx,8
101
       jne      .nx_aabb
102
 
103
; out: eax - bit mask
104
       add      esp,160
105
       pop      ebp
106
ret
107
end if
108
 
8719 leency 109
reverse_mx_3x3:
110
; esi - source matrix
111
; edi - desired reversed matrix
112
 
113
  push  ebp
114
  mov   ebp,esp
115
  sub   esp,4
116
  .det  equ  ebp-4
117
 
118
  fninit
119
  fld  dword[esi]
120
  fmul dword[esi+16]
121
  fmul dword[esi+32]
122
  fld  dword[esi+12]
123
  fmul dword[esi+28]
124
  fmul dword[esi+8]
125
  faddp
126
  fld  dword[esi+24]
127
  fmul dword[esi+4]
128
  fmul dword[esi+20]
129
  faddp
130
  fld  dword[esi]
131
  fmul dword[esi+28]
132
  fmul dword[esi+20]
133
  fchs
134
  faddp
135
  fld  dword[esi+24]
136
  fmul dword[esi+16]
137
  fmul dword[esi+8]
138
  fchs
139
  faddp
140
  fld  dword[esi+12]
141
  fmul dword[esi+4]
142
  fmul dword[esi+32]
143
  fchs
144
  faddp
145
  fstp dword[.det]
146
  cmp  dword[.det],0
147
  jne  @f
148
  int3
149
 @@:
150
 ; fld1
151
 ; fdiv dword[.det]
152
 ; fstp dword[.det]
153
 
154
  fld  dword[esi+16]
155
  fmul dword[esi+32]
156
  fld  dword[esi+20]
157
  fmul dword[esi+28]
158
  fchs
159
  faddp
160
  fdiv dword[.det]
161
  fstp dword[edi]
162
 
163
  fld  dword[esi+8]
164
  fmul dword[esi+28]
165
  fld  dword[esi+4]
166
  fmul dword[esi+32]
167
  fchs
168
  faddp
169
  fdiv dword[.det]
170
  fstp dword[edi+4]
171
 
172
  fld  dword[esi+4]
173
  fmul dword[esi+20]
174
  fld  dword[esi+8]
175
  fmul dword[esi+16]
176
  fchs
177
  faddp
178
  fdiv dword[.det]
179
  fstp dword[edi+8]
180
 
181
  fld  dword[esi+20]
182
  fmul dword[esi+24]
183
  fld  dword[esi+12]
184
  fmul dword[esi+32]
185
  fchs
186
  faddp
187
  fdiv dword[.det]
188
  fstp dword[edi+12]
189
 
190
  fld  dword[esi]
191
  fmul dword[esi+32]
192
  fld  dword[esi+8]
193
  fmul dword[esi+24]
194
  fchs
195
  faddp
196
  fdiv dword[.det]
197
  fstp dword[edi+16]
198
 
199
  fld  dword[esi+8]
200
  fmul dword[esi+12]
201
  fld  dword[esi]
202
  fmul dword[esi+20]
203
  fchs
204
  faddp
205
  fdiv dword[.det]
206
  fstp dword[edi+20]
207
 
208
  fld  dword[esi+12]
209
  fmul dword[esi+28]
210
  fld  dword[esi+16]
211
  fmul dword[esi+24]
212
  fchs
213
  faddp
214
  fdiv dword[.det]
215
  fstp dword[edi+24]
216
 
217
  fld  dword[esi+4]
218
  fmul dword[esi+24]
219
  fld  dword[esi]
220
  fmul dword[esi+28]
221
  fchs
222
  faddp
223
  fdiv dword[.det]
224
  fstp dword[edi+28]
225
 
226
  fld  dword[esi]
227
  fmul dword[esi+16]
228
  fld  dword[esi+4]
229
  fmul dword[esi+12]
230
  fchs
231
  faddp
232
  fdiv dword[.det]
233
  fstp dword[edi+32]
234
 
235
 
236
  mov  esp,ebp
237
  pop  ebp
238
ret
9237 leency 239
; 3d point - triple integer word coordinate
240
; vector   - triple float dword coordinate
241
;----------------------in: --------------------------------
242
;------------------------ esi - pointer to 1st 3d point ---
243
;------------------------ edi - pointer to 2nd 3d point ---
244
;------------------------ ebx - pointer to result vector --
245
;---------------------- out : none ------------------------
8719 leency 246
 
247
make_vector_r:
248
   if Ext < SSE2
249
        fninit
250
        fld     dword[edi]                ;edi+x3d
251
        fsub    dword[esi]                ;esi+x3d
252
        fstp    dword[ebx+vec_x]
253
 
254
        fld     dword[edi+4]
255
        fsub    dword[esi+4]
256
        fstp    dword[ebx+vec_y]
257
 
258
        fld     dword[edi+8]
259
        fsub    dword[esi+8]
260
        fstp    dword[ebx+vec_z]
261
    else
262
        movups  xmm0,[esi]
263
        movups  xmm1,[edi]
264
        subps   xmm1,xmm0
265
        movlps  [ebx],xmm1
266
        movhlps  xmm1,xmm1
267
        movss   [ebx+8],xmm1
268
     end if
269
 
270
ret
271
;---------------------- in: -------------------------------
272
;--------------------------- esi - pointer to 1st vector --
273
;--------------------------- edi - pointer to 2nd vector --
274
;--------------------------- ebx - pointer to result vector
275
;---------------------- out : none
276
cross_product:
277
        fninit
278
        fld     dword [esi+vec_y]
279
        fmul    dword [edi+vec_z]
280
        fld     dword [esi+vec_z]
281
        fmul    dword [edi+vec_y]
282
        fsubp   ;st1 ,st
283
        fstp    dword [ebx+vec_x]
284
 
285
        fld     dword [esi+vec_z]
286
        fmul    dword [edi+vec_x]
287
        fld     dword [esi+vec_x]
288
        fmul    dword [edi+vec_z]
289
        fsubp   ;st1 ,st
290
        fstp    dword [ebx+vec_y]
291
 
292
        fld     dword [esi+vec_x]
293
        fmul    dword [edi+vec_y]
294
        fld     dword [esi+vec_y]
295
        fmul    dword [edi+vec_x]
296
        fsubp   ;st1 ,st
297
        fstp    dword [ebx+vec_z]
298
ret
9237 leency 299
cross_aligned:
300
      movaps  xmm0,[esi]
301
      movaps  xmm1,[esi]
302
      movaps  xmm2,[edi]
303
      movaps  xmm3,[edi]
304
      shufps  xmm0,xmm0,00001001b
305
      shufps  xmm1,xmm1,00010010b
306
      shufps  xmm2,xmm2,00010010b
307
      shufps  xmm3,xmm3,00001001b
308
      mulps   xmm0,xmm2
309
      mulps   xmm1,xmm3
310
      subps   xmm0,xmm1
311
      movaps  [ebx],xmm0
312
ret
8719 leency 313
;----------------------- in: ------------------------------
314
;---------------------------- edi - pointer to vector -----
315
;----------------------- out : none
316
normalize_vector:
9237 leency 317
if Ext >= SSE2
8719 leency 318
        movups  xmm0,[edi]
319
        andps   xmm0,[zero_hgst_dd]
320
        movups  xmm1,xmm0
321
        mulps   xmm0,xmm0
9237 leency 322
        movhlps xmm2,xmm0
323
        addps   xmm0,xmm2
324
        movaps  xmm2,xmm0
325
        shufps  xmm2,xmm2,11100101b
326
        addps   xmm0,xmm2
327
        shufps  xmm0,xmm0,0
328
;        haddps  xmm0,xmm0
329
;        haddps  xmm0,xmm0
8719 leency 330
        rsqrtps xmm0,xmm0
331
        mulps   xmm0,xmm1
332
        movlps  [edi],xmm0
333
        movhlps xmm0,xmm0
334
        movss   [edi+8],xmm0
335
else
336
 
337
        fninit
338
        fld     dword [edi+vec_x]
339
        fmul    st, st
340
        fld     dword [edi+vec_y]
341
        fmul    st, st
342
        fld     dword [edi+vec_z]
343
        fmul    st, st
344
        faddp   st1, st
345
        faddp   st1, st
346
        fsqrt
347
 
348
        ftst
349
        fstsw ax
350
        sahf
351
        jnz     @f
352
 
353
        fst     dword [edi+vec_x]
354
        fst     dword [edi+vec_y]
355
        fstp    dword [edi+vec_z]
356
        ret
357
      @@:
358
        fld st
359
        fld st
360
        fdivr dword [edi+vec_x]
361
        fstp  dword [edi+vec_x]
362
        fdivr dword [edi+vec_y]
363
        fstp  dword [edi+vec_y]
364
        fdivr dword [edi+vec_z]
365
        fstp  dword [edi+vec_z]
366
end if
367
ret
368
;------------------in: -------------------------
369
;------------------ esi - pointer to 1st vector
370
;------------------ edi - pointer to 2nd vector
371
;------------------out: ------------------------
372
;------------------ st0 - dot-product
373
dot_product:
374
        fninit
375
;if Ext >=SSE3
376
;        movups  xmm0,[esi]
377
;        movups  xmm1,[edi]
378
;        andps   xmm0,[zero_hgst_dd]
379
;        mulps   xmm0,xmm1
380
;        haddps  xmm0,xmm0
381
;        haddps  xmm0,xmm0
382
;        movss   [esp-4],xmm0
383
;        fld     dword[esp-4]
384
;else
385
        fld     dword [esi+vec_x]
386
        fmul    dword [edi+vec_x]
387
        fld     dword [esi+vec_y]
388
        fmul    dword [edi+vec_y]
389
        fld     dword [esi+vec_z]
390
        fmul    dword [edi+vec_z]
391
        faddp
392
        faddp
393
;end if
394
ret
395
 
396
; DOS version Coded by Mikolaj Felix aka Majuma
397
; mfelix@polbox.com
398
; www.majuma.xt.pl
399
; into FASM translation by Macgub
400
init_sincos_tab:
401
.counter   equ  dword [ebp-4]  ; cur angle
402
 
403
     push       ebp
404
     mov        ebp,esp
405
 
406
     xor        eax,eax
407
     push       eax            ; init .counter
408
     mov        edi,cos_tab
409
     mov        esi,sin_tab
410
     mov        ecx,256
411
     fninit
412
 
413
     fld        .counter
414
  @@:
415
     fld        st
416
     fsincos
417
     fstp       dword [edi]
418
     fstp       dword [esi]
419
;     fadd       [piD180]
420
     fadd       [piD128]
421
     add        esi,4
422
     add        edi,4
423
     loop       @b
424
     ffree      st
425
 
426
     mov        esp,ebp
427
     pop        ebp
428
ret
429
;------
430
; esi - offset (pointer) to angles, edi offset to 3x3 matrix
431
make_rotation_matrix:
432
   .sinx   equ dword[ebp-4]
433
   .cosx   equ dword[ebp-8]
434
   .siny   equ dword[ebp-12]
435
   .cosy   equ dword[ebp-16]
436
   .sinz   equ dword[ebp-20]
437
   .cosz   equ dword[ebp-24]
438
     push      ebp
439
     mov       ebp,esp
440
     sub       esp,24
441
 
442
     movzx     ebx,word[esi]
443
     shl       ebx,2
444
     mov       eax,dword[sin_tab+ebx]
445
     mov       .sinx,eax
446
     mov       edx,dword[cos_tab+ebx]
447
     mov       .cosx,edx
448
 
449
     movzx     ebx,word[esi+2]
450
     shl       ebx,2
451
     mov       eax,dword[sin_tab+ebx]
452
     mov       .siny,eax
453
     mov       edx,dword[cos_tab+ebx]
454
     mov       .cosy,edx
455
 
456
     movzx     ebx,word[esi+4]
457
     shl       ebx,2
458
     mov       eax,dword[sin_tab+ebx]
459
     mov       .sinz,eax
460
     mov       edx,dword[cos_tab+ebx]
461
     mov       .cosz,edx
462
 
463
     fninit
464
     fld       .cosy
465
     fmul      .cosz
466
     fstp      dword[edi]
467
 
468
     fld       .sinx
469
     fmul      .siny
470
     fmul      .cosz
471
     fld       .cosx
472
     fmul      .sinz
473
     fchs
474
     faddp
475
     fstp      dword[edi+12]
476
 
477
     fld       .cosx
478
     fmul      .siny
479
     fmul      .cosz
480
     fld       .sinx
481
     fmul      .sinz
482
     faddp
483
     fstp      dword[edi+24]
484
 
485
     fld       .cosy
486
     fmul      .sinz
487
     fstp      dword[edi+4]
488
 
489
     fld       .sinx
490
     fmul      .siny
491
     fmul      .sinz
492
     fld       .cosx
493
     fmul      .cosz
494
     faddp
495
     fstp      dword[edi+16]
496
 
497
     fld       .cosx
498
     fmul      .siny
499
     fmul      .sinz
500
     fld       .sinx
501
     fchs
502
     fmul      .cosz
503
     faddp
504
     fstp      dword[edi+28]
505
 
506
     fld       .siny
507
     fchs
508
     fstp      dword[edi+8]
509
 
510
     fld       .cosy
511
     fmul      .sinx
512
     fstp      dword[edi+20]
513
 
514
     fld       .cosx
515
     fmul      .cosy
516
     fstp      dword[edi+32]
517
 
518
     mov       esp,ebp
519
     pop       ebp
520
ret
521
;---------------------
522
;  in:  esi - ptr to points(normals], each point(normal) coeficient as dword
523
;       edi - ptr to rotated points(normals)
524
;       ebx - ptr to 3x3 (9 dwords, 36 bytes) rotation matrix
525
;       ecx - number of points(normals)
526
rotary:
527
if Ext
528
    fninit
529
 .again:
530
 
531
    fld     dword[esi]
532
    fmul    dword[ebx]
533
    fld     dword[esi+4]
534
    fmul    dword[ebx+12]
535
    faddp
536
    fld     dword[esi+8]
537
    fmul    dword[ebx+24]
538
    faddp
539
    fstp    dword[edi]
540
 
541
 
542
    fld     dword[esi+4]
543
    fmul    dword[ebx+16]
544
    fld     dword[esi]
545
    fmul    dword[ebx+4]
546
    faddp
547
    fld     dword[esi+8]
548
    fmul    dword[ebx+28]
549
    faddp
550
    fstp    dword[edi+4]
551
 
552
 
553
    fld     dword[esi+8]
554
    fmul    dword[ebx+32]
555
    fld     dword[esi]
556
    fmul    dword[ebx+8]
557
    fld     dword[esi+4]
558
    fmul    dword[ebx+20]
559
    faddp
560
    faddp
561
    fstp    dword[edi+8]
562
 
563
 
564
    add     esi,12
565
    add     edi,12
566
    loop    .again
567
    mov     [edi],dword -1
568
else
569
;   Copyright (C) 1999-2001  Brian Paul
570
;   Copyright (C)            Maciej Guba
571
;---------------------
572
;  in:  esi - ptr to points(normals], each point(normal) coeficient as dword
573
;       edi - ptr to rotated points(normals)
574
;       ebx - ptr to 3x3 (9 dwords, 36 bytes) rotation matrix
575
;       ecx - number of points(normals)
576
;align 32
577
    movups   xmm4,[ebx]
578
 ;   lddqu    xmm4,[ebx]   ; I tried sse3 :D
579
    movups   xmm5,[ebx+12]
580
    movups   xmm6,[ebx+24]
581
;align 32
582
  .again:
583
    movss    xmm0,dword[esi]
584
    shufps   xmm0,xmm0,0
585
    mulps    xmm0,xmm4
586
 
587
    movss    xmm1,dword[esi+4]
588
    shufps   xmm1,xmm1,0
589
    mulps    xmm1,xmm5
590
 
591
    movss    xmm2,dword[esi+8]
592
    shufps   xmm2,xmm2,0
593
    mulps    xmm2,xmm6
594
 
595
    addps    xmm0,xmm1
596
    addps    xmm0,xmm2
597
 
598
    movups   [edi],xmm0
599
 
600
    add      esi,12
601
    add      edi,12
602
    dec      ecx
603
    jne      .again
604
    mov      [edi],dword -1
605
end if
606
ret
607
;----------------------------------------------
608
;  esi - pointer to 3x3 matrix
609
add_scale_to_matrix:
610
     fninit
611
     fld     [rsscale]
612
     fld     dword[esi]            ;-----
613
     fmul    st,st1
614
     fstp    dword[esi]
615
     fld     dword[esi+12]           ; x scale
616
     fmul    st,st1
617
     fstp    dword[esi+12]
618
     fld     dword[esi+24]
619
     fmul    st,st1
620
     fstp    dword[esi+24]         ;------
621
 
622
     fld     dword[esi+4]          ;-----
623
     fmul    st,st1
624
     fstp    dword[esi+4]
625
     fld     dword[esi+16]            ; y scale
626
     fmul    st,st1
627
     fstp    dword[esi+16]
628
     fld     dword[esi+28]
629
     fmul    st,st1
630
     fstp    dword[esi+28]         ;------
631
 
632
 
633
     fld     dword[esi+8]          ;-----
634
     fmul    st,st1
635
     fstp    dword[esi+8]
636
     fld     dword[esi+20]              ; z scale
637
     fmul    st,st1
638
     fstp    dword[esi+20]
639
     fld     dword[esi+32]
640
     fmulp    st1,st
641
     fstp    dword[esi+32]         ;------
642
 
643
ret
644
 
645
;in   esi - offset to 3d points  (point as 3 dwords float)
646
;     edi - offset to 2d points  ( as 3 words integer)
647
;     ecx - number of points
648
translate_points:  ; just convert into integer; z coord still needed
649
    fninit
650
  .again:
651
  if 0
652
    fld    dword[esi+8]
653
 ;   fmul   [rsscale]
654
    fist   word[edi+4]
655
 
656
    fisub  [zobs]
657
    fchs
658
 
659
    fld    dword[esi]
660
;    fmul   [rsscale]
661
    fisub  [xobs]
662
    fimul  [zobs]
663
    fdiv   st0,st1
664
 
665
    fiadd  [xobs]
666
    fiadd  [vect_x]
667
    fistp  word[edi]
668
 
669
    fld    dword[esi+4]
670
;    fmul   [rsscale]
671
    fisub  [yobs]
672
    fimul  [zobs]
673
    fdivrp  ;   st0,st1
674
 
675
    fiadd  [yobs]
676
    fiadd  [vect_y]
677
    fistp  word[edi+2]
678
   end if
679
   ; movups   xmm0,[esi]
680
   ; cvtps2dq xmm0,xmm0
681
   ; packsdw xmm0,xmm0
682
   ; movq     [edi]
683
    fld    dword[esi]
9237 leency 684
    fiadd  word[vect_x]
8719 leency 685
    fistp  word[edi]
686
    fld    dword[esi+4]
687
    fiadd  [vect_y]
688
    fistp  word[edi+2]
689
    fld    dword[esi+8]
690
    fistp  word[edi+4]
691
    add    esi,12
692
    add    edi,6
693
    dec    ecx
694
    jnz    .again
695
 
696
ret