WebSVN – Kolibri OS – Path Comparison – / – /kernel/ Rev 1625 and /kernel/ Rev 1626

Regard whitespace Rev 1625 → Rev 1626

 /kernel/branches/Kolibri-A/trunk/core/syscall.inc
 ,7 → 76,7
 syscall_entry:
 ;       push    ecx
           sti
-        xor   eax, 3
+        and   eax, 3
         call    dword [servetable3 + eax * 4]
 ;       pop     ecx
 ,8 → 187,8
 align 4
 servetable3:
-        dd      paleholder      ; 0
-        dd      paleholder      ; 1
+        dd      FFT4    ; 0
+        dd      FFT4    ; 1
         dd      paleholder      ; 2
         dd      sys_end ; last

/kernel/branches/Kolibri-A/trunk/kernel32.inc
244,6 → 244,7
; sound

include "sound/playnote.inc" ; player Note for Speaker PC
include "sound/FFT.inc" ; fast Fourier transform routines

; display

 /kernel/branches/Kolibri-A/trunk/sound/FFT.inc
 ,0 → 1,760
+;           Fast Hartley Transform routine
+;           Copyright (C) 1999, 2004, 2010
+;          Artem Jerdev  artem@jerdev.co.uk
+;
+; free KolibriOS version - not to be ported to other OSes
+; ==========================================================
+Power_of_4      equ     5
+NumPoints       equ     1024
+N_2             equ     NumPoints / 2
+N_4             equ     NumPoints / 4
+;=================================================================
+; global constants
+align 8
+_root           dq      1.41421356237309504880169       ; = sqrt(2)
+_root2  dq      0.70710678118654752440084       ; = sqrt(2)/2
+_c1             dq      0.92387953251128675612818       ; = cos(pi/8)
+_s1             dq      0.38268343236508977172846       ; = sin(pi/8)
+_dx             dq      0.00613592315154296875          ; pi/512
+;[_CosTable]    dd      0 ; N_2 elements
+;[_SinTable]    dd      0 ; N_2 elements
+;; ==========================
+align 4
+MakeSinCosTable:
+        mov     ebx, [_Sines]
+        mov     ecx, [_Cosins]
+        xor     eax,  eax
+        fld     [_dx]                   ; st : dx
+        fldz                            ; st : 0, dx
+.loop:
+        fld     st0                     ; st : x, x, dx
+        FSINCOS                         ; st : cos, sin, x, dx
+        fstp    qword [ecx+eax*8]       ; st : sin, x, dx
+        fstp    qword [ebx+eax*8]       ; st : x, dx
+        fadd    st0, st1                ; st : x+dx, dx
+        inc     eax
+        cmp     eax, N_2
+        jne     .loop
+        fstp    st0                     ; st : dx
+        fstp    st0                     ; st : <empty>
+        ret
+; ================================================================
+align 4
+BitInvert:
+        mov     esi, [x]        ; array of qwords
+        xor     ecx, ecx        ; index term
+.newterm:
+        inc     ecx
+        cmp     ecx, NumPoints
+        jge     .done
+        xor     eax, eax
+        mov     edx, ecx
+        xor     bl, bl
+.do_invert:
+        inc     bl
+        cmp     bl, Power_of_4
+        jg      .switch
+        mov     bh, dl
+        and     bh,  3
+        shl     eax, 2
+        or      al, bh
+        shr     edx, 2
+        jmp     .do_invert
+.switch:
+        cmp     eax, ecx
+        jle     .newterm
+        fld     qword [esi+eax*8]
+        fld     qword [esi+ecx*8]
+        fstp    qword [esi+eax*8]
+        fstp    qword [esi+ecx*8]
+        jmp     .newterm
+.done:
+        ret
+;=================================================================
+align 4
+step1:
+        mov     esi, [x]
+        mov     ebx, esi
+        add     esi, NumPoints*8
+.loop:
+        fld     qword[ebx]
+        fld     qword[ebx+8]
+        fld     st1
+        fsub    st0, st1        ; st : t2, f[i+1], f[i]
+        fxch    st1             ; st : f[i+1], t2, f[i]
+        faddp   st2, st0        ; st : t2, t1
+        fld     qword[ebx+16]
+        fld     qword[ebx+24]
+        fld     st1             ; st : f[i+2], f[i+3], f[i+2], t2, t1
+        fadd    st0, st1        ; st : t3, f[i+3], f[i+2], t2, t1
+        fxch    st2             ; st : f[i+2], f[i+3], t3, t2, t1
+        fsub    st0, st1        ; st : t4, f[i+3], t3, t2, t1
+        fstp    st1             ; st : t4, t3, t2, t1
+        fld     st2             ; st : t2, t4, t3, t2, t1
+        fadd    st0, st1        ; st : t2+t4, t4, t3, t2, t1
+        fstp    qword[ebx+16]   ; st : t4, t3, t2, t1
+        fsubp   st2, st0        ; st : t3, t2-t4, t1
+        fld     st2             ; st : t1, t3, t2-t4, t1
+        fadd    st0, st1        ; st : t1+t3, t3, t2-t4, t1
+        fstp    qword[ebx]      ; st : t3, t2-t4, t1
+        fsubp   st2, st0        ; st : t2-t4, t1-t3
+        fstp    qword[ebx+24]   ; st : t1-t3
+        fstp    qword[ebx+8]    ; st : <empty>
+        add     ebx, 32
+        cmp     ebx, esi
+        jnz     .loop
+        ret
+;
+;===========================================================================
+step2:                          ; Step2
+;===========================================================================
+        mov     eax, [_f]
+        mov     ebx, eax
+        add     eax, NumPoints*8
+.loop_i:
+; -- quad subelements  +0, +4, +8 and +12 (simpliest operations)
+        fld     qword[ebx]
+        fld     qword[ebx+8*4]
+        fld     st0
+        fadd    st0, st2        ; st : t1, f_4, f_0
+        fxch    st1
+        fsubp   st2, st0        ; st : t1, t2
+        fld     qword[ebx+8*8]
+        fld     qword[ebx+8*12]
+        fld     st0
+        fadd    st0, st2        ; st : t3, f_12, t1, t2
+        fxch    st1
+        fsubp   st2, st0        ; st : t3, t4, t1, t2
+        ; ------
+        fld     st2             ; st : t1, t3, t4, t1, t2
+        fadd    st0, st1
+        fstp    qword[ebx]      ; st : t3, t4, t1, t2
+        fsub    st0, st2        ; st : t3-t1, t4, t1, t2
+        fchs                    ; st : t1-t3, t4, t1, t2
+        fstp    qword[ebx+8*4]  ; st : t4, t1, t2
+        fst     st1             ; st : t4, t4, t2
+        fadd    st0, st2        ; st : t2+t4, t4, t2
+        fstp    qword[ebx+8*8]  ; st : t4, t2
+        fsubp   st1, st0        ; st : t2-t4
+        fstp    qword[ebx+8*12] ; st : <empty>
+; -- even subelements  +2, +6, +10 and +14 (2 multiplications needed)
+        fld     qword[ebx+8*2]
+        fld     qword[ebx+8*6]
+        fld     [_root]
+        fmul    st1, st0        ; st : r, t2, t1
+        fld     qword[ebx+8*10]
+        fxch    st1             ; st : r, t3, t2, t1
+        fmul    qword[ebx+8*14] ; st : t4, t3, t2, t1
+        ; ------
+        fld     st3             ; st : t1, t4, t3, t2, t1
+        fadd    st0, st3        ;
+        fadd    st0, st2        ;
+        fst     qword[ebx+8*2]  ; store f[i+8] = t1+t2+t3
+        fsub    st0, st3        ;
+        fsub    st0, st3        ;
+        fstp    qword[ebx+8*10] ; store f[i+10]= t1-t2+t3
+        fld     st3             ; st : t1, t4, t3, t2, t1
+        fsub    st0, st2        ;
+        fsub    st0, st1        ;
+        fst     qword[ebx+8*14] ; store f[i+14]= t1-t3-t4
+        fadd    st0, st1        ;
+        faddp   st1, st0        ; st : t1-t3+t4, t3, t2, t1
+        fstp    qword[ebx+8*6]  ; store f[i+6]
+        fstp    st0             ; st : t2, t1
+        fstp    st0             ; st : t1
+        fstp    st0             ; st : <empty>
+; -- odd subelements
+        fld     qword[ebx+8*9]
+        fld     qword[ebx+8*11]
+        fld     st1
+        fsub    st0, st1
+        fxch    st1
+        faddp   st2, st0        ; st : (f[l3]-f[l7]), (f[l3]+f[l7])
+        fld     [_root2]
+        fmul    st2, st0
+        fmulp   st1, st0        ; st : t9, t6
+        fld     qword[ebx+8*3]
+        fld     st0
+        fadd    st0, st2        ; st : t1, f[l5], t9, t6
+        fstp    [_t1]
+        fsub    st0, st1
+        fstp    [_t2]
+        fstp    [_t9]   ; (t9 never used)
+        fstp    [_t6]           ; st : <empty>
+        fld     [_c1]
+        fld     [_s1]
+        fld     qword[ebx+8*5]
+        fld     qword[ebx+8*7]
+        fld     st3             ; st: c1, f[l6], f[l2], s1, c1
+        fmul    st0, st2        ; st: f_2*c, f_6, f_2, s, c
+        fld     st1             ; st: f_6, f_2*c, f_6, f_2, s, c
+        fmul    st0, st4        ; st: f_6*s, f_2*c, f_6, f_2, s, c
+        faddp   st1, st0        ; st: t5, f_6, f_2, s, c
+        fstp    [_t5]           ; st: f_6, f_2, s, c
+        fld     st3             ; st: c, f_6, f_2, s, c
+        fmul    st0, st1
+        fld     st3
+        fmul    st0, st3        ; st: f_2*s, f_6*c, f_6, f_2, s, c
+        fsubp   st1, st0        ; st: t8, f_6, f_2, s, c
+        fstp    [_t8]           ; st: f_6, f_2, s, c
+        fstp    st0             ; st: f_2, s, c
+        fstp    st0             ; st: s, c
+        fld     qword[ebx+8*13]
+        fld     qword[ebx+8*15]
+        fld     st3             ; st: c1, f[l8], f[l4], s1, c1
+        fmul    st0, st1
+        fld     st3
+        fmul    st0, st3        ; st: f_4*s, f_8*c, f_8, f_4, s, c
+        faddp   st1, st0        ; st: t7, f_8, f_4, s, c
+        fld     [_t5]           ; st: t5, t7, f_8, f_4, s, c
+        fsub    st0, st1        ; st: t4, t7, f_8, f_4, s, c
+        fstp    [_t4]
+        fstp    [_t7]           ; st: f_8, f_4, s, c
+        fld     st3             ; st: c, f_8, f_4, s, c
+        fmul    st0, st2
+        fld     st3
+        fmul    st0, st2        ; st: f_8*s, f_4*c, f_8, f_4, s, c
+        fsubp   st1, st0        ; st:-t0, f_8, f_4, s, c
+        fchs
+        fld     [_t8]
+        fchs                    ; st:-t8, t0, f_8, f_4, s, c
+        fsub    st0, st1        ; st: t3, t0, f_8, f_4, s, c
+        fstp    [_t3]
+        fstp    [_t0]           ; st: f_8, f_4, s, c
+        fstp    st0             ; st: f_4, s, c
+        fstp    st0             ; st: s, c
+        fstp    st0             ; st: c
+        fstp    st0             ; st: <empty>
+        fld     [_t1]
+        fld     [_t4]
+        fld     st1
+        fsub    st0, st1
+        fstp    qword[ebx+8*11] ; f[l7] = t1-t4
+        faddp   st1, st0
+        fstp    qword[ebx+8*3]  ; f[l5] = t1+t4
+        fld     [_t2]
+        fld     [_t3]
+        fld     st1
+        fsub    st0, st1
+        fstp    qword[ebx+8*15] ; f[l8]
+        faddp   st1, st0
+        fstp    qword[ebx+8*7]  ; f[l6]
+        fld     [_t6]
+        fld     qword[ebx+8]
+        fld     st1
+        fsub    st0, st1
+        fxch    st1
+        faddp   st2, st0        ; st : t2, t1
+        fld     [_t8]
+        fsub    [_t0]
+        fld     [_t5]
+        fadd    [_t7]           ; st : t4, t3, t2, t1
+        fld     st3
+        fsub    st0, st1
+        fstp    qword[ebx+8*9]  ; f[l3] = t1-t4
+        fadd    st0, st3
+        fstp    qword[ebx+8]    ; f[l1] = t1+t4
+        fld     st1             ; st : t2, t3, t2, t1
+        fsub    st0, st1        ; f[l4] = t2-t3
+        fstp    qword[ebx+8*13] ; st : t3, t2, t1
+        faddp   st1, st0        ; st : t2+t3, t1
+        fstp    qword[ebx+8*5]  ; f[l2] = t2+t3
+        fstp    st0             ; st : <empty>
+        add     ebx, 16*8
+        cmp     ebx, eax
+        jb      .loop_i
+        ret
+align 8         ; shared local vars
+_t0     dq      0
+_t1     dq      0
+_t2     dq      0
+_t3     dq      0
+_t4     dq      0
+_t5     dq      0
+_t6     dq      0
+_t7     dq      0
+_t8     dq      0
+_t9     dq      0
+;===================================================================
+step3:
+;===================================================================
+; 283  : {
+; 293  :   for (l=3; l<=p; l++)
+        mov     cx, 0x0200
+.newstep:
+        inc     ch
+        cmp     ch, Power_of_4
+        jg      .done
+        mov     [.step], cx
+; 294  :   {
+; 295  :     d1 = 1 << (l + l - 3);
+        mov     cl, ch
+        add     cl, cl
+        sub     cl, 3
+        mov     edx, 1
+        shl     edx, cl
+        mov     [.d1], edx
+; 296  :     d2 = d1 << 1;
+        shl     edx, 1
+        mov     [.d2], edx
+        mov     eax, edx
+; 297  :     d3 = d2 << 1;
+        shl     edx, 1
+        mov     [.d3], edx
+; 298  :     d4 = d2 + d3;
+        add     eax, edx
+        mov     [.d4], eax
+; 299  :     d5 = d3 << 1;
+        shl     edx, 1
+        mov     [.d5], edx
+        shl     edx, 3
+        mov     [.d6], edx      ; d6 = d5*8 to simplify index operations
+; 339  :         j5 = N / d5;   ; moved out of internal loop
+        mov     cl, Power_of_4
+        sub     cl, ch
+        add     cl, cl
+        mov     edx, 1
+        shl     edx, cl
+        mov     [.j5], edx
+; 300  :
+; 301  :     for (j=0; j<N; j+=d5)
+        mov     esi, [_f]
+        mov     ebx, esi
+        add     esi, NumPoints*8
+        mov     [.end_of_array], esi
+.next_j:
+; {
+; t1 = f[j] + f[j+d2];
+        mov     eax, [.d2]
+        fld     qword[ebx]
+        fld     qword[ebx+eax*8]
+        fld     st1
+        fadd    st0, st1
+        fstp    [_t1]
+; t2 = f[j] - f[j+d2];
+        fsubp   st1, st0
+        fstp    [_t2]
+; t3 = f[j+d3] + f[j+d4];
+        mov     edi, [.d3]
+        fld     qword[ebx+edi*8]
+        mov     edx, [.d4]
+        fld     qword[ebx+edx*8]
+        fld     st1
+        fsub    st0, st1                ; st : t4, f4, f3
+        fxch    st1                     ; st : f4, t4, f3
+; t4 = f[j+d3] - f[j+d4];
+        faddp   st2, st0                ; st : t4, t3
+; f[j+d4] = t2 - t4;
+; f[j+d3] = t2 + t4;
+        fld     [_t2]
+        fld     st0
+        fsub    st0, st2                ; st : f4, t2, t4, t3
+        fstp    qword[ebx+edx*8]        ; st : t2, t4, t3
+        fadd    st0, st1                ; st : f3, t4, t3
+        fstp    qword[ebx+edi*8]        ; st : t4, t3
+; f[j+d2] = t1 - t3;
+; f[j]    = t1 + t3;
+        fld     [_t1]
+        fst     st1
+        fsub    st0, st2                ; st : f2, t1, t3
+        fstp    qword[ebx+eax*8]        ; st : t1, t3
+        fadd    st0, st1                ; st : f0, t3
+        fstp    qword[ebx]              ; st : t3
+        fstp    st0
+; jj = j + d1;     / ??
+        mov     edi, [.d1]
+        shl     edi, 3          ; = d1*8
+        mov     edx, edi
+        mov     eax, edi
+        add     eax, eax        ; eax = d2*8
+        shl     edx, 2          ; = d3*8
+        add     edi, ebx        ; now [edi] points to f[jj]
+        add     edx, edi        ; and [edx] points to f[jj+d3]
+; t1 = f[jj];
+        fld     qword [edi]     ; st : t1
+; t3 = f[jj+d3];
+        fld     qword [edx]     ; st : t3, t1
+; t2 = f[jj+d2] * r;
+        fld     qword [edi+eax]
+        fld     [_root]
+        fmul    st1, st0        ; st : r,  t2, t3, t1
+; t4 = f[jj+d4] * r
+        fmul    qword [edx+eax] ; st : t4, t2, t3, t1
+; f[jj]    = t1 + t2 + t3;
+        fld     st3             ; st : t1, t4, t2, t3, t1
+        fadd    st0, st3
+        fadd    st0, st2
+        fstp    qword [edi]
+; f[jj+d2] = t1 - t3 + t4;
+        fld     st3
+        fsub    st0, st3        ; st : (t1-t3), t4, t2, t3, t1
+        fld     st0
+        fadd    st0, st2        ; st : f2, (t1-t3), t4, t2, t3, t1
+        fstp    qword [edi+eax]
+; f[jj+d4] = t1 - t3 - t4;
+        fsub    st0, st1        ; st : f4, t4, t2, t3, t1
+        fstp    qword [edx+eax]
+; f[jj+d3] = t1 - t2 + t3;
+        fstp    st0             ; st : t2, t3,  t1
+        fsubp   st1, st0        ; st : (t3-t2), t1
+        faddp   st1, st0        ; st : f3
+        fstp    qword [edx]
+; for (k=1; k<d1; k++)
+        xor     ecx, ecx        ; ecx = k
+        mov     [.jj], ecx
+.next_k:
+        inc     ecx
+        cmp     ecx, [.d1]
+        jge     .done_k
+; {
+        mov     eax, [.d2]      ; the sector increment
+; l1 = j  + k;
+        mov     edx, ecx
+        mov     [.l1], edx      ; [ebx+edx*8] --> f[j+k]
+; l2 = l1 + d2;
+        add     edx, eax
+        mov     [.l2], edx
+; l3 = l1 + d3;
+        add     edx, eax
+        mov     [.l3], edx
+; l4 = l1 + d4;
+        add     edx, eax
+        mov     [.l4], edx
+; l5 = j  + d2 - k;
+        mov     edx, eax
+        sub     edx, ecx
+        mov     [.l5], edx
+; l6 = l5 + d2;
+        add     edx, eax
+        mov     [.l6], edx
+; l7 = l5 + d3;
+        add     edx, eax
+        mov     [.l7], edx
+; l8 = l5 + d4;
+        add     edx, eax
+        mov     [.l8], edx
+; 340  :         j5 *= k;       // add-substituted multiplication
+        mov     eax, [.jj]
+        add     eax, [.j5]
+        mov     [.jj], eax
+; c1 = C[jj];
+; s1 = S[jj];
+        mov     edi, [_Cosins]
+        fld     qword[edi+eax*8]
+        mov     esi, [_Sines]
+        fld     qword[esi+eax*8]        ; st : s1, c1
+; t5 = f[l2] * c1 + f[l6] * s1;
+; t8 = f[l6] * c1 - f[l2] * s1;
+        mov     edx, [.l6]
+        fld     qword[ebx+edx*8]
+        mov     edx, [.l2]
+        fld     st0
+        fmul    st0, st2
+        fxch    st1
+        fmul    st0, st3
+        fld     qword[ebx+edx*8]        ; st : f[l2], f[l6]*c, f[l6]*s, s, c
+        fmul    st4, st0
+        fmulp   st3, st0                ; st : f[l6]*c, f[l6]*s, f[l2]*s, f[l2]*c
+        fsub    st0, st2                ; st :   t8,    f[l6]*s, f[l2]*s, f[l2]*c
+        fstp    [_t8]
+        faddp   st2, st0                ; st :  f[l2]*s, t5
+        fstp    st0                     ; st :  t5
+        fstp    [_t5]                   ; st :  <empty>
+; c2 = C[2*jj];
+; s2 = S[2*jj];
+        shl     eax, 1
+        fld     qword[edi+eax*8]
+        fld     qword[esi+eax*8]        ; st : s2, c2
+; t6 = f[l3] * c2 + f[l7] * s2;
+; t9 = f[l7] * c2 - f[l3] * s2;
+        mov     edx, [.l7]
+        fld     qword[ebx+edx*8]
+        mov     edx, [.l3]
+        fld     st0
+        fmul    st0, st2
+        fxch    st1
+        fmul    st0, st3
+        fld     qword[ebx+edx*8]        ; st : f[l3], f[l7]*c, f[l7]*s, s, c
+        fmul    st4, st0
+        fmulp   st3, st0                ; st : f[l7]*c, f[l7]*s, f[l3]*s, f[l3]*c
+        fsub    st0, st2                ; st :   t9,    f[l7]*s, f[l3]*s, f[l3]*c
+        fstp    [_t9]
+        faddp   st2, st0                ; st :  f[l2]*s, t6
+        fstp    st0                     ; st :  t6
+        fstp    [_t6]                   ; st :  <empty>
+; c3 = C[3*jj];
+; s3 = S[3*jj];
+        add     eax, [.jj]
+        fld     qword[edi+eax*8]
+        fld     qword[esi+eax*8]        ; st : s3, c3
+; t7 = f[l4] * c3 + f[l8] * s3;
+; t0 = f[l8] * c3 - f[l4] * s3;
+        mov     edx, [.l8]
+        fld     qword[ebx+edx*8]
+        mov     edx, [.l4]
+        fld     st0
+        fmul    st0, st2
+        fxch    st1
+        fmul    st0, st3
+        fld     qword[ebx+edx*8]        ; st : f[l4], f[l8]*c, f[l8]*s, s, c
+        fmul    st4, st0
+        fmulp   st3, st0                ; st : f[l8]*c, f[l8]*s, f[l4]*s, f[l4]*c
+        fsub    st0, st2                ; st :   t9,    f[l8]*s, f[l4]*s, f[l4]*c
+        fstp    [_t0]
+        faddp   st2, st0                ; st : f[l2]*s, t7
+        fstp    st0                     ; st :  t7
+        fstp    [_t7]                   ; st :  <empty>
+; t1 = f[l5] - t9;
+; t2 = f[l5] + t9;
+        mov     eax, [.l5]
+        fld     qword [ebx+eax*8]
+        fld     [_t9]
+        fld     st0
+        fadd    st0, st2
+        fstp    [_t2]
+        fsubp   st1, st0
+        fstp    [_t1]
+; t3 = - t8  - t0;
+        fld     [_t8]
+        fadd    [_t0]
+        fchs
+        fstp    [_t3]
+; t4 =   t5  - t7;
+        fld     [_t5]
+        fsub    [_t7]
+        fstp    [_t4]
+; f[l5] = t1 + t4;
+        fld     [_t1]
+        fld     [_t4]
+        fld     st0
+        fadd    st0, st2
+        fstp    qword [ebx+eax*8]
+; f[l7] = t1 - t4;
+        mov     eax, [.l7]
+        fsubp   st1, st0
+        fstp    qword [ebx+eax*8]
+; f[l6] = t2 + t3;
+        mov     eax, [.l6]
+        fld     [_t2]
+        fld     [_t3]
+        fld     st0
+        fadd    st0, st2
+        fstp    qword [ebx+eax*8]
+; f[l8] = t2 - t3;
+        mov     eax, [.l8]
+        fsubp   st1, st0
+        fstp    qword [ebx+eax*8]
+; t1 = f[l1] + t6;
+        mov     eax, [.l1]
+        fld     qword [ebx+eax*8]
+        fld     [_t6]
+        fld     st0
+        fadd    st0, st2
+        fstp    [_t1]
+; t2 = f[l1] - t6;
+        fsubp   st1, st0
+        fstp    [_t2]
+; t3 =    t8 - t0;
+        fld     [_t8]
+        fsub    [_t0]
+        fstp    [_t3]
+; t4 =    t5 + t7;
+        fld     [_t5]
+        fadd    [_t7]
+        fstp    [_t4]
+; f[l1] = t1 + t4;
+        mov     eax, [.l1]
+        fld     [_t1]
+        fld     [_t4]
+      fld     st0
+        fadd    st0, st2
+        fstp    qword [ebx+eax*8]
+; f[l3] = t1 - t4;
+        mov     eax, [.l3]
+        fsubp   st1, st0
+        fstp    qword [ebx+eax*8]
+; f[l2] = t2 + t3;
+        mov     eax, [.l2]
+        fld     [_t2]
+        fld     [_t3]
+        fld     st0
+        fadd    st0, st2
+        fstp    qword [ebx+eax*8]
+; f[l4] = t2 - t3;
+        mov     eax, [.l4]
+        fsubp   st1, st0
+        fstp    qword [ebx+eax*8]
+; 374  :       }
+        jmp     .next_k
+.done_k:
+; 375  :     }
+        add     ebx, [.d6]      ; d6 = d5*8
+        cmp     ebx, [.end_of_array]
+        jb      .next_j
+; 376  :   }
+        mov     cx, [.step]
+        jmp     .newstep
+.done:
+; 377  : }
+        ret
+align 4
+.l1   dd        0
+.l2   dd        0
+.l3   dd        0
+.l4   dd        0
+.l5   dd        0
+.l6   dd        0
+.l7   dd        0
+.l8   dd        0
+.l9   dd        0
+.l0   dd        0
+.d1   dd        0
+.d2   dd        0
+.d3   dd        0
+.d4   dd        0
+.d5   dd        0
+.d6   dd        0
+.j5   dd        0
+.jj   dd        0
+.end_of_array   dd        0
+.step dw        0
+align 8
+                ;=========== Step3 ends here ===========
+; =================================================================
+;       syscall entry
+;
+_f  dd ?
+_N  dd 1024     ; number of points
+_a              dd ?            ; initial   data array
+x       dd 0            ; tranformed (float) data array
+_Cosins         dd 0
+_Sines  dd 0
+FFT4:
+        or      al, al
+        jnz     .trans
+        mov     cl, Power_of_4
+        mov     eax, 1
+        shl     eax, cl
+        shl     eax, cl
+        mov     [_N], eax
+        shl     eax, 2  ; size of Sine table in bytes
+        add     eax, ebx
+        mov     [_Sines], ebx
+        mov     [_Cosins], eax
+        cpuid
+        rdtsc
+        mov     [.time], eax
+        call    MakeSinCosTable
+        cpuid
+        rdtsc
+        sub     eax, [.time]
+        ret
+.trans:
+        mov     [x],    ebx
+        mov     [_f], ebx
+        cli     ;-----
+        cpuid
+        rdtsc
+        mov     [.time], eax
+        call BitInvert
+        call    step1
+        call    step2
+        call    step3
+        cpuid
+        rdtsc
+        sti     ;----
+        sub     eax, [.time]
+        ret
+.time   dd      0

Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 1625 → Rev 1626