WebSVN – Kolibri OS – Path Comparison – / – /programs/fs/kfar/trunk/zlib/adler32.asm Rev 6616 and /programs/fs/kfar/trunk/zlib/adler32.asm Rev 6617

Regard whitespace Rev 6616 → Rev 6617

 /programs/fs/kfar/trunk/zlib/adler32.asm
 ,0 → 1,309
+; adler32.asm -- compute the Adler-32 checksum of a data stream
+; Copyright (C) 1995-2011 Mark Adler
+; For conditions of distribution and use, see copyright notice in zlib.h
+BASE equ 65521 ;largest prime smaller than 65536
+NMAX equ 5552
+; NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
+macro DO1 buf,i
+{
+        mov eax,buf
+        add eax,i
+        movzx eax,byte[eax]
+        add [adler],eax
+        mov eax,[adler]
+        add [sum2],eax
+}
+macro DO2 buf,i
+{
+        DO1 buf,i
+        DO1 buf,i+1
+}
+macro DO4 buf,i
+{
+        DO2 buf,i
+        DO2 buf,i+2
+}
+macro DO8 buf,i
+{
+        DO4 buf,i
+        DO4 buf,i+4
+}
+macro DO16 buf
+{
+        DO8 buf,0
+        DO8 buf,8
+}
+; use NO_DIVIDE if your processor does not do division in hardware --
+; try it both ways to see which is faster
+; note that this assumes BASE is 65521, where 65536 % 65521 == 15
+; (thank you to John Reiser for pointing this out)
+macro CHOP a
+{
+if NO_DIVIDE eq 1
+        mov eax,a
+        shr eax,16
+        and a,0xffff
+        shl eax,4
+        add a,eax
+        shr eax,4
+        sub a,eax
+end if
+}
+macro MOD28 a
+{
+if NO_DIVIDE eq 1
+local .end0
+        CHOP a
+        cmp a,BASE
+        jl .end0 ;if (..>=..)
+                sub a,BASE
+        .end0:
+else
+push eax ecx edx
+        mov eax,a
+        xor edx,edx
+        mov ecx,BASE
+        div ecx
+        mov a,edx
+pop edx ecx eax
+end if
+}
+macro MOD a
+{
+if NO_DIVIDE eq 1
+        CHOP a
+        MOD28 a
+else
+push eax ecx edx
+        mov eax,a
+        xor edx,edx
+        mov ecx,BASE
+        div ecx
+        mov a,edx
+pop edx ecx eax
+end if
+}
+macro MOD63 a
+{
+if NO_DIVIDE eq 1
+;this assumes a is not negative
+;        z_off64_t tmp = a >> 32;
+;        a &= 0xffffffff;
+;        a += (tmp << 8) - (tmp << 5) + tmp;
+;        tmp = a >> 16;
+;        a &= 0xffff;
+;        a += (tmp << 4) - tmp;
+;        tmp = a >> 16;
+;        a &= 0xffff;
+;        a += (tmp << 4) - tmp;
+;        if (a >= BASE) a -= BASE;
+else
+push eax ecx edx
+        mov eax,a
+        xor edx,edx
+        mov ecx,BASE
+        div ecx
+        mov a,edx
+pop edx ecx eax
+end if
+}
+; =========================================================================
+;uLong (adler, buf, len)
+;    uLong adler
+;    const Bytef *buf
+;    uInt len
+align 4
+proc adler32 uses ebx edx, adler:dword, buf:dword, len:dword
+locals
+        sum2 dd ? ;uLong
+endl
+;zlib_debug 'adler32 adler = %d',[adler]
+        ; split Adler-32 into component sums
+        mov eax,[adler]
+        shr eax,16
+        mov [sum2],eax
+        and [adler],0xffff
+        mov ebx,[buf]
+        ; in case user likes doing a byte at a time, keep it fast
+        cmp dword[len],1
+        jne .end0 ;if (..==..)
+                movzx eax,byte[ebx]
+                add [adler],eax
+                cmp dword[adler],BASE
+                jl @f ;if (..>=..)
+                        sub dword[adler],BASE
+                @@:
+                mov eax,[adler]
+                add [sum2],eax
+                cmp dword[sum2],BASE
+                jl @f ;if (..>=..)
+                        sub dword[sum2],BASE
+                @@:
+                jmp .combine
+align 4
+        .end0:
+        ; initial Adler-32 value (deferred check for len == 1 speed)
+        cmp ebx,Z_NULL
+        jne @f ;if (..==0)
+                xor eax,eax
+                inc eax
+                jmp .end_f
+align 4
+        @@:
+        ; in case short lengths are provided, keep it somewhat fast
+        cmp dword[len],16
+        jge .end1 ;if (..<..)
+                .cycle0:
+                        cmp dword[len],0
+                        jne @f ;while (..)
+                        movzx eax,byte[ebx]
+                        inc ebx
+                        add [adler],eax
+                        mov eax,[adler]
+                        add [sum2],eax
+                        dec dword[len]
+                        jmp .cycle0
+align 4
+                @@:
+                cmp dword[adler],BASE
+                jl @f ;if (..>=..)
+                        sub dword[adler],BASE
+                @@:
+                MOD28 dword[sum2] ;only added so many BASE's
+                jmp .combine
+align 4
+        .end1:
+        ; do length NMAX blocks -- requires just one modulo operation
+        .cycle3:
+        cmp dword[len],NMAX
+        jl .cycle3end ;while (..>=..)
+                sub dword[len],NMAX
+                mov edx,NMAX/16 ;NMAX is divisible by 16
+                .cycle1: ;do
+                        DO16 ebx ;16 sums unrolled
+                        add ebx,16
+                        dec edx
+                        cmp edx,0
+                        jg .cycle1 ;while (..)
+                MOD [adler]
+                MOD [sum2]
+                jmp .cycle3
+align 4
+        .cycle3end:
+        ; do remaining bytes (less than NMAX, still just one modulo)
+        cmp dword[len],0
+        jne .end2 ;if (..) ;avoid modulos if none remaining
+                @@:
+                cmp dword[len],16
+                jl .cycle2 ;while (..>=..)
+                        sub dword[len],16
+                        DO16 ebx
+                        add ebx,16
+                        jmp @b
+align 4
+                .cycle2:
+                        cmp dword[len],0
+                        jne @f ;while (..)
+                        movzx eax,byte[ebx]
+                        inc ebx
+                        add [adler],eax
+                        mov eax,[adler]
+                        add [sum2],eax
+                        dec dword[len]
+                        jmp .cycle2
+align 4
+                @@:
+                MOD [adler]
+                MOD [sum2]
+        .end2:
+        ; return recombined sums
+.combine:
+        mov eax,[sum2]
+        shl eax,16
+        or eax,[adler]
+.end_f:
+;zlib_debug '  adler32.ret = %d',eax
+        ret
+endp
+; =========================================================================
+;uLong (adler1, adler2, len2)
+;    uLong adler1
+;    uLong adler2
+;    z_off64_t len2
+align 4
+proc adler32_combine_, adler1:dword, adler2:dword, len2:dword
+locals
+        sum1 dd ? ;uLong
+        sum2 dd ? ;uLong
+;    unsigned rem;
+endl
+        ; for negative len, return invalid adler32 as a clue for debugging
+        cmp dword[len2],0
+        jge @f ;if (..<0)
+                mov eax,0xffffffff
+                jmp .end_f
+        @@:
+        ; the derivation of this formula is left as an exercise for the reader
+;    MOD63(len2) ;assumes len2 >= 0
+;    rem = (unsigned)len2;
+;    sum1 = adler1 & 0xffff;
+;    sum2 = rem * sum1;
+;    MOD(sum2);
+;    sum1 += (adler2 & 0xffff) + BASE - 1;
+;    sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
+        cmp dword[sum1],BASE
+        jl @f ;if (..>=..)
+                sub dword[sum1],BASE
+        @@:
+        cmp dword[sum1],BASE
+        jl @f ;if (..>=..)
+                sub dword[sum1],BASE
+        @@:
+        cmp dword[sum2],BASE shl 1
+        jl @f ;if (..>=..)
+                sub dword[sum2],BASE shl 1
+        @@:
+        cmp dword[sum2],BASE
+        jl @f ;if (..>=..)
+                sub dword[sum2],BASE
+        @@:
+        mov eax,[sum2]
+        shl eax,16
+        or eax,[sum1]
+.end_f:
+        ret
+endp
+; =========================================================================
+;uLong (adler1, adler2, len2)
+;    uLong adler1
+;    uLong adler2
+;    z_off_t len2
+align 4
+proc adler32_combine, adler1:dword, adler2:dword, len2:dword
+        stdcall adler32_combine_, [adler1], [adler2], [len2]
+        ret
+endp
+;uLong (adler1, adler2, len2)
+;    uLong adler1
+;    uLong adler2
+;    z_off64_t len2
+align 4
+proc adler32_combine64, adler1:dword, adler2:dword, len2:dword
+        stdcall adler32_combine_, [adler1], [adler2], [len2]
+        ret
+endp

Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 6616 → Rev 6617