Rev 6851 | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
6617 | IgorA | 1 | ; adler32.asm -- compute the Adler-32 checksum of a data stream |
2 | ; Copyright (C) 1995-2011 Mark Adler |
||
3 | ; For conditions of distribution and use, see copyright notice in zlib.h |
||
4 | |||
5 | |||
6 | BASE equ 65521 ;largest prime smaller than 65536 |
||
7 | NMAX equ 5552 |
||
8 | ; NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 |
||
9 | |||
10 | macro DO1 buf,i |
||
11 | { |
||
6873 | IgorA | 12 | movzx eax,byte[buf+i] |
6617 | IgorA | 13 | add [adler],eax |
6873 | IgorA | 14 | add edi,[adler] |
6617 | IgorA | 15 | } |
16 | macro DO2 buf,i |
||
17 | { |
||
18 | DO1 buf,i |
||
19 | DO1 buf,i+1 |
||
20 | } |
||
21 | macro DO4 buf,i |
||
22 | { |
||
23 | DO2 buf,i |
||
24 | DO2 buf,i+2 |
||
25 | } |
||
26 | macro DO8 buf,i |
||
27 | { |
||
28 | DO4 buf,i |
||
29 | DO4 buf,i+4 |
||
30 | } |
||
31 | macro DO16 buf |
||
32 | { |
||
33 | DO8 buf,0 |
||
34 | DO8 buf,8 |
||
35 | } |
||
36 | |||
37 | ; use NO_DIVIDE if your processor does not do division in hardware -- |
||
38 | ; try it both ways to see which is faster |
||
39 | ; note that this assumes BASE is 65521, where 65536 % 65521 == 15 |
||
40 | ; (thank you to John Reiser for pointing this out) |
||
41 | macro CHOP a |
||
42 | { |
||
43 | if NO_DIVIDE eq 1 |
||
44 | mov eax,a |
||
45 | shr eax,16 |
||
46 | and a,0xffff |
||
47 | shl eax,4 |
||
48 | add a,eax |
||
49 | shr eax,4 |
||
50 | sub a,eax |
||
51 | end if |
||
52 | } |
||
53 | macro MOD28 a |
||
54 | { |
||
55 | if NO_DIVIDE eq 1 |
||
56 | local .end0 |
||
57 | CHOP a |
||
58 | cmp a,BASE |
||
59 | jl .end0 ;if (..>=..) |
||
60 | sub a,BASE |
||
61 | .end0: |
||
62 | else |
||
63 | push eax ecx edx |
||
64 | mov eax,a |
||
65 | xor edx,edx |
||
66 | mov ecx,BASE |
||
67 | div ecx |
||
68 | mov a,edx |
||
69 | pop edx ecx eax |
||
70 | end if |
||
71 | } |
||
72 | macro MOD a |
||
73 | { |
||
74 | if NO_DIVIDE eq 1 |
||
75 | CHOP a |
||
76 | MOD28 a |
||
77 | else |
||
78 | push eax ecx edx |
||
79 | mov eax,a |
||
80 | xor edx,edx |
||
81 | mov ecx,BASE |
||
82 | div ecx |
||
83 | mov a,edx |
||
84 | pop edx ecx eax |
||
85 | end if |
||
86 | } |
||
87 | macro MOD63 a |
||
88 | { |
||
89 | if NO_DIVIDE eq 1 |
||
90 | ;this assumes a is not negative |
||
91 | ; z_off64_t tmp = a >> 32; |
||
92 | ; a &= 0xffffffff; |
||
93 | ; a += (tmp << 8) - (tmp << 5) + tmp; |
||
94 | ; tmp = a >> 16; |
||
95 | ; a &= 0xffff; |
||
96 | ; a += (tmp << 4) - tmp; |
||
97 | ; tmp = a >> 16; |
||
98 | ; a &= 0xffff; |
||
99 | ; a += (tmp << 4) - tmp; |
||
100 | ; if (a >= BASE) a -= BASE; |
||
101 | else |
||
102 | push eax ecx edx |
||
103 | mov eax,a |
||
104 | xor edx,edx |
||
105 | mov ecx,BASE |
||
106 | div ecx |
||
107 | mov a,edx |
||
108 | pop edx ecx eax |
||
109 | end if |
||
110 | } |
||
111 | |||
112 | ; ========================================================================= |
||
6873 | IgorA | 113 | ;uLong (uLong adler, const Bytef *buf, uInt len) |
114 | align 16 |
||
115 | proc adler32 uses ebx ecx edx edi, adler:dword, buf:dword, len:dword |
||
6617 | IgorA | 116 | ; split Adler-32 into component sums |
6873 | IgorA | 117 | mov edi,[adler] |
118 | shr edi,16 |
||
119 | and dword[adler],0xffff |
||
6617 | IgorA | 120 | mov ebx,[buf] |
6873 | IgorA | 121 | mov ecx,[len] |
6617 | IgorA | 122 | |
123 | ; in case user likes doing a byte at a time, keep it fast |
||
6873 | IgorA | 124 | cmp ecx,1 |
6617 | IgorA | 125 | jne .end0 ;if (..==..) |
126 | movzx eax,byte[ebx] |
||
127 | add [adler],eax |
||
128 | cmp dword[adler],BASE |
||
6851 | IgorA | 129 | jb @f ;if (..>=..) |
6617 | IgorA | 130 | sub dword[adler],BASE |
131 | @@: |
||
6873 | IgorA | 132 | add edi,[adler] |
133 | cmp edi,BASE |
||
134 | jae .combine ;if (..>=..) |
||
135 | sub edi,BASE |
||
6617 | IgorA | 136 | jmp .combine |
137 | align 4 |
||
6873 | IgorA | 138 | .end0: |
6617 | IgorA | 139 | |
140 | ; initial Adler-32 value (deferred check for len == 1 speed) |
||
141 | cmp ebx,Z_NULL |
||
142 | jne @f ;if (..==0) |
||
143 | xor eax,eax |
||
144 | inc eax |
||
145 | jmp .end_f |
||
146 | align 4 |
||
147 | @@: |
||
148 | |||
149 | ; in case short lengths are provided, keep it somewhat fast |
||
6873 | IgorA | 150 | cmp ecx,16 |
151 | jae .cycle3 ;if (..<..) |
||
6617 | IgorA | 152 | .cycle0: |
6873 | IgorA | 153 | mov eax,ecx |
154 | dec ecx |
||
155 | test eax,eax |
||
156 | je @f ;while (..) |
||
6617 | IgorA | 157 | movzx eax,byte[ebx] |
6873 | IgorA | 158 | add [adler],eax |
6617 | IgorA | 159 | inc ebx |
6873 | IgorA | 160 | add edi,[adler] |
6617 | IgorA | 161 | jmp .cycle0 |
162 | align 4 |
||
163 | @@: |
||
164 | cmp dword[adler],BASE |
||
6873 | IgorA | 165 | jb @f ;if (..>=..) |
6617 | IgorA | 166 | sub dword[adler],BASE |
167 | @@: |
||
6873 | IgorA | 168 | MOD28 edi ;only added so many BASE's |
6617 | IgorA | 169 | jmp .combine |
170 | |||
171 | ; do length NMAX blocks -- requires just one modulo operation |
||
6873 | IgorA | 172 | align 4 |
6617 | IgorA | 173 | .cycle3: |
6873 | IgorA | 174 | cmp ecx,NMAX |
175 | jb .cycle3end ;while (..>=..) |
||
176 | sub ecx,NMAX |
||
6617 | IgorA | 177 | mov edx,NMAX/16 ;NMAX is divisible by 16 |
178 | .cycle1: ;do |
||
179 | DO16 ebx ;16 sums unrolled |
||
180 | add ebx,16 |
||
181 | dec edx |
||
6873 | IgorA | 182 | jne .cycle1 ;while (..) |
6617 | IgorA | 183 | MOD [adler] |
6873 | IgorA | 184 | MOD edi |
6617 | IgorA | 185 | jmp .cycle3 |
186 | align 4 |
||
187 | .cycle3end: |
||
188 | |||
189 | ; do remaining bytes (less than NMAX, still just one modulo) |
||
6873 | IgorA | 190 | cmp ecx,0 |
191 | je .combine ;if (..) ;avoid modulos if none remaining |
||
192 | @@: |
||
193 | cmp ecx,16 |
||
194 | jb .cycle2 ;while (..>=..) |
||
195 | sub ecx,16 |
||
6617 | IgorA | 196 | DO16 ebx |
197 | add ebx,16 |
||
198 | jmp @b |
||
199 | align 4 |
||
200 | .cycle2: |
||
6873 | IgorA | 201 | mov eax,ecx |
202 | dec ecx |
||
203 | test eax,eax |
||
204 | je @f ;while (..) |
||
6617 | IgorA | 205 | movzx eax,byte[ebx] |
6873 | IgorA | 206 | add [adler],eax |
6617 | IgorA | 207 | inc ebx |
6873 | IgorA | 208 | add edi,[adler] |
6617 | IgorA | 209 | jmp .cycle2 |
210 | align 4 |
||
211 | @@: |
||
212 | MOD [adler] |
||
6873 | IgorA | 213 | MOD edi |
6617 | IgorA | 214 | |
215 | ; return recombined sums |
||
216 | .combine: |
||
6873 | IgorA | 217 | mov eax,edi |
6617 | IgorA | 218 | shl eax,16 |
219 | or eax,[adler] |
||
220 | .end_f: |
||
221 | ret |
||
222 | endp |
||
223 | |||
224 | ; ========================================================================= |
||
6873 | IgorA | 225 | ;uLong (uLong adler1, uLong adler2, z_off64_t len2) |
6617 | IgorA | 226 | align 4 |
227 | proc adler32_combine_, adler1:dword, adler2:dword, len2:dword |
||
228 | locals |
||
229 | sum1 dd ? ;uLong |
||
230 | sum2 dd ? ;uLong |
||
231 | ; unsigned rem; |
||
232 | endl |
||
233 | ; for negative len, return invalid adler32 as a clue for debugging |
||
234 | cmp dword[len2],0 |
||
235 | jge @f ;if (..<0) |
||
236 | mov eax,0xffffffff |
||
237 | jmp .end_f |
||
238 | @@: |
||
239 | |||
240 | ; the derivation of this formula is left as an exercise for the reader |
||
241 | ; MOD63(len2) ;assumes len2 >= 0 |
||
242 | ; rem = (unsigned)len2; |
||
243 | ; sum1 = adler1 & 0xffff; |
||
244 | ; sum2 = rem * sum1; |
||
245 | ; MOD(sum2); |
||
246 | ; sum1 += (adler2 & 0xffff) + BASE - 1; |
||
247 | ; sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem; |
||
248 | cmp dword[sum1],BASE |
||
249 | jl @f ;if (..>=..) |
||
250 | sub dword[sum1],BASE |
||
251 | @@: |
||
252 | cmp dword[sum1],BASE |
||
253 | jl @f ;if (..>=..) |
||
254 | sub dword[sum1],BASE |
||
255 | @@: |
||
256 | cmp dword[sum2],BASE shl 1 |
||
257 | jl @f ;if (..>=..) |
||
258 | sub dword[sum2],BASE shl 1 |
||
259 | @@: |
||
260 | cmp dword[sum2],BASE |
||
261 | jl @f ;if (..>=..) |
||
262 | sub dword[sum2],BASE |
||
263 | @@: |
||
264 | mov eax,[sum2] |
||
265 | shl eax,16 |
||
266 | or eax,[sum1] |
||
267 | .end_f: |
||
268 | ret |
||
269 | endp |
||
270 | |||
271 | ; ========================================================================= |
||
272 | ;uLong (adler1, adler2, len2) |
||
273 | ; uLong adler1 |
||
274 | ; uLong adler2 |
||
275 | ; z_off_t len2 |
||
276 | align 4 |
||
277 | proc adler32_combine, adler1:dword, adler2:dword, len2:dword |
||
278 | stdcall adler32_combine_, [adler1], [adler2], [len2] |
||
279 | ret |
||
280 | endp |
||
281 | |||
282 | ;uLong (adler1, adler2, len2) |
||
283 | ; uLong adler1 |
||
284 | ; uLong adler2 |
||
285 | ; z_off64_t len2 |
||
286 | align 4 |
||
287 | proc adler32_combine64, adler1:dword, adler2:dword, len2:dword |
||
288 | stdcall adler32_combine_, [adler1], [adler2], [len2] |
||
289 | ret |
||
290 | endp0) |