Subversion Repositories Kolibri OS

Rev

Rev 6851 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
6617 IgorA 1
; adler32.asm -- compute the Adler-32 checksum of a data stream
2
; Copyright (C) 1995-2011 Mark Adler
3
; For conditions of distribution and use, see copyright notice in zlib.h
4
 
5
 
6
BASE equ 65521 ;largest prime smaller than 65536
7
NMAX equ 5552
8
; NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
9
 
10
macro DO1 buf,i
11
{
6873 IgorA 12
	movzx eax,byte[buf+i]
6617 IgorA 13
	add [adler],eax
6873 IgorA 14
	add edi,[adler]
6617 IgorA 15
}
16
macro DO2 buf,i
17
{
18
	DO1 buf,i
19
	DO1 buf,i+1
20
}
21
macro DO4 buf,i
22
{
23
	DO2 buf,i
24
	DO2 buf,i+2
25
}
26
macro DO8 buf,i
27
{
28
	DO4 buf,i
29
	DO4 buf,i+4
30
}
31
macro DO16 buf
32
{
33
	DO8 buf,0
34
	DO8 buf,8
35
}
36
 
37
; use NO_DIVIDE if your processor does not do division in hardware --
38
; try it both ways to see which is faster
39
; note that this assumes BASE is 65521, where 65536 % 65521 == 15
40
; (thank you to John Reiser for pointing this out)
41
macro CHOP a
42
{
43
if NO_DIVIDE eq 1
44
	mov eax,a
45
	shr eax,16
46
	and a,0xffff
47
	shl eax,4
48
	add a,eax
49
	shr eax,4
50
	sub a,eax
51
end if
52
}
53
macro MOD28 a
54
{
55
if NO_DIVIDE eq 1
56
local .end0
57
	CHOP a
58
	cmp a,BASE
59
	jl .end0 ;if (..>=..)
60
		sub a,BASE
61
	.end0:
62
else
63
push eax ecx edx
64
	mov eax,a
65
	xor edx,edx
66
	mov ecx,BASE
67
	div ecx
68
	mov a,edx
69
pop edx ecx eax
70
end if
71
}
72
macro MOD a
73
{
74
if NO_DIVIDE eq 1
75
	CHOP a
76
	MOD28 a
77
else
78
push eax ecx edx
79
	mov eax,a
80
	xor edx,edx
81
	mov ecx,BASE
82
	div ecx
83
	mov a,edx
84
pop edx ecx eax
85
end if
86
}
87
macro MOD63 a
88
{
89
if NO_DIVIDE eq 1
90
;this assumes a is not negative
91
;        z_off64_t tmp = a >> 32;
92
;        a &= 0xffffffff;
93
;        a += (tmp << 8) - (tmp << 5) + tmp;
94
;        tmp = a >> 16;
95
;        a &= 0xffff;
96
;        a += (tmp << 4) - tmp;
97
;        tmp = a >> 16;
98
;        a &= 0xffff;
99
;        a += (tmp << 4) - tmp;
100
;        if (a >= BASE) a -= BASE;
101
else
102
push eax ecx edx
103
	mov eax,a
104
	xor edx,edx
105
	mov ecx,BASE
106
	div ecx
107
	mov a,edx
108
pop edx ecx eax
109
end if
110
}
111
 
112
; =========================================================================
6873 IgorA 113
;uLong (uLong adler, const Bytef *buf, uInt len)
114
align 16
115
proc adler32 uses ebx ecx edx edi, adler:dword, buf:dword, len:dword
6617 IgorA 116
	; split Adler-32 into component sums
6873 IgorA 117
	mov edi,[adler]
118
	shr edi,16
119
	and dword[adler],0xffff
6617 IgorA 120
	mov ebx,[buf]
6873 IgorA 121
	mov ecx,[len]
6617 IgorA 122
 
123
	; in case user likes doing a byte at a time, keep it fast
6873 IgorA 124
	cmp ecx,1
6617 IgorA 125
	jne .end0 ;if (..==..)
126
		movzx eax,byte[ebx]
127
		add [adler],eax
128
		cmp dword[adler],BASE
6851 IgorA 129
		jb @f ;if (..>=..)
6617 IgorA 130
			sub dword[adler],BASE
131
		@@:
6873 IgorA 132
		add edi,[adler]
133
		cmp edi,BASE
134
		jae .combine ;if (..>=..)
135
			sub edi,BASE
6617 IgorA 136
		jmp .combine
137
align 4
6873 IgorA 138
.end0:
6617 IgorA 139
 
140
	; initial Adler-32 value (deferred check for len == 1 speed)
141
	cmp ebx,Z_NULL
142
	jne @f ;if (..==0)
143
		xor eax,eax
144
		inc eax
145
		jmp .end_f
146
align 4
147
	@@:
148
 
149
	; in case short lengths are provided, keep it somewhat fast
6873 IgorA 150
	cmp ecx,16
151
	jae .cycle3 ;if (..<..)
6617 IgorA 152
		.cycle0:
6873 IgorA 153
			mov eax,ecx
154
			dec ecx
155
			test eax,eax
156
			je @f ;while (..)
6617 IgorA 157
			movzx eax,byte[ebx]
6873 IgorA 158
			add [adler],eax
6617 IgorA 159
			inc ebx
6873 IgorA 160
			add edi,[adler]
6617 IgorA 161
			jmp .cycle0
162
align 4
163
		@@:
164
		cmp dword[adler],BASE
6873 IgorA 165
		jb @f ;if (..>=..)
6617 IgorA 166
			sub dword[adler],BASE
167
		@@:
6873 IgorA 168
		MOD28 edi ;only added so many BASE's
6617 IgorA 169
		jmp .combine
170
 
171
	; do length NMAX blocks -- requires just one modulo operation
6873 IgorA 172
align 4
6617 IgorA 173
	.cycle3:
6873 IgorA 174
	cmp ecx,NMAX
175
	jb .cycle3end ;while (..>=..)
176
		sub ecx,NMAX
6617 IgorA 177
		mov edx,NMAX/16 ;NMAX is divisible by 16
178
		.cycle1: ;do
179
			DO16 ebx ;16 sums unrolled
180
			add ebx,16
181
			dec edx
6873 IgorA 182
			jne .cycle1 ;while (..)
6617 IgorA 183
		MOD [adler]
6873 IgorA 184
		MOD edi
6617 IgorA 185
		jmp .cycle3
186
align 4
187
	.cycle3end:
188
 
189
	; do remaining bytes (less than NMAX, still just one modulo)
6873 IgorA 190
	cmp ecx,0
191
	je .combine ;if (..) ;avoid modulos if none remaining
192
	@@:
193
		cmp ecx,16
194
		jb .cycle2 ;while (..>=..)
195
			sub ecx,16
6617 IgorA 196
			DO16 ebx
197
			add ebx,16
198
			jmp @b
199
align 4
200
		.cycle2:
6873 IgorA 201
			mov eax,ecx
202
			dec ecx
203
			test eax,eax
204
			je @f ;while (..)
6617 IgorA 205
			movzx eax,byte[ebx]
6873 IgorA 206
			add [adler],eax
6617 IgorA 207
			inc ebx
6873 IgorA 208
			add edi,[adler]
6617 IgorA 209
			jmp .cycle2
210
align 4
211
		@@:
212
		MOD [adler]
6873 IgorA 213
		MOD edi
6617 IgorA 214
 
215
	; return recombined sums
216
.combine:
6873 IgorA 217
	mov eax,edi
6617 IgorA 218
	shl eax,16
219
	or eax,[adler]
220
.end_f:
221
	ret
222
endp
223
 
224
; =========================================================================
6873 IgorA 225
;uLong (uLong adler1, uLong adler2, z_off64_t len2)
6617 IgorA 226
align 4
227
proc adler32_combine_, adler1:dword, adler2:dword, len2:dword
228
locals
229
	sum1 dd ? ;uLong
230
	sum2 dd ? ;uLong
231
;    unsigned rem;
232
endl
233
	; for negative len, return invalid adler32 as a clue for debugging
234
	cmp dword[len2],0
235
	jge @f ;if (..<0)
236
		mov eax,0xffffffff
237
		jmp .end_f
238
	@@:
239
 
240
	; the derivation of this formula is left as an exercise for the reader
241
;    MOD63(len2) ;assumes len2 >= 0
242
;    rem = (unsigned)len2;
243
;    sum1 = adler1 & 0xffff;
244
;    sum2 = rem * sum1;
245
;    MOD(sum2);
246
;    sum1 += (adler2 & 0xffff) + BASE - 1;
247
;    sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
248
	cmp dword[sum1],BASE
249
	jl @f ;if (..>=..)
250
		sub dword[sum1],BASE
251
	@@:
252
	cmp dword[sum1],BASE
253
	jl @f ;if (..>=..)
254
		sub dword[sum1],BASE
255
	@@:
256
	cmp dword[sum2],BASE shl 1
257
	jl @f ;if (..>=..)
258
		sub dword[sum2],BASE shl 1
259
	@@:
260
	cmp dword[sum2],BASE
261
	jl @f ;if (..>=..)
262
		sub dword[sum2],BASE
263
	@@:
264
	mov eax,[sum2]
265
	shl eax,16
266
	or eax,[sum1]
267
.end_f:
268
	ret
269
endp
270
 
271
; =========================================================================
272
;uLong (adler1, adler2, len2)
273
;    uLong adler1
274
;    uLong adler2
275
;    z_off_t len2
276
align 4
277
proc adler32_combine, adler1:dword, adler2:dword, len2:dword
278
	stdcall adler32_combine_, [adler1], [adler2], [len2]
279
	ret
280
endp
281
 
282
;uLong (adler1, adler2, len2)
283
;    uLong adler1
284
;    uLong adler2
285
;    z_off64_t len2
286
align 4
287
proc adler32_combine64, adler1:dword, adler2:dword, len2:dword
288
	stdcall adler32_combine_, [adler1], [adler2], [len2]
289
	ret
290
endp