WebSVN – Kolibri OS – Blame – /programs/fs/kfar/trunk/zlib/adler32.asm

Rev	Author	Line No.	Line
6617	IgorA	1	; adler32.asm -- compute the Adler-32 checksum of a data stream
		2	; Copyright (C) 1995-2011 Mark Adler
		3	; For conditions of distribution and use, see copyright notice in zlib.h
		4
		5
		6	BASE equ 65521 ;largest prime smaller than 65536
		7	NMAX equ 5552
		8	; NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
		9
		10	macro DO1 buf,i
		11	{
6873	IgorA	12	movzx eax,byte[buf+i]
6617	IgorA	13	add [adler],eax
6873	IgorA	14	add edi,[adler]
6617	IgorA	15	}
		16	macro DO2 buf,i
		17	{
		18	DO1 buf,i
		19	DO1 buf,i+1
		20	}
		21	macro DO4 buf,i
		22	{
		23	DO2 buf,i
		24	DO2 buf,i+2
		25	}
		26	macro DO8 buf,i
		27	{
		28	DO4 buf,i
		29	DO4 buf,i+4
		30	}
		31	macro DO16 buf
		32	{
		33	DO8 buf,0
		34	DO8 buf,8
		35	}
		36
		37	; use NO_DIVIDE if your processor does not do division in hardware --
		38	; try it both ways to see which is faster
		39	; note that this assumes BASE is 65521, where 65536 % 65521 == 15
		40	; (thank you to John Reiser for pointing this out)
		41	macro CHOP a
		42	{
		43	if NO_DIVIDE eq 1
		44	mov eax,a
		45	shr eax,16
		46	and a,0xffff
		47	shl eax,4
		48	add a,eax
		49	shr eax,4
		50	sub a,eax
		51	end if
		52	}
		53	macro MOD28 a
		54	{
		55	if NO_DIVIDE eq 1
		56	local .end0
		57	CHOP a
		58	cmp a,BASE
		59	jl .end0 ;if (..>=..)
		60	sub a,BASE
		61	.end0:
		62	else
		63	push eax ecx edx
		64	mov eax,a
		65	xor edx,edx
		66	mov ecx,BASE
		67	div ecx
		68	mov a,edx
		69	pop edx ecx eax
		70	end if
		71	}
		72	macro MOD a
		73	{
		74	if NO_DIVIDE eq 1
		75	CHOP a
		76	MOD28 a
		77	else
		78	push eax ecx edx
		79	mov eax,a
		80	xor edx,edx
		81	mov ecx,BASE
		82	div ecx
		83	mov a,edx
		84	pop edx ecx eax
		85	end if
		86	}
		87	macro MOD63 a
		88	{
		89	if NO_DIVIDE eq 1
		90	;this assumes a is not negative
		91	; z_off64_t tmp = a >> 32;
		92	; a &= 0xffffffff;
		93	; a += (tmp << 8) - (tmp << 5) + tmp;
		94	; tmp = a >> 16;
		95	; a &= 0xffff;
		96	; a += (tmp << 4) - tmp;
		97	; tmp = a >> 16;
		98	; a &= 0xffff;
		99	; a += (tmp << 4) - tmp;
		100	; if (a >= BASE) a -= BASE;
		101	else
		102	push eax ecx edx
		103	mov eax,a
		104	xor edx,edx
		105	mov ecx,BASE
		106	div ecx
		107	mov a,edx
		108	pop edx ecx eax
		109	end if
		110	}
		111
		112	; =========================================================================
6873	IgorA	113	;uLong (uLong adler, const Bytef *buf, uInt len)
		114	align 16
		115	proc adler32 uses ebx ecx edx edi, adler:dword, buf:dword, len:dword
6617	IgorA	116	; split Adler-32 into component sums
6873	IgorA	117	mov edi,[adler]
		118	shr edi,16
		119	and dword[adler],0xffff
6617	IgorA	120	mov ebx,[buf]
6873	IgorA	121	mov ecx,[len]
6617	IgorA	122
		123	; in case user likes doing a byte at a time, keep it fast
6873	IgorA	124	cmp ecx,1
6617	IgorA	125	jne .end0 ;if (..==..)
		126	movzx eax,byte[ebx]
		127	add [adler],eax
		128	cmp dword[adler],BASE
6851	IgorA	129	jb @f ;if (..>=..)
6617	IgorA	130	sub dword[adler],BASE
		131	@@:
6873	IgorA	132	add edi,[adler]
		133	cmp edi,BASE
		134	jae .combine ;if (..>=..)
		135	sub edi,BASE
6617	IgorA	136	jmp .combine
		137	align 4
6873	IgorA	138	.end0:
6617	IgorA	139
		140	; initial Adler-32 value (deferred check for len == 1 speed)
		141	cmp ebx,Z_NULL
		142	jne @f ;if (..==0)
		143	xor eax,eax
		144	inc eax
		145	jmp .end_f
		146	align 4
		147	@@:
		148
		149	; in case short lengths are provided, keep it somewhat fast
6873	IgorA	150	cmp ecx,16
		151	jae .cycle3 ;if (..<..)
6617	IgorA	152	.cycle0:
6873	IgorA	153	mov eax,ecx
		154	dec ecx
		155	test eax,eax
		156	je @f ;while (..)
6617	IgorA	157	movzx eax,byte[ebx]
6873	IgorA	158	add [adler],eax
6617	IgorA	159	inc ebx
6873	IgorA	160	add edi,[adler]
6617	IgorA	161	jmp .cycle0
		162	align 4
		163	@@:
		164	cmp dword[adler],BASE
6873	IgorA	165	jb @f ;if (..>=..)
6617	IgorA	166	sub dword[adler],BASE
		167	@@:
6873	IgorA	168	MOD28 edi ;only added so many BASE's
6617	IgorA	169	jmp .combine
		170
		171	; do length NMAX blocks -- requires just one modulo operation
6873	IgorA	172	align 4
6617	IgorA	173	.cycle3:
6873	IgorA	174	cmp ecx,NMAX
		175	jb .cycle3end ;while (..>=..)
		176	sub ecx,NMAX
6617	IgorA	177	mov edx,NMAX/16 ;NMAX is divisible by 16
		178	.cycle1: ;do
		179	DO16 ebx ;16 sums unrolled
		180	add ebx,16
		181	dec edx
6873	IgorA	182	jne .cycle1 ;while (..)
6617	IgorA	183	MOD [adler]
6873	IgorA	184	MOD edi
6617	IgorA	185	jmp .cycle3
		186	align 4
		187	.cycle3end:
		188
		189	; do remaining bytes (less than NMAX, still just one modulo)
6873	IgorA	190	cmp ecx,0
		191	je .combine ;if (..) ;avoid modulos if none remaining
		192	@@:
		193	cmp ecx,16
		194	jb .cycle2 ;while (..>=..)
		195	sub ecx,16
6617	IgorA	196	DO16 ebx
		197	add ebx,16
		198	jmp @b
		199	align 4
		200	.cycle2:
6873	IgorA	201	mov eax,ecx
		202	dec ecx
		203	test eax,eax
		204	je @f ;while (..)
6617	IgorA	205	movzx eax,byte[ebx]
6873	IgorA	206	add [adler],eax
6617	IgorA	207	inc ebx
6873	IgorA	208	add edi,[adler]
6617	IgorA	209	jmp .cycle2
		210	align 4
		211	@@:
		212	MOD [adler]
6873	IgorA	213	MOD edi
6617	IgorA	214
		215	; return recombined sums
		216	.combine:
6873	IgorA	217	mov eax,edi
6617	IgorA	218	shl eax,16
		219	or eax,[adler]
		220	.end_f:
		221	ret
		222	endp
		223
		224	; =========================================================================
6873	IgorA	225	;uLong (uLong adler1, uLong adler2, z_off64_t len2)
6617	IgorA	226	align 4
		227	proc adler32_combine_, adler1:dword, adler2:dword, len2:dword
		228	locals
		229	sum1 dd ? ;uLong
		230	sum2 dd ? ;uLong
		231	; unsigned rem;
		232	endl
		233	; for negative len, return invalid adler32 as a clue for debugging
		234	cmp dword[len2],0
		235	jge @f ;if (..<0)
		236	mov eax,0xffffffff
		237	jmp .end_f
		238	@@:
		239
		240	; the derivation of this formula is left as an exercise for the reader
		241	; MOD63(len2) ;assumes len2 >= 0
		242	; rem = (unsigned)len2;
		243	; sum1 = adler1 & 0xffff;
		244	; sum2 = rem * sum1;
		245	; MOD(sum2);
		246	; sum1 += (adler2 & 0xffff) + BASE - 1;
		247	; sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
		248	cmp dword[sum1],BASE
		249	jl @f ;if (..>=..)
		250	sub dword[sum1],BASE
		251	@@:
		252	cmp dword[sum1],BASE
		253	jl @f ;if (..>=..)
		254	sub dword[sum1],BASE
		255	@@:
		256	cmp dword[sum2],BASE shl 1
		257	jl @f ;if (..>=..)
		258	sub dword[sum2],BASE shl 1
		259	@@:
		260	cmp dword[sum2],BASE
		261	jl @f ;if (..>=..)
		262	sub dword[sum2],BASE
		263	@@:
		264	mov eax,[sum2]
		265	shl eax,16
		266	or eax,[sum1]
		267	.end_f:
		268	ret
		269	endp
		270
		271	; =========================================================================
		272	;uLong (adler1, adler2, len2)
		273	; uLong adler1
		274	; uLong adler2
		275	; z_off_t len2
		276	align 4
		277	proc adler32_combine, adler1:dword, adler2:dword, len2:dword
		278	stdcall adler32_combine_, [adler1], [adler2], [len2]
		279	ret
		280	endp
		281
		282	;uLong (adler1, adler2, len2)
		283	; uLong adler1
		284	; uLong adler2
		285	; z_off64_t len2
		286	align 4
		287	proc adler32_combine64, adler1:dword, adler2:dword, len2:dword
		288	stdcall adler32_combine_, [adler1], [adler2], [len2]
		289	ret
		290	endp

Subversion Repositories Kolibri OS

(root)/programs/fs/kfar/trunk/zlib/adler32.asm @ 6851 – Rev 6873