WebSVN – Kolibri OS – Blame – /programs/fs/kfar/trunk/zlib/adler32.asm

Rev	Author	Line No.	Line
6617	IgorA	1	; adler32.asm -- compute the Adler-32 checksum of a data stream
		2	; Copyright (C) 1995-2011 Mark Adler
		3	; For conditions of distribution and use, see copyright notice in zlib.h
		4
		5
		6	BASE equ 65521 ;largest prime smaller than 65536
		7	NMAX equ 5552
		8	; NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
		9
		10	macro DO1 buf,i
		11	{
		12	mov eax,buf
		13	add eax,i
		14	movzx eax,byte[eax]
		15	add [adler],eax
		16	mov eax,[adler]
		17	add [sum2],eax
		18	}
		19	macro DO2 buf,i
		20	{
		21	DO1 buf,i
		22	DO1 buf,i+1
		23	}
		24	macro DO4 buf,i
		25	{
		26	DO2 buf,i
		27	DO2 buf,i+2
		28	}
		29	macro DO8 buf,i
		30	{
		31	DO4 buf,i
		32	DO4 buf,i+4
		33	}
		34	macro DO16 buf
		35	{
		36	DO8 buf,0
		37	DO8 buf,8
		38	}
		39
		40	; use NO_DIVIDE if your processor does not do division in hardware --
		41	; try it both ways to see which is faster
		42	; note that this assumes BASE is 65521, where 65536 % 65521 == 15
		43	; (thank you to John Reiser for pointing this out)
		44	macro CHOP a
		45	{
		46	if NO_DIVIDE eq 1
		47	mov eax,a
		48	shr eax,16
		49	and a,0xffff
		50	shl eax,4
		51	add a,eax
		52	shr eax,4
		53	sub a,eax
		54	end if
		55	}
		56	macro MOD28 a
		57	{
		58	if NO_DIVIDE eq 1
		59	local .end0
		60	CHOP a
		61	cmp a,BASE
		62	jl .end0 ;if (..>=..)
		63	sub a,BASE
		64	.end0:
		65	else
		66	push eax ecx edx
		67	mov eax,a
		68	xor edx,edx
		69	mov ecx,BASE
		70	div ecx
		71	mov a,edx
		72	pop edx ecx eax
		73	end if
		74	}
		75	macro MOD a
		76	{
		77	if NO_DIVIDE eq 1
		78	CHOP a
		79	MOD28 a
		80	else
		81	push eax ecx edx
		82	mov eax,a
		83	xor edx,edx
		84	mov ecx,BASE
		85	div ecx
		86	mov a,edx
		87	pop edx ecx eax
		88	end if
		89	}
		90	macro MOD63 a
		91	{
		92	if NO_DIVIDE eq 1
		93	;this assumes a is not negative
		94	; z_off64_t tmp = a >> 32;
		95	; a &= 0xffffffff;
		96	; a += (tmp << 8) - (tmp << 5) + tmp;
		97	; tmp = a >> 16;
		98	; a &= 0xffff;
		99	; a += (tmp << 4) - tmp;
		100	; tmp = a >> 16;
		101	; a &= 0xffff;
		102	; a += (tmp << 4) - tmp;
		103	; if (a >= BASE) a -= BASE;
		104	else
		105	push eax ecx edx
		106	mov eax,a
		107	xor edx,edx
		108	mov ecx,BASE
		109	div ecx
		110	mov a,edx
		111	pop edx ecx eax
		112	end if
		113	}
		114
		115	; =========================================================================
		116	;uLong (adler, buf, len)
		117	; uLong adler
		118	; const Bytef *buf
		119	; uInt len
		120	align 4
		121	proc adler32 uses ebx edx, adler:dword, buf:dword, len:dword
		122	locals
		123	sum2 dd ? ;uLong
		124	endl
		125	;zlib_debug 'adler32 adler = %d',[adler]
		126	; split Adler-32 into component sums
		127	mov eax,[adler]
		128	shr eax,16
		129	mov [sum2],eax
		130	and [adler],0xffff
		131	mov ebx,[buf]
		132
		133	; in case user likes doing a byte at a time, keep it fast
		134	cmp dword[len],1
		135	jne .end0 ;if (..==..)
		136	movzx eax,byte[ebx]
		137	add [adler],eax
		138	cmp dword[adler],BASE
6851	IgorA	139	jb @f ;if (..>=..)
6617	IgorA	140	sub dword[adler],BASE
		141	@@:
		142	mov eax,[adler]
		143	add [sum2],eax
		144	cmp dword[sum2],BASE
6851	IgorA	145	jb @f ;if (..>=..)
6617	IgorA	146	sub dword[sum2],BASE
		147	@@:
		148	jmp .combine
		149	align 4
		150	.end0:
		151
		152	; initial Adler-32 value (deferred check for len == 1 speed)
		153	cmp ebx,Z_NULL
		154	jne @f ;if (..==0)
		155	xor eax,eax
		156	inc eax
		157	jmp .end_f
		158	align 4
		159	@@:
		160
		161	; in case short lengths are provided, keep it somewhat fast
		162	cmp dword[len],16
		163	jge .end1 ;if (..<..)
		164	.cycle0:
		165	cmp dword[len],0
		166	jne @f ;while (..)
		167	movzx eax,byte[ebx]
		168	inc ebx
		169	add [adler],eax
		170	mov eax,[adler]
		171	add [sum2],eax
		172	dec dword[len]
		173	jmp .cycle0
		174	align 4
		175	@@:
		176	cmp dword[adler],BASE
		177	jl @f ;if (..>=..)
		178	sub dword[adler],BASE
		179	@@:
		180	MOD28 dword[sum2] ;only added so many BASE's
		181	jmp .combine
		182	align 4
		183	.end1:
		184
		185	; do length NMAX blocks -- requires just one modulo operation
		186	.cycle3:
		187	cmp dword[len],NMAX
		188	jl .cycle3end ;while (..>=..)
		189	sub dword[len],NMAX
		190	mov edx,NMAX/16 ;NMAX is divisible by 16
		191	.cycle1: ;do
		192	DO16 ebx ;16 sums unrolled
		193	add ebx,16
		194	dec edx
		195	cmp edx,0
		196	jg .cycle1 ;while (..)
		197	MOD [adler]
		198	MOD [sum2]
		199	jmp .cycle3
		200	align 4
		201	.cycle3end:
		202
		203	; do remaining bytes (less than NMAX, still just one modulo)
		204	cmp dword[len],0
		205	jne .end2 ;if (..) ;avoid modulos if none remaining
		206	@@:
		207	cmp dword[len],16
		208	jl .cycle2 ;while (..>=..)
		209	sub dword[len],16
		210	DO16 ebx
		211	add ebx,16
		212	jmp @b
		213	align 4
		214	.cycle2:
		215	cmp dword[len],0
		216	jne @f ;while (..)
		217	movzx eax,byte[ebx]
		218	inc ebx
		219	add [adler],eax
		220	mov eax,[adler]
		221	add [sum2],eax
		222	dec dword[len]
		223	jmp .cycle2
		224	align 4
		225	@@:
		226	MOD [adler]
		227	MOD [sum2]
		228	.end2:
		229
		230	; return recombined sums
		231	.combine:
		232	mov eax,[sum2]
		233	shl eax,16
		234	or eax,[adler]
		235	.end_f:
		236	;zlib_debug ' adler32.ret = %d',eax
		237	ret
		238	endp
		239
		240	; =========================================================================
		241	;uLong (adler1, adler2, len2)
		242	; uLong adler1
		243	; uLong adler2
		244	; z_off64_t len2
		245	align 4
		246	proc adler32_combine_, adler1:dword, adler2:dword, len2:dword
		247	locals
		248	sum1 dd ? ;uLong
		249	sum2 dd ? ;uLong
		250	; unsigned rem;
		251	endl
		252	; for negative len, return invalid adler32 as a clue for debugging
		253	cmp dword[len2],0
		254	jge @f ;if (..<0)
		255	mov eax,0xffffffff
		256	jmp .end_f
		257	@@:
		258
		259	; the derivation of this formula is left as an exercise for the reader
		260	; MOD63(len2) ;assumes len2 >= 0
		261	; rem = (unsigned)len2;
		262	; sum1 = adler1 & 0xffff;
		263	; sum2 = rem * sum1;
		264	; MOD(sum2);
		265	; sum1 += (adler2 & 0xffff) + BASE - 1;
		266	; sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
		267	cmp dword[sum1],BASE
		268	jl @f ;if (..>=..)
		269	sub dword[sum1],BASE
		270	@@:
		271	cmp dword[sum1],BASE
		272	jl @f ;if (..>=..)
		273	sub dword[sum1],BASE
		274	@@:
		275	cmp dword[sum2],BASE shl 1
		276	jl @f ;if (..>=..)
		277	sub dword[sum2],BASE shl 1
		278	@@:
		279	cmp dword[sum2],BASE
		280	jl @f ;if (..>=..)
		281	sub dword[sum2],BASE
		282	@@:
		283	mov eax,[sum2]
		284	shl eax,16
		285	or eax,[sum1]
		286	.end_f:
		287	ret
		288	endp
		289
		290	; =========================================================================
		291	;uLong (adler1, adler2, len2)
		292	; uLong adler1
		293	; uLong adler2
		294	; z_off_t len2
		295	align 4
		296	proc adler32_combine, adler1:dword, adler2:dword, len2:dword
		297	stdcall adler32_combine_, [adler1], [adler2], [len2]
		298	ret
		299	endp
		300
		301	;uLong (adler1, adler2, len2)
		302	; uLong adler1
		303	; uLong adler2
		304	; z_off64_t len2
		305	align 4
		306	proc adler32_combine64, adler1:dword, adler2:dword, len2:dword
		307	stdcall adler32_combine_, [adler1], [adler2], [len2]
		308	ret
		309	endp

Subversion Repositories Kolibri OS

(root)/programs/fs/kfar/trunk/zlib/adler32.asm – Rev 6851