0,0 → 1,310 |
/* |
Copyright (c) 1990-2007 Info-ZIP. All rights reserved. |
|
See the accompanying file LICENSE, version 2000-Apr-09 or later |
(the contents of which are also included in zip.h) for terms of use. |
If, for some reason, all these files are missing, the Info-ZIP license |
also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html |
*/ |
/* crc_i386.c -- Microsoft 32-bit C/C++ adaptation of crc_i386.asm |
* Created by Rodney Brown from crc_i386.asm, modified by Chr. Spieler. |
* Last revised: 07-Jan-2007 |
* |
* Original coded (in crc_i386.asm) and put into the public domain |
* by Paul Kienitz and Christian Spieler. |
* |
* Revised 06-Oct-96, Scott Field (sfield@microsoft.com) |
* fixed to assemble with masm by not using .model directive which makes |
* assumptions about segment alignment. Also, |
* avoid using loop, and j[e]cxz where possible. Use mov + inc, rather |
* than lodsb, and other misc. changes resulting in the following performance |
* increases: |
* |
* unrolled loops NO_UNROLLED_LOOPS |
* *8 >8 <8 *8 >8 <8 |
* |
* +54% +42% +35% +82% +52% +25% |
* |
* first item in each table is input buffer length, even multiple of 8 |
* second item in each table is input buffer length, > 8 |
* third item in each table is input buffer length, < 8 |
* |
* Revised 02-Apr-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au) |
* Incorporated Rodney Brown's 32-bit-reads optimization as found in the |
* UNIX AS source crc_i386.S. This new code can be disabled by defining |
* the macro symbol NO_32_BIT_LOADS. |
* |
* Revised 12-Oct-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au) |
* Incorporated Rodney Brown's additional tweaks for 32-bit-optimized CPUs |
* (like the Pentium Pro, Pentium II, and probably some Pentium clones). |
* This optimization is controlled by the macro symbol __686 and is disabled |
* by default. (This default is based on the assumption that most users |
* do not yet work on a Pentium Pro or Pentium II machine ...) |
* |
* Revised 16-Nov-97, Chr. Spieler: Made code compatible with Borland C++ |
* 32-bit, removed unneeded kludge for potentially unknown movzx mnemonic, |
* confirmed correct working with MS VC++ (32-bit). |
* |
* Revised 22-May-98, Peter Kunath, Chr. Spieler: The 16-Nov-97 revision broke |
* MSVC 5.0. Inside preprocessor macros, each instruction is enclosed in its |
* own __asm {...} construct. For MSVC, a "#pragma warning" was added to |
* shut up the "no return value" warning message. |
* |
* Revised 13-Dec-98, Chr. Spieler: Modified path to "zip.h" header file. |
* |
* Revised 16-Jan-2005, Cosmin Truta: Added the ASM_CRC guard, for easier |
* switching between ASM vs. non-ASM builds, when handling makefiles. |
* Also enabled the 686 build by default, because there are hardly any |
* pre-686 CPUs in serious use nowadays. (See the 12-Oct-97 note above.) |
* |
* Revised 03-Jan-2006, Chr. Spieler |
* Enlarged unrolling loops to "do 16 bytes per turn"; optimized access to |
* data buffer in loop body (adjust pointer only once in loop body and use |
* offsets to access each item); added additional support for the "unfolded |
* tables" optimization variant (enabled by IZ_CRCOPTIM_UNFOLDTBL). |
* |
* Revised 07-Jan-2007, Chr. Spieler |
* Recognize additional conditional flag CRC_TABLE_ONLY that prevents |
* compilation of the crc32() function. |
* |
* FLAT memory model assumed. |
* |
* Loop unrolling can be disabled by defining the macro NO_UNROLLED_LOOPS. |
* This results in shorter code at the expense of reduced performance. |
* |
*/ |
|
#include "../zip.h" |
#include "../crc32.h" |
|
#if defined(ASM_CRC) && !defined(USE_ZLIB) && !defined(CRC_TABLE_ONLY) |
|
#if !defined(PRE_686) && !defined(__686) |
# define __686 |
#endif |
|
#ifndef ZCONST |
# define ZCONST const |
#endif |
|
/* Select wether the following inline-assember code is supported. */ |
#if (defined(_MSC_VER) && _MSC_VER >= 700) |
#if (defined(_M_IX86) && _M_IX86 >= 300) |
# define MSC_INLINE_ASM_32BIT_SUPPORT |
/* Disable warning for no return value, typical of asm functions */ |
# pragma warning( disable : 4035 ) |
#endif |
#endif |
|
#if (defined(__BORLANDC__) && __BORLANDC__ >= 452) |
# define MSC_INLINE_ASM_32BIT_SUPPORT |
#endif |
|
#ifdef MSC_INLINE_ASM_32BIT_SUPPORT |
/* This code is intended for Microsoft C/C++ (32-bit) compatible compilers. */ |
|
/* |
* These two (three) macros make up the loop body of the CRC32 cruncher. |
* registers modified: |
* eax : crc value "c" |
* esi : pointer to next data byte (or dword) "buf++" |
* registers read: |
* edi : pointer to base of crc_table array |
* scratch registers: |
* ebx : index into crc_table array |
* (requires upper three bytes = 0 when __686 is undefined) |
*/ |
#ifndef __686 |
#define Do_CRC { \ |
__asm { mov bl, al }; \ |
__asm { shr eax, 8 }; \ |
__asm { xor eax, [edi+ebx*4] }; } |
#else /* __686 */ |
#define Do_CRC { \ |
__asm { movzx ebx, al }; \ |
__asm { shr eax, 8 }; \ |
__asm { xor eax, [edi+ebx*4] }; } |
#endif /* ?__686 */ |
|
#define Do_CRC_byte { \ |
__asm { xor al, byte ptr [esi] }; \ |
__asm { inc esi }; \ |
Do_CRC; } |
|
#define Do_CRC_byteof(ofs) { \ |
__asm { xor al, byte ptr [esi+(ofs)] }; \ |
Do_CRC; } |
|
#ifndef NO_32_BIT_LOADS |
#ifdef IZ_CRCOPTIM_UNFOLDTBL |
# define SavLen len /* the edx register is needed elsewhere */ |
# define UpdCRC_dword { \ |
__asm { movzx ebx,al }; \ |
__asm { mov edx,[edi+ebx*4+3072] }; \ |
__asm { movzx ebx,ah }; \ |
__asm { shr eax,16 }; \ |
__asm { xor edx,[edi+ebx*4+2048] }; \ |
__asm { movzx ebx,al }; \ |
__asm { shr eax,8 }; \ |
__asm { xor edx,[edi+ebx*4+1024] }; \ |
__asm { mov eax,[edi+eax*4] }; \ |
__asm { xor eax,edx }; } |
# define UpdCRC_dword_sh(dwPtrIncr) { \ |
__asm { movzx ebx,al }; \ |
__asm { mov edx,[edi+ebx*4+3072] }; \ |
__asm { movzx ebx,ah }; \ |
__asm { xor edx,[edi+ebx*4+2048] }; \ |
__asm { shr eax,16 }; \ |
__asm { movzx ebx,al }; \ |
__asm { add esi, 4*dwPtrIncr }; \ |
__asm { shr eax,8 }; \ |
__asm { xor edx,[edi+ebx*4+1024] }; \ |
__asm { mov eax,[edi+eax*4] }; \ |
__asm { xor eax,edx }; } |
#else /* !IZ_CRCOPTIM_UNFOLDTBL */ |
# define SavLen edx /* the edx register is free for use here */ |
# define UpdCRC_dword { \ |
Do_CRC; \ |
Do_CRC; \ |
Do_CRC; \ |
Do_CRC; } |
# define UpdCRC_dword_sh(dwPtrIncr) { \ |
Do_CRC; \ |
Do_CRC; \ |
__asm { add esi, 4*(dwPtrIncr) }; \ |
Do_CRC; \ |
Do_CRC; } |
#endif /* ?IZ_CRCOPTIM_UNFOLDTBL */ |
|
#define Do_CRC_dword { \ |
__asm { xor eax, dword ptr [esi] }; \ |
UpdCRC_dword_sh(1); } |
|
#define Do_CRC_4dword { \ |
__asm { xor eax, dword ptr [esi] }; \ |
UpdCRC_dword; \ |
__asm { xor eax, dword ptr [esi+4] }; \ |
UpdCRC_dword; \ |
__asm { xor eax, dword ptr [esi+8] }; \ |
UpdCRC_dword; \ |
__asm { xor eax, dword ptr [esi+12] }; \ |
UpdCRC_dword_sh(4); } |
#endif /* !NO_32_BIT_LOADS */ |
|
/* ========================================================================= */ |
ulg crc32(crc, buf, len) |
ulg crc; /* crc shift register */ |
ZCONST uch *buf; /* pointer to bytes to pump through */ |
extent len; /* number of bytes in buf[] */ |
/* Run a set of bytes through the crc shift register. If buf is a NULL |
pointer, then initialize the crc shift register contents instead. |
Return the current crc in either case. */ |
{ |
__asm { |
push edx |
push ecx |
|
mov esi,buf ;/* 2nd arg: uch *buf */ |
sub eax,eax ;/*> if (!buf) */ |
test esi,esi ;/*> return 0; */ |
jz fine ;/*> else { */ |
|
call get_crc_table |
mov edi,eax |
mov eax,crc ;/* 1st arg: ulg crc */ |
#ifndef __686 |
sub ebx,ebx ;/* ebx=0; => bl usable as a dword */ |
#endif |
mov ecx,len ;/* 3rd arg: extent len */ |
not eax ;/*> c = ~crc; */ |
|
test ecx,ecx |
#ifndef NO_UNROLLED_LOOPS |
jz bail |
# ifndef NO_32_BIT_LOADS |
align_loop: |
test esi,3 ;/* align buf pointer on next */ |
jz aligned_now ;/* dword boundary */ |
} |
Do_CRC_byte ; |
__asm { |
dec ecx |
jnz align_loop |
aligned_now: |
# endif /* !NO_32_BIT_LOADS */ |
mov SavLen,ecx ;/* save current len for later */ |
shr ecx,4 ;/* ecx = len / 16 */ |
jz No_Sixteens |
; align loop head at start of 486 internal cache line !! |
align 16 |
Next_Sixteen: |
} |
# ifndef NO_32_BIT_LOADS |
Do_CRC_4dword ; |
# else /* NO_32_BIT_LOADS */ |
Do_CRC_byteof(0) ; |
Do_CRC_byteof(1) ; |
Do_CRC_byteof(2) ; |
Do_CRC_byteof(3) ; |
Do_CRC_byteof(4) ; |
Do_CRC_byteof(5) ; |
Do_CRC_byteof(6) ; |
Do_CRC_byteof(7) ; |
Do_CRC_byteof(8) ; |
Do_CRC_byteof(9) ; |
Do_CRC_byteof(10) ; |
Do_CRC_byteof(11) ; |
Do_CRC_byteof(12) ; |
Do_CRC_byteof(13) ; |
Do_CRC_byteof(14) ; |
Do_CRC_byteof(15) ; |
__asm { add esi,16 }; |
# endif /* ?NO_32_BIT_LOADS */ |
__asm { |
dec ecx |
jnz Next_Sixteen |
No_Sixteens: |
mov ecx,SavLen |
and ecx,00000000FH ;/* ecx = len % 16 */ |
# ifndef NO_32_BIT_LOADS |
shr ecx,2 |
jz No_Fours |
Next_Four: |
} |
Do_CRC_dword ; |
__asm { |
dec ecx |
jnz Next_Four |
No_Fours: |
mov ecx,SavLen |
and ecx,000000003H ;/* ecx = len % 4 */ |
# endif /* !NO_32_BIT_LOADS */ |
#endif /* !NO_UNROLLED_LOOPS */ |
jz bail ;/*> if (len) */ |
; align loop head at start of 486 internal cache line !! |
align 16 |
loupe: ;/*> do { */ |
} |
Do_CRC_byte ;/* c = CRC32(c,*buf++,crctab);*/ |
__asm { |
dec ecx ;/*> } while (--len); */ |
jnz loupe |
|
bail: ;/*> } */ |
not eax ;/*> return ~c; */ |
fine: |
pop ecx |
pop edx |
} |
#ifdef NEED_RETURN |
return _EAX; |
#endif |
} |
#endif /* MSC_INLINE_ASM_32BIT_SUPPORT */ |
#if (defined(_MSC_VER) && _MSC_VER >= 700) |
#if (defined(_M_IX86) && _M_IX86 >= 300) |
/* Reenable missing return value warning */ |
# pragma warning( default : 4035 ) |
#endif |
#endif |
#endif /* ASM_CRC && !USE_ZLIB && !CRC_TABLE_ONLY */ |