0,0 → 1,304 |
/* |
Copyright (c) 1990-2007 Info-ZIP. All rights reserved. |
|
See the accompanying file LICENSE, version 2000-Apr-09 or later |
(the contents of which are also included in zip.h) for terms of use. |
If, for some reason, all these files are missing, the Info-ZIP license |
also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html |
*/ |
/* |
* crc_i386.S, optimized CRC calculation function for Zip and UnZip, |
* created by Paul Kienitz and Christian Spieler. Last revised 07 Jan 2007. |
* |
* GRR 961110: incorporated Scott Field optimizations from win32/crc_i386.asm |
* => overall 6% speedup in "unzip -tq" on 9MB zipfile (486-66) |
* |
* SPC 970402: revised for Rodney Brown's optimizations (32-bit-wide |
* aligned reads for most of the data from buffer), can be |
* disabled by defining the macro NO_32_BIT_LOADS |
* |
* SPC 971012: added Rodney Brown's additional tweaks for 32-bit-optimized |
* CPUs (like the Pentium Pro, Pentium II, and probably some |
* Pentium clones). This optimization is controlled by the |
* preprocessor switch "__686" and is disabled by default. |
* (This default is based on the assumption that most users |
* do not yet work on a Pentium Pro or Pentium II machine ...) |
* |
* COS 050116: Enabled the 686 build by default, because there are hardly any |
* pre-686 CPUs in serious use nowadays. (See SPC 970402 above.) |
* |
* SPC 060103: Updated code to incorporate newer optimizations found in zlib. |
* |
* SPC 070107: Added conditional switch to deactivate crc32() compilation. |
* |
* FLAT memory model assumed. Calling interface: |
* - args are pushed onto the stack from right to left, |
* - return value is given in the EAX register, |
* - all other registers (with exception of EFLAGS) are preserved. (With |
* GNU C 2.7.x, %edx and %ecx are `scratch' registers, but preserving |
* them nevertheless adds only 4 single byte instructions.) |
* |
* This source generates the function |
* ulg crc32(ulg crc, ZCONST uch *buf, extent len). |
* |
* Loop unrolling can be disabled by defining the macro NO_UNROLLED_LOOPS. |
* This results in shorter code at the expense of reduced performance. |
*/ |
|
/* This file is NOT used in conjunction with zlib, or when only creation of |
* the basic CRC_32_Table (for other purpose) is requested. |
*/ |
#if !defined(USE_ZLIB) && !defined(CRC_TABLE_ONLY) |
|
/* Preprocess with -DNO_UNDERLINE if your C compiler does not prefix |
* external symbols with an underline character '_'. |
*/ |
#if defined(NO_UNDERLINE) || defined(__ELF__) |
# define _crc32 crc32 |
# define _get_crc_table get_crc_table |
#endif |
/* Use 16-byte alignment if your assembler supports it. Warning: gas |
* uses a log(x) parameter (.align 4 means 16-byte alignment). On SVR4 |
* the parameter is a number of bytes. |
*/ |
#ifndef ALIGNMENT |
# define ALIGNMENT .align 4,0x90 |
#endif |
|
#if defined(i386) || defined(_i386) || defined(_I386) || defined(__i386) |
|
/* This version is for 386 Unix, OS/2, MSDOS in 32 bit mode (gcc & gas). |
* Warning: it uses the AT&T syntax: mov source,dest |
* This file is only optional. If you want to use the C version, |
* remove -DASM_CRC from CFLAGS in Makefile and set OBJA to an empty string. |
*/ |
|
.file "crc_i386.S" |
|
#if !defined(PRE_686) && !defined(__686) |
/* Optimize for Pentium Pro and compatible CPUs by default. */ |
# define __686 |
#endif |
|
#if defined(NO_STD_STACKFRAME) && defined(USE_STD_STACKFRAME) |
# undef USE_STACKFRAME |
#else |
/* The default is to use standard stack frame entry, because it |
* results in smaller code! |
*/ |
# ifndef USE_STD_STACKFRAME |
# define USE_STD_STACKFRAME |
# endif |
#endif |
|
#ifdef USE_STD_STACKFRAME |
# define _STD_ENTRY pushl %ebp ; movl %esp,%ebp |
# define arg1 8(%ebp) |
# define arg2 12(%ebp) |
# define arg3 16(%ebp) |
# define _STD_LEAVE popl %ebp |
#else /* !USE_STD_STACKFRAME */ |
# define _STD_ENTRY |
# define arg1 24(%esp) |
# define arg2 28(%esp) |
# define arg3 32(%esp) |
# define _STD_LEAVE |
#endif /* ?USE_STD_STACKFRAME */ |
|
/* |
* These two (three) macros make up the loop body of the CRC32 cruncher. |
* registers modified: |
* eax : crc value "c" |
* esi : pointer to next data byte (or lword) "buf++" |
* registers read: |
* edi : pointer to base of crc_table array |
* scratch registers: |
* ebx : index into crc_table array |
* (requires upper three bytes = 0 when __686 is undefined) |
*/ |
#ifndef __686 /* optimize for 386, 486, Pentium */ |
#define Do_CRC /* c = (c >> 8) ^ table[c & 0xFF] */\ |
movb %al, %bl ;/* tmp = c & 0xFF */\ |
shrl $8, %eax ;/* c = (c >> 8) */\ |
xorl (%edi, %ebx, 4), %eax ;/* c ^= table[tmp] */ |
#else /* __686 : optimize for Pentium Pro and compatible CPUs */ |
#define Do_CRC /* c = (c >> 8) ^ table[c & 0xFF] */\ |
movzbl %al, %ebx ;/* tmp = c & 0xFF */\ |
shrl $8, %eax ;/* c = (c >> 8) */\ |
xorl (%edi, %ebx, 4), %eax ;/* c ^=table[tmp] */ |
#endif /* ?__686 */ |
|
#define Do_CRC_byte /* c = (c >> 8) ^ table[(c^*buf++)&0xFF] */\ |
xorb (%esi), %al ;/* c ^= *buf */\ |
incl %esi ;/* buf++ */\ |
Do_CRC |
|
#define Do_CRC_byteof(ofs) /* c = (c >> 8) ^ table[(c^*buf++)&0xFF] */\ |
xorb ofs(%esi), %al ;/* c ^= *buf */\ |
incl %esi ;/* buf++ */\ |
Do_CRC |
|
#ifndef NO_32_BIT_LOADS |
# ifdef IZ_CRCOPTIM_UNFOLDTBL |
/* the edx register is needed in crc calculation */ |
# define SavLen arg3 |
# define UpdCRC_lword \ |
movzbl %al, %ebx ; \ |
movl 3072(%edi,%ebx,4), %edx ; \ |
movzbl %ah, %ebx ; \ |
shrl $16, %eax ; \ |
xor 2048(%edi,%ebx,4), %edx ; \ |
movzbl %al, %ebx ; \ |
shrl $8,%eax ; \ |
xorl 1024(%edi,%ebx,4), %edx ; \ |
movl (%edi,%eax,4), %eax ; \ |
xorl %edx,%eax ; |
# define UpdCRC_lword_sh(dwPtrIncr) \ |
movzbl %al, %ebx ; \ |
movl 3072(%edi,%ebx,4), %edx ; \ |
movzbl %ah, %ebx ; \ |
shrl $16, %eax ; \ |
xor 2048(%edi,%ebx,4), %edx ; \ |
movzbl %al, %ebx ; \ |
addl $4*(dwPtrIncr), %esi ;/* ((ulg *)buf)+=dwPtrIncr */\ |
shrl $8,%eax ; \ |
xorl 1024(%edi,%ebx,4), %edx ; \ |
movl (%edi,%eax,4),%eax ; \ |
xorl %edx,%eax ; |
# else /* !IZ_CRCOPTIM_UNFOLDTBL */ |
/* the edx register is not needed anywhere else */ |
# define SavLen %edx |
# define UpdCRC_lword \ |
Do_CRC \ |
Do_CRC \ |
Do_CRC \ |
Do_CRC |
# define UpdCRC_lword_sh(dwPtrIncr) \ |
Do_CRC \ |
Do_CRC \ |
addl $4*(dwPtrIncr), %esi ;/* ((ulg *)buf)++ */\ |
Do_CRC \ |
Do_CRC |
# endif /* ?IZ_CRCOPTIM_UNFOLDTBL */ |
#define Do_CRC_lword \ |
xorl (%esi), %eax ;/* c ^= *(ulg *)buf */\ |
UpdCRC_lword_sh(1) /* ... ((ulg *)buf)++ */ |
#define Do_CRC_4lword \ |
xorl (%esi), %eax ;/* c ^= *(ulg *)buf */\ |
UpdCRC_lword \ |
xorl 4(%esi), %eax ;/* c ^= *((ulg *)buf+1) */\ |
UpdCRC_lword \ |
xorl 8(%esi), %eax ;/* c ^= *((ulg *)buf+2) */\ |
UpdCRC_lword \ |
xorl 12(%esi), %eax ;/* c ^= *((ulg *)buf]+3 */\ |
UpdCRC_lword_sh(4) /* ... ((ulg *)buf)+=4 */ |
#endif /* !NO_32_BIT_LOADS */ |
|
|
.text |
|
.globl _crc32 |
|
_crc32: /* ulg crc32(ulg crc, uch *buf, extent len) */ |
_STD_ENTRY |
pushl %edi |
pushl %esi |
pushl %ebx |
pushl %edx |
pushl %ecx |
|
movl arg2, %esi /* 2nd arg: uch *buf */ |
subl %eax, %eax /* > if (!buf) */ |
testl %esi, %esi /* > return 0; */ |
jz .L_fine /* > else { */ |
call _get_crc_table |
movl %eax, %edi |
movl arg1, %eax /* 1st arg: ulg crc */ |
#ifndef __686 |
subl %ebx, %ebx /* ebx=0; bl usable as dword */ |
#endif |
movl arg3, %ecx /* 3rd arg: extent len */ |
notl %eax /* > c = ~crc; */ |
|
testl %ecx, %ecx |
#ifndef NO_UNROLLED_LOOPS |
jz .L_bail |
# ifndef NO_32_BIT_LOADS |
/* Assert now have positive length */ |
.L_align_loop: |
testl $3, %esi /* Align buf on lword boundary */ |
jz .L_aligned_now |
Do_CRC_byte |
decl %ecx |
jnz .L_align_loop |
.L_aligned_now: |
# endif /* !NO_32_BIT_LOADS */ |
movl %ecx, SavLen /* save current value of len */ |
shrl $4, %ecx /* ecx = len / 16 */ |
jz .L_No_Sixteens |
/* align loop head at start of 486 internal cache line !! */ |
ALIGNMENT |
.L_Next_Sixteen: |
# ifndef NO_32_BIT_LOADS |
Do_CRC_4lword |
# else /* NO_32_BIT_LOADS */ |
Do_CRC_byteof(0) |
Do_CRC_byteof(1) |
Do_CRC_byteof(2) |
Do_CRC_byteof(3) |
Do_CRC_byteof(4) |
Do_CRC_byteof(5) |
Do_CRC_byteof(6) |
Do_CRC_byteof(7) |
Do_CRC_byteof(8) |
Do_CRC_byteof(9) |
Do_CRC_byteof(10) |
Do_CRC_byteof(11) |
Do_CRC_byteof(12) |
Do_CRC_byteof(13) |
Do_CRC_byteof(14) |
Do_CRC_byteof(15) |
addl $16,%esi ;/* buf += 16 */ |
# endif /* ?NO_32_BIT_LOADS */ |
decl %ecx |
jnz .L_Next_Sixteen |
|
.L_No_Sixteens: |
movl SavLen, %ecx |
andl $15, %ecx /* ecx = len % 16 */ |
# ifndef NO_32_BIT_LOADS |
shrl $2,%ecx /* ecx = len / 4 */ |
jz .L_No_Fours |
.L_Next_Four: |
Do_CRC_lword |
decl %ecx |
jnz .L_Next_Four |
.L_No_Fours: |
movl SavLen,%ecx |
andl $3,%ecx /* ecx = len % 4 */ |
# endif /* !NO_32_BIT_LOADS */ |
#endif /* !NO_UNROLLED_LOOPS */ |
jz .L_bail /* > if (len) */ |
/* align loop head at start of 486 internal cache line !! */ |
ALIGNMENT |
.L_loupe: /* > do { */ |
Do_CRC_byte /* c = CRC32(c,*buf++,crctab);*/ |
decl %ecx /* > } while (--len); */ |
jnz .L_loupe |
|
.L_bail: /* > } */ |
notl %eax /* > return ~c; */ |
.L_fine: |
popl %ecx |
popl %edx |
popl %ebx |
popl %esi |
popl %edi |
_STD_LEAVE |
ret |
|
#else |
error: this asm version is for 386 only |
#endif /* i386 || _i386 || _I386 || __i386 */ |
|
#endif /* !USE_ZLIB && !CRC_TABLE_ONLY */ |