Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /*
  2.   Copyright (c) 1990-2007 Info-ZIP.  All rights reserved.
  3.  
  4.   See the accompanying file LICENSE, version 2000-Apr-09 or later
  5.   (the contents of which are also included in zip.h) for terms of use.
  6.   If, for some reason, all these files are missing, the Info-ZIP license
  7.   also may be found at:  ftp://ftp.info-zip.org/pub/infozip/license.html
  8. */
  9. /* crc_i386.c -- Microsoft 32-bit C/C++ adaptation of crc_i386.asm
  10.  * Created by Rodney Brown from crc_i386.asm, modified by Chr. Spieler.
  11.  * Last revised: 07-Jan-2007
  12.  *
  13.  * Original coded (in crc_i386.asm) and put into the public domain
  14.  * by Paul Kienitz and Christian Spieler.
  15.  *
  16.  * Revised 06-Oct-96, Scott Field (sfield@microsoft.com)
  17.  *   fixed to assemble with masm by not using .model directive which makes
  18.  *   assumptions about segment alignment.  Also,
  19.  *   avoid using loop, and j[e]cxz where possible.  Use mov + inc, rather
  20.  *   than lodsb, and other misc. changes resulting in the following performance
  21.  *   increases:
  22.  *
  23.  *      unrolled loops                NO_UNROLLED_LOOPS
  24.  *      *8    >8      <8              *8      >8      <8
  25.  *
  26.  *      +54%  +42%    +35%            +82%    +52%    +25%
  27.  *
  28.  *   first item in each table is input buffer length, even multiple of 8
  29.  *   second item in each table is input buffer length, > 8
  30.  *   third item in each table is input buffer length, < 8
  31.  *
  32.  * Revised 02-Apr-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au)
  33.  *   Incorporated Rodney Brown's 32-bit-reads optimization as found in the
  34.  *   UNIX AS source crc_i386.S. This new code can be disabled by defining
  35.  *   the macro symbol NO_32_BIT_LOADS.
  36.  *
  37.  * Revised 12-Oct-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au)
  38.  *   Incorporated Rodney Brown's additional tweaks for 32-bit-optimized CPUs
  39.  *   (like the Pentium Pro, Pentium II, and probably some Pentium clones).
  40.  *   This optimization is controlled by the macro symbol __686 and is disabled
  41.  *   by default. (This default is based on the assumption that most users
  42.  *   do not yet work on a Pentium Pro or Pentium II machine ...)
  43.  *
  44.  * Revised 16-Nov-97, Chr. Spieler: Made code compatible with Borland C++
  45.  *   32-bit, removed unneeded kludge for potentially unknown movzx mnemonic,
  46.  *   confirmed correct working with MS VC++ (32-bit).
  47.  *
  48.  * Revised 22-May-98, Peter Kunath, Chr. Spieler: The 16-Nov-97 revision broke
  49.  *   MSVC 5.0. Inside preprocessor macros, each instruction is enclosed in its
  50.  *   own __asm {...} construct.  For MSVC, a "#pragma warning" was added to
  51.  *   shut up the "no return value" warning message.
  52.  *
  53.  * Revised 13-Dec-98, Chr. Spieler: Modified path to "zip.h" header file.
  54.  *
  55.  * Revised 16-Jan-2005, Cosmin Truta: Added the ASM_CRC guard, for easier
  56.  *   switching between ASM vs. non-ASM builds, when handling makefiles.
  57.  *   Also enabled the 686 build by default, because there are hardly any
  58.  *   pre-686 CPUs in serious use nowadays. (See the 12-Oct-97 note above.)
  59.  *
  60.  * Revised 03-Jan-2006, Chr. Spieler
  61.  *   Enlarged unrolling loops to "do 16 bytes per turn"; optimized access to
  62.  *   data buffer in loop body (adjust pointer only once in loop body and use
  63.  *   offsets to access each item); added additional support for the "unfolded
  64.  *   tables" optimization variant (enabled by IZ_CRCOPTIM_UNFOLDTBL).
  65.  *
  66.  * Revised 07-Jan-2007, Chr. Spieler
  67.  *   Recognize additional conditional flag CRC_TABLE_ONLY that prevents
  68.  *   compilation of the crc32() function.
  69.  *
  70.  * FLAT memory model assumed.
  71.  *
  72.  * Loop unrolling can be disabled by defining the macro NO_UNROLLED_LOOPS.
  73.  * This results in shorter code at the expense of reduced performance.
  74.  *
  75.  */
  76.  
  77. #include "../zip.h"
  78. #include "../crc32.h"
  79.  
  80. #if defined(ASM_CRC) && !defined(USE_ZLIB) && !defined(CRC_TABLE_ONLY)
  81.  
  82. #if !defined(PRE_686) && !defined(__686)
  83. #  define __686
  84. #endif
  85.  
  86. #ifndef ZCONST
  87. #  define ZCONST const
  88. #endif
  89.  
  90. /* Select wether the following inline-assember code is supported. */
  91. #if (defined(_MSC_VER) && _MSC_VER >= 700)
  92. #if (defined(_M_IX86) && _M_IX86 >= 300)
  93. #  define MSC_INLINE_ASM_32BIT_SUPPORT
  94.    /* Disable warning for no return value, typical of asm functions */
  95. #  pragma warning( disable : 4035 )
  96. #endif
  97. #endif
  98.  
  99. #if (defined(__BORLANDC__) && __BORLANDC__ >= 452)
  100. #  define MSC_INLINE_ASM_32BIT_SUPPORT
  101. #endif
  102.  
  103. #ifdef MSC_INLINE_ASM_32BIT_SUPPORT
  104. /* This code is intended for Microsoft C/C++ (32-bit) compatible compilers. */
  105.  
  106. /*
  107.  * These two (three) macros make up the loop body of the CRC32 cruncher.
  108.  * registers modified:
  109.  *   eax  : crc value "c"
  110.  *   esi  : pointer to next data byte (or dword) "buf++"
  111.  * registers read:
  112.  *   edi  : pointer to base of crc_table array
  113.  * scratch registers:
  114.  *   ebx  : index into crc_table array
  115.  *          (requires upper three bytes = 0 when __686 is undefined)
  116.  */
  117. #ifndef __686
  118. #define Do_CRC { \
  119.   __asm { mov   bl, al }; \
  120.   __asm { shr   eax, 8 }; \
  121.   __asm { xor   eax, [edi+ebx*4] }; }
  122. #else /* __686 */
  123. #define Do_CRC { \
  124.   __asm { movzx ebx, al }; \
  125.   __asm { shr   eax, 8  }; \
  126.   __asm { xor   eax, [edi+ebx*4] }; }
  127. #endif /* ?__686 */
  128.  
  129. #define Do_CRC_byte { \
  130.   __asm { xor   al, byte ptr [esi] }; \
  131.   __asm { inc   esi }; \
  132.   Do_CRC; }
  133.  
  134. #define Do_CRC_byteof(ofs) { \
  135.   __asm { xor   al, byte ptr [esi+(ofs)] }; \
  136.   Do_CRC; }
  137.  
  138. #ifndef NO_32_BIT_LOADS
  139. #ifdef IZ_CRCOPTIM_UNFOLDTBL
  140. # define SavLen  len            /* the edx register is needed elsewhere */
  141. # define UpdCRC_dword { \
  142.    __asm { movzx   ebx,al }; \
  143.    __asm { mov     edx,[edi+ebx*4+3072] }; \
  144.    __asm { movzx   ebx,ah }; \
  145.    __asm { shr     eax,16 }; \
  146.    __asm { xor     edx,[edi+ebx*4+2048] }; \
  147.    __asm { movzx   ebx,al }; \
  148.    __asm { shr     eax,8 }; \
  149.    __asm { xor     edx,[edi+ebx*4+1024] }; \
  150.    __asm { mov     eax,[edi+eax*4] }; \
  151.    __asm { xor     eax,edx }; }
  152. # define UpdCRC_dword_sh(dwPtrIncr) { \
  153.    __asm { movzx   ebx,al }; \
  154.    __asm { mov     edx,[edi+ebx*4+3072] }; \
  155.    __asm { movzx   ebx,ah }; \
  156.    __asm { xor     edx,[edi+ebx*4+2048] }; \
  157.    __asm { shr     eax,16 }; \
  158.    __asm { movzx   ebx,al }; \
  159.    __asm { add     esi, 4*dwPtrIncr }; \
  160.    __asm { shr     eax,8 }; \
  161.    __asm { xor     edx,[edi+ebx*4+1024] }; \
  162.    __asm { mov     eax,[edi+eax*4] }; \
  163.    __asm { xor     eax,edx }; }
  164. #else /* !IZ_CRCOPTIM_UNFOLDTBL */
  165. # define SavLen  edx            /* the edx register is free for use here */
  166. # define UpdCRC_dword { \
  167.     Do_CRC; \
  168.     Do_CRC; \
  169.     Do_CRC; \
  170.     Do_CRC; }
  171. # define UpdCRC_dword_sh(dwPtrIncr) { \
  172.     Do_CRC; \
  173.     Do_CRC; \
  174.     __asm { add   esi, 4*(dwPtrIncr) }; \
  175.     Do_CRC; \
  176.     Do_CRC; }
  177. #endif /* ?IZ_CRCOPTIM_UNFOLDTBL */
  178.  
  179. #define Do_CRC_dword { \
  180.   __asm { xor   eax, dword ptr [esi] }; \
  181.   UpdCRC_dword_sh(1); }
  182.  
  183. #define Do_CRC_4dword { \
  184.   __asm { xor   eax, dword ptr [esi] }; \
  185.   UpdCRC_dword; \
  186.   __asm { xor   eax, dword ptr [esi+4] }; \
  187.   UpdCRC_dword; \
  188.   __asm { xor   eax, dword ptr [esi+8] }; \
  189.   UpdCRC_dword; \
  190.   __asm { xor   eax, dword ptr [esi+12] }; \
  191.   UpdCRC_dword_sh(4); }
  192. #endif /* !NO_32_BIT_LOADS */
  193.  
  194. /* ========================================================================= */
  195. ulg crc32(crc, buf, len)
  196.     ulg crc;                    /* crc shift register */
  197.     ZCONST uch *buf;            /* pointer to bytes to pump through */
  198.     extent len;                 /* number of bytes in buf[] */
  199. /* Run a set of bytes through the crc shift register.  If buf is a NULL
  200.    pointer, then initialize the crc shift register contents instead.
  201.    Return the current crc in either case. */
  202. {
  203.     __asm {
  204.                 push    edx
  205.                 push    ecx
  206.  
  207.                 mov     esi,buf         ;/* 2nd arg: uch *buf              */
  208.                 sub     eax,eax         ;/*> if (!buf)                     */
  209.                 test    esi,esi         ;/*>   return 0;                   */
  210.                 jz      fine            ;/*> else {                        */
  211.  
  212.                 call    get_crc_table
  213.                 mov     edi,eax
  214.                 mov     eax,crc         ;/* 1st arg: ulg crc               */
  215. #ifndef __686
  216.                 sub     ebx,ebx         ;/* ebx=0; => bl usable as a dword */
  217. #endif
  218.                 mov     ecx,len         ;/* 3rd arg: extent len            */
  219.                 not     eax             ;/*>   c = ~crc;                   */
  220.  
  221.                 test    ecx,ecx
  222. #ifndef NO_UNROLLED_LOOPS
  223.                 jz      bail
  224. #  ifndef NO_32_BIT_LOADS
  225. align_loop:
  226.                 test    esi,3           ;/* align buf pointer on next      */
  227.                 jz      aligned_now     ;/*  dword boundary                */
  228.     }
  229.                 Do_CRC_byte             ;
  230.     __asm {
  231.                 dec     ecx
  232.                 jnz     align_loop
  233. aligned_now:
  234. #  endif /* !NO_32_BIT_LOADS */
  235.                 mov     SavLen,ecx      ;/* save current len for later  */
  236.                 shr     ecx,4           ;/* ecx = len / 16    */
  237.                 jz      No_Sixteens
  238. ; align loop head at start of 486 internal cache line !!
  239.                 align   16
  240. Next_Sixteen:
  241.     }
  242. #  ifndef NO_32_BIT_LOADS
  243.                 Do_CRC_4dword ;
  244. #  else /* NO_32_BIT_LOADS */
  245.                 Do_CRC_byteof(0) ;
  246.                 Do_CRC_byteof(1) ;
  247.                 Do_CRC_byteof(2) ;
  248.                 Do_CRC_byteof(3) ;
  249.                 Do_CRC_byteof(4) ;
  250.                 Do_CRC_byteof(5) ;
  251.                 Do_CRC_byteof(6) ;
  252.                 Do_CRC_byteof(7) ;
  253.                 Do_CRC_byteof(8) ;
  254.                 Do_CRC_byteof(9) ;
  255.                 Do_CRC_byteof(10) ;
  256.                 Do_CRC_byteof(11) ;
  257.                 Do_CRC_byteof(12) ;
  258.                 Do_CRC_byteof(13) ;
  259.                 Do_CRC_byteof(14) ;
  260.                 Do_CRC_byteof(15) ;
  261.     __asm {     add     esi,16 };
  262. #  endif /* ?NO_32_BIT_LOADS */
  263.     __asm {
  264.                 dec     ecx
  265.                 jnz     Next_Sixteen
  266. No_Sixteens:
  267.                 mov     ecx,SavLen
  268.                 and     ecx,00000000FH  ;/* ecx = len % 16    */
  269. #  ifndef NO_32_BIT_LOADS
  270.                 shr     ecx,2
  271.                 jz      No_Fours
  272. Next_Four:
  273.     }
  274.                 Do_CRC_dword ;
  275.     __asm {
  276.                 dec     ecx
  277.                 jnz     Next_Four
  278. No_Fours:
  279.                 mov     ecx,SavLen
  280.                 and     ecx,000000003H  ;/* ecx = len % 4    */
  281. #  endif /* !NO_32_BIT_LOADS */
  282. #endif /* !NO_UNROLLED_LOOPS */
  283.                 jz      bail            ;/*>  if (len)                       */
  284. ; align loop head at start of 486 internal cache line !!
  285.                 align   16
  286. loupe:                                  ;/*>    do {                         */
  287.     }
  288.                 Do_CRC_byte             ;/*       c = CRC32(c,*buf++,crctab);*/
  289.     __asm {
  290.                 dec     ecx             ;/*>    } while (--len);             */
  291.                 jnz     loupe
  292.  
  293. bail:                                   ;/*> }                               */
  294.                 not     eax             ;/*> return ~c;                      */
  295. fine:
  296.                 pop     ecx
  297.                 pop     edx
  298.     }
  299. #ifdef NEED_RETURN
  300.     return _EAX;
  301. #endif
  302. }
  303. #endif /* MSC_INLINE_ASM_32BIT_SUPPORT */
  304. #if (defined(_MSC_VER) && _MSC_VER >= 700)
  305. #if (defined(_M_IX86) && _M_IX86 >= 300)
  306.    /* Reenable missing return value warning */
  307. #  pragma warning( default : 4035 )
  308. #endif
  309. #endif
  310. #endif /* ASM_CRC && !USE_ZLIB && !CRC_TABLE_ONLY */
  311.