Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
6725 siemargl 1
/*
2
  Copyright (c) 1990-2007 Info-ZIP.  All rights reserved.
3
 
4
  See the accompanying file LICENSE, version 2000-Apr-09 or later
5
  (the contents of which are also included in zip.h) for terms of use.
6
  If, for some reason, all these files are missing, the Info-ZIP license
7
  also may be found at:  ftp://ftp.info-zip.org/pub/infozip/license.html
8
*/
9
/* crc_i386.c -- Microsoft 32-bit C/C++ adaptation of crc_i386.asm
10
 * Created by Rodney Brown from crc_i386.asm, modified by Chr. Spieler.
11
 * Last revised: 07-Jan-2007
12
 *
13
 * Original coded (in crc_i386.asm) and put into the public domain
14
 * by Paul Kienitz and Christian Spieler.
15
 *
16
 * Revised 06-Oct-96, Scott Field (sfield@microsoft.com)
17
 *   fixed to assemble with masm by not using .model directive which makes
18
 *   assumptions about segment alignment.  Also,
19
 *   avoid using loop, and j[e]cxz where possible.  Use mov + inc, rather
20
 *   than lodsb, and other misc. changes resulting in the following performance
21
 *   increases:
22
 *
23
 *      unrolled loops                NO_UNROLLED_LOOPS
24
 *      *8    >8      <8              *8      >8      <8
25
 *
26
 *      +54%  +42%    +35%            +82%    +52%    +25%
27
 *
28
 *   first item in each table is input buffer length, even multiple of 8
29
 *   second item in each table is input buffer length, > 8
30
 *   third item in each table is input buffer length, < 8
31
 *
32
 * Revised 02-Apr-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au)
33
 *   Incorporated Rodney Brown's 32-bit-reads optimization as found in the
34
 *   UNIX AS source crc_i386.S. This new code can be disabled by defining
35
 *   the macro symbol NO_32_BIT_LOADS.
36
 *
37
 * Revised 12-Oct-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au)
38
 *   Incorporated Rodney Brown's additional tweaks for 32-bit-optimized CPUs
39
 *   (like the Pentium Pro, Pentium II, and probably some Pentium clones).
40
 *   This optimization is controlled by the macro symbol __686 and is disabled
41
 *   by default. (This default is based on the assumption that most users
42
 *   do not yet work on a Pentium Pro or Pentium II machine ...)
43
 *
44
 * Revised 16-Nov-97, Chr. Spieler: Made code compatible with Borland C++
45
 *   32-bit, removed unneeded kludge for potentially unknown movzx mnemonic,
46
 *   confirmed correct working with MS VC++ (32-bit).
47
 *
48
 * Revised 22-May-98, Peter Kunath, Chr. Spieler: The 16-Nov-97 revision broke
49
 *   MSVC 5.0. Inside preprocessor macros, each instruction is enclosed in its
50
 *   own __asm {...} construct.  For MSVC, a "#pragma warning" was added to
51
 *   shut up the "no return value" warning message.
52
 *
53
 * Revised 13-Dec-98, Chr. Spieler: Modified path to "zip.h" header file.
54
 *
55
 * Revised 16-Jan-2005, Cosmin Truta: Added the ASM_CRC guard, for easier
56
 *   switching between ASM vs. non-ASM builds, when handling makefiles.
57
 *   Also enabled the 686 build by default, because there are hardly any
58
 *   pre-686 CPUs in serious use nowadays. (See the 12-Oct-97 note above.)
59
 *
60
 * Revised 03-Jan-2006, Chr. Spieler
61
 *   Enlarged unrolling loops to "do 16 bytes per turn"; optimized access to
62
 *   data buffer in loop body (adjust pointer only once in loop body and use
63
 *   offsets to access each item); added additional support for the "unfolded
64
 *   tables" optimization variant (enabled by IZ_CRCOPTIM_UNFOLDTBL).
65
 *
66
 * Revised 07-Jan-2007, Chr. Spieler
67
 *   Recognize additional conditional flag CRC_TABLE_ONLY that prevents
68
 *   compilation of the crc32() function.
69
 *
70
 * FLAT memory model assumed.
71
 *
72
 * Loop unrolling can be disabled by defining the macro NO_UNROLLED_LOOPS.
73
 * This results in shorter code at the expense of reduced performance.
74
 *
75
 */
76
 
77
#include "../zip.h"
78
#include "../crc32.h"
79
 
80
#if defined(ASM_CRC) && !defined(USE_ZLIB) && !defined(CRC_TABLE_ONLY)
81
 
82
#if !defined(PRE_686) && !defined(__686)
83
#  define __686
84
#endif
85
 
86
#ifndef ZCONST
87
#  define ZCONST const
88
#endif
89
 
90
/* Select wether the following inline-assember code is supported. */
91
#if (defined(_MSC_VER) && _MSC_VER >= 700)
92
#if (defined(_M_IX86) && _M_IX86 >= 300)
93
#  define MSC_INLINE_ASM_32BIT_SUPPORT
94
   /* Disable warning for no return value, typical of asm functions */
95
#  pragma warning( disable : 4035 )
96
#endif
97
#endif
98
 
99
#if (defined(__BORLANDC__) && __BORLANDC__ >= 452)
100
#  define MSC_INLINE_ASM_32BIT_SUPPORT
101
#endif
102
 
103
#ifdef MSC_INLINE_ASM_32BIT_SUPPORT
104
/* This code is intended for Microsoft C/C++ (32-bit) compatible compilers. */
105
 
106
/*
107
 * These two (three) macros make up the loop body of the CRC32 cruncher.
108
 * registers modified:
109
 *   eax  : crc value "c"
110
 *   esi  : pointer to next data byte (or dword) "buf++"
111
 * registers read:
112
 *   edi  : pointer to base of crc_table array
113
 * scratch registers:
114
 *   ebx  : index into crc_table array
115
 *          (requires upper three bytes = 0 when __686 is undefined)
116
 */
117
#ifndef __686
118
#define Do_CRC { \
119
  __asm { mov   bl, al }; \
120
  __asm { shr   eax, 8 }; \
121
  __asm { xor   eax, [edi+ebx*4] }; }
122
#else /* __686 */
123
#define Do_CRC { \
124
  __asm { movzx ebx, al }; \
125
  __asm { shr   eax, 8  }; \
126
  __asm { xor   eax, [edi+ebx*4] }; }
127
#endif /* ?__686 */
128
 
129
#define Do_CRC_byte { \
130
  __asm { xor   al, byte ptr [esi] }; \
131
  __asm { inc   esi }; \
132
  Do_CRC; }
133
 
134
#define Do_CRC_byteof(ofs) { \
135
  __asm { xor   al, byte ptr [esi+(ofs)] }; \
136
  Do_CRC; }
137
 
138
#ifndef NO_32_BIT_LOADS
139
#ifdef IZ_CRCOPTIM_UNFOLDTBL
140
# define SavLen  len            /* the edx register is needed elsewhere */
141
# define UpdCRC_dword { \
142
   __asm { movzx   ebx,al }; \
143
   __asm { mov     edx,[edi+ebx*4+3072] }; \
144
   __asm { movzx   ebx,ah }; \
145
   __asm { shr     eax,16 }; \
146
   __asm { xor     edx,[edi+ebx*4+2048] }; \
147
   __asm { movzx   ebx,al }; \
148
   __asm { shr     eax,8 }; \
149
   __asm { xor     edx,[edi+ebx*4+1024] }; \
150
   __asm { mov     eax,[edi+eax*4] }; \
151
   __asm { xor     eax,edx }; }
152
# define UpdCRC_dword_sh(dwPtrIncr) { \
153
   __asm { movzx   ebx,al }; \
154
   __asm { mov     edx,[edi+ebx*4+3072] }; \
155
   __asm { movzx   ebx,ah }; \
156
   __asm { xor     edx,[edi+ebx*4+2048] }; \
157
   __asm { shr     eax,16 }; \
158
   __asm { movzx   ebx,al }; \
159
   __asm { add     esi, 4*dwPtrIncr }; \
160
   __asm { shr     eax,8 }; \
161
   __asm { xor     edx,[edi+ebx*4+1024] }; \
162
   __asm { mov     eax,[edi+eax*4] }; \
163
   __asm { xor     eax,edx }; }
164
#else /* !IZ_CRCOPTIM_UNFOLDTBL */
165
# define SavLen  edx            /* the edx register is free for use here */
166
# define UpdCRC_dword { \
167
    Do_CRC; \
168
    Do_CRC; \
169
    Do_CRC; \
170
    Do_CRC; }
171
# define UpdCRC_dword_sh(dwPtrIncr) { \
172
    Do_CRC; \
173
    Do_CRC; \
174
    __asm { add   esi, 4*(dwPtrIncr) }; \
175
    Do_CRC; \
176
    Do_CRC; }
177
#endif /* ?IZ_CRCOPTIM_UNFOLDTBL */
178
 
179
#define Do_CRC_dword { \
180
  __asm { xor   eax, dword ptr [esi] }; \
181
  UpdCRC_dword_sh(1); }
182
 
183
#define Do_CRC_4dword { \
184
  __asm { xor   eax, dword ptr [esi] }; \
185
  UpdCRC_dword; \
186
  __asm { xor   eax, dword ptr [esi+4] }; \
187
  UpdCRC_dword; \
188
  __asm { xor   eax, dword ptr [esi+8] }; \
189
  UpdCRC_dword; \
190
  __asm { xor   eax, dword ptr [esi+12] }; \
191
  UpdCRC_dword_sh(4); }
192
#endif /* !NO_32_BIT_LOADS */
193
 
194
/* ========================================================================= */
195
ulg crc32(crc, buf, len)
196
    ulg crc;                    /* crc shift register */
197
    ZCONST uch *buf;            /* pointer to bytes to pump through */
198
    extent len;                 /* number of bytes in buf[] */
199
/* Run a set of bytes through the crc shift register.  If buf is a NULL
200
   pointer, then initialize the crc shift register contents instead.
201
   Return the current crc in either case. */
202
{
203
    __asm {
204
                push    edx
205
                push    ecx
206
 
207
                mov     esi,buf         ;/* 2nd arg: uch *buf              */
208
                sub     eax,eax         ;/*> if (!buf)                     */
209
                test    esi,esi         ;/*>   return 0;                   */
210
                jz      fine            ;/*> else {                        */
211
 
212
                call    get_crc_table
213
                mov     edi,eax
214
                mov     eax,crc         ;/* 1st arg: ulg crc               */
215
#ifndef __686
216
                sub     ebx,ebx         ;/* ebx=0; => bl usable as a dword */
217
#endif
218
                mov     ecx,len         ;/* 3rd arg: extent len            */
219
                not     eax             ;/*>   c = ~crc;                   */
220
 
221
                test    ecx,ecx
222
#ifndef NO_UNROLLED_LOOPS
223
                jz      bail
224
#  ifndef NO_32_BIT_LOADS
225
align_loop:
226
                test    esi,3           ;/* align buf pointer on next      */
227
                jz      aligned_now     ;/*  dword boundary                */
228
    }
229
                Do_CRC_byte             ;
230
    __asm {
231
                dec     ecx
232
                jnz     align_loop
233
aligned_now:
234
#  endif /* !NO_32_BIT_LOADS */
235
                mov     SavLen,ecx      ;/* save current len for later  */
236
                shr     ecx,4           ;/* ecx = len / 16    */
237
                jz      No_Sixteens
238
; align loop head at start of 486 internal cache line !!
239
                align   16
240
Next_Sixteen:
241
    }
242
#  ifndef NO_32_BIT_LOADS
243
                Do_CRC_4dword ;
244
#  else /* NO_32_BIT_LOADS */
245
                Do_CRC_byteof(0) ;
246
                Do_CRC_byteof(1) ;
247
                Do_CRC_byteof(2) ;
248
                Do_CRC_byteof(3) ;
249
                Do_CRC_byteof(4) ;
250
                Do_CRC_byteof(5) ;
251
                Do_CRC_byteof(6) ;
252
                Do_CRC_byteof(7) ;
253
                Do_CRC_byteof(8) ;
254
                Do_CRC_byteof(9) ;
255
                Do_CRC_byteof(10) ;
256
                Do_CRC_byteof(11) ;
257
                Do_CRC_byteof(12) ;
258
                Do_CRC_byteof(13) ;
259
                Do_CRC_byteof(14) ;
260
                Do_CRC_byteof(15) ;
261
    __asm {     add     esi,16 };
262
#  endif /* ?NO_32_BIT_LOADS */
263
    __asm {
264
                dec     ecx
265
                jnz     Next_Sixteen
266
No_Sixteens:
267
                mov     ecx,SavLen
268
                and     ecx,00000000FH  ;/* ecx = len % 16    */
269
#  ifndef NO_32_BIT_LOADS
270
                shr     ecx,2
271
                jz      No_Fours
272
Next_Four:
273
    }
274
                Do_CRC_dword ;
275
    __asm {
276
                dec     ecx
277
                jnz     Next_Four
278
No_Fours:
279
                mov     ecx,SavLen
280
                and     ecx,000000003H  ;/* ecx = len % 4    */
281
#  endif /* !NO_32_BIT_LOADS */
282
#endif /* !NO_UNROLLED_LOOPS */
283
                jz      bail            ;/*>  if (len)                       */
284
; align loop head at start of 486 internal cache line !!
285
                align   16
286
loupe:                                  ;/*>    do {                         */
287
    }
288
                Do_CRC_byte             ;/*       c = CRC32(c,*buf++,crctab);*/
289
    __asm {
290
                dec     ecx             ;/*>    } while (--len);             */
291
                jnz     loupe
292
 
293
bail:                                   ;/*> }                               */
294
                not     eax             ;/*> return ~c;                      */
295
fine:
296
                pop     ecx
297
                pop     edx
298
    }
299
#ifdef NEED_RETURN
300
    return _EAX;
301
#endif
302
}
303
#endif /* MSC_INLINE_ASM_32BIT_SUPPORT */
304
#if (defined(_MSC_VER) && _MSC_VER >= 700)
305
#if (defined(_M_IX86) && _M_IX86 >= 300)
306
   /* Reenable missing return value warning */
307
#  pragma warning( default : 4035 )
308
#endif
309
#endif
310
#endif /* ASM_CRC && !USE_ZLIB && !CRC_TABLE_ONLY */