Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
6725 siemargl 1
/*
2
  Copyright (c) 1990-2007 Info-ZIP.  All rights reserved.
3
 
4
  See the accompanying file LICENSE, version 2000-Apr-09 or later
5
  (the contents of which are also included in zip.h) for terms of use.
6
  If, for some reason, all these files are missing, the Info-ZIP license
7
  also may be found at:  ftp://ftp.info-zip.org/pub/infozip/license.html
8
*/
9
/*
10
 * crc_i386.S, optimized CRC calculation function for Zip and UnZip,
11
 * created by Paul Kienitz and Christian Spieler.  Last revised 07 Jan 2007.
12
 *
13
 * GRR 961110:  incorporated Scott Field optimizations from win32/crc_i386.asm
14
 *              => overall 6% speedup in "unzip -tq" on 9MB zipfile (486-66)
15
 *
16
 * SPC 970402:  revised for Rodney Brown's optimizations (32-bit-wide
17
 *              aligned reads for most of the data from buffer), can be
18
 *              disabled by defining the macro NO_32_BIT_LOADS
19
 *
20
 * SPC 971012:  added Rodney Brown's additional tweaks for 32-bit-optimized
21
 *              CPUs (like the Pentium Pro, Pentium II, and probably some
22
 *              Pentium clones). This optimization is controlled by the
23
 *              preprocessor switch "__686" and is disabled by default.
24
 *              (This default is based on the assumption that most users
25
 *              do not yet work on a Pentium Pro or Pentium II machine ...)
26
 *
27
 * COS 050116:  Enabled the 686 build by default, because there are hardly any
28
 *              pre-686 CPUs in serious use nowadays. (See SPC 970402 above.)
29
 *
30
 * SPC 060103:  Updated code to incorporate newer optimizations found in zlib.
31
 *
32
 * SPC 070107:  Added conditional switch to deactivate crc32() compilation.
33
 *
34
 * FLAT memory model assumed.  Calling interface:
35
 *   - args are pushed onto the stack from right to left,
36
 *   - return value is given in the EAX register,
37
 *   - all other registers (with exception of EFLAGS) are preserved. (With
38
 *     GNU C 2.7.x, %edx and %ecx are `scratch' registers, but preserving
39
 *     them nevertheless adds only 4 single byte instructions.)
40
 *
41
 * This source generates the function
42
 * ulg crc32(ulg crc, ZCONST uch *buf, extent len).
43
 *
44
 * Loop unrolling can be disabled by defining the macro NO_UNROLLED_LOOPS.
45
 * This results in shorter code at the expense of reduced performance.
46
 */
47
 
48
/* This file is NOT used in conjunction with zlib, or when only creation of
49
 * the basic CRC_32_Table (for other purpose) is requested.
50
 */
51
#if !defined(USE_ZLIB) && !defined(CRC_TABLE_ONLY)
52
 
53
/* Preprocess with -DNO_UNDERLINE if your C compiler does not prefix
54
 * external symbols with an underline character '_'.
55
 */
56
#if defined(NO_UNDERLINE) || defined(__ELF__)
57
#  define _crc32            crc32
58
#  define _get_crc_table    get_crc_table
59
#endif
60
/* Use 16-byte alignment if your assembler supports it. Warning: gas
61
 * uses a log(x) parameter (.align 4 means 16-byte alignment). On SVR4
62
 * the parameter is a number of bytes.
63
 */
64
#ifndef ALIGNMENT
65
#  define ALIGNMENT .align 4,0x90
66
#endif
67
 
68
#if defined(i386) || defined(_i386) || defined(_I386) || defined(__i386)
69
 
70
/* This version is for 386 Unix, OS/2, MSDOS in 32 bit mode (gcc & gas).
71
 * Warning: it uses the AT&T syntax: mov source,dest
72
 * This file is only optional. If you want to use the C version,
73
 * remove -DASM_CRC from CFLAGS in Makefile and set OBJA to an empty string.
74
 */
75
 
76
                .file   "crc_i386.S"
77
 
78
#if !defined(PRE_686) && !defined(__686)
79
   /* Optimize for Pentium Pro and compatible CPUs by default. */
80
#  define __686
81
#endif
82
 
83
#if defined(NO_STD_STACKFRAME) && defined(USE_STD_STACKFRAME)
84
#  undef USE_STACKFRAME
85
#else
86
   /* The default is to use standard stack frame entry, because it
87
    * results in smaller code!
88
    */
89
#  ifndef USE_STD_STACKFRAME
90
#    define USE_STD_STACKFRAME
91
#  endif
92
#endif
93
 
94
#ifdef USE_STD_STACKFRAME
95
#  define _STD_ENTRY    pushl   %ebp ; movl   %esp,%ebp
96
#  define arg1  8(%ebp)
97
#  define arg2  12(%ebp)
98
#  define arg3  16(%ebp)
99
#  define _STD_LEAVE    popl    %ebp
100
#else /* !USE_STD_STACKFRAME */
101
#  define _STD_ENTRY
102
#  define arg1  24(%esp)
103
#  define arg2  28(%esp)
104
#  define arg3  32(%esp)
105
#  define _STD_LEAVE
106
#endif /* ?USE_STD_STACKFRAME */
107
 
108
/*
109
 * These two (three) macros make up the loop body of the CRC32 cruncher.
110
 * registers modified:
111
 *   eax  : crc value "c"
112
 *   esi  : pointer to next data byte (or lword) "buf++"
113
 * registers read:
114
 *   edi  : pointer to base of crc_table array
115
 * scratch registers:
116
 *   ebx  : index into crc_table array
117
 *          (requires upper three bytes = 0 when __686 is undefined)
118
 */
119
#ifndef __686   /* optimize for 386, 486, Pentium */
120
#define Do_CRC          /* c = (c >> 8) ^ table[c & 0xFF] */\
121
                movb    %al, %bl                ;/* tmp = c & 0xFF  */\
122
                shrl    $8, %eax                ;/* c = (c >> 8)    */\
123
                xorl    (%edi, %ebx, 4), %eax   ;/* c ^= table[tmp] */
124
#else   /* __686 : optimize for Pentium Pro and compatible CPUs */
125
#define Do_CRC          /* c = (c >> 8) ^ table[c & 0xFF] */\
126
                movzbl  %al, %ebx               ;/* tmp = c & 0xFF  */\
127
                shrl    $8, %eax                ;/* c = (c >> 8)    */\
128
                xorl    (%edi, %ebx, 4), %eax   ;/* c ^=table[tmp]  */
129
#endif  /* ?__686 */
130
 
131
#define Do_CRC_byte             /* c = (c >> 8) ^ table[(c^*buf++)&0xFF] */\
132
                xorb    (%esi), %al     ;/* c ^= *buf  */\
133
                incl    %esi            ;/* buf++      */\
134
                Do_CRC
135
 
136
#define Do_CRC_byteof(ofs)      /* c = (c >> 8) ^ table[(c^*buf++)&0xFF] */\
137
                xorb    ofs(%esi), %al  ;/* c ^= *buf  */\
138
                incl    %esi            ;/* buf++      */\
139
                Do_CRC
140
 
141
#ifndef  NO_32_BIT_LOADS
142
# ifdef IZ_CRCOPTIM_UNFOLDTBL
143
   /* the edx register is needed in crc calculation */
144
#  define SavLen arg3
145
#  define UpdCRC_lword \
146
                movzbl  %al, %ebx               ; \
147
                movl    3072(%edi,%ebx,4), %edx ; \
148
                movzbl  %ah, %ebx               ; \
149
                shrl    $16, %eax               ; \
150
                xor     2048(%edi,%ebx,4), %edx ; \
151
                movzbl  %al, %ebx               ; \
152
                shrl    $8,%eax                 ; \
153
                xorl    1024(%edi,%ebx,4), %edx ; \
154
                movl    (%edi,%eax,4), %eax     ; \
155
                xorl    %edx,%eax               ;
156
#  define UpdCRC_lword_sh(dwPtrIncr) \
157
                movzbl  %al, %ebx               ; \
158
                movl    3072(%edi,%ebx,4), %edx ; \
159
                movzbl  %ah, %ebx               ; \
160
                shrl    $16, %eax               ; \
161
                xor     2048(%edi,%ebx,4), %edx ; \
162
                movzbl  %al, %ebx               ; \
163
                addl    $4*(dwPtrIncr), %esi    ;/* ((ulg *)buf)+=dwPtrIncr */\
164
                shrl    $8,%eax                 ; \
165
                xorl    1024(%edi,%ebx,4), %edx ; \
166
                movl    (%edi,%eax,4),%eax      ; \
167
                xorl    %edx,%eax               ;
168
# else /* !IZ_CRCOPTIM_UNFOLDTBL */
169
   /* the edx register is not needed anywhere else */
170
#  define SavLen %edx
171
#  define UpdCRC_lword \
172
                Do_CRC \
173
                Do_CRC \
174
                Do_CRC \
175
                Do_CRC
176
#  define UpdCRC_lword_sh(dwPtrIncr) \
177
                Do_CRC \
178
                Do_CRC \
179
                addl    $4*(dwPtrIncr), %esi    ;/* ((ulg *)buf)++   */\
180
                Do_CRC \
181
                Do_CRC
182
# endif /* ?IZ_CRCOPTIM_UNFOLDTBL */
183
#define Do_CRC_lword \
184
                xorl    (%esi), %eax           ;/* c ^= *(ulg *)buf */\
185
                UpdCRC_lword_sh(1)              /* ... ((ulg *)buf)++ */
186
#define Do_CRC_4lword \
187
                xorl    (%esi), %eax           ;/* c ^= *(ulg *)buf */\
188
                UpdCRC_lword \
189
                xorl    4(%esi), %eax          ;/* c ^= *((ulg *)buf+1) */\
190
                UpdCRC_lword \
191
                xorl    8(%esi), %eax          ;/* c ^= *((ulg *)buf+2) */\
192
                UpdCRC_lword \
193
                xorl    12(%esi), %eax         ;/* c ^= *((ulg *)buf]+3 */\
194
                UpdCRC_lword_sh(4)              /* ... ((ulg *)buf)+=4 */
195
#endif  /* !NO_32_BIT_LOADS */
196
 
197
 
198
                .text
199
 
200
                .globl  _crc32
201
 
202
_crc32:                         /* ulg crc32(ulg crc, uch *buf, extent len) */
203
                _STD_ENTRY
204
                pushl   %edi
205
                pushl   %esi
206
                pushl   %ebx
207
                pushl   %edx
208
                pushl   %ecx
209
 
210
                movl    arg2, %esi           /* 2nd arg: uch *buf            */
211
                subl    %eax, %eax           /* > if (!buf)                  */
212
                testl   %esi, %esi           /* >   return 0;                */
213
                jz      .L_fine              /* > else {                     */
214
                call    _get_crc_table
215
                movl    %eax, %edi
216
                movl    arg1, %eax           /* 1st arg: ulg crc             */
217
#ifndef __686
218
                subl    %ebx, %ebx           /* ebx=0; bl usable as dword    */
219
#endif
220
                movl    arg3, %ecx           /* 3rd arg: extent len          */
221
                notl    %eax                 /* >   c = ~crc;                */
222
 
223
                testl   %ecx, %ecx
224
#ifndef  NO_UNROLLED_LOOPS
225
                jz      .L_bail
226
#  ifndef  NO_32_BIT_LOADS
227
                /* Assert now have positive length */
228
.L_align_loop:
229
                testl   $3, %esi        /* Align buf on lword boundary */
230
                jz      .L_aligned_now
231
                Do_CRC_byte
232
                decl    %ecx
233
                jnz     .L_align_loop
234
.L_aligned_now:
235
#  endif  /* !NO_32_BIT_LOADS */
236
                movl    %ecx, SavLen         /* save current value of len */
237
                shrl    $4, %ecx             /* ecx = len / 16   */
238
                jz      .L_No_Sixteens
239
/*  align loop head at start of 486 internal cache line !! */
240
                ALIGNMENT
241
.L_Next_Sixteen:
242
#  ifndef NO_32_BIT_LOADS
243
                 Do_CRC_4lword
244
#  else   /* NO_32_BIT_LOADS */
245
                 Do_CRC_byteof(0)
246
                 Do_CRC_byteof(1)
247
                 Do_CRC_byteof(2)
248
                 Do_CRC_byteof(3)
249
                 Do_CRC_byteof(4)
250
                 Do_CRC_byteof(5)
251
                 Do_CRC_byteof(6)
252
                 Do_CRC_byteof(7)
253
                 Do_CRC_byteof(8)
254
                 Do_CRC_byteof(9)
255
                 Do_CRC_byteof(10)
256
                 Do_CRC_byteof(11)
257
                 Do_CRC_byteof(12)
258
                 Do_CRC_byteof(13)
259
                 Do_CRC_byteof(14)
260
                 Do_CRC_byteof(15)
261
                 addl    $16,%esi        ;/* buf += 16 */
262
#  endif  /* ?NO_32_BIT_LOADS */
263
                decl    %ecx
264
                jnz     .L_Next_Sixteen
265
 
266
.L_No_Sixteens:
267
                movl    SavLen, %ecx
268
                andl    $15, %ecx         /* ecx = len % 16   */
269
# ifndef NO_32_BIT_LOADS
270
                shrl    $2,%ecx           /* ecx = len / 4    */
271
                jz      .L_No_Fours
272
.L_Next_Four:
273
                Do_CRC_lword
274
                decl    %ecx
275
                jnz     .L_Next_Four
276
.L_No_Fours:
277
                movl    SavLen,%ecx
278
                andl    $3,%ecx          /* ecx = len % 4 */
279
# endif /* !NO_32_BIT_LOADS */
280
#endif /* !NO_UNROLLED_LOOPS */
281
                jz      .L_bail          /* > if (len)                       */
282
/* align loop head at start of 486 internal cache line !! */
283
                ALIGNMENT
284
.L_loupe:                                /* >   do {                         */
285
                 Do_CRC_byte             /*       c = CRC32(c,*buf++,crctab);*/
286
                decl    %ecx             /* >   } while (--len);             */
287
                jnz     .L_loupe
288
 
289
.L_bail:                                 /* > }                              */
290
                notl    %eax             /* > return ~c;                     */
291
.L_fine:
292
                popl    %ecx
293
                popl    %edx
294
                popl    %ebx
295
                popl    %esi
296
                popl    %edi
297
                _STD_LEAVE
298
                ret
299
 
300
#else
301
 error: this asm version is for 386 only
302
#endif /* i386 || _i386 || _I386 || __i386 */
303
 
304
#endif /* !USE_ZLIB && !CRC_TABLE_ONLY */