Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
6725 | siemargl | 1 | /* |
2 | Copyright (c) 1990-2007 Info-ZIP. All rights reserved. |
||
3 | |||
4 | See the accompanying file LICENSE, version 2000-Apr-09 or later |
||
5 | (the contents of which are also included in zip.h) for terms of use. |
||
6 | If, for some reason, all these files are missing, the Info-ZIP license |
||
7 | also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html |
||
8 | */ |
||
9 | /* |
||
10 | * crc_i386.S, optimized CRC calculation function for Zip and UnZip, |
||
11 | * created by Paul Kienitz and Christian Spieler. Last revised 07 Jan 2007. |
||
12 | * |
||
13 | * GRR 961110: incorporated Scott Field optimizations from win32/crc_i386.asm |
||
14 | * => overall 6% speedup in "unzip -tq" on 9MB zipfile (486-66) |
||
15 | * |
||
16 | * SPC 970402: revised for Rodney Brown's optimizations (32-bit-wide |
||
17 | * aligned reads for most of the data from buffer), can be |
||
18 | * disabled by defining the macro NO_32_BIT_LOADS |
||
19 | * |
||
20 | * SPC 971012: added Rodney Brown's additional tweaks for 32-bit-optimized |
||
21 | * CPUs (like the Pentium Pro, Pentium II, and probably some |
||
22 | * Pentium clones). This optimization is controlled by the |
||
23 | * preprocessor switch "__686" and is disabled by default. |
||
24 | * (This default is based on the assumption that most users |
||
25 | * do not yet work on a Pentium Pro or Pentium II machine ...) |
||
26 | * |
||
27 | * COS 050116: Enabled the 686 build by default, because there are hardly any |
||
28 | * pre-686 CPUs in serious use nowadays. (See SPC 970402 above.) |
||
29 | * |
||
30 | * SPC 060103: Updated code to incorporate newer optimizations found in zlib. |
||
31 | * |
||
32 | * SPC 070107: Added conditional switch to deactivate crc32() compilation. |
||
33 | * |
||
34 | * FLAT memory model assumed. Calling interface: |
||
35 | * - args are pushed onto the stack from right to left, |
||
36 | * - return value is given in the EAX register, |
||
37 | * - all other registers (with exception of EFLAGS) are preserved. (With |
||
38 | * GNU C 2.7.x, %edx and %ecx are `scratch' registers, but preserving |
||
39 | * them nevertheless adds only 4 single byte instructions.) |
||
40 | * |
||
41 | * This source generates the function |
||
42 | * ulg crc32(ulg crc, ZCONST uch *buf, extent len). |
||
43 | * |
||
44 | * Loop unrolling can be disabled by defining the macro NO_UNROLLED_LOOPS. |
||
45 | * This results in shorter code at the expense of reduced performance. |
||
46 | */ |
||
47 | |||
48 | /* This file is NOT used in conjunction with zlib, or when only creation of |
||
49 | * the basic CRC_32_Table (for other purpose) is requested. |
||
50 | */ |
||
51 | #if !defined(USE_ZLIB) && !defined(CRC_TABLE_ONLY) |
||
52 | |||
53 | /* Preprocess with -DNO_UNDERLINE if your C compiler does not prefix |
||
54 | * external symbols with an underline character '_'. |
||
55 | */ |
||
56 | #if defined(NO_UNDERLINE) || defined(__ELF__) |
||
57 | # define _crc32 crc32 |
||
58 | # define _get_crc_table get_crc_table |
||
59 | #endif |
||
60 | /* Use 16-byte alignment if your assembler supports it. Warning: gas |
||
61 | * uses a log(x) parameter (.align 4 means 16-byte alignment). On SVR4 |
||
62 | * the parameter is a number of bytes. |
||
63 | */ |
||
64 | #ifndef ALIGNMENT |
||
65 | # define ALIGNMENT .align 4,0x90 |
||
66 | #endif |
||
67 | |||
68 | #if defined(i386) || defined(_i386) || defined(_I386) || defined(__i386) |
||
69 | |||
70 | /* This version is for 386 Unix, OS/2, MSDOS in 32 bit mode (gcc & gas). |
||
71 | * Warning: it uses the AT&T syntax: mov source,dest |
||
72 | * This file is only optional. If you want to use the C version, |
||
73 | * remove -DASM_CRC from CFLAGS in Makefile and set OBJA to an empty string. |
||
74 | */ |
||
75 | |||
76 | .file "crc_i386.S" |
||
77 | |||
78 | #if !defined(PRE_686) && !defined(__686) |
||
79 | /* Optimize for Pentium Pro and compatible CPUs by default. */ |
||
80 | # define __686 |
||
81 | #endif |
||
82 | |||
83 | #if defined(NO_STD_STACKFRAME) && defined(USE_STD_STACKFRAME) |
||
84 | # undef USE_STACKFRAME |
||
85 | #else |
||
86 | /* The default is to use standard stack frame entry, because it |
||
87 | * results in smaller code! |
||
88 | */ |
||
89 | # ifndef USE_STD_STACKFRAME |
||
90 | # define USE_STD_STACKFRAME |
||
91 | # endif |
||
92 | #endif |
||
93 | |||
94 | #ifdef USE_STD_STACKFRAME |
||
95 | # define _STD_ENTRY pushl %ebp ; movl %esp,%ebp |
||
96 | # define arg1 8(%ebp) |
||
97 | # define arg2 12(%ebp) |
||
98 | # define arg3 16(%ebp) |
||
99 | # define _STD_LEAVE popl %ebp |
||
100 | #else /* !USE_STD_STACKFRAME */ |
||
101 | # define _STD_ENTRY |
||
102 | # define arg1 24(%esp) |
||
103 | # define arg2 28(%esp) |
||
104 | # define arg3 32(%esp) |
||
105 | # define _STD_LEAVE |
||
106 | #endif /* ?USE_STD_STACKFRAME */ |
||
107 | |||
108 | /* |
||
109 | * These two (three) macros make up the loop body of the CRC32 cruncher. |
||
110 | * registers modified: |
||
111 | * eax : crc value "c" |
||
112 | * esi : pointer to next data byte (or lword) "buf++" |
||
113 | * registers read: |
||
114 | * edi : pointer to base of crc_table array |
||
115 | * scratch registers: |
||
116 | * ebx : index into crc_table array |
||
117 | * (requires upper three bytes = 0 when __686 is undefined) |
||
118 | */ |
||
119 | #ifndef __686 /* optimize for 386, 486, Pentium */ |
||
120 | #define Do_CRC /* c = (c >> 8) ^ table[c & 0xFF] */\ |
||
121 | movb %al, %bl ;/* tmp = c & 0xFF */\ |
||
122 | shrl $8, %eax ;/* c = (c >> 8) */\ |
||
123 | xorl (%edi, %ebx, 4), %eax ;/* c ^= table[tmp] */ |
||
124 | #else /* __686 : optimize for Pentium Pro and compatible CPUs */ |
||
125 | #define Do_CRC /* c = (c >> 8) ^ table[c & 0xFF] */\ |
||
126 | movzbl %al, %ebx ;/* tmp = c & 0xFF */\ |
||
127 | shrl $8, %eax ;/* c = (c >> 8) */\ |
||
128 | xorl (%edi, %ebx, 4), %eax ;/* c ^=table[tmp] */ |
||
129 | #endif /* ?__686 */ |
||
130 | |||
131 | #define Do_CRC_byte /* c = (c >> 8) ^ table[(c^*buf++)&0xFF] */\ |
||
132 | xorb (%esi), %al ;/* c ^= *buf */\ |
||
133 | incl %esi ;/* buf++ */\ |
||
134 | Do_CRC |
||
135 | |||
136 | #define Do_CRC_byteof(ofs) /* c = (c >> 8) ^ table[(c^*buf++)&0xFF] */\ |
||
137 | xorb ofs(%esi), %al ;/* c ^= *buf */\ |
||
138 | incl %esi ;/* buf++ */\ |
||
139 | Do_CRC |
||
140 | |||
141 | #ifndef NO_32_BIT_LOADS |
||
142 | # ifdef IZ_CRCOPTIM_UNFOLDTBL |
||
143 | /* the edx register is needed in crc calculation */ |
||
144 | # define SavLen arg3 |
||
145 | # define UpdCRC_lword \ |
||
146 | movzbl %al, %ebx ; \ |
||
147 | movl 3072(%edi,%ebx,4), %edx ; \ |
||
148 | movzbl %ah, %ebx ; \ |
||
149 | shrl $16, %eax ; \ |
||
150 | xor 2048(%edi,%ebx,4), %edx ; \ |
||
151 | movzbl %al, %ebx ; \ |
||
152 | shrl $8,%eax ; \ |
||
153 | xorl 1024(%edi,%ebx,4), %edx ; \ |
||
154 | movl (%edi,%eax,4), %eax ; \ |
||
155 | xorl %edx,%eax ; |
||
156 | # define UpdCRC_lword_sh(dwPtrIncr) \ |
||
157 | movzbl %al, %ebx ; \ |
||
158 | movl 3072(%edi,%ebx,4), %edx ; \ |
||
159 | movzbl %ah, %ebx ; \ |
||
160 | shrl $16, %eax ; \ |
||
161 | xor 2048(%edi,%ebx,4), %edx ; \ |
||
162 | movzbl %al, %ebx ; \ |
||
163 | addl $4*(dwPtrIncr), %esi ;/* ((ulg *)buf)+=dwPtrIncr */\ |
||
164 | shrl $8,%eax ; \ |
||
165 | xorl 1024(%edi,%ebx,4), %edx ; \ |
||
166 | movl (%edi,%eax,4),%eax ; \ |
||
167 | xorl %edx,%eax ; |
||
168 | # else /* !IZ_CRCOPTIM_UNFOLDTBL */ |
||
169 | /* the edx register is not needed anywhere else */ |
||
170 | # define SavLen %edx |
||
171 | # define UpdCRC_lword \ |
||
172 | Do_CRC \ |
||
173 | Do_CRC \ |
||
174 | Do_CRC \ |
||
175 | Do_CRC |
||
176 | # define UpdCRC_lword_sh(dwPtrIncr) \ |
||
177 | Do_CRC \ |
||
178 | Do_CRC \ |
||
179 | addl $4*(dwPtrIncr), %esi ;/* ((ulg *)buf)++ */\ |
||
180 | Do_CRC \ |
||
181 | Do_CRC |
||
182 | # endif /* ?IZ_CRCOPTIM_UNFOLDTBL */ |
||
183 | #define Do_CRC_lword \ |
||
184 | xorl (%esi), %eax ;/* c ^= *(ulg *)buf */\ |
||
185 | UpdCRC_lword_sh(1) /* ... ((ulg *)buf)++ */ |
||
186 | #define Do_CRC_4lword \ |
||
187 | xorl (%esi), %eax ;/* c ^= *(ulg *)buf */\ |
||
188 | UpdCRC_lword \ |
||
189 | xorl 4(%esi), %eax ;/* c ^= *((ulg *)buf+1) */\ |
||
190 | UpdCRC_lword \ |
||
191 | xorl 8(%esi), %eax ;/* c ^= *((ulg *)buf+2) */\ |
||
192 | UpdCRC_lword \ |
||
193 | xorl 12(%esi), %eax ;/* c ^= *((ulg *)buf]+3 */\ |
||
194 | UpdCRC_lword_sh(4) /* ... ((ulg *)buf)+=4 */ |
||
195 | #endif /* !NO_32_BIT_LOADS */ |
||
196 | |||
197 | |||
198 | .text |
||
199 | |||
200 | .globl _crc32 |
||
201 | |||
202 | _crc32: /* ulg crc32(ulg crc, uch *buf, extent len) */ |
||
203 | _STD_ENTRY |
||
204 | pushl %edi |
||
205 | pushl %esi |
||
206 | pushl %ebx |
||
207 | pushl %edx |
||
208 | pushl %ecx |
||
209 | |||
210 | movl arg2, %esi /* 2nd arg: uch *buf */ |
||
211 | subl %eax, %eax /* > if (!buf) */ |
||
212 | testl %esi, %esi /* > return 0; */ |
||
213 | jz .L_fine /* > else { */ |
||
214 | call _get_crc_table |
||
215 | movl %eax, %edi |
||
216 | movl arg1, %eax /* 1st arg: ulg crc */ |
||
217 | #ifndef __686 |
||
218 | subl %ebx, %ebx /* ebx=0; bl usable as dword */ |
||
219 | #endif |
||
220 | movl arg3, %ecx /* 3rd arg: extent len */ |
||
221 | notl %eax /* > c = ~crc; */ |
||
222 | |||
223 | testl %ecx, %ecx |
||
224 | #ifndef NO_UNROLLED_LOOPS |
||
225 | jz .L_bail |
||
226 | # ifndef NO_32_BIT_LOADS |
||
227 | /* Assert now have positive length */ |
||
228 | .L_align_loop: |
||
229 | testl $3, %esi /* Align buf on lword boundary */ |
||
230 | jz .L_aligned_now |
||
231 | Do_CRC_byte |
||
232 | decl %ecx |
||
233 | jnz .L_align_loop |
||
234 | .L_aligned_now: |
||
235 | # endif /* !NO_32_BIT_LOADS */ |
||
236 | movl %ecx, SavLen /* save current value of len */ |
||
237 | shrl $4, %ecx /* ecx = len / 16 */ |
||
238 | jz .L_No_Sixteens |
||
239 | /* align loop head at start of 486 internal cache line !! */ |
||
240 | ALIGNMENT |
||
241 | .L_Next_Sixteen: |
||
242 | # ifndef NO_32_BIT_LOADS |
||
243 | Do_CRC_4lword |
||
244 | # else /* NO_32_BIT_LOADS */ |
||
245 | Do_CRC_byteof(0) |
||
246 | Do_CRC_byteof(1) |
||
247 | Do_CRC_byteof(2) |
||
248 | Do_CRC_byteof(3) |
||
249 | Do_CRC_byteof(4) |
||
250 | Do_CRC_byteof(5) |
||
251 | Do_CRC_byteof(6) |
||
252 | Do_CRC_byteof(7) |
||
253 | Do_CRC_byteof(8) |
||
254 | Do_CRC_byteof(9) |
||
255 | Do_CRC_byteof(10) |
||
256 | Do_CRC_byteof(11) |
||
257 | Do_CRC_byteof(12) |
||
258 | Do_CRC_byteof(13) |
||
259 | Do_CRC_byteof(14) |
||
260 | Do_CRC_byteof(15) |
||
261 | addl $16,%esi ;/* buf += 16 */ |
||
262 | # endif /* ?NO_32_BIT_LOADS */ |
||
263 | decl %ecx |
||
264 | jnz .L_Next_Sixteen |
||
265 | |||
266 | .L_No_Sixteens: |
||
267 | movl SavLen, %ecx |
||
268 | andl $15, %ecx /* ecx = len % 16 */ |
||
269 | # ifndef NO_32_BIT_LOADS |
||
270 | shrl $2,%ecx /* ecx = len / 4 */ |
||
271 | jz .L_No_Fours |
||
272 | .L_Next_Four: |
||
273 | Do_CRC_lword |
||
274 | decl %ecx |
||
275 | jnz .L_Next_Four |
||
276 | .L_No_Fours: |
||
277 | movl SavLen,%ecx |
||
278 | andl $3,%ecx /* ecx = len % 4 */ |
||
279 | # endif /* !NO_32_BIT_LOADS */ |
||
280 | #endif /* !NO_UNROLLED_LOOPS */ |
||
281 | jz .L_bail /* > if (len) */ |
||
282 | /* align loop head at start of 486 internal cache line !! */ |
||
283 | ALIGNMENT |
||
284 | .L_loupe: /* > do { */ |
||
285 | Do_CRC_byte /* c = CRC32(c,*buf++,crctab);*/ |
||
286 | decl %ecx /* > } while (--len); */ |
||
287 | jnz .L_loupe |
||
288 | |||
289 | .L_bail: /* > } */ |
||
290 | notl %eax /* > return ~c; */ |
||
291 | .L_fine: |
||
292 | popl %ecx |
||
293 | popl %edx |
||
294 | popl %ebx |
||
295 | popl %esi |
||
296 | popl %edi |
||
297 | _STD_LEAVE |
||
298 | ret |
||
299 | |||
300 | #else |
||
301 | error: this asm version is for 386 only |
||
302 | #endif /* i386 || _i386 || _I386 || __i386 */ |
||
303 | |||
304 | #endif /* !USE_ZLIB && !CRC_TABLE_ONLY */ |