Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
6725 | siemargl | 1 | /* |
2 | Copyright (c) 1990-2007 Info-ZIP. All rights reserved. |
||
3 | |||
4 | See the accompanying file LICENSE, version 2000-Apr-09 or later |
||
5 | (the contents of which are also included in zip.h) for terms of use. |
||
6 | If, for some reason, all these files are missing, the Info-ZIP license |
||
7 | also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html |
||
8 | */ |
||
9 | /* crc_i386.c -- Microsoft 32-bit C/C++ adaptation of crc_i386.asm |
||
10 | * Created by Rodney Brown from crc_i386.asm, modified by Chr. Spieler. |
||
11 | * Last revised: 07-Jan-2007 |
||
12 | * |
||
13 | * Original coded (in crc_i386.asm) and put into the public domain |
||
14 | * by Paul Kienitz and Christian Spieler. |
||
15 | * |
||
16 | * Revised 06-Oct-96, Scott Field (sfield@microsoft.com) |
||
17 | * fixed to assemble with masm by not using .model directive which makes |
||
18 | * assumptions about segment alignment. Also, |
||
19 | * avoid using loop, and j[e]cxz where possible. Use mov + inc, rather |
||
20 | * than lodsb, and other misc. changes resulting in the following performance |
||
21 | * increases: |
||
22 | * |
||
23 | * unrolled loops NO_UNROLLED_LOOPS |
||
24 | * *8 >8 <8 *8 >8 <8 |
||
25 | * |
||
26 | * +54% +42% +35% +82% +52% +25% |
||
27 | * |
||
28 | * first item in each table is input buffer length, even multiple of 8 |
||
29 | * second item in each table is input buffer length, > 8 |
||
30 | * third item in each table is input buffer length, < 8 |
||
31 | * |
||
32 | * Revised 02-Apr-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au) |
||
33 | * Incorporated Rodney Brown's 32-bit-reads optimization as found in the |
||
34 | * UNIX AS source crc_i386.S. This new code can be disabled by defining |
||
35 | * the macro symbol NO_32_BIT_LOADS. |
||
36 | * |
||
37 | * Revised 12-Oct-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au) |
||
38 | * Incorporated Rodney Brown's additional tweaks for 32-bit-optimized CPUs |
||
39 | * (like the Pentium Pro, Pentium II, and probably some Pentium clones). |
||
40 | * This optimization is controlled by the macro symbol __686 and is disabled |
||
41 | * by default. (This default is based on the assumption that most users |
||
42 | * do not yet work on a Pentium Pro or Pentium II machine ...) |
||
43 | * |
||
44 | * Revised 16-Nov-97, Chr. Spieler: Made code compatible with Borland C++ |
||
45 | * 32-bit, removed unneeded kludge for potentially unknown movzx mnemonic, |
||
46 | * confirmed correct working with MS VC++ (32-bit). |
||
47 | * |
||
48 | * Revised 22-May-98, Peter Kunath, Chr. Spieler: The 16-Nov-97 revision broke |
||
49 | * MSVC 5.0. Inside preprocessor macros, each instruction is enclosed in its |
||
50 | * own __asm {...} construct. For MSVC, a "#pragma warning" was added to |
||
51 | * shut up the "no return value" warning message. |
||
52 | * |
||
53 | * Revised 13-Dec-98, Chr. Spieler: Modified path to "zip.h" header file. |
||
54 | * |
||
55 | * Revised 16-Jan-2005, Cosmin Truta: Added the ASM_CRC guard, for easier |
||
56 | * switching between ASM vs. non-ASM builds, when handling makefiles. |
||
57 | * Also enabled the 686 build by default, because there are hardly any |
||
58 | * pre-686 CPUs in serious use nowadays. (See the 12-Oct-97 note above.) |
||
59 | * |
||
60 | * Revised 03-Jan-2006, Chr. Spieler |
||
61 | * Enlarged unrolling loops to "do 16 bytes per turn"; optimized access to |
||
62 | * data buffer in loop body (adjust pointer only once in loop body and use |
||
63 | * offsets to access each item); added additional support for the "unfolded |
||
64 | * tables" optimization variant (enabled by IZ_CRCOPTIM_UNFOLDTBL). |
||
65 | * |
||
66 | * Revised 07-Jan-2007, Chr. Spieler |
||
67 | * Recognize additional conditional flag CRC_TABLE_ONLY that prevents |
||
68 | * compilation of the crc32() function. |
||
69 | * |
||
70 | * FLAT memory model assumed. |
||
71 | * |
||
72 | * Loop unrolling can be disabled by defining the macro NO_UNROLLED_LOOPS. |
||
73 | * This results in shorter code at the expense of reduced performance. |
||
74 | * |
||
75 | */ |
||
76 | |||
77 | #include "../zip.h" |
||
78 | #include "../crc32.h" |
||
79 | |||
80 | #if defined(ASM_CRC) && !defined(USE_ZLIB) && !defined(CRC_TABLE_ONLY) |
||
81 | |||
82 | #if !defined(PRE_686) && !defined(__686) |
||
83 | # define __686 |
||
84 | #endif |
||
85 | |||
86 | #ifndef ZCONST |
||
87 | # define ZCONST const |
||
88 | #endif |
||
89 | |||
90 | /* Select wether the following inline-assember code is supported. */ |
||
91 | #if (defined(_MSC_VER) && _MSC_VER >= 700) |
||
92 | #if (defined(_M_IX86) && _M_IX86 >= 300) |
||
93 | # define MSC_INLINE_ASM_32BIT_SUPPORT |
||
94 | /* Disable warning for no return value, typical of asm functions */ |
||
95 | # pragma warning( disable : 4035 ) |
||
96 | #endif |
||
97 | #endif |
||
98 | |||
99 | #if (defined(__BORLANDC__) && __BORLANDC__ >= 452) |
||
100 | # define MSC_INLINE_ASM_32BIT_SUPPORT |
||
101 | #endif |
||
102 | |||
103 | #ifdef MSC_INLINE_ASM_32BIT_SUPPORT |
||
104 | /* This code is intended for Microsoft C/C++ (32-bit) compatible compilers. */ |
||
105 | |||
106 | /* |
||
107 | * These two (three) macros make up the loop body of the CRC32 cruncher. |
||
108 | * registers modified: |
||
109 | * eax : crc value "c" |
||
110 | * esi : pointer to next data byte (or dword) "buf++" |
||
111 | * registers read: |
||
112 | * edi : pointer to base of crc_table array |
||
113 | * scratch registers: |
||
114 | * ebx : index into crc_table array |
||
115 | * (requires upper three bytes = 0 when __686 is undefined) |
||
116 | */ |
||
117 | #ifndef __686 |
||
118 | #define Do_CRC { \ |
||
119 | __asm { mov bl, al }; \ |
||
120 | __asm { shr eax, 8 }; \ |
||
121 | __asm { xor eax, [edi+ebx*4] }; } |
||
122 | #else /* __686 */ |
||
123 | #define Do_CRC { \ |
||
124 | __asm { movzx ebx, al }; \ |
||
125 | __asm { shr eax, 8 }; \ |
||
126 | __asm { xor eax, [edi+ebx*4] }; } |
||
127 | #endif /* ?__686 */ |
||
128 | |||
129 | #define Do_CRC_byte { \ |
||
130 | __asm { xor al, byte ptr [esi] }; \ |
||
131 | __asm { inc esi }; \ |
||
132 | Do_CRC; } |
||
133 | |||
134 | #define Do_CRC_byteof(ofs) { \ |
||
135 | __asm { xor al, byte ptr [esi+(ofs)] }; \ |
||
136 | Do_CRC; } |
||
137 | |||
138 | #ifndef NO_32_BIT_LOADS |
||
139 | #ifdef IZ_CRCOPTIM_UNFOLDTBL |
||
140 | # define SavLen len /* the edx register is needed elsewhere */ |
||
141 | # define UpdCRC_dword { \ |
||
142 | __asm { movzx ebx,al }; \ |
||
143 | __asm { mov edx,[edi+ebx*4+3072] }; \ |
||
144 | __asm { movzx ebx,ah }; \ |
||
145 | __asm { shr eax,16 }; \ |
||
146 | __asm { xor edx,[edi+ebx*4+2048] }; \ |
||
147 | __asm { movzx ebx,al }; \ |
||
148 | __asm { shr eax,8 }; \ |
||
149 | __asm { xor edx,[edi+ebx*4+1024] }; \ |
||
150 | __asm { mov eax,[edi+eax*4] }; \ |
||
151 | __asm { xor eax,edx }; } |
||
152 | # define UpdCRC_dword_sh(dwPtrIncr) { \ |
||
153 | __asm { movzx ebx,al }; \ |
||
154 | __asm { mov edx,[edi+ebx*4+3072] }; \ |
||
155 | __asm { movzx ebx,ah }; \ |
||
156 | __asm { xor edx,[edi+ebx*4+2048] }; \ |
||
157 | __asm { shr eax,16 }; \ |
||
158 | __asm { movzx ebx,al }; \ |
||
159 | __asm { add esi, 4*dwPtrIncr }; \ |
||
160 | __asm { shr eax,8 }; \ |
||
161 | __asm { xor edx,[edi+ebx*4+1024] }; \ |
||
162 | __asm { mov eax,[edi+eax*4] }; \ |
||
163 | __asm { xor eax,edx }; } |
||
164 | #else /* !IZ_CRCOPTIM_UNFOLDTBL */ |
||
165 | # define SavLen edx /* the edx register is free for use here */ |
||
166 | # define UpdCRC_dword { \ |
||
167 | Do_CRC; \ |
||
168 | Do_CRC; \ |
||
169 | Do_CRC; \ |
||
170 | Do_CRC; } |
||
171 | # define UpdCRC_dword_sh(dwPtrIncr) { \ |
||
172 | Do_CRC; \ |
||
173 | Do_CRC; \ |
||
174 | __asm { add esi, 4*(dwPtrIncr) }; \ |
||
175 | Do_CRC; \ |
||
176 | Do_CRC; } |
||
177 | #endif /* ?IZ_CRCOPTIM_UNFOLDTBL */ |
||
178 | |||
179 | #define Do_CRC_dword { \ |
||
180 | __asm { xor eax, dword ptr [esi] }; \ |
||
181 | UpdCRC_dword_sh(1); } |
||
182 | |||
183 | #define Do_CRC_4dword { \ |
||
184 | __asm { xor eax, dword ptr [esi] }; \ |
||
185 | UpdCRC_dword; \ |
||
186 | __asm { xor eax, dword ptr [esi+4] }; \ |
||
187 | UpdCRC_dword; \ |
||
188 | __asm { xor eax, dword ptr [esi+8] }; \ |
||
189 | UpdCRC_dword; \ |
||
190 | __asm { xor eax, dword ptr [esi+12] }; \ |
||
191 | UpdCRC_dword_sh(4); } |
||
192 | #endif /* !NO_32_BIT_LOADS */ |
||
193 | |||
194 | /* ========================================================================= */ |
||
195 | ulg crc32(crc, buf, len) |
||
196 | ulg crc; /* crc shift register */ |
||
197 | ZCONST uch *buf; /* pointer to bytes to pump through */ |
||
198 | extent len; /* number of bytes in buf[] */ |
||
199 | /* Run a set of bytes through the crc shift register. If buf is a NULL |
||
200 | pointer, then initialize the crc shift register contents instead. |
||
201 | Return the current crc in either case. */ |
||
202 | { |
||
203 | __asm { |
||
204 | push edx |
||
205 | push ecx |
||
206 | |||
207 | mov esi,buf ;/* 2nd arg: uch *buf */ |
||
208 | sub eax,eax ;/*> if (!buf) */ |
||
209 | test esi,esi ;/*> return 0; */ |
||
210 | jz fine ;/*> else { */ |
||
211 | |||
212 | call get_crc_table |
||
213 | mov edi,eax |
||
214 | mov eax,crc ;/* 1st arg: ulg crc */ |
||
215 | #ifndef __686 |
||
216 | sub ebx,ebx ;/* ebx=0; => bl usable as a dword */ |
||
217 | #endif |
||
218 | mov ecx,len ;/* 3rd arg: extent len */ |
||
219 | not eax ;/*> c = ~crc; */ |
||
220 | |||
221 | test ecx,ecx |
||
222 | #ifndef NO_UNROLLED_LOOPS |
||
223 | jz bail |
||
224 | # ifndef NO_32_BIT_LOADS |
||
225 | align_loop: |
||
226 | test esi,3 ;/* align buf pointer on next */ |
||
227 | jz aligned_now ;/* dword boundary */ |
||
228 | } |
||
229 | Do_CRC_byte ; |
||
230 | __asm { |
||
231 | dec ecx |
||
232 | jnz align_loop |
||
233 | aligned_now: |
||
234 | # endif /* !NO_32_BIT_LOADS */ |
||
235 | mov SavLen,ecx ;/* save current len for later */ |
||
236 | shr ecx,4 ;/* ecx = len / 16 */ |
||
237 | jz No_Sixteens |
||
238 | ; align loop head at start of 486 internal cache line !! |
||
239 | align 16 |
||
240 | Next_Sixteen: |
||
241 | } |
||
242 | # ifndef NO_32_BIT_LOADS |
||
243 | Do_CRC_4dword ; |
||
244 | # else /* NO_32_BIT_LOADS */ |
||
245 | Do_CRC_byteof(0) ; |
||
246 | Do_CRC_byteof(1) ; |
||
247 | Do_CRC_byteof(2) ; |
||
248 | Do_CRC_byteof(3) ; |
||
249 | Do_CRC_byteof(4) ; |
||
250 | Do_CRC_byteof(5) ; |
||
251 | Do_CRC_byteof(6) ; |
||
252 | Do_CRC_byteof(7) ; |
||
253 | Do_CRC_byteof(8) ; |
||
254 | Do_CRC_byteof(9) ; |
||
255 | Do_CRC_byteof(10) ; |
||
256 | Do_CRC_byteof(11) ; |
||
257 | Do_CRC_byteof(12) ; |
||
258 | Do_CRC_byteof(13) ; |
||
259 | Do_CRC_byteof(14) ; |
||
260 | Do_CRC_byteof(15) ; |
||
261 | __asm { add esi,16 }; |
||
262 | # endif /* ?NO_32_BIT_LOADS */ |
||
263 | __asm { |
||
264 | dec ecx |
||
265 | jnz Next_Sixteen |
||
266 | No_Sixteens: |
||
267 | mov ecx,SavLen |
||
268 | and ecx,00000000FH ;/* ecx = len % 16 */ |
||
269 | # ifndef NO_32_BIT_LOADS |
||
270 | shr ecx,2 |
||
271 | jz No_Fours |
||
272 | Next_Four: |
||
273 | } |
||
274 | Do_CRC_dword ; |
||
275 | __asm { |
||
276 | dec ecx |
||
277 | jnz Next_Four |
||
278 | No_Fours: |
||
279 | mov ecx,SavLen |
||
280 | and ecx,000000003H ;/* ecx = len % 4 */ |
||
281 | # endif /* !NO_32_BIT_LOADS */ |
||
282 | #endif /* !NO_UNROLLED_LOOPS */ |
||
283 | jz bail ;/*> if (len) */ |
||
284 | ; align loop head at start of 486 internal cache line !! |
||
285 | align 16 |
||
286 | loupe: ;/*> do { */ |
||
287 | } |
||
288 | Do_CRC_byte ;/* c = CRC32(c,*buf++,crctab);*/ |
||
289 | __asm { |
||
290 | dec ecx ;/*> } while (--len); */ |
||
291 | jnz loupe |
||
292 | |||
293 | bail: ;/*> } */ |
||
294 | not eax ;/*> return ~c; */ |
||
295 | fine: |
||
296 | pop ecx |
||
297 | pop edx |
||
298 | } |
||
299 | #ifdef NEED_RETURN |
||
300 | return _EAX; |
||
301 | #endif |
||
302 | } |
||
303 | #endif /* MSC_INLINE_ASM_32BIT_SUPPORT */ |
||
304 | #if (defined(_MSC_VER) && _MSC_VER >= 700) |
||
305 | #if (defined(_M_IX86) && _M_IX86 >= 300) |
||
306 | /* Reenable missing return value warning */ |
||
307 | # pragma warning( default : 4035 ) |
||
308 | #endif |
||
309 | #endif |
||
310 | #endif /* ASM_CRC && !USE_ZLIB && !CRC_TABLE_ONLY */>8 |