Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
6725 siemargl 1
;===========================================================================
2
; Copyright (c) 1990-2007 Info-ZIP.  All rights reserved.
3
;
4
; See the accompanying file LICENSE, version 2000-Apr-09 or later
5
; (the contents of which are also included in zip.h) for terms of use.
6
; If, for some reason, all these files are missing, the Info-ZIP license
7
; also may be found at:  ftp://ftp.info-zip.org/pub/infozip/license.html
8
;===========================================================================
9
; crc_i386.asm, optimized CRC calculation function for Zip and UnZip,
10
; created by Paul Kienitz and Christian Spieler.  Last revised 07 Jan 2007.
11
;
12
; Revised 06-Oct-96, Scott Field (sfield@microsoft.com)
13
;   fixed to assemble with masm by not using .model directive which makes
14
;   assumptions about segment alignment.  Also,
15
;   avoid using loop, and j[e]cxz where possible.  Use mov + inc, rather
16
;   than lodsb, and other misc. changes resulting in the following performance
17
;   increases:
18
;
19
;      unrolled loops                NO_UNROLLED_LOOPS
20
;      *8    >8      <8              *8      >8      <8
21
;
22
;      +54%  +42%    +35%            +82%    +52%    +25%
23
;
24
;   first item in each table is input buffer length, even multiple of 8
25
;   second item in each table is input buffer length, > 8
26
;   third item in each table is input buffer length, < 8
27
;
28
; Revised 02-Apr-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au)
29
;   Incorporated Rodney Brown's 32-bit-reads optimization as found in the
30
;   UNIX AS source crc_i386.S. This new code can be disabled by defining
31
;   the macro symbol NO_32_BIT_LOADS.
32
;
33
; Revised 12-Oct-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au)
34
;   Incorporated Rodney Brown's additional tweaks for 32-bit-optimized CPUs
35
;   (like the Pentium Pro, Pentium II, and probably some Pentium clones).
36
;   This optimization is controlled by the macro symbol __686 and is disabled
37
;   by default. (This default is based on the assumption that most users
38
;   do not yet work on a Pentium Pro or Pentium II machine ...)
39
;
40
; Revised 25-Mar-98, Cosmin Truta (cosmint@cs.ubbcluj.ro)
41
;   Working without .model directive caused tasm32 version 5.0 to produce
42
;   bad object code. The optimized alignments can be optionally disabled
43
;   by defining NO_ALIGN, thus allowing to use .model flat. There is no need
44
;   to define this macro if using other versions of tasm.
45
;
46
; Revised 16-Jan-2005, Cosmin Truta (cosmint@cs.ubbcluj.ro)
47
;   Enabled the 686 build by default, because there are hardly any pre-686 CPUs
48
;   in serious use nowadays. (See the 12-Oct-97 note above.)
49
;
50
; Revised 03-Jan-2006, Chr. Spieler
51
;   Enlarged unrolling loops to "do 16 bytes per turn"; optimized access to
52
;   data buffer in loop body (adjust pointer only once in loop body and use
53
;   offsets to access each item); added additional support for the "unfolded
54
;   tables" optimization variant (enabled by IZ_CRCOPTIM_UNFOLDTBL).
55
;
56
; Revised 07-Jan-2007, Chr. Spieler
57
;   Recognize additional conditional flag CRC_TABLE_ONLY that prevents
58
;   compilation of the crc32() function.
59
;
60
; FLAT memory model assumed.
61
;
62
; Loop unrolling can be disabled by defining the macro NO_UNROLLED_LOOPS.
63
; This results in shorter code at the expense of reduced performance.
64
;
65
;==============================================================================
66
;
67
; Do NOT assemble this source if external crc32 routine from zlib gets used,
68
; or only the precomputed CRC_32_Table is needed.
69
;
70
    IFNDEF USE_ZLIB
71
    IFNDEF CRC_TABLE_ONLY
72
;
73
        .386p
74
        name    crc_i386
75
 
76
    IFDEF NO_ALIGN
77
        .model flat
78
    ENDIF
79
 
80
    IFNDEF PRE_686
81
    IFNDEF __686
82
__686   EQU     1 ; optimize for Pentium Pro, Pentium II and compatible CPUs
83
    ENDIF
84
    ENDIF
85
 
86
extrn   _get_crc_table:near    ; ZCONST ulg near *get_crc_table(void);
87
 
88
;
89
    IFNDEF NO_STD_STACKFRAME
90
        ; Use a `standard' stack frame setup on routine entry and exit.
91
        ; Actually, this option is set as default, because it results
92
        ; in smaller code !!
93
STD_ENTRY       MACRO
94
                push    ebp
95
                mov     ebp,esp
96
        ENDM
97
 
98
        Arg1    EQU     08H[ebp]
99
        Arg2    EQU     0CH[ebp]
100
        Arg3    EQU     10H[ebp]
101
 
102
STD_LEAVE       MACRO
103
                pop     ebp
104
        ENDM
105
 
106
    ELSE  ; NO_STD_STACKFRAME
107
 
108
STD_ENTRY       MACRO
109
        ENDM
110
 
111
        Arg1    EQU     18H[esp]
112
        Arg2    EQU     1CH[esp]
113
        Arg3    EQU     20H[esp]
114
 
115
STD_LEAVE       MACRO
116
        ENDM
117
 
118
    ENDIF ; ?NO_STD_STACKFRAME
119
 
120
; These two (three) macros make up the loop body of the CRC32 cruncher.
121
; registers modified:
122
;   eax  : crc value "c"
123
;   esi  : pointer to next data byte (or dword) "buf++"
124
; registers read:
125
;   edi  : pointer to base of crc_table array
126
; scratch registers:
127
;   ebx  : index into crc_table array
128
;          (requires upper three bytes = 0 when __686 is undefined)
129
    IFNDEF  __686 ; optimize for 386, 486, Pentium
130
Do_CRC  MACRO
131
                mov     bl,al                ; tmp = c & 0xFF
132
                shr     eax,8                ; c = (c >> 8)
133
                xor     eax,[edi+ebx*4]      ;  ^ table[tmp]
134
        ENDM
135
    ELSE ; __686 : optimize for Pentium Pro, Pentium II and compatible CPUs
136
Do_CRC  MACRO
137
                movzx   ebx,al                 ; tmp = c & 0xFF
138
                shr     eax,8                  ; c = (c >> 8)
139
                xor     eax,[edi+ebx*4]        ;  ^ table[tmp]
140
        ENDM
141
    ENDIF ; ?__686
142
Do_CRC_byte     MACRO
143
                xor     al, byte ptr [esi]     ; c ^= *buf
144
                inc     esi                    ; buf++
145
                Do_CRC                         ; c = (c >> 8) ^ table[c & 0xFF]
146
        ENDM
147
Do_CRC_byteof   MACRO   ofs
148
                xor     al, byte ptr [esi+ofs] ; c ^= *(buf+ofs)
149
                Do_CRC                         ; c = (c >> 8) ^ table[c & 0xFF]
150
        ENDM
151
    IFNDEF  NO_32_BIT_LOADS
152
      IFDEF IZ_CRCOPTIM_UNFOLDTBL
153
        ; the edx register is needed in crc calculation
154
        SavLen  EQU     Arg3
155
 
156
UpdCRC_dword    MACRO
157
                movzx   ebx,al                 ; tmp = c & 0xFF
158
                mov     edx,[edi+ebx*4+3072]   ;  table[256*3+tmp]
159
                movzx   ebx,ah                 ; tmp = (c>>8) & 0xFF
160
                shr     eax,16                 ;
161
                xor     edx,[edi+ebx*4+2048]   ;  ^ table[256*2+tmp]
162
                movzx   ebx,al                 ; tmp = (c>>16) & 0xFF
163
                shr     eax,8                  ; tmp = (c>>24)
164
                xor     edx,[edi+ebx*4+1024]   ;  ^ table[256*1+tmp]
165
                mov     eax,[edi+eax*4]        ;  ^ table[256*0+tmp]
166
                xor     eax,edx                ; ..
167
        ENDM
168
UpdCRC_dword_sh MACRO   dwPtrIncr
169
                movzx   ebx,al                 ; tmp = c & 0xFF
170
                mov     edx,[edi+ebx*4+3072]   ;  table[256*3+tmp]
171
                movzx   ebx,ah                 ; tmp = (c>>8) & 0xFF
172
                xor     edx,[edi+ebx*4+2048]   ;  ^ table[256*2+tmp]
173
                shr     eax,16                 ;
174
                movzx   ebx,al                 ; tmp = (c>>16) & 0xFF
175
                add     esi, 4*dwPtrIncr       ; ((ulg *)buf) += dwPtrIncr
176
                shr     eax,8                  ; tmp = (c>>24)
177
                xor     edx,[edi+ebx*4+1024]   ;  ^ table[256*1+tmp]
178
                mov     eax,[edi+eax*4]        ;  ^ table[256*0+tmp]
179
                xor     eax,edx                ; ..
180
        ENDM
181
      ELSE ; IZ_CRCOPTIM_UNFOLDTBL
182
        ; the edx register is not needed anywhere else
183
        SavLen  EQU     edx
184
 
185
UpdCRC_dword    MACRO
186
                Do_CRC
187
                Do_CRC
188
                Do_CRC
189
                Do_CRC
190
        ENDM
191
UpdCRC_dword_sh MACRO   dwPtrIncr
192
                Do_CRC
193
                Do_CRC
194
                add     esi, 4*dwPtrIncr       ; ((ulg *)buf) += dwPtrIncr
195
                Do_CRC
196
                Do_CRC
197
        ENDM
198
      ENDIF ; ?IZ_CRCOPTIM_UNFOLDTBL
199
Do_CRC_dword    MACRO
200
                xor     eax, dword ptr [esi]   ; c ^= *(ulg *)buf
201
                UpdCRC_dword_sh 1              ; ... ((ulg *)buf)++
202
        ENDM
203
Do_CRC_4dword   MACRO
204
                xor     eax, dword ptr [esi]    ; c ^= *(ulg *)buf
205
                UpdCRC_dword
206
                xor     eax, dword ptr [esi+4]  ; c ^= *((ulg *)buf+1)
207
                UpdCRC_dword
208
                xor     eax, dword ptr [esi+8]  ; c ^= *((ulg *)buf+2)
209
                UpdCRC_dword
210
                xor     eax, dword ptr [esi+12] ; c ^= *((ulg *)buf]+3
211
                UpdCRC_dword_sh	4               ; ... ((ulg *)buf)+=4
212
        ENDM
213
    ENDIF ; !NO_32_BIT_LOADS
214
 
215
    IFNDEF NO_ALIGN
216
_TEXT   segment use32 para public 'CODE'
217
    ELSE
218
_TEXT   segment use32
219
    ENDIF
220
        assume  CS: _TEXT
221
 
222
        public  _crc32
223
_crc32          proc    near  ; ulg crc32(ulg crc, ZCONST uch *buf, extent len)
224
                STD_ENTRY
225
                push    edi
226
                push    esi
227
                push    ebx
228
                push    edx
229
                push    ecx
230
 
231
                mov     esi,Arg2            ; 2nd arg: uch *buf
232
                sub     eax,eax             ;> if (!buf)
233
                test    esi,esi             ;>   return 0;
234
                jz      fine                ;> else {
235
 
236
                call    _get_crc_table
237
                mov     edi,eax
238
                mov     eax,Arg1            ; 1st arg: ulg crc
239
    IFNDEF __686
240
                sub     ebx,ebx             ; ebx=0; make bl usable as a dword
241
    ENDIF
242
                mov     ecx,Arg3            ; 3rd arg: extent len
243
                not     eax                 ;>   c = ~crc;
244
 
245
                test    ecx,ecx
246
    IFNDEF  NO_UNROLLED_LOOPS
247
                jz      bail
248
    IFNDEF  NO_32_BIT_LOADS
249
align_loop:
250
                test    esi,3               ; align buf pointer on next
251
                jz      SHORT aligned_now   ;  dword boundary
252
                Do_CRC_byte
253
                dec     ecx
254
                jnz     align_loop
255
aligned_now:
256
    ENDIF ; !NO_32_BIT_LOADS
257
                mov     SavLen,ecx          ; save current len for later
258
                shr     ecx,4               ; ecx = len / 16
259
                jz      No_Sixteens
260
    IFNDEF NO_ALIGN
261
; align loop head at start of 486 internal cache line !!
262
                align   16
263
    ENDIF
264
Next_Sixteen:
265
    IFNDEF  NO_32_BIT_LOADS
266
                Do_CRC_4dword
267
    ELSE ; NO_32_BIT_LOADS
268
                Do_CRC_byteof   0
269
                Do_CRC_byteof   1
270
                Do_CRC_byteof   2
271
                Do_CRC_byteof   3
272
                Do_CRC_byteof   4
273
                Do_CRC_byteof   5
274
                Do_CRC_byteof   6
275
                Do_CRC_byteof   7
276
                Do_CRC_byteof   8
277
                Do_CRC_byteof   9
278
                Do_CRC_byteof   10
279
                Do_CRC_byteof   11
280
                Do_CRC_byteof   12
281
                Do_CRC_byteof   13
282
                Do_CRC_byteof   14
283
                Do_CRC_byteof   15
284
                add     esi, 16                 ; buf += 16
285
    ENDIF ; ?NO_32_BIT_LOADS
286
                dec     ecx
287
                jnz     Next_Sixteen
288
No_Sixteens:
289
                mov     ecx,SavLen
290
                and     ecx,00000000FH      ; ecx = len % 16
291
    IFNDEF  NO_32_BIT_LOADS
292
                shr     ecx,2               ; ecx = len / 4
293
                jz      SHORT No_Fours
294
Next_Four:
295
                Do_CRC_dword
296
                dec     ecx
297
                jnz     Next_Four
298
No_Fours:
299
                mov     ecx,SavLen
300
                and     ecx,000000003H      ; ecx = len % 4
301
    ENDIF ; !NO_32_BIT_LOADS
302
    ENDIF ; !NO_UNROLLED_LOOPS
303
                jz      SHORT bail          ;>   if (len)
304
    IFNDEF NO_ALIGN
305
; align loop head at start of 486 internal cache line !!
306
                align   16
307
    ENDIF
308
loupe:                                      ;>     do {
309
                Do_CRC_byte                 ;        c = CRC32(c,*buf++,crctab);
310
                dec     ecx                 ;>     } while (--len);
311
                jnz     loupe
312
 
313
bail:                                       ;> }
314
                not     eax                 ;> return ~c;
315
fine:
316
                pop     ecx
317
                pop     edx
318
                pop     ebx
319
                pop     esi
320
                pop     edi
321
                STD_LEAVE
322
                ret
323
_crc32          endp
324
 
325
_TEXT   ends
326
;
327
    ENDIF ; !CRC_TABLE_ONLY
328
    ENDIF ; !USE_ZLIB
329
;
330
end