Rev 6639 | Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
6617 | IgorA | 1 | ; crc32.asm -- compute the CRC-32 of a data stream |
2 | ; Copyright (C) 1995-2006, 2010, 2011, 2012 Mark Adler |
||
3 | ; For conditions of distribution and use, see copyright notice in zlib.inc |
||
4 | |||
5 | ; Thanks to Rodney Brown |
||
6 | ; CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing |
||
7 | ; tables for updating the shift register in one step with three exclusive-ors |
||
8 | ; instead of four steps with four exclusive-ors. This results in about a |
||
9 | ; factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3. |
||
10 | |||
11 | |||
12 | ; Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore |
||
13 | ; protection on the static variables used to control the first-use generation |
||
14 | ; of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should |
||
15 | ; first call get_crc_table() to initialize the tables before allowing more than |
||
16 | ; one thread to use crc32(). |
||
17 | |||
18 | ; Definitions for doing the crc four data bytes at a time. |
||
19 | |||
20 | TBLS equ 1 |
||
21 | |||
22 | if DYNAMIC_CRC_TABLE eq 1 |
||
23 | |||
24 | align 4 |
||
25 | crc_table_empty dd 1 |
||
26 | align 4 |
||
27 | crc_table rd TBLS*256 |
||
28 | |||
29 | ; Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: |
||
30 | ; x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. |
||
31 | |||
32 | ; Polynomials over GF(2) are represented in binary, one bit per coefficient, |
||
33 | ; with the lowest powers in the most significant bit. Then adding polynomials |
||
34 | ; is just exclusive-or, and multiplying a polynomial by x is a right shift by |
||
35 | ; one. If we call the above polynomial p, and represent a byte as the |
||
36 | ; polynomial q, also with the lowest power in the most significant bit (so the |
||
37 | ; byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p, |
||
38 | ; where a mod b means the remainder after dividing a by b. |
||
39 | |||
40 | ; This calculation is done using the shift-register method of multiplying and |
||
41 | ; taking the remainder. The register is initialized to zero, and for each |
||
42 | ; incoming bit, x^32 is added mod p to the register if the bit is a one (where |
||
43 | ; x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by |
||
44 | ; x (which is shifting right by one and adding x^32 mod p if the bit shifted |
||
45 | ; out is a one). We start with the highest power (least significant bit) of |
||
46 | ; q and repeat for all eight bits of q. |
||
47 | |||
48 | ; The first table is simply the CRC of all possible eight bit values. This is |
||
49 | ; all the information needed to generate CRCs on data a byte at a time for all |
||
50 | ; combinations of CRC register values and incoming bytes. The remaining tables |
||
51 | ; allow for word-at-a-time CRC calculation for both big-endian and little- |
||
52 | ; endian machines, where a word is four bytes. |
||
53 | |||
54 | ;void () |
||
55 | align 4 |
||
56 | proc make_crc_table uses ecx edx edi |
||
57 | zlib_debug 'make_crc_table' |
||
58 | |||
59 | ; generate a crc for every 8-bit value |
||
60 | xor edx, edx |
||
61 | mov edi, crc_table |
||
62 | .1: |
||
63 | mov ecx, 8 |
||
64 | mov eax, edx |
||
65 | .2: |
||
66 | shr eax, 1 |
||
67 | jnc @f |
||
68 | xor eax, 0xEDB88320 |
||
69 | @@: |
||
70 | loop .2 |
||
71 | stosd |
||
72 | inc dl |
||
73 | jnz .1 |
||
74 | |||
75 | mov dword[crc_table_empty],0 |
||
76 | ret |
||
77 | endp |
||
78 | |||
79 | else ;!DYNAMIC_CRC_TABLE |
||
80 | ; ======================================================================== |
||
81 | ; Tables of CRC-32s of all single-byte values, made by make_crc_table(). |
||
82 | |||
83 | ;include 'crc32.inc' |
||
84 | end if ;DYNAMIC_CRC_TABLE |
||
85 | |||
86 | ; ========================================================================= |
||
87 | ; This function can be used by asm versions of crc32() |
||
88 | |||
89 | ;const z_crc_t* () |
||
90 | align 4 |
||
91 | proc get_crc_table |
||
92 | if DYNAMIC_CRC_TABLE eq 1 |
||
93 | cmp dword[crc_table_empty],0 |
||
94 | je @f ;if (..) |
||
95 | call make_crc_table |
||
96 | @@: |
||
97 | end if ;DYNAMIC_CRC_TABLE |
||
98 | mov eax,crc_table |
||
99 | ret |
||
100 | endp |
||
101 | |||
102 | ; ========================================================================= |
||
103 | macro DO1 |
||
104 | { |
||
105 | xor al,byte[esi] |
||
106 | xor al,ah |
||
107 | mov eax,[crc_table+eax*4] |
||
108 | inc esi |
||
109 | } |
||
110 | macro DO8 |
||
111 | { |
||
112 | DO1 |
||
113 | DO1 |
||
114 | DO1 |
||
115 | DO1 |
||
116 | DO1 |
||
117 | DO1 |
||
118 | DO1 |
||
119 | DO1 |
||
120 | } |
||
121 | |||
122 | ; ========================================================================= |
||
123 | ;unsigned long (crc, buf, len) |
||
124 | ; unsigned long crc |
||
125 | ; unsigned char *buf |
||
126 | ; uInt len |
||
127 | align 4 |
||
128 | proc calc_crc32 uses ecx esi, p1crc:dword, buf:dword, len:dword |
||
129 | xor eax,eax |
||
130 | mov esi,[buf] |
||
131 | zlib_debug 'calc_crc32 buf = %d',esi |
||
132 | cmp esi,Z_NULL |
||
133 | je .end_f ;if (..==0) return 0 |
||
134 | |||
135 | if DYNAMIC_CRC_TABLE eq 1 |
||
136 | cmp dword[crc_table_empty],0 |
||
137 | je @f ;if (..) |
||
138 | call make_crc_table |
||
139 | @@: |
||
140 | end if |
||
141 | |||
142 | mov eax,[p1crc] |
||
143 | xor eax,0xffffffff |
||
144 | mov [p1crc],eax |
||
145 | mov ecx,[len] |
||
146 | align 4 |
||
147 | .cycle0: |
||
148 | cmp ecx,8 |
||
149 | jl @f |
||
150 | DO8 |
||
151 | sub ecx,8 |
||
152 | jmp .cycle0 |
||
153 | align 4 |
||
154 | @@: |
||
155 | cmp ecx,1 |
||
156 | jl @f |
||
157 | DO1 |
||
158 | dec ecx |
||
159 | jmp @b |
||
160 | @@: |
||
161 | mov eax,[p1crc] |
||
162 | xor eax,0xffffffff |
||
163 | .end_f: |
||
164 | ret |
||
165 | endp |
||
166 | |||
167 | GF2_DIM equ 32 ;dimension of GF(2) vectors (length of CRC) |
||
168 | |||
169 | ; ========================================================================= |
||
170 | ;unsigned long (mat, vec) |
||
171 | ; unsigned long *mat |
||
172 | ; unsigned long vec |
||
173 | align 4 |
||
174 | proc gf2_matrix_times, mat:dword, vec:dword |
||
175 | ; unsigned long sum; |
||
176 | |||
177 | ; sum = 0; |
||
178 | ; while (vec) { |
||
179 | ; if (vec & 1) |
||
180 | ; sum ^= *mat; |
||
181 | ; vec >>= 1; |
||
182 | ; mat++; |
||
183 | ; } |
||
184 | ; return sum; |
||
185 | ret |
||
186 | endp |
||
187 | |||
188 | ; ========================================================================= |
||
189 | ;local void (square, mat) |
||
190 | ; unsigned long *square |
||
191 | ; unsigned long *mat |
||
192 | align 4 |
||
193 | proc gf2_matrix_square, square:dword, mat:dword |
||
194 | ; int n; |
||
195 | |||
196 | ; for (n = 0; n < GF2_DIM; n++) |
||
197 | ; square[n] = gf2_matrix_times(mat, mat[n]); |
||
198 | ret |
||
199 | endp |
||
200 | |||
201 | ; ========================================================================= |
||
202 | ;uLong (crc1, crc2, len2) |
||
203 | ; uLong crc1 |
||
204 | ; uLong crc2 |
||
205 | ; z_off64_t len2 |
||
206 | align 4 |
||
207 | proc crc32_combine_, crc1:dword, crc2:dword, len2:dword |
||
208 | ; int n; |
||
209 | ; unsigned long row; |
||
210 | ; unsigned long even[GF2_DIM]; /* even-power-of-two zeros operator */ |
||
211 | ; unsigned long odd[GF2_DIM]; /* odd-power-of-two zeros operator */ |
||
212 | |||
213 | ; degenerate case (also disallow negative lengths) |
||
214 | ; if (len2 <= 0) |
||
215 | ; return crc1; |
||
216 | |||
217 | ; put operator for one zero bit in odd |
||
218 | ; odd[0] = 0xedb88320UL; /* CRC-32 polynomial */ |
||
219 | ; row = 1; |
||
220 | ; for (n = 1; n < GF2_DIM; n++) { |
||
221 | ; odd[n] = row; |
||
222 | ; row <<= 1; |
||
223 | ; } |
||
224 | |||
225 | ; put operator for two zero bits in even |
||
226 | ; gf2_matrix_square(even, odd); |
||
227 | |||
228 | ; put operator for four zero bits in odd |
||
229 | ; gf2_matrix_square(odd, even); |
||
230 | |||
231 | ; apply len2 zeros to crc1 (first square will put the operator for one |
||
232 | ; zero byte, eight zero bits, in even) |
||
233 | ; do { |
||
234 | ; apply zeros operator for this bit of len2 |
||
235 | ; gf2_matrix_square(even, odd); |
||
236 | ; if (len2 & 1) |
||
237 | ; crc1 = gf2_matrix_times(even, crc1); |
||
238 | ; len2 >>= 1; |
||
239 | |||
240 | ; if no more bits set, then done |
||
241 | ; if (len2 == 0) |
||
242 | ; break; |
||
243 | |||
244 | ; another iteration of the loop with odd and even swapped |
||
245 | ; gf2_matrix_square(odd, even); |
||
246 | ; if (len2 & 1) |
||
247 | ; crc1 = gf2_matrix_times(odd, crc1); |
||
248 | ; len2 >>= 1; |
||
249 | |||
250 | ; if no more bits set, then done |
||
251 | ; } while (len2 != 0); |
||
252 | |||
253 | ; return combined crc |
||
254 | ; crc1 ^= crc2; |
||
255 | ; return crc1; |
||
256 | ret |
||
257 | endp |
||
258 | |||
259 | ; ========================================================================= |
||
260 | ;uLong (crc1, crc2, len2) |
||
261 | ; uLong crc1 |
||
262 | ; uLong crc2 |
||
263 | ; z_off_t len2 |
||
264 | align 4 |
||
265 | proc crc32_combine, crc1:dword, crc2:dword, len2:dword |
||
266 | stdcall crc32_combine_, [crc1], [crc2], [len2] |
||
267 | ret |
||
268 | endp |
||
269 | |||
270 | ;uLong (crc1, crc2, len2) |
||
271 | ; uLong crc1 |
||
272 | ; uLong crc2 |
||
273 | ; z_off64_t len2 |
||
274 | align 4 |
||
275 | proc crc32_combine64, crc1:dword, crc2:dword, len2:dword |
||
276 | stdcall crc32_combine_, [crc1], [crc2], [len2] |
||
277 | ret |
||
278 | endp=><=>>=>> |