Rev 6617 | Rev 6673 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 6617 | Rev 6639 | ||
---|---|---|---|
1 | ; crc32.asm -- compute the CRC-32 of a data stream |
1 | ; crc32.asm -- compute the CRC-32 of a data stream |
2 | ; Copyright (C) 1995-2006, 2010, 2011, 2012 Mark Adler |
2 | ; Copyright (C) 1995-2006, 2010, 2011, 2012 Mark Adler |
3 | ; For conditions of distribution and use, see copyright notice in zlib.inc |
3 | ; For conditions of distribution and use, see copyright notice in zlib.inc |
4 | 4 | ||
5 | ; Thanks to Rodney Brown |
5 | ; Thanks to Rodney Brown |
6 | ; CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing |
6 | ; CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing |
7 | ; tables for updating the shift register in one step with three exclusive-ors |
7 | ; tables for updating the shift register in one step with three exclusive-ors |
8 | ; instead of four steps with four exclusive-ors. This results in about a |
8 | ; instead of four steps with four exclusive-ors. This results in about a |
9 | ; factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3. |
9 | ; factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3. |
10 | 10 | ||
11 | 11 | ||
12 | ; Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore |
12 | ; Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore |
13 | ; protection on the static variables used to control the first-use generation |
13 | ; protection on the static variables used to control the first-use generation |
14 | ; of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should |
14 | ; of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should |
15 | ; first call get_crc_table() to initialize the tables before allowing more than |
15 | ; first call get_crc_table() to initialize the tables before allowing more than |
16 | ; one thread to use crc32(). |
16 | ; one thread to use crc32(). |
17 | 17 | ||
18 | ; Definitions for doing the crc four data bytes at a time. |
18 | ; Definitions for doing the crc four data bytes at a time. |
19 | 19 | ||
20 | TBLS equ 1 |
20 | TBLS equ 1 |
21 | 21 | ||
22 | if DYNAMIC_CRC_TABLE eq 1 |
22 | if DYNAMIC_CRC_TABLE eq 1 |
23 | 23 | ||
24 | align 4 |
24 | align 4 |
25 | crc_table_empty dd 1 |
25 | crc_table_empty dd 1 |
26 | align 4 |
26 | align 4 |
27 | crc_table rd TBLS*256 |
27 | crc_table rd TBLS*256 |
28 | 28 | ||
29 | ; Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: |
29 | ; Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: |
30 | ; x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. |
30 | ; x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. |
31 | 31 | ||
32 | ; Polynomials over GF(2) are represented in binary, one bit per coefficient, |
32 | ; Polynomials over GF(2) are represented in binary, one bit per coefficient, |
33 | ; with the lowest powers in the most significant bit. Then adding polynomials |
33 | ; with the lowest powers in the most significant bit. Then adding polynomials |
34 | ; is just exclusive-or, and multiplying a polynomial by x is a right shift by |
34 | ; is just exclusive-or, and multiplying a polynomial by x is a right shift by |
35 | ; one. If we call the above polynomial p, and represent a byte as the |
35 | ; one. If we call the above polynomial p, and represent a byte as the |
36 | ; polynomial q, also with the lowest power in the most significant bit (so the |
36 | ; polynomial q, also with the lowest power in the most significant bit (so the |
37 | ; byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p, |
37 | ; byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p, |
38 | ; where a mod b means the remainder after dividing a by b. |
38 | ; where a mod b means the remainder after dividing a by b. |
39 | 39 | ||
40 | ; This calculation is done using the shift-register method of multiplying and |
40 | ; This calculation is done using the shift-register method of multiplying and |
41 | ; taking the remainder. The register is initialized to zero, and for each |
41 | ; taking the remainder. The register is initialized to zero, and for each |
42 | ; incoming bit, x^32 is added mod p to the register if the bit is a one (where |
42 | ; incoming bit, x^32 is added mod p to the register if the bit is a one (where |
43 | ; x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by |
43 | ; x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by |
44 | ; x (which is shifting right by one and adding x^32 mod p if the bit shifted |
44 | ; x (which is shifting right by one and adding x^32 mod p if the bit shifted |
45 | ; out is a one). We start with the highest power (least significant bit) of |
45 | ; out is a one). We start with the highest power (least significant bit) of |
46 | ; q and repeat for all eight bits of q. |
46 | ; q and repeat for all eight bits of q. |
47 | 47 | ||
48 | ; The first table is simply the CRC of all possible eight bit values. This is |
48 | ; The first table is simply the CRC of all possible eight bit values. This is |
49 | ; all the information needed to generate CRCs on data a byte at a time for all |
49 | ; all the information needed to generate CRCs on data a byte at a time for all |
50 | ; combinations of CRC register values and incoming bytes. The remaining tables |
50 | ; combinations of CRC register values and incoming bytes. The remaining tables |
51 | ; allow for word-at-a-time CRC calculation for both big-endian and little- |
51 | ; allow for word-at-a-time CRC calculation for both big-endian and little- |
52 | ; endian machines, where a word is four bytes. |
52 | ; endian machines, where a word is four bytes. |
53 | 53 | ||
54 | ;void () |
54 | ;void () |
55 | align 4 |
55 | align 4 |
56 | proc make_crc_table uses ecx edx edi |
56 | proc make_crc_table uses ecx edx edi |
57 | zlib_debug 'make_crc_table' |
57 | zlib_debug 'make_crc_table' |
58 | 58 | ||
59 | ; generate a crc for every 8-bit value |
59 | ; generate a crc for every 8-bit value |
60 | xor edx, edx |
60 | xor edx, edx |
61 | mov edi, crc_table |
61 | mov edi, crc_table |
62 | .1: |
62 | .1: |
63 | mov ecx, 8 |
63 | mov ecx, 8 |
64 | mov eax, edx |
64 | mov eax, edx |
65 | .2: |
65 | .2: |
66 | shr eax, 1 |
66 | shr eax, 1 |
67 | jnc @f |
67 | jnc @f |
68 | xor eax, 0xEDB88320 |
68 | xor eax, 0xEDB88320 |
69 | @@: |
69 | @@: |
70 | loop .2 |
70 | loop .2 |
71 | stosd |
71 | stosd |
72 | inc dl |
72 | inc dl |
73 | jnz .1 |
73 | jnz .1 |
74 | 74 | ||
75 | mov dword[crc_table_empty],0 |
75 | mov dword[crc_table_empty],0 |
76 | ret |
76 | ret |
77 | endp |
77 | endp |
78 | 78 | ||
79 | else ;!DYNAMIC_CRC_TABLE |
79 | else ;!DYNAMIC_CRC_TABLE |
80 | ; ======================================================================== |
80 | ; ======================================================================== |
81 | ; Tables of CRC-32s of all single-byte values, made by make_crc_table(). |
81 | ; Tables of CRC-32s of all single-byte values, made by make_crc_table(). |
82 | 82 | ||
83 | ;include 'crc32.inc' |
83 | ;include 'crc32.inc' |
84 | end if ;DYNAMIC_CRC_TABLE |
84 | end if ;DYNAMIC_CRC_TABLE |
85 | 85 | ||
86 | ; ========================================================================= |
86 | ; ========================================================================= |
87 | ; This function can be used by asm versions of crc32() |
87 | ; This function can be used by asm versions of crc32() |
88 | 88 | ||
89 | ;const z_crc_t* () |
89 | ;const z_crc_t* () |
90 | align 4 |
90 | align 4 |
91 | proc get_crc_table |
91 | proc get_crc_table |
92 | if DYNAMIC_CRC_TABLE eq 1 |
92 | if DYNAMIC_CRC_TABLE eq 1 |
93 | cmp dword[crc_table_empty],0 |
93 | cmp dword[crc_table_empty],0 |
94 | je @f ;if (..) |
94 | je @f ;if (..) |
95 | call make_crc_table |
95 | call make_crc_table |
96 | @@: |
96 | @@: |
97 | end if ;DYNAMIC_CRC_TABLE |
97 | end if |
98 | mov eax,crc_table |
98 | mov eax,crc_table |
99 | ret |
99 | ret |
100 | endp |
100 | endp |
101 | 101 | ||
102 | ; ========================================================================= |
102 | ; ========================================================================= |
103 | macro DO1 |
- | |
104 | { |
- | |
105 | xor al,byte[esi] |
- | |
106 | xor al,ah |
- | |
107 | mov eax,[crc_table+eax*4] |
- | |
108 | inc esi |
- | |
109 | } |
- | |
110 | macro DO8 |
- | |
111 | { |
- | |
112 | DO1 |
- | |
113 | DO1 |
- | |
114 | DO1 |
- | |
115 | DO1 |
- | |
116 | DO1 |
- | |
117 | DO1 |
- | |
118 | DO1 |
- | |
119 | DO1 |
- | |
120 | } |
- | |
121 | - | ||
122 | ; ========================================================================= |
- | |
123 | ;unsigned long (crc, buf, len) |
103 | ;unsigned long (crc, buf, len) |
124 | ; unsigned long crc |
104 | ; unsigned long crc |
125 | ; unsigned char *buf |
105 | ; unsigned char *buf |
126 | ; uInt len |
106 | ; uInt len |
127 | align 4 |
107 | align 4 |
128 | proc calc_crc32 uses ecx esi, p1crc:dword, buf:dword, len:dword |
108 | proc calc_crc32 uses ecx esi, p1crc:dword, buf:dword, len:dword |
129 | xor eax,eax |
109 | xor eax,eax |
130 | mov esi,[buf] |
110 | mov esi,[buf] |
131 | zlib_debug 'calc_crc32 buf = %d',esi |
111 | zlib_debug 'calc_crc32 buf = %d',esi |
132 | cmp esi,Z_NULL |
112 | cmp esi,Z_NULL |
133 | je .end_f ;if (..==0) return 0 |
113 | je .end_f ;if (..==0) return 0 |
134 | 114 | ||
135 | if DYNAMIC_CRC_TABLE eq 1 |
115 | if DYNAMIC_CRC_TABLE eq 1 |
136 | cmp dword[crc_table_empty],0 |
116 | cmp dword[crc_table_empty],0 |
137 | je @f ;if (..) |
117 | je @f ;if (..) |
138 | call make_crc_table |
118 | call make_crc_table |
139 | @@: |
119 | @@: |
140 | end if |
120 | end if |
141 | 121 | ||
142 | mov eax,[p1crc] |
122 | mov eax,[p1crc] |
143 | xor eax,0xffffffff |
- | |
144 | mov [p1crc],eax |
- | |
145 | mov ecx,[len] |
123 | mov ecx,[len] |
146 | align 4 |
- | |
147 | .cycle0: |
- | |
148 | cmp ecx,8 |
124 | call crc |
149 | jl @f |
- | |
150 | DO8 |
- | |
151 | sub ecx,8 |
- | |
152 | jmp .cycle0 |
- | |
153 | align 4 |
- | |
154 | @@: |
- | |
155 | cmp ecx,1 |
- | |
156 | jl @f |
- | |
157 | DO1 |
- | |
158 | dec ecx |
- | |
159 | jmp @b |
- | |
160 | @@: |
- | |
161 | mov eax,[p1crc] |
- | |
162 | xor eax,0xffffffff |
- | |
163 | .end_f: |
125 | .end_f: |
164 | ret |
126 | ret |
165 | endp |
127 | endp |
166 | 128 | ||
167 | GF2_DIM equ 32 ;dimension of GF(2) vectors (length of CRC) |
129 | GF2_DIM equ 32 ;dimension of GF(2) vectors (length of CRC) |
168 | 130 | ||
169 | ; ========================================================================= |
131 | ; ========================================================================= |
170 | ;unsigned long (mat, vec) |
132 | ;unsigned long (mat, vec) |
171 | ; unsigned long *mat |
133 | ; unsigned long *mat |
172 | ; unsigned long vec |
134 | ; unsigned long vec |
173 | align 4 |
135 | align 4 |
174 | proc gf2_matrix_times, mat:dword, vec:dword |
136 | proc gf2_matrix_times, mat:dword, vec:dword |
175 | ; unsigned long sum; |
137 | ; unsigned long sum; |
176 | 138 | ||
177 | ; sum = 0; |
139 | ; sum = 0; |
178 | ; while (vec) { |
140 | ; while (vec) { |
179 | ; if (vec & 1) |
141 | ; if (vec & 1) |
180 | ; sum ^= *mat; |
142 | ; sum ^= *mat; |
181 | ; vec >>= 1; |
143 | ; vec >>= 1; |
182 | ; mat++; |
144 | ; mat++; |
183 | ; } |
145 | ; } |
184 | ; return sum; |
146 | ; return sum; |
185 | ret |
147 | ret |
186 | endp |
148 | endp |
187 | 149 | ||
188 | ; ========================================================================= |
150 | ; ========================================================================= |
189 | ;local void (square, mat) |
151 | ;local void (square, mat) |
190 | ; unsigned long *square |
152 | ; unsigned long *square |
191 | ; unsigned long *mat |
153 | ; unsigned long *mat |
192 | align 4 |
154 | align 4 |
193 | proc gf2_matrix_square, square:dword, mat:dword |
155 | proc gf2_matrix_square, square:dword, mat:dword |
194 | ; int n; |
156 | ; int n; |
195 | 157 | ||
196 | ; for (n = 0; n < GF2_DIM; n++) |
158 | ; for (n = 0; n < GF2_DIM; n++) |
197 | ; square[n] = gf2_matrix_times(mat, mat[n]); |
159 | ; square[n] = gf2_matrix_times(mat, mat[n]); |
198 | ret |
160 | ret |
199 | endp |
161 | endp |
200 | 162 | ||
201 | ; ========================================================================= |
163 | ; ========================================================================= |
202 | ;uLong (crc1, crc2, len2) |
164 | ;uLong (crc1, crc2, len2) |
203 | ; uLong crc1 |
165 | ; uLong crc1 |
204 | ; uLong crc2 |
166 | ; uLong crc2 |
205 | ; z_off64_t len2 |
167 | ; z_off64_t len2 |
206 | align 4 |
168 | align 4 |
207 | proc crc32_combine_, crc1:dword, crc2:dword, len2:dword |
169 | proc crc32_combine_, crc1:dword, crc2:dword, len2:dword |
208 | ; int n; |
170 | ; int n; |
209 | ; unsigned long row; |
171 | ; unsigned long row; |
210 | ; unsigned long even[GF2_DIM]; /* even-power-of-two zeros operator */ |
172 | ; unsigned long even[GF2_DIM]; /* even-power-of-two zeros operator */ |
211 | ; unsigned long odd[GF2_DIM]; /* odd-power-of-two zeros operator */ |
173 | ; unsigned long odd[GF2_DIM]; /* odd-power-of-two zeros operator */ |
212 | 174 | ||
213 | ; degenerate case (also disallow negative lengths) |
175 | ; degenerate case (also disallow negative lengths) |
214 | ; if (len2 <= 0) |
176 | ; if (len2 <= 0) |
215 | ; return crc1; |
177 | ; return crc1; |
216 | 178 | ||
217 | ; put operator for one zero bit in odd |
179 | ; put operator for one zero bit in odd |
218 | ; odd[0] = 0xedb88320UL; /* CRC-32 polynomial */ |
180 | ; odd[0] = 0xedb88320UL; /* CRC-32 polynomial */ |
219 | ; row = 1; |
181 | ; row = 1; |
220 | ; for (n = 1; n < GF2_DIM; n++) { |
182 | ; for (n = 1; n < GF2_DIM; n++) { |
221 | ; odd[n] = row; |
183 | ; odd[n] = row; |
222 | ; row <<= 1; |
184 | ; row <<= 1; |
223 | ; } |
185 | ; } |
224 | 186 | ||
225 | ; put operator for two zero bits in even |
187 | ; put operator for two zero bits in even |
226 | ; gf2_matrix_square(even, odd); |
188 | ; gf2_matrix_square(even, odd); |
227 | 189 | ||
228 | ; put operator for four zero bits in odd |
190 | ; put operator for four zero bits in odd |
229 | ; gf2_matrix_square(odd, even); |
191 | ; gf2_matrix_square(odd, even); |
230 | 192 | ||
231 | ; apply len2 zeros to crc1 (first square will put the operator for one |
193 | ; apply len2 zeros to crc1 (first square will put the operator for one |
232 | ; zero byte, eight zero bits, in even) |
194 | ; zero byte, eight zero bits, in even) |
233 | ; do { |
195 | ; do { |
234 | ; apply zeros operator for this bit of len2 |
196 | ; apply zeros operator for this bit of len2 |
235 | ; gf2_matrix_square(even, odd); |
197 | ; gf2_matrix_square(even, odd); |
236 | ; if (len2 & 1) |
198 | ; if (len2 & 1) |
237 | ; crc1 = gf2_matrix_times(even, crc1); |
199 | ; crc1 = gf2_matrix_times(even, crc1); |
238 | ; len2 >>= 1; |
200 | ; len2 >>= 1; |
239 | 201 | ||
240 | ; if no more bits set, then done |
202 | ; if no more bits set, then done |
241 | ; if (len2 == 0) |
203 | ; if (len2 == 0) |
242 | ; break; |
204 | ; break; |
243 | 205 | ||
244 | ; another iteration of the loop with odd and even swapped |
206 | ; another iteration of the loop with odd and even swapped |
245 | ; gf2_matrix_square(odd, even); |
207 | ; gf2_matrix_square(odd, even); |
246 | ; if (len2 & 1) |
208 | ; if (len2 & 1) |
247 | ; crc1 = gf2_matrix_times(odd, crc1); |
209 | ; crc1 = gf2_matrix_times(odd, crc1); |
248 | ; len2 >>= 1; |
210 | ; len2 >>= 1; |
249 | 211 | ||
250 | ; if no more bits set, then done |
212 | ; if no more bits set, then done |
251 | ; } while (len2 != 0); |
213 | ; } while (len2 != 0); |
252 | 214 | ||
253 | ; return combined crc |
215 | ; return combined crc |
254 | ; crc1 ^= crc2; |
216 | ; crc1 ^= crc2; |
255 | ; return crc1; |
217 | ; return crc1; |
256 | ret |
218 | ret |
257 | endp |
219 | endp |
258 | 220 | ||
259 | ; ========================================================================= |
221 | ; ========================================================================= |
260 | ;uLong (crc1, crc2, len2) |
222 | ;uLong (crc1, crc2, len2) |
261 | ; uLong crc1 |
223 | ; uLong crc1 |
262 | ; uLong crc2 |
224 | ; uLong crc2 |
263 | ; z_off_t len2 |
225 | ; z_off_t len2 |
264 | align 4 |
226 | align 4 |
265 | proc crc32_combine, crc1:dword, crc2:dword, len2:dword |
227 | proc crc32_combine, crc1:dword, crc2:dword, len2:dword |
266 | stdcall crc32_combine_, [crc1], [crc2], [len2] |
228 | stdcall crc32_combine_, [crc1], [crc2], [len2] |
267 | ret |
229 | ret |
268 | endp |
230 | endp |
269 | 231 | ||
270 | ;uLong (crc1, crc2, len2) |
232 | ;uLong (crc1, crc2, len2) |
271 | ; uLong crc1 |
233 | ; uLong crc1 |
272 | ; uLong crc2 |
234 | ; uLong crc2 |
273 | ; z_off64_t len2 |
235 | ; z_off64_t len2 |
274 | align 4 |
236 | align 4 |
275 | proc crc32_combine64, crc1:dword, crc2:dword, len2:dword |
237 | proc crc32_combine64, crc1:dword, crc2:dword, len2:dword |
276 | stdcall crc32_combine_, [crc1], [crc2], [len2] |
238 | stdcall crc32_combine_, [crc1], [crc2], [len2] |
277 | ret |
239 | ret |
278 | endp=><=>>=>> |
240 | endp=><=>>=>> |