Rev 6639 | Rev 6881 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
6617 | IgorA | 1 | ; crc32.asm -- compute the CRC-32 of a data stream |
2 | ; Copyright (C) 1995-2006, 2010, 2011, 2012 Mark Adler |
||
3 | ; For conditions of distribution and use, see copyright notice in zlib.inc |
||
4 | |||
5 | ; Thanks to Rodney Brown |
||
6 | ; CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing |
||
7 | ; tables for updating the shift register in one step with three exclusive-ors |
||
8 | ; instead of four steps with four exclusive-ors. This results in about a |
||
9 | ; factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3. |
||
10 | |||
11 | |||
12 | ; Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore |
||
13 | ; protection on the static variables used to control the first-use generation |
||
14 | ; of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should |
||
15 | ; first call get_crc_table() to initialize the tables before allowing more than |
||
16 | ; one thread to use crc32(). |
||
17 | |||
18 | ; Definitions for doing the crc four data bytes at a time. |
||
19 | |||
20 | TBLS equ 1 |
||
21 | |||
22 | if DYNAMIC_CRC_TABLE eq 1 |
||
23 | |||
24 | align 4 |
||
25 | crc_table_empty dd 1 |
||
6673 | IgorA | 26 | ;align 4 |
27 | ;crc_table rd TBLS*256 |
||
6617 | IgorA | 28 | |
29 | ; Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: |
||
30 | ; x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. |
||
31 | |||
32 | ; Polynomials over GF(2) are represented in binary, one bit per coefficient, |
||
33 | ; with the lowest powers in the most significant bit. Then adding polynomials |
||
34 | ; is just exclusive-or, and multiplying a polynomial by x is a right shift by |
||
35 | ; one. If we call the above polynomial p, and represent a byte as the |
||
36 | ; polynomial q, also with the lowest power in the most significant bit (so the |
||
37 | ; byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p, |
||
38 | ; where a mod b means the remainder after dividing a by b. |
||
39 | |||
40 | ; This calculation is done using the shift-register method of multiplying and |
||
41 | ; taking the remainder. The register is initialized to zero, and for each |
||
42 | ; incoming bit, x^32 is added mod p to the register if the bit is a one (where |
||
43 | ; x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by |
||
44 | ; x (which is shifting right by one and adding x^32 mod p if the bit shifted |
||
45 | ; out is a one). We start with the highest power (least significant bit) of |
||
46 | ; q and repeat for all eight bits of q. |
||
47 | |||
48 | ; The first table is simply the CRC of all possible eight bit values. This is |
||
49 | ; all the information needed to generate CRCs on data a byte at a time for all |
||
50 | ; combinations of CRC register values and incoming bytes. The remaining tables |
||
51 | ; allow for word-at-a-time CRC calculation for both big-endian and little- |
||
52 | ; endian machines, where a word is four bytes. |
||
53 | |||
54 | ;void () |
||
55 | align 4 |
||
56 | proc make_crc_table uses ecx edx edi |
||
57 | zlib_debug 'make_crc_table' |
||
58 | |||
59 | ; generate a crc for every 8-bit value |
||
60 | xor edx, edx |
||
61 | mov edi, crc_table |
||
62 | .1: |
||
63 | mov ecx, 8 |
||
64 | mov eax, edx |
||
65 | .2: |
||
66 | shr eax, 1 |
||
67 | jnc @f |
||
68 | xor eax, 0xEDB88320 |
||
69 | @@: |
||
70 | loop .2 |
||
71 | stosd |
||
72 | inc dl |
||
73 | jnz .1 |
||
74 | |||
75 | mov dword[crc_table_empty],0 |
||
76 | ret |
||
77 | endp |
||
78 | |||
79 | else ;!DYNAMIC_CRC_TABLE |
||
80 | ; ======================================================================== |
||
81 | ; Tables of CRC-32s of all single-byte values, made by make_crc_table(). |
||
82 | |||
83 | ;include 'crc32.inc' |
||
84 | end if ;DYNAMIC_CRC_TABLE |
||
85 | |||
86 | ; ========================================================================= |
||
87 | ; This function can be used by asm versions of crc32() |
||
88 | |||
89 | ;const z_crc_t* () |
||
90 | align 4 |
||
91 | proc get_crc_table |
||
92 | if DYNAMIC_CRC_TABLE eq 1 |
||
93 | cmp dword[crc_table_empty],0 |
||
94 | je @f ;if (..) |
||
95 | call make_crc_table |
||
96 | @@: |
||
6639 | IgorA | 97 | end if |
6617 | IgorA | 98 | mov eax,crc_table |
99 | ret |
||
100 | endp |
||
101 | |||
102 | ; ========================================================================= |
||
103 | ;unsigned long (crc, buf, len) |
||
104 | ; unsigned long crc |
||
105 | ; unsigned char *buf |
||
106 | ; uInt len |
||
107 | align 4 |
||
108 | proc calc_crc32 uses ecx esi, p1crc:dword, buf:dword, len:dword |
||
109 | xor eax,eax |
||
110 | mov esi,[buf] |
||
111 | zlib_debug 'calc_crc32 buf = %d',esi |
||
112 | cmp esi,Z_NULL |
||
113 | je .end_f ;if (..==0) return 0 |
||
114 | |||
115 | if DYNAMIC_CRC_TABLE eq 1 |
||
116 | cmp dword[crc_table_empty],0 |
||
117 | je @f ;if (..) |
||
118 | call make_crc_table |
||
119 | @@: |
||
120 | end if |
||
121 | |||
122 | mov eax,[p1crc] |
||
123 | mov ecx,[len] |
||
6639 | IgorA | 124 | call crc |
6617 | IgorA | 125 | .end_f: |
126 | ret |
||
127 | endp |
||
128 | |||
129 | GF2_DIM equ 32 ;dimension of GF(2) vectors (length of CRC) |
||
130 | |||
131 | ; ========================================================================= |
||
132 | ;unsigned long (mat, vec) |
||
133 | ; unsigned long *mat |
||
134 | ; unsigned long vec |
||
135 | align 4 |
||
136 | proc gf2_matrix_times, mat:dword, vec:dword |
||
137 | ; unsigned long sum; |
||
138 | |||
139 | ; sum = 0; |
||
140 | ; while (vec) { |
||
141 | ; if (vec & 1) |
||
142 | ; sum ^= *mat; |
||
143 | ; vec >>= 1; |
||
144 | ; mat++; |
||
145 | ; } |
||
146 | ; return sum; |
||
147 | ret |
||
148 | endp |
||
149 | |||
150 | ; ========================================================================= |
||
151 | ;local void (square, mat) |
||
152 | ; unsigned long *square |
||
153 | ; unsigned long *mat |
||
154 | align 4 |
||
155 | proc gf2_matrix_square, square:dword, mat:dword |
||
156 | ; int n; |
||
157 | |||
158 | ; for (n = 0; n < GF2_DIM; n++) |
||
159 | ; square[n] = gf2_matrix_times(mat, mat[n]); |
||
160 | ret |
||
161 | endp |
||
162 | |||
163 | ; ========================================================================= |
||
164 | ;uLong (crc1, crc2, len2) |
||
165 | ; uLong crc1 |
||
166 | ; uLong crc2 |
||
167 | ; z_off64_t len2 |
||
168 | align 4 |
||
169 | proc crc32_combine_, crc1:dword, crc2:dword, len2:dword |
||
170 | ; int n; |
||
171 | ; unsigned long row; |
||
172 | ; unsigned long even[GF2_DIM]; /* even-power-of-two zeros operator */ |
||
173 | ; unsigned long odd[GF2_DIM]; /* odd-power-of-two zeros operator */ |
||
174 | |||
175 | ; degenerate case (also disallow negative lengths) |
||
176 | ; if (len2 <= 0) |
||
177 | ; return crc1; |
||
178 | |||
179 | ; put operator for one zero bit in odd |
||
180 | ; odd[0] = 0xedb88320UL; /* CRC-32 polynomial */ |
||
181 | ; row = 1; |
||
182 | ; for (n = 1; n < GF2_DIM; n++) { |
||
183 | ; odd[n] = row; |
||
184 | ; row <<= 1; |
||
185 | ; } |
||
186 | |||
187 | ; put operator for two zero bits in even |
||
188 | ; gf2_matrix_square(even, odd); |
||
189 | |||
190 | ; put operator for four zero bits in odd |
||
191 | ; gf2_matrix_square(odd, even); |
||
192 | |||
193 | ; apply len2 zeros to crc1 (first square will put the operator for one |
||
194 | ; zero byte, eight zero bits, in even) |
||
195 | ; do { |
||
196 | ; apply zeros operator for this bit of len2 |
||
197 | ; gf2_matrix_square(even, odd); |
||
198 | ; if (len2 & 1) |
||
199 | ; crc1 = gf2_matrix_times(even, crc1); |
||
200 | ; len2 >>= 1; |
||
201 | |||
202 | ; if no more bits set, then done |
||
203 | ; if (len2 == 0) |
||
204 | ; break; |
||
205 | |||
206 | ; another iteration of the loop with odd and even swapped |
||
207 | ; gf2_matrix_square(odd, even); |
||
208 | ; if (len2 & 1) |
||
209 | ; crc1 = gf2_matrix_times(odd, crc1); |
||
210 | ; len2 >>= 1; |
||
211 | |||
212 | ; if no more bits set, then done |
||
213 | ; } while (len2 != 0); |
||
214 | |||
215 | ; return combined crc |
||
216 | ; crc1 ^= crc2; |
||
217 | ; return crc1; |
||
218 | ret |
||
219 | endp |
||
220 | |||
221 | ; ========================================================================= |
||
222 | ;uLong (crc1, crc2, len2) |
||
223 | ; uLong crc1 |
||
224 | ; uLong crc2 |
||
225 | ; z_off_t len2 |
||
226 | align 4 |
||
227 | proc crc32_combine, crc1:dword, crc2:dword, len2:dword |
||
228 | stdcall crc32_combine_, [crc1], [crc2], [len2] |
||
229 | ret |
||
230 | endp |
||
231 | |||
232 | ;uLong (crc1, crc2, len2) |
||
233 | ; uLong crc1 |
||
234 | ; uLong crc2 |
||
235 | ; z_off64_t len2 |
||
236 | align 4 |
||
237 | proc crc32_combine64, crc1:dword, crc2:dword, len2:dword |
||
238 | stdcall crc32_combine_, [crc1], [crc2], [len2] |
||
239 | ret |
||
240 | endp=><=>>=>> |