Rev 1896 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
1896 | serge | 1 | /* inffast.c -- fast decoding |
3926 | Serge | 2 | * Copyright (C) 1995-2008, 2010, 2013 Mark Adler |
1896 | serge | 3 | * For conditions of distribution and use, see copyright notice in zlib.h |
4 | */ |
||
5 | |||
6 | #include "zutil.h" |
||
7 | #include "inftrees.h" |
||
8 | #include "inflate.h" |
||
9 | #include "inffast.h" |
||
10 | |||
11 | #ifndef ASMINF |
||
12 | |||
13 | /* Allow machine dependent optimization for post-increment or pre-increment. |
||
14 | Based on testing to date, |
||
15 | Pre-increment preferred for: |
||
16 | - PowerPC G3 (Adler) |
||
17 | - MIPS R5000 (Randers-Pehrson) |
||
18 | Post-increment preferred for: |
||
19 | - none |
||
20 | No measurable difference: |
||
21 | - Pentium III (Anderson) |
||
22 | - M68060 (Nikl) |
||
23 | */ |
||
24 | #ifdef POSTINC |
||
25 | # define OFF 0 |
||
26 | # define PUP(a) *(a)++ |
||
27 | #else |
||
28 | # define OFF 1 |
||
29 | # define PUP(a) *++(a) |
||
30 | #endif |
||
31 | |||
32 | /* |
||
33 | Decode literal, length, and distance codes and write out the resulting |
||
34 | literal and match bytes until either not enough input or output is |
||
35 | available, an end-of-block is encountered, or a data error is encountered. |
||
36 | When large enough input and output buffers are supplied to inflate(), for |
||
37 | example, a 16K input buffer and a 64K output buffer, more than 95% of the |
||
38 | inflate execution time is spent in this routine. |
||
39 | |||
40 | Entry assumptions: |
||
41 | |||
42 | state->mode == LEN |
||
43 | strm->avail_in >= 6 |
||
44 | strm->avail_out >= 258 |
||
45 | start >= strm->avail_out |
||
46 | state->bits < 8 |
||
47 | |||
48 | On return, state->mode is one of: |
||
49 | |||
50 | LEN -- ran out of enough output space or enough available input |
||
51 | TYPE -- reached end of block code, inflate() to interpret next block |
||
52 | BAD -- error in block data |
||
53 | |||
54 | Notes: |
||
55 | |||
56 | - The maximum input bits used by a length/distance pair is 15 bits for the |
||
57 | length code, 5 bits for the length extra, 15 bits for the distance code, |
||
58 | and 13 bits for the distance extra. This totals 48 bits, or six bytes. |
||
59 | Therefore if strm->avail_in >= 6, then there is enough input to avoid |
||
60 | checking for available input while decoding. |
||
61 | |||
62 | - The maximum bytes that a single length/distance pair can output is 258 |
||
63 | bytes, which is the maximum length that can be coded. inflate_fast() |
||
64 | requires strm->avail_out >= 258 for each loop to avoid checking for |
||
65 | output space. |
||
66 | */ |
||
67 | void ZLIB_INTERNAL inflate_fast(strm, start) |
||
68 | z_streamp strm; |
||
69 | unsigned start; /* inflate()'s starting value for strm->avail_out */ |
||
70 | { |
||
71 | struct inflate_state FAR *state; |
||
3926 | Serge | 72 | z_const unsigned char FAR *in; /* local strm->next_in */ |
73 | z_const unsigned char FAR *last; /* have enough input while in < last */ |
||
1896 | serge | 74 | unsigned char FAR *out; /* local strm->next_out */ |
75 | unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ |
||
76 | unsigned char FAR *end; /* while out < end, enough space available */ |
||
77 | #ifdef INFLATE_STRICT |
||
78 | unsigned dmax; /* maximum distance from zlib header */ |
||
79 | #endif |
||
80 | unsigned wsize; /* window size or zero if not using window */ |
||
81 | unsigned whave; /* valid bytes in the window */ |
||
82 | unsigned wnext; /* window write index */ |
||
83 | unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ |
||
84 | unsigned long hold; /* local strm->hold */ |
||
85 | unsigned bits; /* local strm->bits */ |
||
86 | code const FAR *lcode; /* local strm->lencode */ |
||
87 | code const FAR *dcode; /* local strm->distcode */ |
||
88 | unsigned lmask; /* mask for first level of length codes */ |
||
89 | unsigned dmask; /* mask for first level of distance codes */ |
||
90 | code here; /* retrieved table entry */ |
||
91 | unsigned op; /* code bits, operation, extra bits, or */ |
||
92 | /* window position, window bytes to copy */ |
||
93 | unsigned len; /* match length, unused bytes */ |
||
94 | unsigned dist; /* match distance */ |
||
95 | unsigned char FAR *from; /* where to copy match from */ |
||
96 | |||
97 | /* copy state to local variables */ |
||
98 | state = (struct inflate_state FAR *)strm->state; |
||
99 | in = strm->next_in - OFF; |
||
100 | last = in + (strm->avail_in - 5); |
||
101 | out = strm->next_out - OFF; |
||
102 | beg = out - (start - strm->avail_out); |
||
103 | end = out + (strm->avail_out - 257); |
||
104 | #ifdef INFLATE_STRICT |
||
105 | dmax = state->dmax; |
||
106 | #endif |
||
107 | wsize = state->wsize; |
||
108 | whave = state->whave; |
||
109 | wnext = state->wnext; |
||
110 | window = state->window; |
||
111 | hold = state->hold; |
||
112 | bits = state->bits; |
||
113 | lcode = state->lencode; |
||
114 | dcode = state->distcode; |
||
115 | lmask = (1U << state->lenbits) - 1; |
||
116 | dmask = (1U << state->distbits) - 1; |
||
117 | |||
118 | /* decode literals and length/distances until end-of-block or not enough |
||
119 | input data or output space */ |
||
120 | do { |
||
121 | if (bits < 15) { |
||
122 | hold += (unsigned long)(PUP(in)) << bits; |
||
123 | bits += 8; |
||
124 | hold += (unsigned long)(PUP(in)) << bits; |
||
125 | bits += 8; |
||
126 | } |
||
127 | here = lcode[hold & lmask]; |
||
128 | dolen: |
||
129 | op = (unsigned)(here.bits); |
||
130 | hold >>= op; |
||
131 | bits -= op; |
||
132 | op = (unsigned)(here.op); |
||
133 | if (op == 0) { /* literal */ |
||
134 | Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? |
||
135 | "inflate: literal '%c'\n" : |
||
136 | "inflate: literal 0x%02x\n", here.val)); |
||
137 | PUP(out) = (unsigned char)(here.val); |
||
138 | } |
||
139 | else if (op & 16) { /* length base */ |
||
140 | len = (unsigned)(here.val); |
||
141 | op &= 15; /* number of extra bits */ |
||
142 | if (op) { |
||
143 | if (bits < op) { |
||
144 | hold += (unsigned long)(PUP(in)) << bits; |
||
145 | bits += 8; |
||
146 | } |
||
147 | len += (unsigned)hold & ((1U << op) - 1); |
||
148 | hold >>= op; |
||
149 | bits -= op; |
||
150 | } |
||
151 | Tracevv((stderr, "inflate: length %u\n", len)); |
||
152 | if (bits < 15) { |
||
153 | hold += (unsigned long)(PUP(in)) << bits; |
||
154 | bits += 8; |
||
155 | hold += (unsigned long)(PUP(in)) << bits; |
||
156 | bits += 8; |
||
157 | } |
||
158 | here = dcode[hold & dmask]; |
||
159 | dodist: |
||
160 | op = (unsigned)(here.bits); |
||
161 | hold >>= op; |
||
162 | bits -= op; |
||
163 | op = (unsigned)(here.op); |
||
164 | if (op & 16) { /* distance base */ |
||
165 | dist = (unsigned)(here.val); |
||
166 | op &= 15; /* number of extra bits */ |
||
167 | if (bits < op) { |
||
168 | hold += (unsigned long)(PUP(in)) << bits; |
||
169 | bits += 8; |
||
170 | if (bits < op) { |
||
171 | hold += (unsigned long)(PUP(in)) << bits; |
||
172 | bits += 8; |
||
173 | } |
||
174 | } |
||
175 | dist += (unsigned)hold & ((1U << op) - 1); |
||
176 | #ifdef INFLATE_STRICT |
||
177 | if (dist > dmax) { |
||
178 | strm->msg = (char *)"invalid distance too far back"; |
||
179 | state->mode = BAD; |
||
180 | break; |
||
181 | } |
||
182 | #endif |
||
183 | hold >>= op; |
||
184 | bits -= op; |
||
185 | Tracevv((stderr, "inflate: distance %u\n", dist)); |
||
186 | op = (unsigned)(out - beg); /* max distance in output */ |
||
187 | if (dist > op) { /* see if copy from window */ |
||
188 | op = dist - op; /* distance back in window */ |
||
189 | if (op > whave) { |
||
190 | if (state->sane) { |
||
191 | strm->msg = |
||
192 | (char *)"invalid distance too far back"; |
||
193 | state->mode = BAD; |
||
194 | break; |
||
195 | } |
||
196 | #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR |
||
197 | if (len <= op - whave) { |
||
198 | do { |
||
199 | PUP(out) = 0; |
||
200 | } while (--len); |
||
201 | continue; |
||
202 | } |
||
203 | len -= op - whave; |
||
204 | do { |
||
205 | PUP(out) = 0; |
||
206 | } while (--op > whave); |
||
207 | if (op == 0) { |
||
208 | from = out - dist; |
||
209 | do { |
||
210 | PUP(out) = PUP(from); |
||
211 | } while (--len); |
||
212 | continue; |
||
213 | } |
||
214 | #endif |
||
215 | } |
||
216 | from = window - OFF; |
||
217 | if (wnext == 0) { /* very common case */ |
||
218 | from += wsize - op; |
||
219 | if (op < len) { /* some from window */ |
||
220 | len -= op; |
||
221 | do { |
||
222 | PUP(out) = PUP(from); |
||
223 | } while (--op); |
||
224 | from = out - dist; /* rest from output */ |
||
225 | } |
||
226 | } |
||
227 | else if (wnext < op) { /* wrap around window */ |
||
228 | from += wsize + wnext - op; |
||
229 | op -= wnext; |
||
230 | if (op < len) { /* some from end of window */ |
||
231 | len -= op; |
||
232 | do { |
||
233 | PUP(out) = PUP(from); |
||
234 | } while (--op); |
||
235 | from = window - OFF; |
||
236 | if (wnext < len) { /* some from start of window */ |
||
237 | op = wnext; |
||
238 | len -= op; |
||
239 | do { |
||
240 | PUP(out) = PUP(from); |
||
241 | } while (--op); |
||
242 | from = out - dist; /* rest from output */ |
||
243 | } |
||
244 | } |
||
245 | } |
||
246 | else { /* contiguous in window */ |
||
247 | from += wnext - op; |
||
248 | if (op < len) { /* some from window */ |
||
249 | len -= op; |
||
250 | do { |
||
251 | PUP(out) = PUP(from); |
||
252 | } while (--op); |
||
253 | from = out - dist; /* rest from output */ |
||
254 | } |
||
255 | } |
||
256 | while (len > 2) { |
||
257 | PUP(out) = PUP(from); |
||
258 | PUP(out) = PUP(from); |
||
259 | PUP(out) = PUP(from); |
||
260 | len -= 3; |
||
261 | } |
||
262 | if (len) { |
||
263 | PUP(out) = PUP(from); |
||
264 | if (len > 1) |
||
265 | PUP(out) = PUP(from); |
||
266 | } |
||
267 | } |
||
268 | else { |
||
269 | from = out - dist; /* copy direct from output */ |
||
270 | do { /* minimum length is three */ |
||
271 | PUP(out) = PUP(from); |
||
272 | PUP(out) = PUP(from); |
||
273 | PUP(out) = PUP(from); |
||
274 | len -= 3; |
||
275 | } while (len > 2); |
||
276 | if (len) { |
||
277 | PUP(out) = PUP(from); |
||
278 | if (len > 1) |
||
279 | PUP(out) = PUP(from); |
||
280 | } |
||
281 | } |
||
282 | } |
||
283 | else if ((op & 64) == 0) { /* 2nd level distance code */ |
||
284 | here = dcode[here.val + (hold & ((1U << op) - 1))]; |
||
285 | goto dodist; |
||
286 | } |
||
287 | else { |
||
288 | strm->msg = (char *)"invalid distance code"; |
||
289 | state->mode = BAD; |
||
290 | break; |
||
291 | } |
||
292 | } |
||
293 | else if ((op & 64) == 0) { /* 2nd level length code */ |
||
294 | here = lcode[here.val + (hold & ((1U << op) - 1))]; |
||
295 | goto dolen; |
||
296 | } |
||
297 | else if (op & 32) { /* end-of-block */ |
||
298 | Tracevv((stderr, "inflate: end of block\n")); |
||
299 | state->mode = TYPE; |
||
300 | break; |
||
301 | } |
||
302 | else { |
||
303 | strm->msg = (char *)"invalid literal/length code"; |
||
304 | state->mode = BAD; |
||
305 | break; |
||
306 | } |
||
307 | } while (in < last && out < end); |
||
308 | |||
309 | /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ |
||
310 | len = bits >> 3; |
||
311 | in -= len; |
||
312 | bits -= len << 3; |
||
313 | hold &= (1U << bits) - 1; |
||
314 | |||
315 | /* update state and return */ |
||
316 | strm->next_in = in + OFF; |
||
317 | strm->next_out = out + OFF; |
||
318 | strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); |
||
319 | strm->avail_out = (unsigned)(out < end ? |
||
320 | 257 + (end - out) : 257 - (out - end)); |
||
321 | state->hold = hold; |
||
322 | state->bits = bits; |
||
323 | return; |
||
324 | } |
||
325 | |||
326 | /* |
||
327 | inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): |
||
328 | - Using bit fields for code structure |
||
329 | - Different op definition to avoid & for extra bits (do & for table bits) |
||
330 | - Three separate decoding do-loops for direct, window, and wnext == 0 |
||
331 | - Special case for distance > 1 copies to do overlapped load and store copy |
||
332 | - Explicit branch predictions (based on measured branch probabilities) |
||
333 | - Deferring match copy and interspersed it with decoding subsequent codes |
||
334 | - Swapping literal/length else |
||
335 | - Swapping window/direct else |
||
336 | - Larger unrolled copy loops (three is about right) |
||
337 | - Moving len -= 3 statement into middle of loop |
||
338 | */ |
||
339 | |||
340 | #endif /* !ASMINF */>>><>><>>>>><>><>>>>>>=>><>><>>><>>><>><>>><>><>>>><>><>>><>><>>>> |