Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
6725 | siemargl | 1 | /* |
2 | Copyright (c) 1990-2008 Info-ZIP. All rights reserved. |
||
3 | |||
4 | See the accompanying file LICENSE, version 2000-Apr-09 or later |
||
5 | (the contents of which are also included in unzip.h) for terms of use. |
||
6 | If, for some reason, all these files are missing, the Info-ZIP license |
||
7 | also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html |
||
8 | */ |
||
9 | /*--------------------------------------------------------------------------- |
||
10 | |||
11 | unshrink.c version 1.22 19 Mar 2008 |
||
12 | |||
13 | |||
14 | NOTE: This code may or may not infringe on the so-called "Welch |
||
15 | patent" owned by Unisys. (From reading the patent, it appears |
||
16 | that a pure LZW decompressor is *not* covered, but this claim has |
||
17 | not been tested in court, and Unisys is reported to believe other- |
||
18 | wise.) It is therefore the responsibility of the user to acquire |
||
19 | whatever license(s) may be required for legal use of this code. |
||
20 | |||
21 | THE INFO-ZIP GROUP DISCLAIMS ALL LIABILITY FOR USE OF THIS CODE |
||
22 | IN VIOLATION OF APPLICABLE PATENT LAW. |
||
23 | |||
24 | |||
25 | Shrinking is basically a dynamic LZW algorithm with allowed code sizes of |
||
26 | up to 13 bits; in addition, there is provision for partial clearing of |
||
27 | leaf nodes. PKWARE uses the special code 256 (decimal) to indicate a |
||
28 | change in code size or a partial clear of the code tree: 256,1 for the |
||
29 | former and 256,2 for the latter. [Note that partial clearing can "orphan" |
||
30 | nodes: the parent-to-be can be cleared before its new child is added, |
||
31 | but the child is added anyway (as an orphan, as though the parent still |
||
32 | existed). When the tree fills up to the point where the parent node is |
||
33 | reused, the orphan is effectively "adopted." Versions prior to 1.05 were |
||
34 | affected more due to greater use of pointers (to children and siblings |
||
35 | as well as parents).] |
||
36 | |||
37 | This replacement version of unshrink.c was written from scratch. It is |
||
38 | based only on the algorithms described in Mark Nelson's _The Data Compres- |
||
39 | sion Book_ and in Terry Welch's original paper in the June 1984 issue of |
||
40 | IEEE _Computer_; no existing source code, including any in Nelson's book, |
||
41 | was used. |
||
42 | |||
43 | Memory requirements have been reduced in this version and are now no more |
||
44 | than the original Sam Smith code. This is still larger than any of the |
||
45 | other algorithms: at a minimum, 8K+8K+16K (stack+values+parents) assuming |
||
46 | 16-bit short ints, and this does not even include the output buffer (the |
||
47 | other algorithms leave the uncompressed data in the work area, typically |
||
48 | called slide[]). For machines with a 64KB data space this is a problem, |
||
49 | particularly when text conversion is required and line endings have more |
||
50 | than one character. UnZip's solution is to use two roughly equal halves |
||
51 | of outbuf for the ASCII conversion in such a case; the "unshrink" argument |
||
52 | to flush() signals that this is the case. |
||
53 | |||
54 | For large-memory machines, a second outbuf is allocated for translations, |
||
55 | but only if unshrinking and only if translations are required. |
||
56 | |||
57 | | binary mode | text mode |
||
58 | --------------------------------------------------- |
||
59 | big mem | big outbuf | big outbuf + big outbuf2 <- malloc'd here |
||
60 | small mem | small outbuf | half + half small outbuf |
||
61 | |||
62 | Copyright 1994, 1995 Greg Roelofs. See the accompanying file "COPYING" |
||
63 | in UnZip 5.20 (or later) source or binary distributions. |
||
64 | |||
65 | ---------------------------------------------------------------------------*/ |
||
66 | |||
67 | |||
68 | #define __UNSHRINK_C /* identifies this source module */ |
||
69 | #define UNZIP_INTERNAL |
||
70 | #include "unzip.h" |
||
71 | |||
72 | |||
73 | #ifndef LZW_CLEAN |
||
74 | |||
75 | static void partial_clear OF((__GPRO__ int lastcodeused)); |
||
76 | |||
77 | #ifdef DEBUG |
||
78 | # define OUTDBG(c) \ |
||
79 | if ((c)<32 || (c)>=127) fprintf(stderr,"\\x%02x",(c)); else putc((c),stderr); |
||
80 | #else |
||
81 | # define OUTDBG(c) |
||
82 | #endif |
||
83 | |||
84 | /* HSIZE is defined as 2^13 (8192) in unzip.h (resp. unzpriv.h */ |
||
85 | #define BOGUSCODE 256 |
||
86 | #define FLAG_BITS parent /* upper bits of parent[] used as flag bits */ |
||
87 | #define CODE_MASK (HSIZE - 1) /* 0x1fff (lower bits are parent's index) */ |
||
88 | #define FREE_CODE HSIZE /* 0x2000 (code is unused or was cleared) */ |
||
89 | #define HAS_CHILD (HSIZE << 1) /* 0x4000 (code has a child--do not clear) */ |
||
90 | |||
91 | #define parent G.area.shrink.Parent |
||
92 | #define Value G.area.shrink.value /* "value" conflicts with Pyramid ioctl.h */ |
||
93 | #define stack G.area.shrink.Stack |
||
94 | |||
95 | |||
96 | /***********************/ |
||
97 | /* Function unshrink() */ |
||
98 | /***********************/ |
||
99 | |||
100 | int unshrink(__G) |
||
101 | __GDEF |
||
102 | { |
||
103 | uch *stacktop = stack + (HSIZE - 1); |
||
104 | register uch *newstr; |
||
105 | uch finalval; |
||
106 | int codesize=9, len, error; |
||
107 | shrint code, oldcode, curcode; |
||
108 | shrint lastfreecode; |
||
109 | unsigned int outbufsiz; |
||
110 | #if (defined(DLL) && !defined(NO_SLIDE_REDIR)) |
||
111 | /* Normally realbuf and outbuf will be the same. However, if the data |
||
112 | * are redirected to a large memory buffer, realbuf will point to the |
||
113 | * new location while outbuf will remain pointing to the malloc'd |
||
114 | * memory buffer. */ |
||
115 | uch *realbuf = G.outbuf; |
||
116 | #else |
||
117 | # define realbuf G.outbuf |
||
118 | #endif |
||
119 | |||
120 | |||
121 | /*--------------------------------------------------------------------------- |
||
122 | Initialize various variables. |
||
123 | ---------------------------------------------------------------------------*/ |
||
124 | |||
125 | lastfreecode = BOGUSCODE; |
||
126 | |||
127 | #ifndef VMS /* VMS uses its own buffer scheme for textmode flush(). */ |
||
128 | #ifndef SMALL_MEM |
||
129 | /* non-memory-limited machines: allocate second (large) buffer for |
||
130 | * textmode conversion in flush(), but only if needed */ |
||
131 | if (G.pInfo->textmode && !G.outbuf2 && |
||
132 | (G.outbuf2 = (uch *)malloc(TRANSBUFSIZ)) == (uch *)NULL) |
||
133 | return PK_MEM3; |
||
134 | #endif |
||
135 | #endif /* !VMS */ |
||
136 | |||
137 | for (code = 0; code < BOGUSCODE; ++code) { |
||
138 | Value[code] = (uch)code; |
||
139 | parent[code] = BOGUSCODE; |
||
140 | } |
||
141 | for (code = BOGUSCODE+1; code < HSIZE; ++code) |
||
142 | parent[code] = FREE_CODE; |
||
143 | |||
144 | #if (defined(DLL) && !defined(NO_SLIDE_REDIR)) |
||
145 | if (G.redirect_slide) { /* use normal outbuf unless we're a DLL routine */ |
||
146 | realbuf = G.redirect_buffer; |
||
147 | outbufsiz = (unsigned)G.redirect_size; |
||
148 | } else |
||
149 | #endif |
||
150 | #ifdef DLL |
||
151 | if (G.pInfo->textmode && !G.redirect_data) |
||
152 | #else |
||
153 | if (G.pInfo->textmode) |
||
154 | #endif |
||
155 | outbufsiz = RAWBUFSIZ; |
||
156 | else |
||
157 | outbufsiz = OUTBUFSIZ; |
||
158 | G.outptr = realbuf; |
||
159 | G.outcnt = 0L; |
||
160 | |||
161 | /*--------------------------------------------------------------------------- |
||
162 | Get and output first code, then loop over remaining ones. |
||
163 | ---------------------------------------------------------------------------*/ |
||
164 | |||
165 | READBITS(codesize, oldcode) |
||
166 | if (G.zipeof) |
||
167 | return PK_OK; |
||
168 | |||
169 | finalval = (uch)oldcode; |
||
170 | OUTDBG(finalval) |
||
171 | *G.outptr++ = finalval; |
||
172 | ++G.outcnt; |
||
173 | |||
174 | while (TRUE) { |
||
175 | READBITS(codesize, code) |
||
176 | if (G.zipeof) |
||
177 | break; |
||
178 | if (code == BOGUSCODE) { /* possible to have consecutive escapes? */ |
||
179 | READBITS(codesize, code) |
||
180 | if (G.zipeof) |
||
181 | break; |
||
182 | if (code == 1) { |
||
183 | ++codesize; |
||
184 | Trace((stderr, " (codesize now %d bits)\n", codesize)); |
||
185 | if (codesize > MAX_BITS) return PK_ERR; |
||
186 | } else if (code == 2) { |
||
187 | Trace((stderr, " (partial clear code)\n")); |
||
188 | /* clear leafs (nodes with no children) */ |
||
189 | partial_clear(__G__ lastfreecode); |
||
190 | Trace((stderr, " (done with partial clear)\n")); |
||
191 | lastfreecode = BOGUSCODE; /* reset start of free-node search */ |
||
192 | } |
||
193 | continue; |
||
194 | } |
||
195 | |||
196 | /*----------------------------------------------------------------------- |
||
197 | Translate code: traverse tree from leaf back to root. |
||
198 | -----------------------------------------------------------------------*/ |
||
199 | |||
200 | newstr = stacktop; |
||
201 | curcode = code; |
||
202 | |||
203 | if (parent[code] == FREE_CODE) { |
||
204 | /* or (FLAG_BITS[code] & FREE_CODE)? */ |
||
205 | Trace((stderr, " (found a KwKwK code %d; oldcode = %d)\n", code, |
||
206 | oldcode)); |
||
207 | *newstr-- = finalval; |
||
208 | code = oldcode; |
||
209 | } |
||
210 | |||
211 | while (code != BOGUSCODE) { |
||
212 | if (newstr < stack) { |
||
213 | /* Bogus compression stream caused buffer underflow! */ |
||
214 | Trace((stderr, "unshrink stack overflow!\n")); |
||
215 | return PK_ERR; |
||
216 | } |
||
217 | if (parent[code] == FREE_CODE) { |
||
218 | /* or (FLAG_BITS[code] & FREE_CODE)? */ |
||
219 | Trace((stderr, " (found a KwKwK code %d; oldcode = %d)\n", |
||
220 | code, oldcode)); |
||
221 | *newstr-- = finalval; |
||
222 | code = oldcode; |
||
223 | } else { |
||
224 | *newstr-- = Value[code]; |
||
225 | code = (shrint)(parent[code] & CODE_MASK); |
||
226 | } |
||
227 | } |
||
228 | |||
229 | len = (int)(stacktop - newstr++); |
||
230 | finalval = *newstr; |
||
231 | |||
232 | /*----------------------------------------------------------------------- |
||
233 | Write expanded string in reverse order to output buffer. |
||
234 | -----------------------------------------------------------------------*/ |
||
235 | |||
236 | Trace((stderr, |
||
237 | "code %4d; oldcode %4d; char %3d (%c); len %d; string [", curcode, |
||
238 | oldcode, (int)(*newstr), (*newstr<32 || *newstr>=127)? ' ':*newstr, |
||
239 | len)); |
||
240 | |||
241 | { |
||
242 | register uch *p; |
||
243 | |||
244 | for (p = newstr; p < newstr+len; ++p) { |
||
245 | *G.outptr++ = *p; |
||
246 | OUTDBG(*p) |
||
247 | if (++G.outcnt == outbufsiz) { |
||
248 | Trace((stderr, "doing flush(), outcnt = %lu\n", G.outcnt)); |
||
249 | if ((error = flush(__G__ realbuf, G.outcnt, TRUE)) != 0) { |
||
250 | Trace((stderr, "unshrink: flush() error (%d)\n", |
||
251 | error)); |
||
252 | return error; |
||
253 | } |
||
254 | G.outptr = realbuf; |
||
255 | G.outcnt = 0L; |
||
256 | Trace((stderr, "done with flush()\n")); |
||
257 | } |
||
258 | } |
||
259 | } |
||
260 | |||
261 | /*----------------------------------------------------------------------- |
||
262 | Add new leaf (first character of newstr) to tree as child of oldcode. |
||
263 | -----------------------------------------------------------------------*/ |
||
264 | |||
265 | /* search for freecode */ |
||
266 | code = (shrint)(lastfreecode + 1); |
||
267 | /* add if-test before loop for speed? */ |
||
268 | while ((code < HSIZE) && (parent[code] != FREE_CODE)) |
||
269 | ++code; |
||
270 | lastfreecode = code; |
||
271 | Trace((stderr, "]; newcode %d\n", code)); |
||
272 | if (code >= HSIZE) |
||
273 | /* invalid compressed data caused max-code overflow! */ |
||
274 | return PK_ERR; |
||
275 | |||
276 | Value[code] = finalval; |
||
277 | parent[code] = oldcode; |
||
278 | oldcode = curcode; |
||
279 | |||
280 | } |
||
281 | |||
282 | /*--------------------------------------------------------------------------- |
||
283 | Flush any remaining data and return to sender... |
||
284 | ---------------------------------------------------------------------------*/ |
||
285 | |||
286 | if (G.outcnt > 0L) { |
||
287 | Trace((stderr, "doing final flush(), outcnt = %lu\n", G.outcnt)); |
||
288 | if ((error = flush(__G__ realbuf, G.outcnt, TRUE)) != 0) { |
||
289 | Trace((stderr, "unshrink: flush() error (%d)\n", error)); |
||
290 | return error; |
||
291 | } |
||
292 | Trace((stderr, "done with flush()\n")); |
||
293 | } |
||
294 | |||
295 | return PK_OK; |
||
296 | |||
297 | } /* end function unshrink() */ |
||
298 | |||
299 | |||
300 | |||
301 | |||
302 | |||
303 | /****************************/ |
||
304 | /* Function partial_clear() */ /* no longer recursive... */ |
||
305 | /****************************/ |
||
306 | |||
307 | static void partial_clear(__G__ lastcodeused) |
||
308 | __GDEF |
||
309 | int lastcodeused; |
||
310 | { |
||
311 | register shrint code; |
||
312 | |||
313 | /* clear all nodes which have no children (i.e., leaf nodes only) */ |
||
314 | |||
315 | /* first loop: mark each parent as such */ |
||
316 | for (code = BOGUSCODE+1; code <= lastcodeused; ++code) { |
||
317 | register shrint cparent = (shrint)(parent[code] & CODE_MASK); |
||
318 | |||
319 | if (cparent > BOGUSCODE) |
||
320 | FLAG_BITS[cparent] |= HAS_CHILD; /* set parent's child-bit */ |
||
321 | } |
||
322 | |||
323 | /* second loop: clear all nodes *not* marked as parents; reset flag bits */ |
||
324 | for (code = BOGUSCODE+1; code <= lastcodeused; ++code) { |
||
325 | if (FLAG_BITS[code] & HAS_CHILD) /* just clear child-bit */ |
||
326 | FLAG_BITS[code] &= ~HAS_CHILD; |
||
327 | else { /* leaf: lose it */ |
||
328 | Trace((stderr, "%d\n", code)); |
||
329 | parent[code] = FREE_CODE; |
||
330 | } |
||
331 | } |
||
332 | |||
333 | return; |
||
334 | } |
||
335 | |||
336 | #endif /* !LZW_CLEAN */=>=>>>32>>>>><>32>-> |