Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /*
  2.   Copyright (c) 1990-2008 Info-ZIP.  All rights reserved.
  3.  
  4.   See the accompanying file LICENSE, version 2000-Apr-09 or later
  5.   (the contents of which are also included in unzip.h) for terms of use.
  6.   If, for some reason, all these files are missing, the Info-ZIP license
  7.   also may be found at:  ftp://ftp.info-zip.org/pub/infozip/license.html
  8. */
  9. /*---------------------------------------------------------------------------
  10.  
  11.   unshrink.c                     version 1.22                     19 Mar 2008
  12.  
  13.  
  14.        NOTE:  This code may or may not infringe on the so-called "Welch
  15.        patent" owned by Unisys.  (From reading the patent, it appears
  16.        that a pure LZW decompressor is *not* covered, but this claim has
  17.        not been tested in court, and Unisys is reported to believe other-
  18.        wise.)  It is therefore the responsibility of the user to acquire
  19.        whatever license(s) may be required for legal use of this code.
  20.  
  21.        THE INFO-ZIP GROUP DISCLAIMS ALL LIABILITY FOR USE OF THIS CODE
  22.        IN VIOLATION OF APPLICABLE PATENT LAW.
  23.  
  24.  
  25.   Shrinking is basically a dynamic LZW algorithm with allowed code sizes of
  26.   up to 13 bits; in addition, there is provision for partial clearing of
  27.   leaf nodes.  PKWARE uses the special code 256 (decimal) to indicate a
  28.   change in code size or a partial clear of the code tree:  256,1 for the
  29.   former and 256,2 for the latter.  [Note that partial clearing can "orphan"
  30.   nodes:  the parent-to-be can be cleared before its new child is added,
  31.   but the child is added anyway (as an orphan, as though the parent still
  32.   existed).  When the tree fills up to the point where the parent node is
  33.   reused, the orphan is effectively "adopted."  Versions prior to 1.05 were
  34.   affected more due to greater use of pointers (to children and siblings
  35.   as well as parents).]
  36.  
  37.   This replacement version of unshrink.c was written from scratch.  It is
  38.   based only on the algorithms described in Mark Nelson's _The Data Compres-
  39.   sion Book_ and in Terry Welch's original paper in the June 1984 issue of
  40.   IEEE _Computer_; no existing source code, including any in Nelson's book,
  41.   was used.
  42.  
  43.   Memory requirements have been reduced in this version and are now no more
  44.   than the original Sam Smith code.  This is still larger than any of the
  45.   other algorithms:  at a minimum, 8K+8K+16K (stack+values+parents) assuming
  46.   16-bit short ints, and this does not even include the output buffer (the
  47.   other algorithms leave the uncompressed data in the work area, typically
  48.   called slide[]).  For machines with a 64KB data space this is a problem,
  49.   particularly when text conversion is required and line endings have more
  50.   than one character.  UnZip's solution is to use two roughly equal halves
  51.   of outbuf for the ASCII conversion in such a case; the "unshrink" argument
  52.   to flush() signals that this is the case.
  53.  
  54.   For large-memory machines, a second outbuf is allocated for translations,
  55.   but only if unshrinking and only if translations are required.
  56.  
  57.               | binary mode  |        text mode
  58.     ---------------------------------------------------
  59.     big mem   |  big outbuf  | big outbuf + big outbuf2  <- malloc'd here
  60.     small mem | small outbuf | half + half small outbuf
  61.  
  62.   Copyright 1994, 1995 Greg Roelofs.  See the accompanying file "COPYING"
  63.   in UnZip 5.20 (or later) source or binary distributions.
  64.  
  65.   ---------------------------------------------------------------------------*/
  66.  
  67.  
  68. #define __UNSHRINK_C    /* identifies this source module */
  69. #define UNZIP_INTERNAL
  70. #include "unzip.h"
  71.  
  72.  
  73. #ifndef LZW_CLEAN
  74.  
  75. static void  partial_clear  OF((__GPRO__ int lastcodeused));
  76.  
  77. #ifdef DEBUG
  78. #  define OUTDBG(c) \
  79.    if ((c)<32 || (c)>=127) fprintf(stderr,"\\x%02x",(c)); else putc((c),stderr);
  80. #else
  81. #  define OUTDBG(c)
  82. #endif
  83.  
  84. /* HSIZE is defined as 2^13 (8192) in unzip.h (resp. unzpriv.h */
  85. #define BOGUSCODE  256
  86. #define FLAG_BITS  parent        /* upper bits of parent[] used as flag bits */
  87. #define CODE_MASK  (HSIZE - 1)   /* 0x1fff (lower bits are parent's index) */
  88. #define FREE_CODE  HSIZE         /* 0x2000 (code is unused or was cleared) */
  89. #define HAS_CHILD  (HSIZE << 1)  /* 0x4000 (code has a child--do not clear) */
  90.  
  91. #define parent G.area.shrink.Parent
  92. #define Value  G.area.shrink.value /* "value" conflicts with Pyramid ioctl.h */
  93. #define stack  G.area.shrink.Stack
  94.  
  95.  
  96. /***********************/
  97. /* Function unshrink() */
  98. /***********************/
  99.  
  100. int unshrink(__G)
  101.      __GDEF
  102. {
  103.     uch *stacktop = stack + (HSIZE - 1);
  104.     register uch *newstr;
  105.     uch finalval;
  106.     int codesize=9, len, error;
  107.     shrint code, oldcode, curcode;
  108.     shrint lastfreecode;
  109.     unsigned int outbufsiz;
  110. #if (defined(DLL) && !defined(NO_SLIDE_REDIR))
  111.     /* Normally realbuf and outbuf will be the same.  However, if the data
  112.      * are redirected to a large memory buffer, realbuf will point to the
  113.      * new location while outbuf will remain pointing to the malloc'd
  114.      * memory buffer. */
  115.     uch *realbuf = G.outbuf;
  116. #else
  117. #   define realbuf G.outbuf
  118. #endif
  119.  
  120.  
  121. /*---------------------------------------------------------------------------
  122.     Initialize various variables.
  123.   ---------------------------------------------------------------------------*/
  124.  
  125.     lastfreecode = BOGUSCODE;
  126.  
  127. #ifndef VMS     /* VMS uses its own buffer scheme for textmode flush(). */
  128. #ifndef SMALL_MEM
  129.     /* non-memory-limited machines:  allocate second (large) buffer for
  130.      * textmode conversion in flush(), but only if needed */
  131.     if (G.pInfo->textmode && !G.outbuf2 &&
  132.         (G.outbuf2 = (uch *)malloc(TRANSBUFSIZ)) == (uch *)NULL)
  133.         return PK_MEM3;
  134. #endif
  135. #endif /* !VMS */
  136.  
  137.     for (code = 0;  code < BOGUSCODE;  ++code) {
  138.         Value[code] = (uch)code;
  139.         parent[code] = BOGUSCODE;
  140.     }
  141.     for (code = BOGUSCODE+1;  code < HSIZE;  ++code)
  142.         parent[code] = FREE_CODE;
  143.  
  144. #if (defined(DLL) && !defined(NO_SLIDE_REDIR))
  145.     if (G.redirect_slide) { /* use normal outbuf unless we're a DLL routine */
  146.         realbuf = G.redirect_buffer;
  147.         outbufsiz = (unsigned)G.redirect_size;
  148.     } else
  149. #endif
  150. #ifdef DLL
  151.     if (G.pInfo->textmode && !G.redirect_data)
  152. #else
  153.     if (G.pInfo->textmode)
  154. #endif
  155.         outbufsiz = RAWBUFSIZ;
  156.     else
  157.         outbufsiz = OUTBUFSIZ;
  158.     G.outptr = realbuf;
  159.     G.outcnt = 0L;
  160.  
  161. /*---------------------------------------------------------------------------
  162.     Get and output first code, then loop over remaining ones.
  163.   ---------------------------------------------------------------------------*/
  164.  
  165.     READBITS(codesize, oldcode)
  166.     if (G.zipeof)
  167.         return PK_OK;
  168.  
  169.     finalval = (uch)oldcode;
  170.     OUTDBG(finalval)
  171.     *G.outptr++ = finalval;
  172.     ++G.outcnt;
  173.  
  174.     while (TRUE) {
  175.         READBITS(codesize, code)
  176.         if (G.zipeof)
  177.             break;
  178.         if (code == BOGUSCODE) {   /* possible to have consecutive escapes? */
  179.             READBITS(codesize, code)
  180.             if (G.zipeof)
  181.                 break;
  182.             if (code == 1) {
  183.                 ++codesize;
  184.                 Trace((stderr, " (codesize now %d bits)\n", codesize));
  185.                 if (codesize > MAX_BITS) return PK_ERR;
  186.             } else if (code == 2) {
  187.                 Trace((stderr, " (partial clear code)\n"));
  188.                 /* clear leafs (nodes with no children) */
  189.                 partial_clear(__G__ lastfreecode);
  190.                 Trace((stderr, " (done with partial clear)\n"));
  191.                 lastfreecode = BOGUSCODE; /* reset start of free-node search */
  192.             }
  193.             continue;
  194.         }
  195.  
  196.     /*-----------------------------------------------------------------------
  197.         Translate code:  traverse tree from leaf back to root.
  198.       -----------------------------------------------------------------------*/
  199.  
  200.         newstr = stacktop;
  201.         curcode = code;
  202.  
  203.         if (parent[code] == FREE_CODE) {
  204.             /* or (FLAG_BITS[code] & FREE_CODE)? */
  205.             Trace((stderr, " (found a KwKwK code %d; oldcode = %d)\n", code,
  206.               oldcode));
  207.             *newstr-- = finalval;
  208.             code = oldcode;
  209.         }
  210.  
  211.         while (code != BOGUSCODE) {
  212.             if (newstr < stack) {
  213.                 /* Bogus compression stream caused buffer underflow! */
  214.                 Trace((stderr, "unshrink stack overflow!\n"));
  215.                 return PK_ERR;
  216.             }
  217.             if (parent[code] == FREE_CODE) {
  218.                 /* or (FLAG_BITS[code] & FREE_CODE)? */
  219.                 Trace((stderr, " (found a KwKwK code %d; oldcode = %d)\n",
  220.                   code, oldcode));
  221.                 *newstr-- = finalval;
  222.                 code = oldcode;
  223.             } else {
  224.                 *newstr-- = Value[code];
  225.                 code = (shrint)(parent[code] & CODE_MASK);
  226.             }
  227.         }
  228.  
  229.         len = (int)(stacktop - newstr++);
  230.         finalval = *newstr;
  231.  
  232.     /*-----------------------------------------------------------------------
  233.         Write expanded string in reverse order to output buffer.
  234.       -----------------------------------------------------------------------*/
  235.  
  236.         Trace((stderr,
  237.           "code %4d; oldcode %4d; char %3d (%c); len %d; string [", curcode,
  238.           oldcode, (int)(*newstr), (*newstr<32 || *newstr>=127)? ' ':*newstr,
  239.           len));
  240.  
  241.         {
  242.             register uch *p;
  243.  
  244.             for (p = newstr;  p < newstr+len;  ++p) {
  245.                 *G.outptr++ = *p;
  246.                 OUTDBG(*p)
  247.                 if (++G.outcnt == outbufsiz) {
  248.                     Trace((stderr, "doing flush(), outcnt = %lu\n", G.outcnt));
  249.                     if ((error = flush(__G__ realbuf, G.outcnt, TRUE)) != 0) {
  250.                         Trace((stderr, "unshrink:  flush() error (%d)\n",
  251.                           error));
  252.                         return error;
  253.                     }
  254.                     G.outptr = realbuf;
  255.                     G.outcnt = 0L;
  256.                     Trace((stderr, "done with flush()\n"));
  257.                 }
  258.             }
  259.         }
  260.  
  261.     /*-----------------------------------------------------------------------
  262.         Add new leaf (first character of newstr) to tree as child of oldcode.
  263.       -----------------------------------------------------------------------*/
  264.  
  265.         /* search for freecode */
  266.         code = (shrint)(lastfreecode + 1);
  267.         /* add if-test before loop for speed? */
  268.         while ((code < HSIZE) && (parent[code] != FREE_CODE))
  269.             ++code;
  270.         lastfreecode = code;
  271.         Trace((stderr, "]; newcode %d\n", code));
  272.         if (code >= HSIZE)
  273.             /* invalid compressed data caused max-code overflow! */
  274.             return PK_ERR;
  275.  
  276.         Value[code] = finalval;
  277.         parent[code] = oldcode;
  278.         oldcode = curcode;
  279.  
  280.     }
  281.  
  282. /*---------------------------------------------------------------------------
  283.     Flush any remaining data and return to sender...
  284.   ---------------------------------------------------------------------------*/
  285.  
  286.     if (G.outcnt > 0L) {
  287.         Trace((stderr, "doing final flush(), outcnt = %lu\n", G.outcnt));
  288.         if ((error = flush(__G__ realbuf, G.outcnt, TRUE)) != 0) {
  289.             Trace((stderr, "unshrink:  flush() error (%d)\n", error));
  290.             return error;
  291.         }
  292.         Trace((stderr, "done with flush()\n"));
  293.     }
  294.  
  295.     return PK_OK;
  296.  
  297. } /* end function unshrink() */
  298.  
  299.  
  300.  
  301.  
  302.  
  303. /****************************/
  304. /* Function partial_clear() */      /* no longer recursive... */
  305. /****************************/
  306.  
  307. static void partial_clear(__G__ lastcodeused)
  308.     __GDEF
  309.     int lastcodeused;
  310. {
  311.     register shrint code;
  312.  
  313.     /* clear all nodes which have no children (i.e., leaf nodes only) */
  314.  
  315.     /* first loop:  mark each parent as such */
  316.     for (code = BOGUSCODE+1;  code <= lastcodeused;  ++code) {
  317.         register shrint cparent = (shrint)(parent[code] & CODE_MASK);
  318.  
  319.         if (cparent > BOGUSCODE)
  320.             FLAG_BITS[cparent] |= HAS_CHILD;   /* set parent's child-bit */
  321.     }
  322.  
  323.     /* second loop:  clear all nodes *not* marked as parents; reset flag bits */
  324.     for (code = BOGUSCODE+1;  code <= lastcodeused;  ++code) {
  325.         if (FLAG_BITS[code] & HAS_CHILD)    /* just clear child-bit */
  326.             FLAG_BITS[code] &= ~HAS_CHILD;
  327.         else {                              /* leaf:  lose it */
  328.             Trace((stderr, "%d\n", code));
  329.             parent[code] = FREE_CODE;
  330.         }
  331.     }
  332.  
  333.     return;
  334. }
  335.  
  336. #endif /* !LZW_CLEAN */
  337.