0,0 → 1,353 |
; deflate.inc -- internal compression state |
; Copyright (C) 1995-2012 Jean-loup Gailly |
; For conditions of distribution and use, see copyright notice in zlib.inc |
|
; WARNING: this file should *not* be used by applications. It is |
; part of the implementation of the compression library and is |
; subject to change. Applications should only use zlib.inc. |
|
include 'zutil.inc' |
|
; =========================================================================== |
; Internal compression state. |
|
|
LENGTH_CODES equ 29 |
; number of length codes, not counting the special END_BLOCK code |
|
LITERALS equ 256 |
; number of literal bytes 0..255 |
|
L_CODES equ (LITERALS+1+LENGTH_CODES) |
; number of Literal or Length codes, including the END_BLOCK code |
|
D_CODES equ 30 |
; number of distance codes |
|
BL_CODES equ 19 |
; number of codes used to transfer the bit lengths |
|
HEAP_SIZE equ (2*L_CODES+1) |
; maximum heap size |
|
MAX_BITS equ 15 |
; All codes must not exceed MAX_BITS bits |
|
Buf_size equ 16 |
; size of bit buffer in bi_buf |
|
INIT_STATE equ 42 |
EXTRA_STATE equ 69 |
NAME_STATE equ 73 |
COMMENT_STATE equ 91 |
HCRC_STATE equ 103 |
BUSY_STATE equ 113 |
FINISH_STATE equ 800 |
; Stream status |
|
; Data structure describing a single value and its code string. |
struct ct_data ;ct_data_s |
fc dw ? ;union |
;uint_16 freq ;frequency count |
;uint_16 code ;bit string |
dale dw ? ;union |
;uint_16 dad ;father node in Huffman tree |
;uint_16 len ;length of bit string |
ends |
|
Freq equ ct_data.fc ;.freq |
Code equ ct_data.fc ;.code |
Dad equ ct_data.dale ;.dad |
Len equ ct_data.dale ;.len |
|
struct tree_desc ;tree_desc_s |
dyn_tree dd ? ;ct_data * ;the dynamic tree |
max_code dd ? ;int ;largest code with non zero frequency |
stat_desc dd ? ;static_tree_desc * ;the corresponding static tree |
ends |
|
; A Pos is an index in the character window. We use short instead of int to |
; save space in the various tables. IPos is used only for parameter passing. |
|
struct deflate_state ;internal_state |
strm dd ? ;z_streamp ;pointer back to this zlib stream |
status dd ? ;int ;as the name implies |
pending_buf dd ? ;Bytef *;output still pending |
pending_buf_size dd ? ;ulg ;size of pending_buf |
pending_out dd ? ;Bytef * ;next pending byte to output to the stream |
pending dw ? ;uInt ;nb of bytes in the pending buffer |
wrap dd ? ;int ;bit 0 true for zlib, bit 1 true for gzip |
gzhead dd ? ;gz_headerp ;gzip header information to write |
gzindex dd ? ;uInt ;where in extra, name, or comment |
method db ? ;Byte ;can only be DEFLATED |
last_flush dd ? ;int ;value of flush param for previous deflate call |
|
; used by deflate.asm: |
|
w_size dd ? ;uInt ;LZ77 window size (32K by default) |
w_bits dd ? ;uInt ;log2(w_size) (8..16) |
w_mask dd ? ;uInt ;w_size - 1 |
|
window dd ? ;Bytef * |
; Sliding window. Input bytes are read into the second half of the window, |
; and move to the first half later to keep a dictionary of at least wSize |
; bytes. With this organization, matches are limited to a distance of |
; wSize-MAX_MATCH bytes, but this ensures that IO is always |
; performed with a length multiple of the block size. Also, it limits |
; the window size to 64K, which is quite useful on MSDOS. |
; To do: use the user input buffer as sliding window. |
|
window_size dd ? ;ulg |
; Actual size of window: 2*wSize, except when the user input buffer |
; is directly used as sliding window. |
|
prev dd ? ;Posf * |
; Link to older string with same hash index. To limit the size of this |
; array to 64K, this link is maintained only for the last 32K strings. |
; An index in this array is thus a window index modulo 32K. |
|
head dd ? ;Posf * ;Heads of the hash chains or NIL. |
|
ins_h dd ? ;uInt ;hash index of string to be inserted |
hash_size dd ? ;uInt ;number of elements in hash table |
hash_bits dd ? ;uInt ;log2(hash_size) |
hash_mask dd ? ;uInt ;hash_size-1 |
|
hash_shift dd ? ;uInt |
; Number of bits by which ins_h must be shifted at each input |
; step. It must be such that after MIN_MATCH steps, the oldest |
; byte no longer takes part in the hash key, that is: |
; hash_shift * MIN_MATCH >= hash_bits |
|
block_start dd ? ;long |
; Window position at the beginning of the current output block. Gets |
; negative when the window is moved backwards. |
|
match_length dd ? ;uInt ;length of best match |
prev_match dd ? ;IPos ;previous match |
match_available dd ? ;int ;set if previous match exists |
strstart dd ? ;uInt ;start of string to insert |
match_start dd ? ;uInt ;start of matching string |
lookahead dd ? ;uInt ;number of valid bytes ahead in window |
|
prev_length dd ? ;uInt |
; Length of the best match at previous step. Matches not greater than this |
; are discarded. This is used in the lazy match evaluation. |
|
max_chain_length dd ? ;uInt |
; To speed up deflation, hash chains are never searched beyond this |
; length. A higher limit improves compression ratio but degrades the |
; speed. |
|
max_lazy_match dd ? ;uInt |
; Attempt to find a better match only when the current match is strictly |
; smaller than this value. This mechanism is used only for compression |
; levels >= 4. |
|
;# define max_insert_length max_lazy_match |
; Insert new strings in the hash table only if the match length is not |
; greater than this length. This saves time but degrades compression. |
; max_insert_length is used only for compression levels <= 3. |
|
level dw ? ;int ;compression level (1..9) |
strategy dw ? ;int ;favor or force Huffman coding |
|
good_match dd ? ;uInt |
; Use a faster search when the previous match is longer than this |
|
nice_match dd ? ;int ;Stop searching when current match exceeds this |
|
; used by trees.asm: |
; Didn't use ct_data typedef below to suppress compiler warning |
dyn_ltree rb sizeof.ct_data * HEAP_SIZE ;literal and length tree |
dyn_dtree rb sizeof.ct_data * (2*D_CODES+1) ;distance tree |
bl_tree rb sizeof.ct_data * (2*BL_CODES+1) ;Huffman tree for bit lengths |
|
l_desc tree_desc ;desc. for literal tree |
d_desc tree_desc ;desc. for distance tree |
bl_desc tree_desc ;desc. for bit length tree |
|
bl_count rw MAX_BITS+1 ;uint_16[] |
; number of codes at each bit length for an optimal tree |
|
heap rw 2*L_CODES+1 ;int[] ;heap used to build the Huffman trees |
heap_len dd ? ;int ;number of elements in the heap |
heap_max dd ? ;int ;element of largest frequency |
; The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. |
; The same heap array is used to build all trees. |
|
depth rb 2*L_CODES+1 ;uch[] |
; Depth of each subtree used as tie breaker for trees of equal frequency |
|
l_buf dd ? ;uchf * ;buffer for literals or lengths |
|
lit_bufsize dd ? ;uInt |
; Size of match buffer for literals/lengths. There are 4 reasons for |
; limiting lit_bufsize to 64K: |
; - frequencies can be kept in 16 bit counters |
; - if compression is not successful for the first block, all input |
; data is still in the window so we can still emit a stored block even |
; when input comes from standard input. (This can also be done for |
; all blocks if lit_bufsize is not greater than 32K.) |
; - if compression is not successful for a file smaller than 64K, we can |
; even emit a stored file instead of a stored block (saving 5 bytes). |
; This is applicable only for zip (not gzip or zlib). |
; - creating new Huffman trees less frequently may not provide fast |
; adaptation to changes in the input data statistics. (Take for |
; example a binary file with poorly compressible code followed by |
; a highly compressible string table.) Smaller buffer sizes give |
; fast adaptation but have of course the overhead of transmitting |
; trees more frequently. |
; - I can't count above 4 |
|
last_lit dd ? ;uInt ;running index in l_buf |
|
d_buf dd ? ;uint_16p |
; Buffer for distances. To simplify the code, d_buf and l_buf have |
; the same number of elements. To use different lengths, an extra flag |
; array would be necessary. |
|
opt_len dd ? ;ulg ;bit length of current block with optimal trees |
static_len dd ? ;ulg ;bit length of current block with static trees |
matches dd ? ;uInt ;number of string matches in current block |
insert dd ? ;uInt ;bytes at end of window left to insert |
|
if DEBUG eq 1 |
compressed_len dd ? ;ulg ;total bit length of compressed file mod 2^32 |
bits_sent dd ? ;ulg ;bit length of compressed data sent mod 2^32 |
end if |
|
bi_buf dw ? ;uint_16 |
; Output buffer. bits are inserted starting at the bottom (least |
; significant bits). |
|
bi_valid dd ? ;int |
; Number of valid bits in bi_buf. All bits above the last valid bit |
; are always zero. |
|
high_water dd ? ;ulg |
; High water mark offset in window for initialized bytes -- bytes above |
; this are set to zero in order to avoid memory check warnings when |
; longest match routines access bytes past the input. This is then |
; updated to the new high water mark. |
ends |
|
; Output a byte on the stream. |
; IN assertion: there is enough room in pending_buf. |
|
macro put_byte s, c |
{ |
;xor eax,eax |
;mov al,c |
;zlib_debug '(%d)',eax |
movzx eax,word[s+deflate_state.pending] |
add eax,[s+deflate_state.pending_buf] |
mov byte[eax],c |
inc word[s+deflate_state.pending] |
} |
macro put_dword s, d |
{ |
;mov eax,d |
;zlib_debug '(%d)',eax |
movzx eax,word[s+deflate_state.pending] |
add eax,[s+deflate_state.pending_buf] |
mov dword[eax],d |
add word[s+deflate_state.pending],4 |
} |
|
MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1) |
; Minimum amount of lookahead, except at the end of the input file. |
; See deflate.asm for comments about the MIN_MATCH+1. |
|
macro MAX_DIST s |
{ |
mov eax,[s+deflate_state.w_size] |
sub eax,MIN_LOOKAHEAD |
} |
; In order to simplify the code, particularly on 16 bit machines, match |
; distances are limited to MAX_DIST instead of WSIZE. |
|
WIN_INIT equ MAX_MATCH |
; Number of bytes after end of data in window to initialize in order to avoid |
; memory checker errors from longest match routines |
|
macro d_code dist |
{ |
;if (dist < 256) _dist_code[dist] |
;else _dist_code[ 256+(dist>>7) ] |
local .end0 |
mov eax,dist |
cmp eax,256 |
jl .end0 |
shr eax,7 |
add eax,256 |
.end0: |
movzx eax,byte[eax+_dist_code] |
} |
; Mapping from a distance to a distance code. dist is the distance - 1 and |
; must not have side effects. _dist_code[256] and _dist_code[257] are never |
; used. |
|
macro _tr_tally_lit s, c, flush |
{ |
local .end0 |
if DEBUG eq 0 |
; Inline versions of _tr_tally for speed: |
if c eq eax |
else |
mov eax,c |
end if |
push ecx |
mov ecx,[s+deflate_state.last_lit] |
shl ecx,1 |
add ecx,[s+deflate_state.d_buf] |
mov word[ecx],0 |
mov ecx,[s+deflate_state.last_lit] |
add ecx,[s+deflate_state.l_buf] |
mov byte[ecx],al |
inc dword[s+deflate_state.last_lit] |
and eax,0xff |
imul eax,sizeof.ct_data |
add eax,s |
inc word[eax+deflate_state.dyn_ltree+Freq] |
xor eax,eax |
mov ecx,[s+deflate_state.lit_bufsize] |
dec ecx |
cmp [s+deflate_state.last_lit],ecx |
jne .end0 |
inc eax ;flush = (..==..) |
.end0: |
mov flush, eax |
pop ecx |
else |
stdcall _tr_tally, s, 0, c |
mov flush, eax |
end if |
} |
macro _tr_tally_dist s, distance, length, flush |
{ |
if 0 ;;;DEBUG eq 0 |
push ecx |
; uch len = (length) |
if distance eq eax |
else |
mov eax,distance |
end if |
mov ecx,[s+deflate_state.last_lit] |
shl ecx,1 |
add ecx,[s+deflate_state.d_buf] |
mov word[ecx],ax |
mov ecx,[s+deflate_state.last_lit] |
add ecx,[s+deflate_state.l_buf] |
mov byte[ecx],length |
inc dword[s+deflate_state.last_lit] |
dec eax |
; s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; |
; s->dyn_dtree[d_code(dist)].Freq++; |
; flush = (s->last_lit == s->lit_bufsize-1); |
pop ecx |
else |
stdcall _tr_tally, s, distance, length |
mov flush, eax |
end if |
} |