Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4349 | Serge | 1 | /* cairo - a vector graphics library with display and print output |
2 | * |
||
3 | * Copyright © 2006 Red Hat, Inc. |
||
4 | * |
||
5 | * This library is free software; you can redistribute it and/or |
||
6 | * modify it either under the terms of the GNU Lesser General Public |
||
7 | * License version 2.1 as published by the Free Software Foundation |
||
8 | * (the "LGPL") or, at your option, under the terms of the Mozilla |
||
9 | * Public License Version 1.1 (the "MPL"). If you do not alter this |
||
10 | * notice, a recipient may use your version of this file under either |
||
11 | * the MPL or the LGPL. |
||
12 | * |
||
13 | * You should have received a copy of the LGPL along with this library |
||
14 | * in the file COPYING-LGPL-2.1; if not, write to the Free Software |
||
15 | * Foundation, Inc., 51 Franklin Street, Suite 500, Boston, MA 02110-1335, USA |
||
16 | * You should have received a copy of the MPL along with this library |
||
17 | * in the file COPYING-MPL-1.1 |
||
18 | * |
||
19 | * The contents of this file are subject to the Mozilla Public License |
||
20 | * Version 1.1 (the "License"); you may not use this file except in |
||
21 | * compliance with the License. You may obtain a copy of the License at |
||
22 | * http://www.mozilla.org/MPL/ |
||
23 | * |
||
24 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY |
||
25 | * OF ANY KIND, either express or implied. See the LGPL or the MPL for |
||
26 | * the specific language governing rights and limitations. |
||
27 | * |
||
28 | * The Original Code is the cairo graphics library. |
||
29 | * |
||
30 | * The Initial Developer of the Original Code is University of Southern |
||
31 | * California. |
||
32 | * |
||
33 | * Contributor(s): |
||
34 | * Carl D. Worth |
||
35 | */ |
||
36 | |||
37 | #include "cairoint.h" |
||
38 | #include "cairo-error-private.h" |
||
39 | |||
40 | typedef struct _lzw_buf { |
||
41 | cairo_status_t status; |
||
42 | |||
43 | unsigned char *data; |
||
44 | int data_size; |
||
45 | int num_data; |
||
46 | uint32_t pending; |
||
47 | unsigned int pending_bits; |
||
48 | } lzw_buf_t; |
||
49 | |||
50 | /* An lzw_buf_t is a simple, growable chunk of memory for holding |
||
51 | * variable-size objects of up to 16 bits each. |
||
52 | * |
||
53 | * Initialize an lzw_buf_t to the given size in bytes. |
||
54 | * |
||
55 | * To store objects into the lzw_buf_t, call _lzw_buf_store_bits and |
||
56 | * when finished, call _lzw_buf_store_pending, (which flushes out the |
||
57 | * last few bits that hadn't yet made a complete byte yet). |
||
58 | * |
||
59 | * Instead of returning failure from any functions, lzw_buf_t provides |
||
60 | * a status value that the caller can query, (and should query at |
||
61 | * least once when done with the object). The status value will be |
||
62 | * either %CAIRO_STATUS_SUCCESS or %CAIRO_STATUS_NO_MEMORY; |
||
63 | */ |
||
64 | static void |
||
65 | _lzw_buf_init (lzw_buf_t *buf, int size) |
||
66 | { |
||
67 | if (size == 0) |
||
68 | size = 16; |
||
69 | |||
70 | buf->status = CAIRO_STATUS_SUCCESS; |
||
71 | buf->data_size = size; |
||
72 | buf->num_data = 0; |
||
73 | buf->pending = 0; |
||
74 | buf->pending_bits = 0; |
||
75 | |||
76 | buf->data = malloc (size); |
||
77 | if (unlikely (buf->data == NULL)) { |
||
78 | buf->data_size = 0; |
||
79 | buf->status = _cairo_error (CAIRO_STATUS_NO_MEMORY); |
||
80 | return; |
||
81 | } |
||
82 | } |
||
83 | |||
84 | /* Increase the buffer size by doubling. |
||
85 | * |
||
86 | * Returns %CAIRO_STATUS_SUCCESS or %CAIRO_STATUS_NO_MEMORY |
||
87 | */ |
||
88 | static cairo_status_t |
||
89 | _lzw_buf_grow (lzw_buf_t *buf) |
||
90 | { |
||
91 | int new_size = buf->data_size * 2; |
||
92 | unsigned char *new_data; |
||
93 | |||
94 | if (buf->status) |
||
95 | return buf->status; |
||
96 | |||
97 | new_data = NULL; |
||
98 | /* check for integer overflow */ |
||
99 | if (new_size / 2 == buf->data_size) |
||
100 | new_data = realloc (buf->data, new_size); |
||
101 | |||
102 | if (unlikely (new_data == NULL)) { |
||
103 | free (buf->data); |
||
104 | buf->data_size = 0; |
||
105 | buf->status = _cairo_error (CAIRO_STATUS_NO_MEMORY); |
||
106 | return buf->status; |
||
107 | } |
||
108 | |||
109 | buf->data = new_data; |
||
110 | buf->data_size = new_size; |
||
111 | |||
112 | return CAIRO_STATUS_SUCCESS; |
||
113 | } |
||
114 | |||
115 | /* Store the lowest num_bits bits of values into buf. |
||
116 | * |
||
117 | * Note: The bits of value above size_in_bits must be 0, (so don't lie |
||
118 | * about the size). |
||
119 | * |
||
120 | * See also _lzw_buf_store_pending which must be called after the last |
||
121 | * call to _lzw_buf_store_bits. |
||
122 | * |
||
123 | * Sets buf->status to either %CAIRO_STATUS_SUCCESS or %CAIRO_STATUS_NO_MEMORY. |
||
124 | */ |
||
125 | static void |
||
126 | _lzw_buf_store_bits (lzw_buf_t *buf, uint16_t value, int num_bits) |
||
127 | { |
||
128 | cairo_status_t status; |
||
129 | |||
130 | assert (value <= (1 << num_bits) - 1); |
||
131 | |||
132 | if (buf->status) |
||
133 | return; |
||
134 | |||
135 | buf->pending = (buf->pending << num_bits) | value; |
||
136 | buf->pending_bits += num_bits; |
||
137 | |||
138 | while (buf->pending_bits >= 8) { |
||
139 | if (buf->num_data >= buf->data_size) { |
||
140 | status = _lzw_buf_grow (buf); |
||
141 | if (unlikely (status)) |
||
142 | return; |
||
143 | } |
||
144 | buf->data[buf->num_data++] = buf->pending >> (buf->pending_bits - 8); |
||
145 | buf->pending_bits -= 8; |
||
146 | } |
||
147 | } |
||
148 | |||
149 | /* Store the last remaining pending bits into the buffer. |
||
150 | * |
||
151 | * Note: This function must be called after the last call to |
||
152 | * _lzw_buf_store_bits. |
||
153 | * |
||
154 | * Sets buf->status to either %CAIRO_STATUS_SUCCESS or %CAIRO_STATUS_NO_MEMORY. |
||
155 | */ |
||
156 | static void |
||
157 | _lzw_buf_store_pending (lzw_buf_t *buf) |
||
158 | { |
||
159 | cairo_status_t status; |
||
160 | |||
161 | if (buf->status) |
||
162 | return; |
||
163 | |||
164 | if (buf->pending_bits == 0) |
||
165 | return; |
||
166 | |||
167 | assert (buf->pending_bits < 8); |
||
168 | |||
169 | if (buf->num_data >= buf->data_size) { |
||
170 | status = _lzw_buf_grow (buf); |
||
171 | if (unlikely (status)) |
||
172 | return; |
||
173 | } |
||
174 | |||
175 | buf->data[buf->num_data++] = buf->pending << (8 - buf->pending_bits); |
||
176 | buf->pending_bits = 0; |
||
177 | } |
||
178 | |||
179 | /* LZW defines a few magic code values */ |
||
180 | #define LZW_CODE_CLEAR_TABLE 256 |
||
181 | #define LZW_CODE_EOD 257 |
||
182 | #define LZW_CODE_FIRST 258 |
||
183 | |||
184 | /* We pack three separate values into a symbol as follows: |
||
185 | * |
||
186 | * 12 bits (31 down to 20): CODE: code value used to represent this symbol |
||
187 | * 12 bits (19 down to 8): PREV: previous code value in chain |
||
188 | * 8 bits ( 7 down to 0): NEXT: next byte value in chain |
||
189 | */ |
||
190 | typedef uint32_t lzw_symbol_t; |
||
191 | |||
192 | #define LZW_SYMBOL_SET(sym, prev, next) ((sym) = ((prev) << 8)|(next)) |
||
193 | #define LZW_SYMBOL_SET_CODE(sym, code, prev, next) ((sym) = ((code << 20)|(prev) << 8)|(next)) |
||
194 | #define LZW_SYMBOL_GET_CODE(sym) (((sym) >> 20)) |
||
195 | #define LZW_SYMBOL_GET_PREV(sym) (((sym) >> 8) & 0x7ff) |
||
196 | #define LZW_SYMBOL_GET_BYTE(sym) (((sym) >> 0) & 0x0ff) |
||
197 | |||
198 | /* The PREV+NEXT fields can be seen as the key used to fetch values |
||
199 | * from the hash table, while the code is the value fetched. |
||
200 | */ |
||
201 | #define LZW_SYMBOL_KEY_MASK 0x000fffff |
||
202 | |||
203 | /* Since code values are only stored starting with 258 we can safely |
||
204 | * use a zero value to represent free slots in the hash table. */ |
||
205 | #define LZW_SYMBOL_FREE 0x00000000 |
||
206 | |||
207 | /* These really aren't very free for modifying. First, the PostScript |
||
208 | * specification sets the 9-12 bit range. Second, the encoding of |
||
209 | * lzw_symbol_t above also relies on 2 of LZW_BITS_MAX plus one byte |
||
210 | * fitting within 32 bits. |
||
211 | * |
||
212 | * But other than that, the LZW compression scheme could function with |
||
213 | * more bits per code. |
||
214 | */ |
||
215 | #define LZW_BITS_MIN 9 |
||
216 | #define LZW_BITS_MAX 12 |
||
217 | #define LZW_BITS_BOUNDARY(bits) ((1<<(bits))-1) |
||
218 | #define LZW_MAX_SYMBOLS (1< |
||
219 | |||
220 | #define LZW_SYMBOL_TABLE_SIZE 9013 |
||
221 | #define LZW_SYMBOL_MOD1 LZW_SYMBOL_TABLE_SIZE |
||
222 | #define LZW_SYMBOL_MOD2 9011 |
||
223 | |||
224 | typedef struct _lzw_symbol_table { |
||
225 | lzw_symbol_t table[LZW_SYMBOL_TABLE_SIZE]; |
||
226 | } lzw_symbol_table_t; |
||
227 | |||
228 | /* Initialize the hash table to entirely empty */ |
||
229 | static void |
||
230 | _lzw_symbol_table_init (lzw_symbol_table_t *table) |
||
231 | { |
||
232 | memset (table->table, 0, LZW_SYMBOL_TABLE_SIZE * sizeof (lzw_symbol_t)); |
||
233 | } |
||
234 | |||
235 | /* Lookup a symbol in the symbol table. The PREV and NEXT fields of |
||
236 | * symbol form the key for the lookup. |
||
237 | * |
||
238 | * If successful, then this function returns %TRUE and slot_ret will be |
||
239 | * left pointing at the result that will have the CODE field of |
||
240 | * interest. |
||
241 | * |
||
242 | * If the lookup fails, then this function returns %FALSE and slot_ret |
||
243 | * will be pointing at the location in the table to which a new CODE |
||
244 | * value should be stored along with PREV and NEXT. |
||
245 | */ |
||
246 | static cairo_bool_t |
||
247 | _lzw_symbol_table_lookup (lzw_symbol_table_t *table, |
||
248 | lzw_symbol_t symbol, |
||
249 | lzw_symbol_t **slot_ret) |
||
250 | { |
||
251 | /* The algorithm here is identical to that in cairo-hash.c. We |
||
252 | * copy it here to allow for a rather more efficient |
||
253 | * implementation due to several circumstances that do not apply |
||
254 | * to the more general case: |
||
255 | * |
||
256 | * 1) We have a known bound on the total number of symbols, so we |
||
257 | * have a fixed-size table without any copying when growing |
||
258 | * |
||
259 | * 2) We never delete any entries, so we don't need to |
||
260 | * support/check for DEAD entries during lookup. |
||
261 | * |
||
262 | * 3) The object fits in 32 bits so we store each object in its |
||
263 | * entirety within the table rather than storing objects |
||
264 | * externally and putting pointers in the table, (which here |
||
265 | * would just double the storage requirements and have negative |
||
266 | * impacts on memory locality). |
||
267 | */ |
||
268 | int i, idx, step, hash = symbol & LZW_SYMBOL_KEY_MASK; |
||
269 | lzw_symbol_t candidate; |
||
270 | |||
271 | idx = hash % LZW_SYMBOL_MOD1; |
||
272 | step = 0; |
||
273 | |||
274 | *slot_ret = NULL; |
||
275 | for (i = 0; i < LZW_SYMBOL_TABLE_SIZE; i++) |
||
276 | { |
||
277 | candidate = table->table[idx]; |
||
278 | if (candidate == LZW_SYMBOL_FREE) |
||
279 | { |
||
280 | *slot_ret = &table->table[idx]; |
||
281 | return FALSE; |
||
282 | } |
||
283 | else /* candidate is LIVE */ |
||
284 | { |
||
285 | if ((candidate & LZW_SYMBOL_KEY_MASK) == |
||
286 | (symbol & LZW_SYMBOL_KEY_MASK)) |
||
287 | { |
||
288 | *slot_ret = &table->table[idx]; |
||
289 | return TRUE; |
||
290 | } |
||
291 | } |
||
292 | |||
293 | if (step == 0) { |
||
294 | step = hash % LZW_SYMBOL_MOD2; |
||
295 | if (step == 0) |
||
296 | step = 1; |
||
297 | } |
||
298 | |||
299 | idx += step; |
||
300 | if (idx >= LZW_SYMBOL_TABLE_SIZE) |
||
301 | idx -= LZW_SYMBOL_TABLE_SIZE; |
||
302 | } |
||
303 | |||
304 | return FALSE; |
||
305 | } |
||
306 | |||
307 | /* Compress a bytestream using the LZW algorithm. |
||
308 | * |
||
309 | * This is an original implementation based on reading the |
||
310 | * specification of the LZWDecode filter in the PostScript Language |
||
311 | * Reference. The free parameters in the LZW algorithm are set to the |
||
312 | * values mandated by PostScript, (symbols encoded with widths from 9 |
||
313 | * to 12 bits). |
||
314 | * |
||
315 | * This function returns a pointer to a newly allocated buffer holding |
||
316 | * the compressed data, or %NULL if an out-of-memory situation |
||
317 | * occurs. |
||
318 | * |
||
319 | * Notice that any one of the _lzw_buf functions called here could |
||
320 | * trigger an out-of-memory condition. But lzw_buf_t uses cairo's |
||
321 | * shutdown-on-error idiom, so it's safe to continue to call into |
||
322 | * lzw_buf without having to check for errors, (until a final check at |
||
323 | * the end). |
||
324 | */ |
||
325 | unsigned char * |
||
326 | _cairo_lzw_compress (unsigned char *data, unsigned long *size_in_out) |
||
327 | { |
||
328 | int bytes_remaining = *size_in_out; |
||
329 | lzw_buf_t buf; |
||
330 | lzw_symbol_table_t table; |
||
331 | lzw_symbol_t symbol, *slot = NULL; /* just to squelch a warning */ |
||
332 | int code_next = LZW_CODE_FIRST; |
||
333 | int code_bits = LZW_BITS_MIN; |
||
334 | int prev, next = 0; /* just to squelch a warning */ |
||
335 | |||
336 | if (*size_in_out == 0) |
||
337 | return NULL; |
||
338 | |||
339 | _lzw_buf_init (&buf, *size_in_out); |
||
340 | |||
341 | _lzw_symbol_table_init (&table); |
||
342 | |||
343 | /* The LZW header is a clear table code. */ |
||
344 | _lzw_buf_store_bits (&buf, LZW_CODE_CLEAR_TABLE, code_bits); |
||
345 | |||
346 | while (1) { |
||
347 | |||
348 | /* Find the longest existing code in the symbol table that |
||
349 | * matches the current input, if any. */ |
||
350 | prev = *data++; |
||
351 | bytes_remaining--; |
||
352 | if (bytes_remaining) { |
||
353 | do |
||
354 | { |
||
355 | next = *data++; |
||
356 | bytes_remaining--; |
||
357 | LZW_SYMBOL_SET (symbol, prev, next); |
||
358 | if (_lzw_symbol_table_lookup (&table, symbol, &slot)) |
||
359 | prev = LZW_SYMBOL_GET_CODE (*slot); |
||
360 | } while (bytes_remaining && *slot != LZW_SYMBOL_FREE); |
||
361 | if (*slot == LZW_SYMBOL_FREE) { |
||
362 | data--; |
||
363 | bytes_remaining++; |
||
364 | } |
||
365 | } |
||
366 | |||
367 | /* Write the code into the output. This is either a byte read |
||
368 | * directly from the input, or a code from the last successful |
||
369 | * lookup. */ |
||
370 | _lzw_buf_store_bits (&buf, prev, code_bits); |
||
371 | |||
372 | if (bytes_remaining == 0) |
||
373 | break; |
||
374 | |||
375 | LZW_SYMBOL_SET_CODE (*slot, code_next++, prev, next); |
||
376 | |||
377 | if (code_next > LZW_BITS_BOUNDARY(code_bits)) |
||
378 | { |
||
379 | code_bits++; |
||
380 | if (code_bits > LZW_BITS_MAX) { |
||
381 | _lzw_symbol_table_init (&table); |
||
382 | _lzw_buf_store_bits (&buf, LZW_CODE_CLEAR_TABLE, code_bits - 1); |
||
383 | code_bits = LZW_BITS_MIN; |
||
384 | code_next = LZW_CODE_FIRST; |
||
385 | } |
||
386 | } |
||
387 | } |
||
388 | |||
389 | /* The LZW footer is an end-of-data code. */ |
||
390 | _lzw_buf_store_bits (&buf, LZW_CODE_EOD, code_bits); |
||
391 | |||
392 | _lzw_buf_store_pending (&buf); |
||
393 | |||
394 | /* See if we ever ran out of memory while writing to buf. */ |
||
395 | if (buf.status == CAIRO_STATUS_NO_MEMORY) { |
||
396 | *size_in_out = 0; |
||
397 | return NULL; |
||
398 | } |
||
399 | |||
400 | assert (buf.status == CAIRO_STATUS_SUCCESS); |
||
401 | |||
402 | *size_in_out = buf.num_data; |
||
403 | return buf.data; |
||
404 | }> |