Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5191 | serge | 1 | /* An expandable hash tables datatype. |
2 | Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2009, 2010 |
||
3 | Free Software Foundation, Inc. |
||
4 | Contributed by Vladimir Makarov (vmakarov@cygnus.com). |
||
5 | |||
6 | This file is part of the libiberty library. |
||
7 | Libiberty is free software; you can redistribute it and/or |
||
8 | modify it under the terms of the GNU Library General Public |
||
9 | License as published by the Free Software Foundation; either |
||
10 | version 2 of the License, or (at your option) any later version. |
||
11 | |||
12 | Libiberty is distributed in the hope that it will be useful, |
||
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
15 | Library General Public License for more details. |
||
16 | |||
17 | You should have received a copy of the GNU Library General Public |
||
18 | License along with libiberty; see the file COPYING.LIB. If |
||
19 | not, write to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor, |
||
20 | Boston, MA 02110-1301, USA. */ |
||
21 | |||
22 | /* This package implements basic hash table functionality. It is possible |
||
23 | to search for an entry, create an entry and destroy an entry. |
||
24 | |||
25 | Elements in the table are generic pointers. |
||
26 | |||
27 | The size of the table is not fixed; if the occupancy of the table |
||
28 | grows too high the hash table will be expanded. |
||
29 | |||
30 | The abstract data implementation is based on generalized Algorithm D |
||
31 | from Knuth's book "The art of computer programming". Hash table is |
||
32 | expanded by creation of new hash table and transferring elements from |
||
33 | the old table to the new table. */ |
||
34 | |||
35 | #ifdef HAVE_CONFIG_H |
||
36 | #include "config.h" |
||
37 | #endif |
||
38 | |||
39 | #include |
||
40 | |||
41 | #ifdef HAVE_STDLIB_H |
||
42 | #include |
||
43 | #endif |
||
44 | #ifdef HAVE_STRING_H |
||
45 | #include |
||
46 | #endif |
||
47 | #ifdef HAVE_MALLOC_H |
||
48 | #include |
||
49 | #endif |
||
50 | #ifdef HAVE_LIMITS_H |
||
51 | #include |
||
52 | #endif |
||
53 | #ifdef HAVE_INTTYPES_H |
||
54 | #include |
||
55 | #endif |
||
56 | #ifdef HAVE_STDINT_H |
||
57 | #include |
||
58 | #endif |
||
59 | |||
60 | #include |
||
61 | |||
62 | #include "libiberty.h" |
||
63 | #include "ansidecl.h" |
||
64 | #include "hashtab.h" |
||
65 | |||
66 | #ifndef CHAR_BIT |
||
67 | #define CHAR_BIT 8 |
||
68 | #endif |
||
69 | |||
70 | static unsigned int higher_prime_index (unsigned long); |
||
71 | static hashval_t htab_mod_1 (hashval_t, hashval_t, hashval_t, int); |
||
72 | static hashval_t htab_mod (hashval_t, htab_t); |
||
73 | static hashval_t htab_mod_m2 (hashval_t, htab_t); |
||
74 | static hashval_t hash_pointer (const void *); |
||
75 | static int eq_pointer (const void *, const void *); |
||
76 | static int htab_expand (htab_t); |
||
77 | static PTR *find_empty_slot_for_expand (htab_t, hashval_t); |
||
78 | |||
79 | /* At some point, we could make these be NULL, and modify the |
||
80 | hash-table routines to handle NULL specially; that would avoid |
||
81 | function-call overhead for the common case of hashing pointers. */ |
||
82 | htab_hash htab_hash_pointer = hash_pointer; |
||
83 | htab_eq htab_eq_pointer = eq_pointer; |
||
84 | |||
85 | /* Table of primes and multiplicative inverses. |
||
86 | |||
87 | Note that these are not minimally reduced inverses. Unlike when generating |
||
88 | code to divide by a constant, we want to be able to use the same algorithm |
||
89 | all the time. All of these inverses (are implied to) have bit 32 set. |
||
90 | |||
91 | For the record, here's the function that computed the table; it's a |
||
92 | vastly simplified version of the function of the same name from gcc. */ |
||
93 | |||
94 | #if 0 |
||
95 | unsigned int |
||
96 | ceil_log2 (unsigned int x) |
||
97 | { |
||
98 | int i; |
||
99 | for (i = 31; i >= 0 ; --i) |
||
100 | if (x > (1u << i)) |
||
101 | return i+1; |
||
102 | abort (); |
||
103 | } |
||
104 | |||
105 | unsigned int |
||
106 | choose_multiplier (unsigned int d, unsigned int *mlp, unsigned char *shiftp) |
||
107 | { |
||
108 | unsigned long long mhigh; |
||
109 | double nx; |
||
110 | int lgup, post_shift; |
||
111 | int pow, pow2; |
||
112 | int n = 32, precision = 32; |
||
113 | |||
114 | lgup = ceil_log2 (d); |
||
115 | pow = n + lgup; |
||
116 | pow2 = n + lgup - precision; |
||
117 | |||
118 | nx = ldexp (1.0, pow) + ldexp (1.0, pow2); |
||
119 | mhigh = nx / d; |
||
120 | |||
121 | *shiftp = lgup - 1; |
||
122 | *mlp = mhigh; |
||
123 | return mhigh >> 32; |
||
124 | } |
||
125 | #endif |
||
126 | |||
127 | struct prime_ent |
||
128 | { |
||
129 | hashval_t prime; |
||
130 | hashval_t inv; |
||
131 | hashval_t inv_m2; /* inverse of prime-2 */ |
||
132 | hashval_t shift; |
||
133 | }; |
||
134 | |||
135 | static struct prime_ent const prime_tab[] = { |
||
136 | { 7, 0x24924925, 0x9999999b, 2 }, |
||
137 | { 13, 0x3b13b13c, 0x745d1747, 3 }, |
||
138 | { 31, 0x08421085, 0x1a7b9612, 4 }, |
||
139 | { 61, 0x0c9714fc, 0x15b1e5f8, 5 }, |
||
140 | { 127, 0x02040811, 0x0624dd30, 6 }, |
||
141 | { 251, 0x05197f7e, 0x073260a5, 7 }, |
||
142 | { 509, 0x01824366, 0x02864fc8, 8 }, |
||
143 | { 1021, 0x00c0906d, 0x014191f7, 9 }, |
||
144 | { 2039, 0x0121456f, 0x0161e69e, 10 }, |
||
145 | { 4093, 0x00300902, 0x00501908, 11 }, |
||
146 | { 8191, 0x00080041, 0x00180241, 12 }, |
||
147 | { 16381, 0x000c0091, 0x00140191, 13 }, |
||
148 | { 32749, 0x002605a5, 0x002a06e6, 14 }, |
||
149 | { 65521, 0x000f00e2, 0x00110122, 15 }, |
||
150 | { 131071, 0x00008001, 0x00018003, 16 }, |
||
151 | { 262139, 0x00014002, 0x0001c004, 17 }, |
||
152 | { 524287, 0x00002001, 0x00006001, 18 }, |
||
153 | { 1048573, 0x00003001, 0x00005001, 19 }, |
||
154 | { 2097143, 0x00004801, 0x00005801, 20 }, |
||
155 | { 4194301, 0x00000c01, 0x00001401, 21 }, |
||
156 | { 8388593, 0x00001e01, 0x00002201, 22 }, |
||
157 | { 16777213, 0x00000301, 0x00000501, 23 }, |
||
158 | { 33554393, 0x00001381, 0x00001481, 24 }, |
||
159 | { 67108859, 0x00000141, 0x000001c1, 25 }, |
||
160 | { 134217689, 0x000004e1, 0x00000521, 26 }, |
||
161 | { 268435399, 0x00000391, 0x000003b1, 27 }, |
||
162 | { 536870909, 0x00000019, 0x00000029, 28 }, |
||
163 | { 1073741789, 0x0000008d, 0x00000095, 29 }, |
||
164 | { 2147483647, 0x00000003, 0x00000007, 30 }, |
||
165 | /* Avoid "decimal constant so large it is unsigned" for 4294967291. */ |
||
166 | { 0xfffffffb, 0x00000006, 0x00000008, 31 } |
||
167 | }; |
||
168 | |||
169 | /* The following function returns an index into the above table of the |
||
170 | nearest prime number which is greater than N, and near a power of two. */ |
||
171 | |||
172 | static unsigned int |
||
173 | higher_prime_index (unsigned long n) |
||
174 | { |
||
175 | unsigned int low = 0; |
||
176 | unsigned int high = sizeof(prime_tab) / sizeof(prime_tab[0]); |
||
177 | |||
178 | while (low != high) |
||
179 | { |
||
180 | unsigned int mid = low + (high - low) / 2; |
||
181 | if (n > prime_tab[mid].prime) |
||
182 | low = mid + 1; |
||
183 | else |
||
184 | high = mid; |
||
185 | } |
||
186 | |||
187 | /* If we've run out of primes, abort. */ |
||
188 | if (n > prime_tab[low].prime) |
||
189 | { |
||
190 | fprintf (stderr, "Cannot find prime bigger than %lu\n", n); |
||
191 | abort (); |
||
192 | } |
||
193 | |||
194 | return low; |
||
195 | } |
||
196 | |||
197 | /* Returns non-zero if P1 and P2 are equal. */ |
||
198 | |||
199 | static int |
||
200 | eq_pointer (const PTR p1, const PTR p2) |
||
201 | { |
||
202 | return p1 == p2; |
||
203 | } |
||
204 | |||
205 | |||
206 | /* The parens around the function names in the next two definitions |
||
207 | are essential in order to prevent macro expansions of the name. |
||
208 | The bodies, however, are expanded as expected, so they are not |
||
209 | recursive definitions. */ |
||
210 | |||
211 | /* Return the current size of given hash table. */ |
||
212 | |||
213 | #define htab_size(htab) ((htab)->size) |
||
214 | |||
215 | size_t |
||
216 | (htab_size) (htab_t htab) |
||
217 | { |
||
218 | return htab_size (htab); |
||
219 | } |
||
220 | |||
221 | /* Return the current number of elements in given hash table. */ |
||
222 | |||
223 | #define htab_elements(htab) ((htab)->n_elements - (htab)->n_deleted) |
||
224 | |||
225 | size_t |
||
226 | (htab_elements) (htab_t htab) |
||
227 | { |
||
228 | return htab_elements (htab); |
||
229 | } |
||
230 | |||
231 | /* Return X % Y. */ |
||
232 | |||
233 | static inline hashval_t |
||
234 | htab_mod_1 (hashval_t x, hashval_t y, hashval_t inv, int shift) |
||
235 | { |
||
236 | /* The multiplicative inverses computed above are for 32-bit types, and |
||
237 | requires that we be able to compute a highpart multiply. */ |
||
238 | #ifdef UNSIGNED_64BIT_TYPE |
||
239 | __extension__ typedef UNSIGNED_64BIT_TYPE ull; |
||
240 | if (sizeof (hashval_t) * CHAR_BIT <= 32) |
||
241 | { |
||
242 | hashval_t t1, t2, t3, t4, q, r; |
||
243 | |||
244 | t1 = ((ull)x * inv) >> 32; |
||
245 | t2 = x - t1; |
||
246 | t3 = t2 >> 1; |
||
247 | t4 = t1 + t3; |
||
248 | q = t4 >> shift; |
||
249 | r = x - (q * y); |
||
250 | |||
251 | return r; |
||
252 | } |
||
253 | #endif |
||
254 | |||
255 | /* Otherwise just use the native division routines. */ |
||
256 | return x % y; |
||
257 | } |
||
258 | |||
259 | /* Compute the primary hash for HASH given HTAB's current size. */ |
||
260 | |||
261 | static inline hashval_t |
||
262 | htab_mod (hashval_t hash, htab_t htab) |
||
263 | { |
||
264 | const struct prime_ent *p = &prime_tab[htab->size_prime_index]; |
||
265 | return htab_mod_1 (hash, p->prime, p->inv, p->shift); |
||
266 | } |
||
267 | |||
268 | /* Compute the secondary hash for HASH given HTAB's current size. */ |
||
269 | |||
270 | static inline hashval_t |
||
271 | htab_mod_m2 (hashval_t hash, htab_t htab) |
||
272 | { |
||
273 | const struct prime_ent *p = &prime_tab[htab->size_prime_index]; |
||
274 | return 1 + htab_mod_1 (hash, p->prime - 2, p->inv_m2, p->shift); |
||
275 | } |
||
276 | |||
277 | /* This function creates table with length slightly longer than given |
||
278 | source length. Created hash table is initiated as empty (all the |
||
279 | hash table entries are HTAB_EMPTY_ENTRY). The function returns the |
||
280 | created hash table, or NULL if memory allocation fails. */ |
||
281 | |||
282 | htab_t |
||
283 | htab_create_alloc (size_t size, htab_hash hash_f, htab_eq eq_f, |
||
284 | htab_del del_f, htab_alloc alloc_f, htab_free free_f) |
||
285 | { |
||
286 | return htab_create_typed_alloc (size, hash_f, eq_f, del_f, alloc_f, alloc_f, |
||
287 | free_f); |
||
288 | } |
||
289 | |||
290 | /* As above, but uses the variants of ALLOC_F and FREE_F which accept |
||
291 | an extra argument. */ |
||
292 | |||
293 | htab_t |
||
294 | htab_create_alloc_ex (size_t size, htab_hash hash_f, htab_eq eq_f, |
||
295 | htab_del del_f, void *alloc_arg, |
||
296 | htab_alloc_with_arg alloc_f, |
||
297 | htab_free_with_arg free_f) |
||
298 | { |
||
299 | htab_t result; |
||
300 | unsigned int size_prime_index; |
||
301 | |||
302 | size_prime_index = higher_prime_index (size); |
||
303 | size = prime_tab[size_prime_index].prime; |
||
304 | |||
305 | result = (htab_t) (*alloc_f) (alloc_arg, 1, sizeof (struct htab)); |
||
306 | if (result == NULL) |
||
307 | return NULL; |
||
308 | result->entries = (PTR *) (*alloc_f) (alloc_arg, size, sizeof (PTR)); |
||
309 | if (result->entries == NULL) |
||
310 | { |
||
311 | if (free_f != NULL) |
||
312 | (*free_f) (alloc_arg, result); |
||
313 | return NULL; |
||
314 | } |
||
315 | result->size = size; |
||
316 | result->size_prime_index = size_prime_index; |
||
317 | result->hash_f = hash_f; |
||
318 | result->eq_f = eq_f; |
||
319 | result->del_f = del_f; |
||
320 | result->alloc_arg = alloc_arg; |
||
321 | result->alloc_with_arg_f = alloc_f; |
||
322 | result->free_with_arg_f = free_f; |
||
323 | return result; |
||
324 | } |
||
325 | |||
326 | /* |
||
327 | |||
328 | @deftypefn Supplemental htab_t htab_create_typed_alloc (size_t @var{size}, @ |
||
329 | htab_hash @var{hash_f}, htab_eq @var{eq_f}, htab_del @var{del_f}, @ |
||
330 | htab_alloc @var{alloc_tab_f}, htab_alloc @var{alloc_f}, @ |
||
331 | htab_free @var{free_f}) |
||
332 | |||
333 | This function creates a hash table that uses two different allocators |
||
334 | @var{alloc_tab_f} and @var{alloc_f} to use for allocating the table itself |
||
335 | and its entries respectively. This is useful when variables of different |
||
336 | types need to be allocated with different allocators. |
||
337 | |||
338 | The created hash table is slightly larger than @var{size} and it is |
||
339 | initially empty (all the hash table entries are @code{HTAB_EMPTY_ENTRY}). |
||
340 | The function returns the created hash table, or @code{NULL} if memory |
||
341 | allocation fails. |
||
342 | |||
343 | @end deftypefn |
||
344 | |||
345 | */ |
||
346 | |||
347 | htab_t |
||
348 | htab_create_typed_alloc (size_t size, htab_hash hash_f, htab_eq eq_f, |
||
349 | htab_del del_f, htab_alloc alloc_tab_f, |
||
350 | htab_alloc alloc_f, htab_free free_f) |
||
351 | { |
||
352 | htab_t result; |
||
353 | unsigned int size_prime_index; |
||
354 | |||
355 | size_prime_index = higher_prime_index (size); |
||
356 | size = prime_tab[size_prime_index].prime; |
||
357 | |||
358 | result = (htab_t) (*alloc_tab_f) (1, sizeof (struct htab)); |
||
359 | if (result == NULL) |
||
360 | return NULL; |
||
361 | result->entries = (PTR *) (*alloc_f) (size, sizeof (PTR)); |
||
362 | if (result->entries == NULL) |
||
363 | { |
||
364 | if (free_f != NULL) |
||
365 | (*free_f) (result); |
||
366 | return NULL; |
||
367 | } |
||
368 | result->size = size; |
||
369 | result->size_prime_index = size_prime_index; |
||
370 | result->hash_f = hash_f; |
||
371 | result->eq_f = eq_f; |
||
372 | result->del_f = del_f; |
||
373 | result->alloc_f = alloc_f; |
||
374 | result->free_f = free_f; |
||
375 | return result; |
||
376 | } |
||
377 | |||
378 | |||
379 | /* Update the function pointers and allocation parameter in the htab_t. */ |
||
380 | |||
381 | void |
||
382 | htab_set_functions_ex (htab_t htab, htab_hash hash_f, htab_eq eq_f, |
||
383 | htab_del del_f, PTR alloc_arg, |
||
384 | htab_alloc_with_arg alloc_f, htab_free_with_arg free_f) |
||
385 | { |
||
386 | htab->hash_f = hash_f; |
||
387 | htab->eq_f = eq_f; |
||
388 | htab->del_f = del_f; |
||
389 | htab->alloc_arg = alloc_arg; |
||
390 | htab->alloc_with_arg_f = alloc_f; |
||
391 | htab->free_with_arg_f = free_f; |
||
392 | } |
||
393 | |||
394 | /* These functions exist solely for backward compatibility. */ |
||
395 | |||
396 | #undef htab_create |
||
397 | htab_t |
||
398 | htab_create (size_t size, htab_hash hash_f, htab_eq eq_f, htab_del del_f) |
||
399 | { |
||
400 | return htab_create_alloc (size, hash_f, eq_f, del_f, xcalloc, free); |
||
401 | } |
||
402 | |||
403 | htab_t |
||
404 | htab_try_create (size_t size, htab_hash hash_f, htab_eq eq_f, htab_del del_f) |
||
405 | { |
||
406 | return htab_create_alloc (size, hash_f, eq_f, del_f, calloc, free); |
||
407 | } |
||
408 | |||
409 | /* This function frees all memory allocated for given hash table. |
||
410 | Naturally the hash table must already exist. */ |
||
411 | |||
412 | void |
||
413 | htab_delete (htab_t htab) |
||
414 | { |
||
415 | size_t size = htab_size (htab); |
||
416 | PTR *entries = htab->entries; |
||
417 | int i; |
||
418 | |||
419 | if (htab->del_f) |
||
420 | for (i = size - 1; i >= 0; i--) |
||
421 | if (entries[i] != HTAB_EMPTY_ENTRY && entries[i] != HTAB_DELETED_ENTRY) |
||
422 | (*htab->del_f) (entries[i]); |
||
423 | |||
424 | if (htab->free_f != NULL) |
||
425 | { |
||
426 | (*htab->free_f) (entries); |
||
427 | (*htab->free_f) (htab); |
||
428 | } |
||
429 | else if (htab->free_with_arg_f != NULL) |
||
430 | { |
||
431 | (*htab->free_with_arg_f) (htab->alloc_arg, entries); |
||
432 | (*htab->free_with_arg_f) (htab->alloc_arg, htab); |
||
433 | } |
||
434 | } |
||
435 | |||
436 | /* This function clears all entries in the given hash table. */ |
||
437 | |||
438 | void |
||
439 | htab_empty (htab_t htab) |
||
440 | { |
||
441 | size_t size = htab_size (htab); |
||
442 | PTR *entries = htab->entries; |
||
443 | int i; |
||
444 | |||
445 | if (htab->del_f) |
||
446 | for (i = size - 1; i >= 0; i--) |
||
447 | if (entries[i] != HTAB_EMPTY_ENTRY && entries[i] != HTAB_DELETED_ENTRY) |
||
448 | (*htab->del_f) (entries[i]); |
||
449 | |||
450 | /* Instead of clearing megabyte, downsize the table. */ |
||
451 | if (size > 1024*1024 / sizeof (PTR)) |
||
452 | { |
||
453 | int nindex = higher_prime_index (1024 / sizeof (PTR)); |
||
454 | int nsize = prime_tab[nindex].prime; |
||
455 | |||
456 | if (htab->free_f != NULL) |
||
457 | (*htab->free_f) (htab->entries); |
||
458 | else if (htab->free_with_arg_f != NULL) |
||
459 | (*htab->free_with_arg_f) (htab->alloc_arg, htab->entries); |
||
460 | if (htab->alloc_with_arg_f != NULL) |
||
461 | htab->entries = (PTR *) (*htab->alloc_with_arg_f) (htab->alloc_arg, nsize, |
||
462 | sizeof (PTR *)); |
||
463 | else |
||
464 | htab->entries = (PTR *) (*htab->alloc_f) (nsize, sizeof (PTR *)); |
||
465 | htab->size = nsize; |
||
466 | htab->size_prime_index = nindex; |
||
467 | } |
||
468 | else |
||
469 | memset (entries, 0, size * sizeof (PTR)); |
||
470 | htab->n_deleted = 0; |
||
471 | htab->n_elements = 0; |
||
472 | } |
||
473 | |||
474 | /* Similar to htab_find_slot, but without several unwanted side effects: |
||
475 | - Does not call htab->eq_f when it finds an existing entry. |
||
476 | - Does not change the count of elements/searches/collisions in the |
||
477 | hash table. |
||
478 | This function also assumes there are no deleted entries in the table. |
||
479 | HASH is the hash value for the element to be inserted. */ |
||
480 | |||
481 | static PTR * |
||
482 | find_empty_slot_for_expand (htab_t htab, hashval_t hash) |
||
483 | { |
||
484 | hashval_t index = htab_mod (hash, htab); |
||
485 | size_t size = htab_size (htab); |
||
486 | PTR *slot = htab->entries + index; |
||
487 | hashval_t hash2; |
||
488 | |||
489 | if (*slot == HTAB_EMPTY_ENTRY) |
||
490 | return slot; |
||
491 | else if (*slot == HTAB_DELETED_ENTRY) |
||
492 | abort (); |
||
493 | |||
494 | hash2 = htab_mod_m2 (hash, htab); |
||
495 | for (;;) |
||
496 | { |
||
497 | index += hash2; |
||
498 | if (index >= size) |
||
499 | index -= size; |
||
500 | |||
501 | slot = htab->entries + index; |
||
502 | if (*slot == HTAB_EMPTY_ENTRY) |
||
503 | return slot; |
||
504 | else if (*slot == HTAB_DELETED_ENTRY) |
||
505 | abort (); |
||
506 | } |
||
507 | } |
||
508 | |||
509 | /* The following function changes size of memory allocated for the |
||
510 | entries and repeatedly inserts the table elements. The occupancy |
||
511 | of the table after the call will be about 50%. Naturally the hash |
||
512 | table must already exist. Remember also that the place of the |
||
513 | table entries is changed. If memory allocation failures are allowed, |
||
514 | this function will return zero, indicating that the table could not be |
||
515 | expanded. If all goes well, it will return a non-zero value. */ |
||
516 | |||
517 | static int |
||
518 | htab_expand (htab_t htab) |
||
519 | { |
||
520 | PTR *oentries; |
||
521 | PTR *olimit; |
||
522 | PTR *p; |
||
523 | PTR *nentries; |
||
524 | size_t nsize, osize, elts; |
||
525 | unsigned int oindex, nindex; |
||
526 | |||
527 | oentries = htab->entries; |
||
528 | oindex = htab->size_prime_index; |
||
529 | osize = htab->size; |
||
530 | olimit = oentries + osize; |
||
531 | elts = htab_elements (htab); |
||
532 | |||
533 | /* Resize only when table after removal of unused elements is either |
||
534 | too full or too empty. */ |
||
535 | if (elts * 2 > osize || (elts * 8 < osize && osize > 32)) |
||
536 | { |
||
537 | nindex = higher_prime_index (elts * 2); |
||
538 | nsize = prime_tab[nindex].prime; |
||
539 | } |
||
540 | else |
||
541 | { |
||
542 | nindex = oindex; |
||
543 | nsize = osize; |
||
544 | } |
||
545 | |||
546 | if (htab->alloc_with_arg_f != NULL) |
||
547 | nentries = (PTR *) (*htab->alloc_with_arg_f) (htab->alloc_arg, nsize, |
||
548 | sizeof (PTR *)); |
||
549 | else |
||
550 | nentries = (PTR *) (*htab->alloc_f) (nsize, sizeof (PTR *)); |
||
551 | if (nentries == NULL) |
||
552 | return 0; |
||
553 | htab->entries = nentries; |
||
554 | htab->size = nsize; |
||
555 | htab->size_prime_index = nindex; |
||
556 | htab->n_elements -= htab->n_deleted; |
||
557 | htab->n_deleted = 0; |
||
558 | |||
559 | p = oentries; |
||
560 | do |
||
561 | { |
||
562 | PTR x = *p; |
||
563 | |||
564 | if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY) |
||
565 | { |
||
566 | PTR *q = find_empty_slot_for_expand (htab, (*htab->hash_f) (x)); |
||
567 | |||
568 | *q = x; |
||
569 | } |
||
570 | |||
571 | p++; |
||
572 | } |
||
573 | while (p < olimit); |
||
574 | |||
575 | if (htab->free_f != NULL) |
||
576 | (*htab->free_f) (oentries); |
||
577 | else if (htab->free_with_arg_f != NULL) |
||
578 | (*htab->free_with_arg_f) (htab->alloc_arg, oentries); |
||
579 | return 1; |
||
580 | } |
||
581 | |||
582 | /* This function searches for a hash table entry equal to the given |
||
583 | element. It cannot be used to insert or delete an element. */ |
||
584 | |||
585 | PTR |
||
586 | htab_find_with_hash (htab_t htab, const PTR element, hashval_t hash) |
||
587 | { |
||
588 | hashval_t index, hash2; |
||
589 | size_t size; |
||
590 | PTR entry; |
||
591 | |||
592 | htab->searches++; |
||
593 | size = htab_size (htab); |
||
594 | index = htab_mod (hash, htab); |
||
595 | |||
596 | entry = htab->entries[index]; |
||
597 | if (entry == HTAB_EMPTY_ENTRY |
||
598 | || (entry != HTAB_DELETED_ENTRY && (*htab->eq_f) (entry, element))) |
||
599 | return entry; |
||
600 | |||
601 | hash2 = htab_mod_m2 (hash, htab); |
||
602 | for (;;) |
||
603 | { |
||
604 | htab->collisions++; |
||
605 | index += hash2; |
||
606 | if (index >= size) |
||
607 | index -= size; |
||
608 | |||
609 | entry = htab->entries[index]; |
||
610 | if (entry == HTAB_EMPTY_ENTRY |
||
611 | || (entry != HTAB_DELETED_ENTRY && (*htab->eq_f) (entry, element))) |
||
612 | return entry; |
||
613 | } |
||
614 | } |
||
615 | |||
616 | /* Like htab_find_slot_with_hash, but compute the hash value from the |
||
617 | element. */ |
||
618 | |||
619 | PTR |
||
620 | htab_find (htab_t htab, const PTR element) |
||
621 | { |
||
622 | return htab_find_with_hash (htab, element, (*htab->hash_f) (element)); |
||
623 | } |
||
624 | |||
625 | /* This function searches for a hash table slot containing an entry |
||
626 | equal to the given element. To delete an entry, call this with |
||
627 | insert=NO_INSERT, then call htab_clear_slot on the slot returned |
||
628 | (possibly after doing some checks). To insert an entry, call this |
||
629 | with insert=INSERT, then write the value you want into the returned |
||
630 | slot. When inserting an entry, NULL may be returned if memory |
||
631 | allocation fails. */ |
||
632 | |||
633 | PTR * |
||
634 | htab_find_slot_with_hash (htab_t htab, const PTR element, |
||
635 | hashval_t hash, enum insert_option insert) |
||
636 | { |
||
637 | PTR *first_deleted_slot; |
||
638 | hashval_t index, hash2; |
||
639 | size_t size; |
||
640 | PTR entry; |
||
641 | |||
642 | size = htab_size (htab); |
||
643 | if (insert == INSERT && size * 3 <= htab->n_elements * 4) |
||
644 | { |
||
645 | if (htab_expand (htab) == 0) |
||
646 | return NULL; |
||
647 | size = htab_size (htab); |
||
648 | } |
||
649 | |||
650 | index = htab_mod (hash, htab); |
||
651 | |||
652 | htab->searches++; |
||
653 | first_deleted_slot = NULL; |
||
654 | |||
655 | entry = htab->entries[index]; |
||
656 | if (entry == HTAB_EMPTY_ENTRY) |
||
657 | goto empty_entry; |
||
658 | else if (entry == HTAB_DELETED_ENTRY) |
||
659 | first_deleted_slot = &htab->entries[index]; |
||
660 | else if ((*htab->eq_f) (entry, element)) |
||
661 | return &htab->entries[index]; |
||
662 | |||
663 | hash2 = htab_mod_m2 (hash, htab); |
||
664 | for (;;) |
||
665 | { |
||
666 | htab->collisions++; |
||
667 | index += hash2; |
||
668 | if (index >= size) |
||
669 | index -= size; |
||
670 | |||
671 | entry = htab->entries[index]; |
||
672 | if (entry == HTAB_EMPTY_ENTRY) |
||
673 | goto empty_entry; |
||
674 | else if (entry == HTAB_DELETED_ENTRY) |
||
675 | { |
||
676 | if (!first_deleted_slot) |
||
677 | first_deleted_slot = &htab->entries[index]; |
||
678 | } |
||
679 | else if ((*htab->eq_f) (entry, element)) |
||
680 | return &htab->entries[index]; |
||
681 | } |
||
682 | |||
683 | empty_entry: |
||
684 | if (insert == NO_INSERT) |
||
685 | return NULL; |
||
686 | |||
687 | if (first_deleted_slot) |
||
688 | { |
||
689 | htab->n_deleted--; |
||
690 | *first_deleted_slot = HTAB_EMPTY_ENTRY; |
||
691 | return first_deleted_slot; |
||
692 | } |
||
693 | |||
694 | htab->n_elements++; |
||
695 | return &htab->entries[index]; |
||
696 | } |
||
697 | |||
698 | /* Like htab_find_slot_with_hash, but compute the hash value from the |
||
699 | element. */ |
||
700 | |||
701 | PTR * |
||
702 | htab_find_slot (htab_t htab, const PTR element, enum insert_option insert) |
||
703 | { |
||
704 | return htab_find_slot_with_hash (htab, element, (*htab->hash_f) (element), |
||
705 | insert); |
||
706 | } |
||
707 | |||
708 | /* This function deletes an element with the given value from hash |
||
709 | table (the hash is computed from the element). If there is no matching |
||
710 | element in the hash table, this function does nothing. */ |
||
711 | |||
712 | void |
||
713 | htab_remove_elt (htab_t htab, PTR element) |
||
714 | { |
||
715 | htab_remove_elt_with_hash (htab, element, (*htab->hash_f) (element)); |
||
716 | } |
||
717 | |||
718 | |||
719 | /* This function deletes an element with the given value from hash |
||
720 | table. If there is no matching element in the hash table, this |
||
721 | function does nothing. */ |
||
722 | |||
723 | void |
||
724 | htab_remove_elt_with_hash (htab_t htab, PTR element, hashval_t hash) |
||
725 | { |
||
726 | PTR *slot; |
||
727 | |||
728 | slot = htab_find_slot_with_hash (htab, element, hash, NO_INSERT); |
||
729 | if (*slot == HTAB_EMPTY_ENTRY) |
||
730 | return; |
||
731 | |||
732 | if (htab->del_f) |
||
733 | (*htab->del_f) (*slot); |
||
734 | |||
735 | *slot = HTAB_DELETED_ENTRY; |
||
736 | htab->n_deleted++; |
||
737 | } |
||
738 | |||
739 | /* This function clears a specified slot in a hash table. It is |
||
740 | useful when you've already done the lookup and don't want to do it |
||
741 | again. */ |
||
742 | |||
743 | void |
||
744 | htab_clear_slot (htab_t htab, PTR *slot) |
||
745 | { |
||
746 | if (slot < htab->entries || slot >= htab->entries + htab_size (htab) |
||
747 | || *slot == HTAB_EMPTY_ENTRY || *slot == HTAB_DELETED_ENTRY) |
||
748 | abort (); |
||
749 | |||
750 | if (htab->del_f) |
||
751 | (*htab->del_f) (*slot); |
||
752 | |||
753 | *slot = HTAB_DELETED_ENTRY; |
||
754 | htab->n_deleted++; |
||
755 | } |
||
756 | |||
757 | /* This function scans over the entire hash table calling |
||
758 | CALLBACK for each live entry. If CALLBACK returns false, |
||
759 | the iteration stops. INFO is passed as CALLBACK's second |
||
760 | argument. */ |
||
761 | |||
762 | void |
||
763 | htab_traverse_noresize (htab_t htab, htab_trav callback, PTR info) |
||
764 | { |
||
765 | PTR *slot; |
||
766 | PTR *limit; |
||
767 | |||
768 | slot = htab->entries; |
||
769 | limit = slot + htab_size (htab); |
||
770 | |||
771 | do |
||
772 | { |
||
773 | PTR x = *slot; |
||
774 | |||
775 | if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY) |
||
776 | if (!(*callback) (slot, info)) |
||
777 | break; |
||
778 | } |
||
779 | while (++slot < limit); |
||
780 | } |
||
781 | |||
782 | /* Like htab_traverse_noresize, but does resize the table when it is |
||
783 | too empty to improve effectivity of subsequent calls. */ |
||
784 | |||
785 | void |
||
786 | htab_traverse (htab_t htab, htab_trav callback, PTR info) |
||
787 | { |
||
788 | size_t size = htab_size (htab); |
||
789 | if (htab_elements (htab) * 8 < size && size > 32) |
||
790 | htab_expand (htab); |
||
791 | |||
792 | htab_traverse_noresize (htab, callback, info); |
||
793 | } |
||
794 | |||
795 | /* Return the fraction of fixed collisions during all work with given |
||
796 | hash table. */ |
||
797 | |||
798 | double |
||
799 | htab_collisions (htab_t htab) |
||
800 | { |
||
801 | if (htab->searches == 0) |
||
802 | return 0.0; |
||
803 | |||
804 | return (double) htab->collisions / (double) htab->searches; |
||
805 | } |
||
806 | |||
807 | /* Hash P as a null-terminated string. |
||
808 | |||
809 | Copied from gcc/hashtable.c. Zack had the following to say with respect |
||
810 | to applicability, though note that unlike hashtable.c, this hash table |
||
811 | implementation re-hashes rather than chain buckets. |
||
812 | |||
813 | http://gcc.gnu.org/ml/gcc-patches/2001-08/msg01021.html |
||
814 | From: Zack Weinberg |
||
815 | Date: Fri, 17 Aug 2001 02:15:56 -0400 |
||
816 | |||
817 | I got it by extracting all the identifiers from all the source code |
||
818 | I had lying around in mid-1999, and testing many recurrences of |
||
819 | the form "H_n = H_{n-1} * K + c_n * L + M" where K, L, M were either |
||
820 | prime numbers or the appropriate identity. This was the best one. |
||
821 | I don't remember exactly what constituted "best", except I was |
||
822 | looking at bucket-length distributions mostly. |
||
823 | |||
824 | So it should be very good at hashing identifiers, but might not be |
||
825 | as good at arbitrary strings. |
||
826 | |||
827 | I'll add that it thoroughly trounces the hash functions recommended |
||
828 | for this use at http://burtleburtle.net/bob/hash/index.html, both |
||
829 | on speed and bucket distribution. I haven't tried it against the |
||
830 | function they just started using for Perl's hashes. */ |
||
831 | |||
832 | hashval_t |
||
833 | htab_hash_string (const PTR p) |
||
834 | { |
||
835 | const unsigned char *str = (const unsigned char *) p; |
||
836 | hashval_t r = 0; |
||
837 | unsigned char c; |
||
838 | |||
839 | while ((c = *str++) != 0) |
||
840 | r = r * 67 + c - 113; |
||
841 | |||
842 | return r; |
||
843 | } |
||
844 | |||
845 | /* DERIVED FROM: |
||
846 | -------------------------------------------------------------------- |
||
847 | lookup2.c, by Bob Jenkins, December 1996, Public Domain. |
||
848 | hash(), hash2(), hash3, and mix() are externally useful functions. |
||
849 | Routines to test the hash are included if SELF_TEST is defined. |
||
850 | You can use this free for any purpose. It has no warranty. |
||
851 | -------------------------------------------------------------------- |
||
852 | */ |
||
853 | |||
854 | /* |
||
855 | -------------------------------------------------------------------- |
||
856 | mix -- mix 3 32-bit values reversibly. |
||
857 | For every delta with one or two bit set, and the deltas of all three |
||
858 | high bits or all three low bits, whether the original value of a,b,c |
||
859 | is almost all zero or is uniformly distributed, |
||
860 | * If mix() is run forward or backward, at least 32 bits in a,b,c |
||
861 | have at least 1/4 probability of changing. |
||
862 | * If mix() is run forward, every bit of c will change between 1/3 and |
||
863 | 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.) |
||
864 | mix() was built out of 36 single-cycle latency instructions in a |
||
865 | structure that could supported 2x parallelism, like so: |
||
866 | a -= b; |
||
867 | a -= c; x = (c>>13); |
||
868 | b -= c; a ^= x; |
||
869 | b -= a; x = (a<<8); |
||
870 | c -= a; b ^= x; |
||
871 | c -= b; x = (b>>13); |
||
872 | ... |
||
873 | Unfortunately, superscalar Pentiums and Sparcs can't take advantage |
||
874 | of that parallelism. They've also turned some of those single-cycle |
||
875 | latency instructions into multi-cycle latency instructions. Still, |
||
876 | this is the fastest good hash I could find. There were about 2^^68 |
||
877 | to choose from. I only looked at a billion or so. |
||
878 | -------------------------------------------------------------------- |
||
879 | */ |
||
880 | /* same, but slower, works on systems that might have 8 byte hashval_t's */ |
||
881 | #define mix(a,b,c) \ |
||
882 | { \ |
||
883 | a -= b; a -= c; a ^= (c>>13); \ |
||
884 | b -= c; b -= a; b ^= (a<< 8); \ |
||
885 | c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \ |
||
886 | a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \ |
||
887 | b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \ |
||
888 | c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \ |
||
889 | a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \ |
||
890 | b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \ |
||
891 | c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \ |
||
892 | } |
||
893 | |||
894 | /* |
||
895 | -------------------------------------------------------------------- |
||
896 | hash() -- hash a variable-length key into a 32-bit value |
||
897 | k : the key (the unaligned variable-length array of bytes) |
||
898 | len : the length of the key, counting by bytes |
||
899 | level : can be any 4-byte value |
||
900 | Returns a 32-bit value. Every bit of the key affects every bit of |
||
901 | the return value. Every 1-bit and 2-bit delta achieves avalanche. |
||
902 | About 36+6len instructions. |
||
903 | |||
904 | The best hash table sizes are powers of 2. There is no need to do |
||
905 | mod a prime (mod is sooo slow!). If you need less than 32 bits, |
||
906 | use a bitmask. For example, if you need only 10 bits, do |
||
907 | h = (h & hashmask(10)); |
||
908 | In which case, the hash table should have hashsize(10) elements. |
||
909 | |||
910 | If you are hashing n strings (ub1 **)k, do it like this: |
||
911 | for (i=0, h=0; i |
||
912 | |||
913 | By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this |
||
914 | code any way you wish, private, educational, or commercial. It's free. |
||
915 | |||
916 | See http://burtleburtle.net/bob/hash/evahash.html |
||
917 | Use for hash table lookup, or anything where one collision in 2^32 is |
||
918 | acceptable. Do NOT use for cryptographic purposes. |
||
919 | -------------------------------------------------------------------- |
||
920 | */ |
||
921 | |||
922 | hashval_t |
||
923 | iterative_hash (const PTR k_in /* the key */, |
||
924 | register size_t length /* the length of the key */, |
||
925 | register hashval_t initval /* the previous hash, or |
||
926 | an arbitrary value */) |
||
927 | { |
||
928 | register const unsigned char *k = (const unsigned char *)k_in; |
||
929 | register hashval_t a,b,c,len; |
||
930 | |||
931 | /* Set up the internal state */ |
||
932 | len = length; |
||
933 | a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */ |
||
934 | c = initval; /* the previous hash value */ |
||
935 | |||
936 | /*---------------------------------------- handle most of the key */ |
||
937 | #ifndef WORDS_BIGENDIAN |
||
938 | /* On a little-endian machine, if the data is 4-byte aligned we can hash |
||
939 | by word for better speed. This gives nondeterministic results on |
||
940 | big-endian machines. */ |
||
941 | if (sizeof (hashval_t) == 4 && (((size_t)k)&3) == 0) |
||
942 | while (len >= 12) /* aligned */ |
||
943 | { |
||
944 | a += *(hashval_t *)(k+0); |
||
945 | b += *(hashval_t *)(k+4); |
||
946 | c += *(hashval_t *)(k+8); |
||
947 | mix(a,b,c); |
||
948 | k += 12; len -= 12; |
||
949 | } |
||
950 | else /* unaligned */ |
||
951 | #endif |
||
952 | while (len >= 12) |
||
953 | { |
||
954 | a += (k[0] +((hashval_t)k[1]<<8) +((hashval_t)k[2]<<16) +((hashval_t)k[3]<<24)); |
||
955 | b += (k[4] +((hashval_t)k[5]<<8) +((hashval_t)k[6]<<16) +((hashval_t)k[7]<<24)); |
||
956 | c += (k[8] +((hashval_t)k[9]<<8) +((hashval_t)k[10]<<16)+((hashval_t)k[11]<<24)); |
||
957 | mix(a,b,c); |
||
958 | k += 12; len -= 12; |
||
959 | } |
||
960 | |||
961 | /*------------------------------------- handle the last 11 bytes */ |
||
962 | c += length; |
||
963 | switch(len) /* all the case statements fall through */ |
||
964 | { |
||
965 | case 11: c+=((hashval_t)k[10]<<24); |
||
966 | case 10: c+=((hashval_t)k[9]<<16); |
||
967 | case 9 : c+=((hashval_t)k[8]<<8); |
||
968 | /* the first byte of c is reserved for the length */ |
||
969 | case 8 : b+=((hashval_t)k[7]<<24); |
||
970 | case 7 : b+=((hashval_t)k[6]<<16); |
||
971 | case 6 : b+=((hashval_t)k[5]<<8); |
||
972 | case 5 : b+=k[4]; |
||
973 | case 4 : a+=((hashval_t)k[3]<<24); |
||
974 | case 3 : a+=((hashval_t)k[2]<<16); |
||
975 | case 2 : a+=((hashval_t)k[1]<<8); |
||
976 | case 1 : a+=k[0]; |
||
977 | /* case 0: nothing left to add */ |
||
978 | } |
||
979 | mix(a,b,c); |
||
980 | /*-------------------------------------------- report the result */ |
||
981 | return c; |
||
982 | } |
||
983 | |||
984 | /* Returns a hash code for pointer P. Simplified version of evahash */ |
||
985 | |||
986 | static hashval_t |
||
987 | hash_pointer (const PTR p) |
||
988 | { |
||
989 | intptr_t v = (intptr_t) p; |
||
990 | unsigned a, b, c; |
||
991 | |||
992 | a = b = 0x9e3779b9; |
||
993 | a += v >> (sizeof (intptr_t) * CHAR_BIT / 2); |
||
994 | b += v & (((intptr_t) 1 << (sizeof (intptr_t) * CHAR_BIT / 2)) - 1); |
||
995 | c = 0x42135234; |
||
996 | mix (a, b, c); |
||
997 | return c; |
||
998 | }><>8); |