Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * Copyright 2006 Rob Kendrick <rjek@rjek.com>
  3.  * Copyright 2006 Richard Wilson <info@tinct.net>
  4.  *
  5.  * This file is part of NetSurf, http://www.netsurf-browser.org/
  6.  *
  7.  * NetSurf is free software; you can redistribute it and/or modify
  8.  * it under the terms of the GNU General Public License as published by
  9.  * the Free Software Foundation; version 2 of the License.
  10.  *
  11.  * NetSurf is distributed in the hope that it will be useful,
  12.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14.  * GNU General Public License for more details.
  15.  *
  16.  * You should have received a copy of the GNU General Public License
  17.  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18.  */
  19.  
  20. /** \file
  21.  * Write-Once hash table for string to string mappings */
  22.  
  23. #include <stdlib.h>
  24. #include <string.h>
  25. #include <stdbool.h>
  26. #ifdef TEST_RIG
  27. #include <assert.h>
  28. #include <stdio.h>
  29. #endif
  30. #include "utils/hashtable.h"
  31. #include "utils/log.h"
  32.  
  33.  
  34. struct hash_entry {
  35.         char *pairing;           /**< block containing 'key\0value\0' */
  36.         unsigned int key_length; /**< length of key */
  37.         struct hash_entry *next; /**< next entry */
  38. };
  39.  
  40. struct hash_table {
  41.         unsigned int nchains;
  42.         struct hash_entry **chain;
  43. };
  44.  
  45. /**
  46.  * Hash a string, returning a 32bit value.  The hash algorithm used is
  47.  * Fowler Noll Vo - a very fast and simple hash, ideal for short strings.
  48.  * See http://en.wikipedia.org/wiki/Fowler_Noll_Vo_hash for more details.
  49.  *
  50.  * \param  datum   The string to hash.
  51.  * \param  len     Pointer to unsigned integer to record datum's length in.
  52.  * \return The calculated hash value for the datum.
  53.  */
  54.  
  55. static inline unsigned int hash_string_fnv(const char *datum, unsigned int *len)
  56. {
  57.         unsigned int z = 0x811c9dc5;
  58.         const char *start = datum;
  59.         *len = 0;
  60.  
  61.         if (datum == NULL)
  62.                 return 0;
  63.  
  64.         while (*datum) {
  65.                 z *= 0x01000193;
  66.                 z ^= *datum++;
  67.         }
  68.         *len = datum - start;
  69.  
  70.         return z;
  71. }
  72.  
  73.  
  74. /**
  75.  * Create a new hash table, and return a context for it.  The memory consumption
  76.  * of a hash table is approximately 8 + (nchains * 12) bytes if it is empty.
  77.  *
  78.  * \param  chains Number of chains/buckets this hash table will have.  This
  79.  *                should be a prime number, and ideally a prime number just
  80.  *                over a power of two, for best performance and distribution.
  81.  * \return struct hash_table containing the context of this hash table or NULL
  82.  *         if there is insufficent memory to create it and its chains.
  83.  */
  84.  
  85. struct hash_table *hash_create(unsigned int chains)
  86. {
  87.         struct hash_table *r = malloc(sizeof(struct hash_table));
  88.  
  89.         if (r == NULL) {
  90.                 LOG(("Not enough memory for hash table."));
  91.                 return NULL;
  92.         }
  93.  
  94.         r->nchains = chains;
  95.         r->chain = calloc(chains, sizeof(struct hash_entry));
  96.  
  97.         if (r->chain == NULL) {
  98.                 LOG(("Not enough memory for %d hash table chains.", chains));
  99.                 free(r);
  100.                 return NULL;
  101.         }
  102.  
  103.         return r;
  104. }
  105.  
  106. /**
  107.  * Destroys a hash table, freeing all memory associated with it.
  108.  *
  109.  * \param  ht    Hash table to destroy.  After the function returns, this
  110.  *               will nolonger be valid.
  111.  */
  112.  
  113. void hash_destroy(struct hash_table *ht)
  114. {
  115.         unsigned int i;
  116.  
  117.         if (ht == NULL)
  118.                 return;
  119.  
  120.         for (i = 0; i < ht->nchains; i++) {
  121.                 if (ht->chain[i] != NULL) {
  122.                         struct hash_entry *e = ht->chain[i];
  123.                         while (e) {
  124.                                 struct hash_entry *n = e->next;
  125.                                 free(e->pairing);
  126.                                 free(e);
  127.                                 e = n;
  128.                         }
  129.                 }
  130.         }
  131.  
  132.         free(ht->chain);
  133.         free(ht);
  134. }
  135.  
  136. /**
  137.  * Adds a key/value pair to a hash table.  If the key you're adding is already
  138.  * in the hash table, it does not replace it, but it does take precedent over
  139.  * it.  The old key/value pair will be inaccessable but still in memory until
  140.  * hash_destroy() is called on the hash table.
  141.  *
  142.  * \param  ht     The hash table context to add the key/value pair to.
  143.  * \param  key    The key to associate the value with.  A copy is made.
  144.  * \param  value  The value to associate the key with.  A copy is made.
  145.  * \return true if the add succeeded, false otherwise.  (Failure most likely
  146.  *         indicates insufficent memory to make copies of the key and value.
  147.  */
  148.  
  149. bool hash_add(struct hash_table *ht, const char *key, const char *value)
  150. {
  151.         unsigned int h, c, v;
  152.         struct hash_entry *e;
  153.  
  154.         if (ht == NULL || key == NULL || value == NULL)
  155.                 return false;
  156.  
  157.         e = malloc(sizeof(struct hash_entry));
  158.         if (e == NULL) {
  159.                 LOG(("Not enough memory for hash entry."));
  160.                 return false;
  161.         }
  162.  
  163.         h = hash_string_fnv(key, &(e->key_length));
  164.         c = h % ht->nchains;
  165.  
  166.         v = strlen(value) ;
  167.         e->pairing = malloc(v + e->key_length + 2);
  168.         if (e->pairing == NULL) {
  169.                 LOG(("Not enough memory for string duplication."));
  170.                 free(e);
  171.                 return false;
  172.         }
  173.         memcpy(e->pairing, key, e->key_length + 1);
  174.         memcpy(e->pairing + e->key_length + 1, value, v + 1);
  175.  
  176.         e->next = ht->chain[c];
  177.         ht->chain[c] = e;
  178.  
  179.         return true;
  180. }
  181.  
  182. /**
  183.  * Looks up a the value associated with with a key from a specific hash table.
  184.  *
  185.  * \param  ht     The hash table context to look up the key in.
  186.  * \param  key    The key to search for.
  187.  * \return The value associated with the key, or NULL if it was not found.
  188.  */
  189.  
  190. const char *hash_get(struct hash_table *ht, const char *key)
  191. {
  192.         unsigned int h, c, key_length;
  193.         struct hash_entry *e;
  194.  
  195.         if (ht == NULL || key == NULL)
  196.                 return NULL;
  197.  
  198.         h = hash_string_fnv(key, &key_length);
  199.         c = h % ht->nchains;
  200.  
  201.         for (e = ht->chain[c]; e; e = e->next)
  202.                 if ((key_length == e->key_length) &&
  203.                                 (memcmp(key, e->pairing, key_length) == 0))
  204.                         return e->pairing + key_length + 1;
  205.  
  206.         return NULL;
  207. }
  208.  
  209. /**
  210.  * Iterate through all available hash keys.
  211.  *
  212.  * \param  ht   The hash table context to iterate.
  213.  * \param  c1   Pointer to first context
  214.  * \param  c2   Pointer to second context (set to 0 on first call)
  215.  * \return The next hash key, or NULL for no more keys
  216.  */
  217.  
  218. const char *hash_iterate(struct hash_table *ht, unsigned int *c1, unsigned int **c2) {
  219.         struct hash_entry **he = (struct hash_entry **)c2;
  220.  
  221.         if (ht == NULL)
  222.                 return NULL;
  223.  
  224.         if (!*he)
  225.                 *c1 = -1;
  226.         else
  227.                 *he = (*he)->next;
  228.  
  229.         if (*he)
  230.                 return (*he)->pairing;
  231.  
  232.         while (!*he) {
  233.                 (*c1)++;
  234.                 if (*c1 >= ht->nchains)
  235.                         return NULL;
  236.                 *he = ht->chain[*c1];
  237.         }
  238.         return (*he)->pairing;
  239. }
  240.  
  241. /* A simple test rig.  To compile, use:
  242.  * gcc -o hashtest -I../ -DTEST_RIG utils/hashtable.c
  243.  *
  244.  * If you make changes to this hash table implementation, please rerun this
  245.  * test, and if possible, through valgrind to make sure there are no memory
  246.  * leaks or invalid memory accesses.  If you add new functionality, please
  247.  * include a test for it that has good coverage along side the other tests.
  248.  */
  249.  
  250. #ifdef TEST_RIG
  251.  
  252. int main(int argc, char *argv[])
  253. {
  254.         struct hash_table *a, *b;
  255.         FILE *dict;
  256.         char keybuf[BUFSIZ], valbuf[BUFSIZ];
  257.         int i;
  258.  
  259.         a = hash_create(79);
  260.         assert(a != NULL);
  261.  
  262.         b = hash_create(103);
  263.         assert(b != NULL);
  264.  
  265.         hash_add(a, "cow", "moo");
  266.         hash_add(b, "moo", "cow");
  267.  
  268.         hash_add(a, "pig", "oink");
  269.         hash_add(b, "oink", "pig");
  270.  
  271.         hash_add(a, "chicken", "cluck");
  272.         hash_add(b, "cluck", "chicken");
  273.  
  274.         hash_add(a, "dog", "woof");
  275.         hash_add(b, "woof", "dog");
  276.  
  277.         hash_add(a, "cat", "meow");
  278.         hash_add(b, "meow", "cat");
  279.  
  280. #define MATCH(x,y) assert(!strcmp(hash_get(a, x), y)); assert(!strcmp(hash_get(b, y), x))
  281.         MATCH("cow", "moo");
  282.         MATCH("pig", "oink");
  283.         MATCH("chicken", "cluck");
  284.         MATCH("dog", "woof");
  285.         MATCH("cat", "meow");
  286.  
  287.         hash_destroy(a);
  288.         hash_destroy(b);
  289.  
  290.         /* this test requires /usr/share/dict/words - a large list of English
  291.          * words.  We load the entire file - odd lines are used as keys, and
  292.          * even lines are used as the values for the previous line.  we then
  293.          * work through it again making sure everything matches.
  294.          *
  295.          * We do this twice - once in a hash table with many chains, and once
  296.          * with a hash table with fewer chains.
  297.          */
  298.  
  299.         a = hash_create(1031);
  300.         b = hash_create(7919);
  301.  
  302.         dict = fopen("/usr/share/dict/words", "r");
  303.         if (dict == NULL) {
  304.                 fprintf(stderr, "Unable to open /usr/share/dict/words - extensive testing skipped.\n");
  305.                 exit(0);
  306.         }
  307.  
  308.         while (!feof(dict)) {
  309.                 fscanf(dict, "%s", keybuf);
  310.                 fscanf(dict, "%s", valbuf);
  311.                 hash_add(a, keybuf, valbuf);
  312.                 hash_add(b, keybuf, valbuf);
  313.         }
  314.  
  315.         for (i = 0; i < 5; i++) {
  316.                 fseek(dict, 0, SEEK_SET);
  317.  
  318.                 while (!feof(dict)) {
  319.                         fscanf(dict, "%s", keybuf);
  320.                         fscanf(dict, "%s", valbuf);
  321.                         assert(strcmp(hash_get(a, keybuf), valbuf) == 0);
  322.                         assert(strcmp(hash_get(b, keybuf), valbuf) == 0);
  323.                 }
  324.         }
  325.  
  326.         hash_destroy(a);
  327.         hash_destroy(b);
  328.  
  329.         fclose(dict);
  330.  
  331.         return 0;
  332. }
  333.  
  334. #endif
  335.