LTP GCOV extension - code coverage report
Current view: directory - csrc/cl - hashtable.c
Test: app.info
Date: 2007-12-10 Instrumented lines: 197
Code covered: 60.9 % Executed lines: 120

       1                 : /*
       2                 :         Copyright (C) 2007, Bruce Ediger
       3                 : 
       4                 :     This file is part of cl.
       5                 : 
       6                 :     cl is free software; you can redistribute it and/or modify
       7                 :     it under the terms of the GNU General Public License as published by
       8                 :     the Free Software Foundation; either version 2 of the License, or
       9                 :     (at your option) any later version.
      10                 : 
      11                 :     cl is distributed in the hope that it will be useful,
      12                 :     but WITHOUT ANY WARRANTY; without even the implied warranty of
      13                 :     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14                 :     GNU General Public License for more details.
      15                 : 
      16                 :     You should have received a copy of the GNU General Public License
      17                 :     along with cl; if not, write to the Free Software
      18                 :     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
      19                 : 
      20                 : */
      21                 : #include <stdio.h>
      22                 : #include <stdlib.h>
      23                 : #include <string.h>
      24                 : 
      25                 : #include <hashtable.h>
      26                 : 
      27                 : #include <node.h>
      28                 : #include <abbreviations.h>  /* free_graph() */
      29                 : 
      30                 : unsigned int hash_djb2(const char *s);
      31                 : void rehash_hashtable(struct hashtable *h);
      32                 : void reallocate_buckets(struct hashtable *h);
      33                 : int  sparsebit(long i);
      34                 : int  msbbit(long i);
      35                 : 
      36                 : /* chain is the dummy node at head of hash chain */
      37                 : void
      38                 : insert_node(struct hashnode *chain, struct hashnode *n)
      39               0 : {
      40               0 :         n->next = chain->next;
      41               0 :         chain->next = n;
      42               0 :         n->next->prev = n;
      43               0 :         n->prev = chain;
      44               0 : }
      45                 : 
      46                 : struct hashtable *
      47                 : init_hashtable(int buckets, int maxload)
      48              56 : {
      49                 :         int i;
      50                 :         int f, z;
      51                 : 
      52                 :         struct hashtable *h;
      53                 : 
      54              56 :         h = malloc(sizeof(*h));
      55                 : 
      56              56 :         h->flags = 0;
      57                 : 
      58                 :         /* Insure that bucket array size is always a multiple of 2.
      59                 :          * This amounted to about 10% run-time savings for small files
      60                 :          * by avoiding the '%' operator. */
      61                 : 
      62              56 :         f = msbbit(buckets);
      63              56 :         z = 1 << f;
      64                 : 
      65              56 :         if (z < buckets)
      66               0 :                 z <<= 1;
      67                 : 
      68              56 :         buckets = z;
      69                 : 
      70                 :         /* buckets and sentinels */
      71              56 :         h->buckets =
      72                 :                 (struct hashnode **)malloc(buckets*sizeof(*h->buckets));
      73              56 :         memset((char *)h->buckets, '\0', buckets*sizeof(*h->buckets));
      74                 : 
      75              56 :         h->sentinels =
      76                 :                 (struct hashnode **)malloc(buckets*sizeof(*h->sentinels));
      77              56 :         memset((char *)h->sentinels, '\0', buckets*sizeof(*h->sentinels));
      78                 : 
      79            3640 :         for (i = 0; i < buckets; ++i)
      80                 :         {
      81                 :                 /* first node is a dummy - for convenience and to hold chain stats */
      82                 : 
      83            3584 :                 h->buckets[i] = (struct hashnode *)malloc(sizeof(*h->buckets[i]));
      84            3584 :                 h->buckets[i]->data = NULL;
      85            3584 :                 h->buckets[i]->string = NULL;
      86            3584 :                 h->buckets[i]->string_length = 0;
      87                 : 
      88            3584 :                 h->sentinels[i] = (struct hashnode *)malloc(sizeof(*h->sentinels[i]));
      89            3584 :                 h->sentinels[i]->data = NULL;
      90                 : 
      91            3584 :                 h->buckets[i]->next = h->sentinels[i];
      92            3584 :                 h->buckets[i]->prev = h->buckets[i];
      93            3584 :                 h->sentinels[i]->next = h->sentinels[i];
      94            3584 :                 h->sentinels[i]->prev = h->buckets[i];
      95                 : 
      96            3584 :                 h->buckets[i]->string = NULL;
      97            3584 :                 h->buckets[i]->value = 0;    /* split count of chain */
      98            3584 :                 h->buckets[i]->nodes_in_chain = 0;    /* split count of chain */
      99                 :         }
     100                 : 
     101                 :         /* bucket allocation */
     102              56 :         h->currentsize = buckets;
     103              56 :         h->allocated = buckets;
     104                 : 
     105              56 :         h->maxload = maxload;
     106                 : 
     107                 :         /* bucket splitting */
     108              56 :         h->p = 0;
     109              56 :         h->maxp = buckets;
     110                 : 
     111              56 :         h->node_cnt = 0;
     112              56 :         h->rehash_cnt = 0;
     113                 : 
     114              56 :         return h;
     115                 : }
     116                 : 
     117                 : void
     118                 : rehash_hashtable(struct hashtable *h)
     119               0 : {
     120                 :         struct hashnode *l;
     121                 :         struct hashnode *oldbucket, *oldtail;
     122                 :         int newindex;
     123                 : 
     124               0 :         ++h->rehash_cnt;
     125                 : 
     126               0 :         if (h->currentsize >= h->allocated)
     127               0 :                 reallocate_buckets(h);
     128                 : 
     129               0 :         oldbucket = h->buckets[h->p];
     130               0 :         oldtail = h->sentinels[h->p];
     131               0 :         l = oldbucket->next;
     132                 : 
     133               0 :         oldbucket->next = oldtail;
     134               0 :         oldtail->prev = oldbucket;
     135                 : 
     136               0 :         newindex = h->p + h->maxp;
     137                 : 
     138               0 :         if (h->flags > 1)
     139                 :         {
     140               0 :                 printf("# %d rehashes, %d total nodes, old bucket %d, new bucket %d\n",
     141                 :                         h->rehash_cnt, h->node_cnt, h->p, newindex);
     142               0 :                 printf("# %d nodes in old bucket\n", oldbucket->nodes_in_chain);
     143                 :         }
     144                 : 
     145               0 :         oldbucket->nodes_in_chain = 0;  /* may not be any lines in chain after rehash */
     146               0 :         ++oldbucket->value;      /* number of splits */
     147                 : 
     148               0 :         ++h->p;
     149                 : 
     150               0 :         if (h->p == h->maxp)
     151                 :         {
     152               0 :                 h->maxp *= 2;
     153               0 :                 h->p = 0;
     154                 :         }
     155                 : 
     156               0 :         ++h->currentsize;
     157                 : 
     158               0 :         while (oldtail != l)
     159                 :         {
     160               0 :                 struct hashnode *t = l->next;
     161               0 :                 int idx = MOD(l->value, h->maxp);
     162                 : 
     163               0 :                 if (idx < h->p)
     164               0 :                         idx = MOD(l->value, (2*h->maxp));
     165                 : 
     166               0 :                 if (idx == newindex)
     167                 :                 {
     168               0 :                         insert_node(h->buckets[newindex], l);
     169               0 :                         ++h->buckets[newindex]->nodes_in_chain;
     170                 :                 } else {
     171               0 :                         insert_node(oldbucket, l);
     172               0 :                         ++oldbucket->nodes_in_chain;
     173                 :                 }
     174                 : 
     175               0 :                 l = t;
     176                 :         }
     177                 : 
     178               0 :         if (h->flags > 1)
     179                 :         {
     180               0 :                 printf("# split: %d in old chain, %d in new chain\n",
     181                 :                         oldbucket->nodes_in_chain, h->buckets[newindex]->nodes_in_chain);
     182                 :         }
     183               0 : }
     184                 : 
     185                 : void
     186                 : reallocate_buckets(struct hashtable *h)
     187               0 : {
     188                 :         int i;
     189                 :         int newallocated;
     190                 : 
     191               0 :         newallocated = h->allocated * 2;
     192                 : 
     193               0 :         h->buckets = (struct hashnode **)realloc(
     194                 :                 h->buckets,
     195                 :                 sizeof(struct hashnode *)*newallocated
     196                 :         );
     197                 : 
     198               0 :         h->sentinels = (struct hashnode **)realloc(
     199                 :                 h->sentinels,
     200                 :                 sizeof(struct hashnode *)*newallocated
     201                 :         );
     202                 : 
     203               0 :         for (i = h->allocated; i < newallocated; ++i)
     204                 :         {
     205               0 :                 h->buckets[i] = (struct hashnode *)malloc(sizeof(*h->buckets[i]));
     206               0 :                 h->sentinels[i] = (struct hashnode *)malloc(sizeof(*h->sentinels[i]));
     207                 : 
     208               0 :                 h->buckets[i]->next = h->sentinels[i];
     209               0 :                 h->sentinels[i]->prev = h->buckets[i];
     210               0 :                 h->sentinels[i]->next = h->sentinels[i];
     211               0 :                 h->buckets[i]->prev = h->buckets[i];
     212                 : 
     213               0 :                 h->buckets[i]->string = NULL;
     214               0 :                 h->buckets[i]->data = NULL;
     215               0 :                 h->sentinels[i]->string = NULL;
     216               0 :                 h->sentinels[i]->data = NULL;
     217                 : 
     218               0 :                 h->buckets[i]->nodes_in_chain = 0;
     219               0 :                 h->buckets[i]->value = 0;    /* number of times chain splits, too */
     220                 :         }
     221                 : 
     222               0 :         h->allocated = newallocated;
     223               0 : }
     224                 : 
     225                 : const char *
     226                 : add_string(struct hashtable *h, const char *string)
     227            1398 : {
     228            1398 :         return (const char *)add_data(h, string, NULL);
     229                 : }
     230                 : 
     231                 : void *
     232                 : add_data(struct hashtable *h, const char *string, void *data)
     233            1398 : {
     234                 :         struct hashnode *n, *head;
     235                 :         int bucket_index;
     236                 :         unsigned int hv;
     237                 :         int hash_mod;
     238                 : 
     239            1398 :         if((n = node_lookup(h, string, &hv)))
     240                 :         {
     241            1029 :                 void *r = NULL;
     242            1029 :                 if (data)
     243                 :                 {
     244               0 :                         r = (void *)n->data;
     245               0 :                         n->data = data;
     246                 :                 } else {
     247            1029 :                         r = (void *)n->string;
     248                 :                 }
     249            1029 :                 return r;
     250                 :         }
     251                 : 
     252             369 :         bucket_index = MOD(hv, h->maxp);
     253             369 :         hash_mod = h->maxp;
     254             369 :         if (bucket_index < h->p)
     255                 :         {
     256               0 :                 bucket_index = MOD(hv, (2*h->maxp));
     257               0 :                 hash_mod = 2*h->maxp;
     258                 :         }
     259                 : 
     260             369 :         if (h->flags > 1)
     261               0 :                 printf("# \"%s\": hash value %u, base %d, bucket %d\n", string, hv, hash_mod, bucket_index);
     262                 : 
     263                 :         /* allocate new node */
     264             369 :         n = (struct hashnode *)malloc(sizeof(*n));
     265                 : 
     266                 :         /* fill in newly allocated node */
     267             369 :         n->value = hv;
     268             369 :         n->string_length = strlen(string);
     269             369 :         n->string = malloc(n->string_length+1);
     270             369 :         memcpy(n->string, string, n->string_length+1);
     271             369 :         n->data = data;
     272                 : 
     273                 :         /* add newly allocated node to appropriate chain */
     274             369 :         head = h->buckets[bucket_index];
     275                 : 
     276                 :         /* just push it on the chain */
     277             369 :         n->next = head->next;
     278             369 :         head->next = n;
     279             369 :         n->next->prev = n;
     280             369 :         n->prev = head;
     281                 : 
     282             369 :         ++h->buckets[bucket_index]->nodes_in_chain;
     283                 : 
     284             369 :         ++h->node_cnt;
     285                 : 
     286                 :         /* "load" on hashtable too high? */
     287             369 :         if (h->node_cnt/h->currentsize > h->maxload)
     288               0 :                 rehash_hashtable(h);
     289                 : 
     290             369 :         return (NULL == data)? (void *)n->string: NULL;
     291                 : }
     292                 : 
     293                 : void free_hashtable(struct hashtable *h)
     294              56 : {
     295                 :         unsigned int i;
     296                 : 
     297            3640 :         for (i = 0; i < h->currentsize; ++i)
     298                 :         {
     299            3584 :                 struct hashnode *chain = h->buckets[i];
     300           11121 :                 while (chain != h->sentinels[i])
     301                 :                 {
     302            3953 :                         struct hashnode *tmp = chain->next;
     303            3953 :                         if (chain->string)
     304                 :                         {
     305             369 :                                 free(chain->string);
     306             369 :                                 chain->string = NULL;
     307                 :                         }
     308            3953 :                         free_graph(chain->data);
     309            3953 :                         chain->data = NULL;
     310            3953 :                         free(chain);
     311            3953 :                         chain = tmp;
     312                 :                 }
     313            3584 :                 free(h->sentinels[i]);
     314            3584 :                 h->sentinels[i] = NULL;
     315                 :         }
     316                 : 
     317                 :         /* free the dummy heads and sentinels that the table
     318                 :          * doesn't currently use - it doubles bucket count
     319                 :          * every time the "load" gets too high */
     320              56 :         for (i = h->currentsize; i < h->allocated; ++i)
     321                 :         {
     322               0 :                 free(h->buckets[i]);
     323               0 :                 h->buckets[i] = NULL;
     324               0 :                 free(h->sentinels[i]);
     325               0 :                 h->sentinels[i] = NULL;
     326                 :         }
     327                 : 
     328              56 :         free(h->buckets);
     329              56 :         free(h->sentinels);
     330              56 :         h->buckets = NULL;
     331              56 :         h->sentinels = NULL;
     332                 : 
     333              56 :         free(h);
     334              56 :         h = NULL;
     335              56 : }
     336                 : 
     337                 : const char *
     338                 : string_lookup(struct hashtable *h, const char *string_to_lookup)
     339               0 : {
     340               0 :         struct hashnode *hn = NULL;
     341               0 :         char *r = NULL;
     342                 :         unsigned int hv;  /* dummy - not used in this function */
     343                 : 
     344               0 :         if (NULL != (hn = node_lookup(h, string_to_lookup, &hv)))
     345               0 :                 r = hn->string;
     346                 : 
     347               0 :         return r;
     348                 : }
     349                 : 
     350                 : void *
     351                 : data_lookup(struct hashtable *h, const char *string_to_lookup)
     352            1017 : {
     353            1017 :         struct hashnode *hn = NULL;
     354            1017 :         void *r = NULL;
     355                 :         unsigned int hv;  /* dummy - not used in this function */
     356                 : 
     357            1017 :         if (NULL != (hn = node_lookup(h, string_to_lookup, &hv)))
     358            1017 :                 r = hn->data;
     359                 : 
     360            1017 :         return r;
     361                 : }
     362                 : 
     363                 : /* note that 3rd argument must evaluate to non-NULL - returns hash value
     364                 :  * of string_to_lookup to add_data() to allow it to skip doing a hash
     365                 :  */
     366                 : struct hashnode *
     367                 : node_lookup(struct hashtable *h, const char *string_to_lookup, unsigned int *rhv)
     368            2538 : {
     369            2538 :         struct hashnode *r = NULL;
     370            2538 :         unsigned int hashval = hash_djb2(string_to_lookup);
     371            2538 :         int idx = MOD(hashval, h->maxp);
     372                 :         struct hashnode *chain;
     373                 : 
     374            2538 :         *rhv = hashval;
     375                 : 
     376            2538 :         if (idx < h->p)
     377               0 :                 idx = MOD(hashval, (2*h->maxp));
     378                 : 
     379            2538 :         chain = h->buckets[idx]->next;
     380                 : 
     381            5268 :         while (chain != h->sentinels[idx])
     382                 :         {
     383                 :                 /* checking the equivalence of hash value first prevents
     384                 :                  * expensive byte-by-byte strcmp().  Seems to improve performance. */
     385            2361 :                 if (chain->value == hashval &&
     386                 :                         !strcmp(chain->string, string_to_lookup))
     387                 :                 {
     388            2169 :                         r = chain;
     389            2169 :                         break;
     390                 :                 }
     391                 : 
     392             192 :                 chain = chain->next;
     393                 :         }
     394                 : 
     395            2538 :         return r;
     396                 : }
     397                 : 
     398                 : unsigned int
     399                 : hash_djb2(const char *str)
     400            2538 : {
     401            2538 :         unsigned long hash = 5381;
     402                 :         int c;
     403                 : 
     404           12254 :         while ((c = *str++))
     405            7178 :                 hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
     406                 : 
     407            2538 :         return hash;
     408                 : }
     409                 : 
     410                 : int
     411                 : sparsebit(long i)
     412              56 : {
     413              56 :         int p = 0;
     414                 : 
     415              56 :         if (i == 0) return(-1);     /* no bits set */
     416                 : 
     417              56 :         if ((i & (-i)) != i) return(-1);    /* two or more bits set */
     418                 : 
     419              56 :         if (i & 0xAAAAAAAA) p++;
     420              56 :         if (i & 0xCCCCCCCC) p += 2;
     421              56 :         if (i & 0xF0F0F0F0) p += 4;
     422              56 :         if (i & 0xFF00FF00) p += 8;
     423              56 :         if (i & 0xFFFF0000) p += 16;
     424                 : 
     425              56 :         return p;
     426                 : }
     427                 : 
     428                 : int
     429                 : msbbit(long i)
     430              56 : {
     431                 :         long i2;
     432                 : 
     433              56 :         if (i<0) return(31); /* speed-up assumes a 'long's msb is at pos 31 */
     434                 : 
     435              56 :         while ((i2 = (i & (i-1)))) i = i2;
     436                 : 
     437              56 :         return(sparsebit(i));
     438                 : }

Generated by: LTP GCOV extension version 1.6