rpm  5.4.10
rpmhash.c
Go to the documentation of this file.
1 
6 #include "system.h"
7 #include <rpmiotypes.h>
8 #include <rpmio.h>
9 #include <rpmhash.h>
10 #include "debug.h"
11 
12 /*@unchecked@*/
13 int _ht_debug = 0;
14 
15 typedef /*@owned@*/ const void * voidptr;
16 
17 typedef struct hashBucket_s * hashBucket;
18 
21 struct hashBucket_s {
23 /*@owned@*/ voidptr * data;
24  int dataCount;
25 /*@dependent@*/hashBucket next;
26 };
27 
30 struct hashTable_s {
31  struct rpmioItem_s _item;
32  int numBuckets;
33  size_t keySize;
34  int freeData;
35  hashBucket * buckets;
36 /*@relnull@*/
38 /*@relnull@*/
40 #if defined(__LCLINT__)
41 /*@refs@*/
42  int nrefs;
43 #endif
44 };
45 
46 #ifdef __cplusplus
47 GENfree(hashBucket)
48 GENfree(hashBucket *)
49 GENfree(voidptr *)
50 #endif /* __cplusplus */
51 
58 static /*@shared@*/ /*@null@*/
59 hashBucket findEntry(hashTable ht, const void * key)
60  /*@*/
61 {
62  rpmuint32_t hash = 0;
63  hashBucket b;
64 
65  /*@-modunconnomods@*/
66  hash = ht->fn(hash, key, 0) % ht->numBuckets;
67  b = ht->buckets[hash];
68 
69  while (b && b->key && ht->eq(b->key, key))
70  b = b->next;
71  /*@=modunconnomods@*/
72 
73  return b;
74 }
75 
76 int hashEqualityString(const void * key1, const void * key2)
77 {
78  const char *k1 = (const char *)key1;
79  const char *k2 = (const char *)key2;
80  return strcmp(k1, k2);
81 }
82 
83 rpmuint32_t hashFunctionString(rpmuint32_t h, const void * data, size_t size)
84 {
85  const char *key = (const char *) data;
86 
87  if (size == 0)
88  size = strlen(key);
89 
90  /*
91  * DJBX33A (Daniel J. Bernstein, Times 33 with Addition)
92  *
93  * This is Daniel J. Bernstein's popular `times 33' hash function as
94  * posted by him years ago on comp.lang.c. It basically uses a function
95  * like ``hash(i) = hash(i-1) * 33 + str[i]''. This is one of the best
96  * known hash functions for strings. Because it is both computed very
97  * fast and distributes very well.
98  *
99  * The magic of number 33, i.e. why it works better than many other
100  * constants, prime or not, has never been adequately explained by
101  * anyone. So I try an explanation: if one experimentally tests all
102  * multipliers between 1 and 256 (as RSE did now) one detects that even
103  * numbers are not useable at all. The remaining 128 odd numbers
104  * (except for the number 1) work more or less all equally well. They
105  * all distribute in an acceptable way and this way fill a hash table
106  * with an average percent of approx. 86%.
107  *
108  * If one compares the Chi^2 values of the variants, the number 33 not
109  * even has the best value. But the number 33 and a few other equally
110  * good numbers like 17, 31, 63, 127 and 129 have nevertheless a great
111  * advantage to the remaining numbers in the large set of possible
112  * multipliers: their multiply operation can be replaced by a faster
113  * operation based on just one shift plus either a single addition
114  * or subtraction operation. And because a hash function has to both
115  * distribute good _and_ has to be very fast to compute, those few
116  * numbers should be preferred and seems to be the reason why Daniel J.
117  * Bernstein also preferred it.
118  *
119  * Below you can find the variant implemented with the
120  * multiplication optimized via bit shifts and hash unrolled eight
121  * times for speed.
122  * -- Ralf S. Engelschall <rse@engelschall.com>
123  */
124  if (h == 0)
125  h = 5381;
126  for (; size >= 8; size -= 8) {
127  h = ((h << 5) + h) + (rpmuint32_t)*key++;
128  h = ((h << 5) + h) + (rpmuint32_t)*key++;
129  h = ((h << 5) + h) + (rpmuint32_t)*key++;
130  h = ((h << 5) + h) + (rpmuint32_t)*key++;
131  h = ((h << 5) + h) + (rpmuint32_t)*key++;
132  h = ((h << 5) + h) + (rpmuint32_t)*key++;
133  h = ((h << 5) + h) + (rpmuint32_t)*key++;
134  h = ((h << 5) + h) + (rpmuint32_t)*key++;
135  }
136  switch (size) {
137  case 7: h = ((h << 5) + h) + (rpmuint32_t)*key++; /*@fallthrough@*/
138  case 6: h = ((h << 5) + h) + (rpmuint32_t)*key++; /*@fallthrough@*/
139  case 5: h = ((h << 5) + h) + (rpmuint32_t)*key++; /*@fallthrough@*/
140  case 4: h = ((h << 5) + h) + (rpmuint32_t)*key++; /*@fallthrough@*/
141  case 3: h = ((h << 5) + h) + (rpmuint32_t)*key++; /*@fallthrough@*/
142  case 2: h = ((h << 5) + h) + (rpmuint32_t)*key++; /*@fallthrough@*/
143  case 1: h = ((h << 5) + h) + (rpmuint32_t)*key++; break;
144  default: /* case 0: */ break;
145  }
146 
147  return h;
148 }
149 
150 void htAddEntry(hashTable ht, const void * key, const void * data)
151 {
152  rpmuint32_t hash = 0;
153  hashBucket b;
154 
155  hash = ht->fn(hash, key, 0) % ht->numBuckets;
156  b = ht->buckets[hash];
157 
158  while (b && b->key && ht->eq(b->key, key))
159  b = b->next;
160 
161  if (b == NULL) {
162  b = (hashBucket) xmalloc(sizeof(*b));
163  if (ht->keySize) {
164  char *k = (char *) xmalloc(ht->keySize);
165  memcpy(k, key, ht->keySize);
166  b->key = k;
167  } else {
168  b->key = key;
169  }
170  b->dataCount = 0;
171  b->next = ht->buckets[hash];
172  b->data = NULL;
173  ht->buckets[hash] = b;
174  }
175 
176  b->data = (voidptr *)
177  xrealloc(b->data, sizeof(*b->data) * (b->dataCount + 1));
178  b->data[b->dataCount++] = data;
179 }
180 
181 int htHasEntry(hashTable ht, const void * key)
182 {
183  hashBucket b;
184 
185  if (!(b = findEntry(ht, key))) return 0; else return 1;
186 }
187 
188 int htGetEntry(hashTable ht, const void * key, const void * data,
189  int * dataCount, const void * tableKey)
190 {
191  hashBucket b;
192 
193  if ((b = findEntry(ht, key)) == NULL)
194  return 1;
195 
196  if (data)
197  *(const void ***)data = (const void **) b->data;
198  if (dataCount)
199  *dataCount = b->dataCount;
200  if (tableKey)
201  *(const void **)tableKey = b->key;
202 
203  return 0;
204 }
205 
206 const void ** htGetKeys(hashTable ht)
207 {
208  const void ** keys = (const void **)
209  xcalloc(ht->numBuckets+1, sizeof(const void*));
210  const void ** keypointer = keys;
211  hashBucket b, n;
212  int i;
213 
214  for (i = 0; i < ht->numBuckets; i++) {
215  b = ht->buckets[i];
216  if (b == NULL)
217  continue;
218  if (b->data)
219  *(keys++) = b->key;
220  do {
221  n = b->next;
222  if(n != NULL)
223  *(keys++) = n->key;
224  } while ((b = n) != NULL);
225  }
226 
227  return keypointer;
228 }
229 
230 /*@-mustmod@*/ /* XXX splint on crack */
231 static void htFini(void * _ht)
232  /*@modifies _ht @*/
233 {
234  hashTable ht = (hashTable) _ht;
235  hashBucket b, n;
236  int i;
237 
238  for (i = 0; i < ht->numBuckets; i++) {
239  b = ht->buckets[i];
240  if (b == NULL)
241  continue;
242  ht->buckets[i] = NULL;
243  if (ht->keySize > 0)
244  b->key = _free(b->key);
245  do {
246  n = b->next;
247  if (b->data) {
248  if (ht->freeData)
249  *b->data = _free(*b->data);
250  b->data = _free(b->data);
251  }
252  b = _free(b);
253  } while ((b = n) != NULL);
254  }
255 
256  ht->buckets = _free(ht->buckets);
257 }
258 /*@=mustmod@*/
259 
260 /*@unchecked@*/ /*@only@*/ /*@null@*/
262 
263 static hashTable htGetPool(/*@null@*/ rpmioPool pool)
264  /*@globals _htPool, fileSystem @*/
265  /*@modifies pool, _htPool, fileSystem @*/
266 {
267  hashTable ht;
268 
269  if (_htPool == NULL) {
270  _htPool = rpmioNewPool("ht", sizeof(*ht), -1, _ht_debug,
271  NULL, NULL, htFini);
272  pool = _htPool;
273  }
274  return (hashTable) rpmioGetPool(pool, sizeof(*ht));
275 }
276 
277 hashTable htCreate(int numBuckets, size_t keySize, int freeData,
279 {
280  hashTable ht = htGetPool(_htPool);
281 
282  ht->numBuckets = numBuckets;
283  ht->buckets = (hashBucket *) xcalloc(numBuckets, sizeof(*ht->buckets));
284  ht->keySize = keySize;
285  ht->freeData = freeData;
286  /*@-assignexpose@*/
287  ht->fn = (fn != NULL ? fn : hashFunctionString);
288  ht->eq = (eq != NULL ? eq : hashEqualityString);
289  /*@=assignexpose@*/
290 
291  return htLink(ht);
292 }