CernVM-FS  2.13.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
smallhash.h
Go to the documentation of this file.
1 
5 #ifndef CVMFS_SMALLHASH_H_
6 #define CVMFS_SMALLHASH_H_
7 
8 #ifndef __STDC_FORMAT_MACROS
9 // NOLINTNEXTLINE
10 #define __STDC_FORMAT_MACROS
11 #endif
12 
13 #include <gtest/gtest_prod.h>
14 #include <inttypes.h>
15 #include <pthread.h>
16 #include <stdint.h>
17 
18 #include <algorithm>
19 #include <cassert>
20 #include <cstdlib>
21 #include <new>
22 
23 #include "util/atomic.h"
24 #include "util/murmur.hxx"
25 #include "util/prng.h"
26 #include "util/smalloc.h"
27 
33 template<class Key, class Value, class Derived>
35  FRIEND_TEST(T_Smallhash, InsertAndCopyMd5Slow);
36 
37  public:
38  static const double kLoadFactor; // mainly useless for the dynamic version
39  static const double kThresholdGrow; // only used for resizable version
40  static const double kThresholdShrink; // only used for resizable version
41 
43  keys_ = NULL;
44  values_ = NULL;
45  hasher_ = NULL;
46  bytes_allocated_ = 0;
47  num_collisions_ = 0;
48  max_collisions_ = 0;
49 
50  // Properly initialized by Init()
51  capacity_ = 0;
53  size_ = 0;
54  }
55 
57 
58  void Init(uint32_t expected_size, Key empty,
59  uint32_t (*hasher)(const Key &key)) {
60  hasher_ = hasher;
61  empty_key_ = empty;
62  capacity_ = static_cast<uint32_t>(static_cast<double>(expected_size)
63  / kLoadFactor);
65  static_cast<Derived *>(this)->SetThresholds(); // No-op for fixed size
66  AllocMemory();
67  this->DoClear(false);
68  }
69 
70  bool Lookup(const Key &key, Value *value) const {
71  uint32_t bucket;
72  uint32_t collisions;
73  const bool found = DoLookup(key, &bucket, &collisions);
74  if (found)
75  *value = values_[bucket];
76  return found;
77  }
78 
86  bool LookupEx(Key *key, Value *value) const {
87  uint32_t bucket = ScaleHash(*key);
88  while (!(keys_[bucket] == empty_key_)) {
89  if (keys_[bucket] == *key) {
90  *key = keys_[bucket];
91  *value = values_[bucket];
92  return true;
93  }
94  bucket = (bucket + 1) % capacity_;
95  }
96  return false;
97  }
98 
99  bool Contains(const Key &key) const {
100  uint32_t bucket;
101  uint32_t collisions;
102  const bool found = DoLookup(key, &bucket, &collisions);
103  return found;
104  }
105 
106  void Insert(const Key &key, const Value &value) {
107  static_cast<Derived *>(this)->Grow(); // No-op if fixed-size
108  const bool overwritten = DoInsert(key, value, true);
109  size_ += !overwritten; // size + 1 if the key was not yet in the map
110  }
111 
112  bool Erase(const Key &key) {
113  uint32_t bucket;
114  uint32_t collisions;
115  const bool found = DoLookup(key, &bucket, &collisions);
116  if (found) {
117  keys_[bucket] = empty_key_;
118  size_--;
119  bucket = (bucket + 1) % capacity_;
120  while (!(keys_[bucket] == empty_key_)) {
121  Key rehash = keys_[bucket];
122  keys_[bucket] = empty_key_;
123  DoInsert(rehash, values_[bucket], false);
124  bucket = (bucket + 1) % capacity_;
125  }
126  static_cast<Derived *>(this)->Shrink(); // No-op if fixed-size
127  }
128  return found;
129  }
130 
131  void Clear() { DoClear(true); }
132 
133  uint64_t bytes_allocated() const { return bytes_allocated_; }
134  static double GetEntrySize() {
135  const double unit = sizeof(Key) + sizeof(Value);
136  return unit / kLoadFactor;
137  }
138 
139  void GetCollisionStats(uint64_t *num_collisions,
140  uint32_t *max_collisions) const {
141  *num_collisions = num_collisions_;
142  *max_collisions = max_collisions_;
143  }
144 
145  // Careful with the direct access TODO: iterator
146  uint32_t capacity() const { return capacity_; }
147  Key empty_key() const { return empty_key_; }
148  Key *keys() const { return keys_; }
149  Value *values() const { return values_; }
150 
151  // Only needed by compat
152  void SetHasher(uint32_t (*hasher)(const Key &key)) { hasher_ = hasher; }
153 
154  protected:
155  uint32_t ScaleHash(const Key &key) const {
156  double bucket = (static_cast<double>(hasher_(key))
157  * static_cast<double>(capacity_)
158  / static_cast<double>(static_cast<uint32_t>(-1)));
159  return static_cast<uint32_t>(bucket) % capacity_;
160  }
161 
162  void AllocMemory() {
163  keys_ = static_cast<Key *>(smmap(capacity_ * sizeof(Key)));
164  values_ = static_cast<Value *>(smmap(capacity_ * sizeof(Value)));
165  for (uint32_t i = 0; i < capacity_; ++i) {
166  /*keys_[i] =*/new (keys_ + i) Key();
167  }
168  for (uint32_t i = 0; i < capacity_; ++i) {
169  /*values_[i] =*/new (values_ + i) Value();
170  }
171  bytes_allocated_ = (sizeof(Key) + sizeof(Value)) * capacity_;
172  }
173 
174  void DeallocMemory(Key *k, Value *v, uint32_t c) {
175  for (uint32_t i = 0; i < c; ++i) {
176  k[i].~Key();
177  }
178  for (uint32_t i = 0; i < c; ++i) {
179  v[i].~Value();
180  }
181  if (k)
182  smunmap(k);
183  if (v)
184  smunmap(v);
185  k = NULL;
186  v = NULL;
187  }
188 
189  // Returns true iff the key is overwritten
190  bool DoInsert(const Key &key, const Value &value,
191  const bool count_collisions) {
192  uint32_t bucket;
193  uint32_t collisions;
194  const bool overwritten = DoLookup(key, &bucket, &collisions);
195  if (count_collisions) {
196  num_collisions_ += collisions;
197  max_collisions_ = std::max(collisions, max_collisions_);
198  }
199  keys_[bucket] = key;
200  values_[bucket] = value;
201  return overwritten;
202  }
203 
204  bool DoLookup(const Key &key, uint32_t *bucket, uint32_t *collisions) const {
205  *bucket = ScaleHash(key);
206  *collisions = 0;
207  while (!(keys_[*bucket] == empty_key_)) {
208  if (keys_[*bucket] == key)
209  return true;
210  *bucket = (*bucket + 1) % capacity_;
211  (*collisions)++;
212  }
213  return false;
214  }
215 
216  void DoClear(const bool reset_capacity) {
217  if (reset_capacity)
218  static_cast<Derived *>(this)->ResetCapacity(); // No-op if fixed-size
219  for (uint32_t i = 0; i < capacity_; ++i)
220  keys_[i] = empty_key_;
221  size_ = 0;
222  }
223 
224  // Methods for resizable version
225  void SetThresholds() { }
226  void Grow() { }
227  void Shrink() { }
228  void ResetCapacity() { }
229 
230  // Separate key and value arrays for better locality
231  Key *keys_;
232  Value *values_;
233  uint32_t capacity_;
235  uint32_t size_;
236  uint32_t (*hasher_)(const Key &key);
238  uint64_t num_collisions_;
239  uint32_t max_collisions_;
241 };
242 
243 
244 template<class Key, class Value>
246  : public SmallHashBase<Key, Value, SmallHashFixed<Key, Value> > {
247  friend class SmallHashBase<Key, Value, SmallHashFixed<Key, Value> >;
248 
249  protected:
250  // No-ops
251  void SetThresholds() { }
252  void Grow() { }
253  void Shrink() { }
254  void ResetCapacity() { }
255 };
256 
257 
258 template<class Key, class Value>
260  : public SmallHashBase<Key, Value, SmallHashDynamic<Key, Value> > {
261  friend class SmallHashBase<Key, Value, SmallHashDynamic<Key, Value> >;
262 
263  public:
265  static const double kThresholdGrow;
266  static const double kThresholdShrink;
267 
269  num_migrates_ = 0;
270 
271  // Properly set by Init
272  threshold_grow_ = 0;
273  threshold_shrink_ = 0;
274  }
275 
277  num_migrates_ = 0;
278  CopyFrom(other);
279  }
280 
282  const SmallHashDynamic<Key, Value> &other) {
283  if (&other == this)
284  return *this;
285 
286  CopyFrom(other);
287  return *this;
288  }
289 
290  uint32_t capacity() const { return Base::capacity_; }
291  uint32_t size() const { return Base::size_; }
292  uint32_t num_migrates() const { return num_migrates_; }
293 
294  protected:
295  void SetThresholds() {
296  threshold_grow_ = static_cast<uint32_t>(static_cast<double>(capacity())
297  * kThresholdGrow);
298  threshold_shrink_ = static_cast<uint32_t>(static_cast<double>(capacity())
299  * kThresholdShrink);
300  }
301 
302  void Grow() {
303  if (size() > threshold_grow_)
304  Migrate(capacity() * 2);
305  }
306 
307  void Shrink() {
308  if (size() < threshold_shrink_) {
309  uint32_t target_capacity = capacity() / 2;
310  if (target_capacity >= Base::initial_capacity_)
311  Migrate(target_capacity);
312  }
313  }
314 
315  void ResetCapacity() {
319  SetThresholds();
320  }
321 
322  private:
323  // Returns a random permutation of indices [0..N-1] that is allocated
324  // by smmap (Knuth's shuffle algorithm)
325  uint32_t *ShuffleIndices(const uint32_t N) {
326  uint32_t *shuffled = static_cast<uint32_t *>(smmap(N * sizeof(uint32_t)));
327  // Init with identity
328  for (unsigned i = 0; i < N; ++i)
329  shuffled[i] = i;
330  // Shuffle (no shuffling for the last element)
331  for (unsigned i = 0; i < N - 1; ++i) {
332  const uint32_t swap_idx = i + g_prng.Next(N - i);
333  uint32_t tmp = shuffled[i];
334  shuffled[i] = shuffled[swap_idx];
335  shuffled[swap_idx] = tmp;
336  }
337  return shuffled;
338  }
339 
340  void Migrate(const uint32_t new_capacity) {
341  Key *old_keys = Base::keys_;
342  Value *old_values = Base::values_;
343  uint32_t old_capacity = capacity();
344  uint32_t old_size = size();
345 
346  Base::capacity_ = new_capacity;
347  SetThresholds();
349  Base::DoClear(false);
350  if (new_capacity < old_capacity) {
351  uint32_t *shuffled_indices = ShuffleIndices(old_capacity);
352  for (uint32_t i = 0; i < old_capacity; ++i) {
353  if (old_keys[shuffled_indices[i]] != Base::empty_key_) {
354  Base::Insert(old_keys[shuffled_indices[i]],
355  old_values[shuffled_indices[i]]);
356  }
357  }
358  smunmap(shuffled_indices);
359  } else {
360  for (uint32_t i = 0; i < old_capacity; ++i) {
361  if (old_keys[i] != Base::empty_key_)
362  Base::Insert(old_keys[i], old_values[i]);
363  }
364  }
365  assert(size() == old_size);
366 
367  Base::DeallocMemory(old_keys, old_values, old_capacity);
368  num_migrates_++;
369  }
370 
372  uint32_t *shuffled_indices = ShuffleIndices(other.capacity_);
373  for (uint32_t i = 0; i < other.capacity_; ++i) {
374  if (other.keys_[shuffled_indices[i]] != other.empty_key_) {
375  this->Insert(other.keys_[shuffled_indices[i]],
376  other.values_[shuffled_indices[i]]);
377  }
378  }
379  smunmap(shuffled_indices);
380  }
381 
382  uint32_t num_migrates_;
383  uint32_t threshold_grow_;
385  static Prng g_prng;
386 };
387 
388 
393 template<class Key, class Value>
394 class MultiHash {
395  public:
397  num_hashmaps_ = 0;
398  hashmaps_ = NULL;
399  locks_ = NULL;
400  }
401 
402  void Init(const uint8_t num_hashmaps, const Key &empty_key,
403  uint32_t (*hasher)(const Key &key)) {
404  assert(num_hashmaps > 0);
405  const uint8_t N = num_hashmaps;
406  num_hashmaps_ = N;
408  locks_ = static_cast<pthread_mutex_t *>(
409  smalloc(N * sizeof(pthread_mutex_t)));
410  for (uint8_t i = 0; i < N; ++i) {
411  int retval = pthread_mutex_init(&locks_[i], NULL);
412  assert(retval == 0);
413  hashmaps_[i].Init(128, empty_key, hasher);
414  }
415  }
416 
418  for (uint8_t i = 0; i < num_hashmaps_; ++i) {
419  pthread_mutex_destroy(&locks_[i]);
420  }
421  free(locks_);
422  delete[] hashmaps_;
423  }
424 
425  bool Lookup(const Key &key, Value *value) {
426  uint8_t target = SelectHashmap(key);
427  Lock(target);
428  const bool result = hashmaps_[target].Lookup(key, value);
429  Unlock(target);
430  return result;
431  }
432 
433  void Insert(const Key &key, const Value &value) {
434  uint8_t target = SelectHashmap(key);
435  Lock(target);
436  hashmaps_[target].Insert(key, value);
437  Unlock(target);
438  }
439 
440  void Erase(const Key &key) {
441  uint8_t target = SelectHashmap(key);
442  Lock(target);
443  hashmaps_[target].Erase(key);
444  Unlock(target);
445  }
446 
447  void Clear() {
448  for (uint8_t i = 0; i < num_hashmaps_; ++i)
449  Lock(i);
450  for (uint8_t i = 0; i < num_hashmaps_; ++i)
451  hashmaps_[i].Clear();
452  for (uint8_t i = 0; i < num_hashmaps_; ++i)
453  Unlock(i);
454  }
455 
456  uint8_t num_hashmaps() const { return num_hashmaps_; }
457 
458  void GetSizes(uint32_t *sizes) {
459  for (uint8_t i = 0; i < num_hashmaps_; ++i) {
460  Lock(i);
461  sizes[i] = hashmaps_[i].size();
462  Unlock(i);
463  }
464  }
465 
466  void GetCollisionStats(uint64_t *num_collisions, uint32_t *max_collisions) {
467  for (uint8_t i = 0; i < num_hashmaps_; ++i) {
468  Lock(i);
469  hashmaps_[i].GetCollisionStats(&num_collisions[i], &max_collisions[i]);
470  Unlock(i);
471  }
472  }
473 
474  private:
475  inline uint8_t SelectHashmap(const Key &key) {
476  uint32_t hash = MurmurHash2(&key, sizeof(key), 0x37);
477  double bucket = static_cast<double>(hash)
478  * static_cast<double>(num_hashmaps_)
479  / static_cast<double>(static_cast<uint32_t>(-1));
480  return static_cast<uint32_t>(bucket) % num_hashmaps_;
481  }
482 
483  inline void Lock(const uint8_t target) {
484  int retval = pthread_mutex_lock(&locks_[target]);
485  assert(retval == 0);
486  }
487 
488  inline void Unlock(const uint8_t target) {
489  int retval = pthread_mutex_unlock(&locks_[target]);
490  assert(retval == 0);
491  }
492 
493  uint8_t num_hashmaps_;
495  pthread_mutex_t *locks_;
496 };
497 
498 
499 // initialize the static fields
500 template<class Key, class Value>
502 
503 template<class Key, class Value, class Derived>
505 
506 template<class Key, class Value>
508 
509 template<class Key, class Value>
511 
512 #endif // CVMFS_SMALLHASH_H_
Definition: prng.h:27
uint32_t capacity() const
Definition: smallhash.h:146
Value * values_
Definition: smallhash.h:232
SmallHashDynamic< Key, Value > * hashmaps_
Definition: smallhash.h:494
void SetThresholds()
Definition: smallhash.h:251
void GetCollisionStats(uint64_t *num_collisions, uint32_t *max_collisions) const
Definition: smallhash.h:139
void ResetCapacity()
Definition: smallhash.h:315
uint32_t(* hasher_)(const Key &key)
Definition: smallhash.h:236
void Unlock(const uint8_t target)
Definition: smallhash.h:488
~MultiHash()
Definition: smallhash.h:417
void Init(const uint8_t num_hashmaps, const Key &empty_key, uint32_t(*hasher)(const Key &key))
Definition: smallhash.h:402
void ResetCapacity()
Definition: smallhash.h:228
void SetThresholds()
Definition: smallhash.h:295
void Insert(const Key &key, const Value &value)
Definition: smallhash.h:433
void GetSizes(uint32_t *sizes)
Definition: smallhash.h:458
void Shrink()
Definition: smallhash.h:253
uint64_t bytes_allocated() const
Definition: smallhash.h:133
uint64_t num_collisions_
Definition: smallhash.h:238
void SetThresholds()
Definition: smallhash.h:225
uint32_t ScaleHash(const Key &key) const
Definition: smallhash.h:155
assert((mem||(size==0))&&"Out Of Memory")
uint32_t * ShuffleIndices(const uint32_t N)
Definition: smallhash.h:325
void CopyFrom(const SmallHashDynamic< Key, Value > &other)
Definition: smallhash.h:371
static const double kThresholdGrow
Definition: smallhash.h:265
bool Lookup(const Key &key, Value *value)
Definition: smallhash.h:425
void ResetCapacity()
Definition: smallhash.h:254
uint32_t num_migrates_
Definition: smallhash.h:382
void Clear()
Definition: smallhash.h:447
uint8_t SelectHashmap(const Key &key)
Definition: smallhash.h:475
void Lock(const uint8_t target)
Definition: smallhash.h:483
SmallHashDynamic(const SmallHashDynamic< Key, Value > &other)
Definition: smallhash.h:276
static Prng g_prng
Definition: smallhash.h:385
uint32_t threshold_shrink_
Definition: smallhash.h:384
bool LookupEx(Key *key, Value *value) const
Definition: smallhash.h:86
static double GetEntrySize()
Definition: smallhash.h:134
uint32_t size_
Definition: smallhash.h:235
uint32_t capacity() const
Definition: smallhash.h:290
pthread_mutex_t * locks_
Definition: smallhash.h:495
void Insert(const Key &key, const Value &value)
Definition: smallhash.h:106
Key * keys() const
Definition: smallhash.h:148
static const double kThresholdGrow
Definition: smallhash.h:39
uint8_t num_hashmaps_
Definition: smallhash.h:493
uint64_t bytes_allocated_
Definition: smallhash.h:237
void Erase(const Key &key)
Definition: smallhash.h:440
uint32_t threshold_grow_
Definition: smallhash.h:383
static const double kLoadFactor
Definition: smallhash.h:38
bool Contains(const Key &key) const
Definition: smallhash.h:99
bool DoInsert(const Key &key, const Value &value, const bool count_collisions)
Definition: smallhash.h:190
void DoClear(const bool reset_capacity)
Definition: smallhash.h:216
void GetCollisionStats(uint64_t *num_collisions, uint32_t *max_collisions)
Definition: smallhash.h:466
uint32_t max_collisions_
Definition: smallhash.h:239
void Clear()
Definition: smallhash.h:131
void Shrink()
Definition: smallhash.h:227
void SetHasher(uint32_t(*hasher)(const Key &key))
Definition: smallhash.h:152
uint32_t size() const
Definition: smallhash.h:291
uint32_t num_migrates() const
Definition: smallhash.h:292
Value * values() const
Definition: smallhash.h:149
bool Erase(const Key &key)
Definition: smallhash.h:112
uint8_t num_hashmaps() const
Definition: smallhash.h:456
FRIEND_TEST(T_Smallhash, InsertAndCopyMd5Slow)
uint32_t initial_capacity_
Definition: smallhash.h:234
static const double kThresholdShrink
Definition: smallhash.h:40
bool Lookup(const Key &key, Value *value) const
Definition: smallhash.h:70
void Grow()
Definition: smallhash.h:226
void DeallocMemory(Key *k, Value *v, uint32_t c)
Definition: smallhash.h:174
void Init(uint32_t expected_size, Key empty, uint32_t(*hasher)(const Key &key))
Definition: smallhash.h:58
bool DoLookup(const Key &key, uint32_t *bucket, uint32_t *collisions) const
Definition: smallhash.h:204
SmallHashBase< Key, Value, SmallHashDynamic< Key, Value > > Base
Definition: smallhash.h:264
Key empty_key() const
Definition: smallhash.h:147
uint32_t capacity_
Definition: smallhash.h:233
void Migrate(const uint32_t new_capacity)
Definition: smallhash.h:340
uint32_t MurmurHash2(const void *key, int len, uint32_t seed)
Definition: murmur.hxx:23
static const double kThresholdShrink
Definition: smallhash.h:266
SmallHashDynamic< Key, Value > & operator=(const SmallHashDynamic< Key, Value > &other)
Definition: smallhash.h:281
uint32_t Next(const uint64_t boundary)
Definition: prng.h:44
void AllocMemory()
Definition: smallhash.h:162