CernVM-FS  2.12.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
smallhash.h
Go to the documentation of this file.
1 
5 #ifndef CVMFS_SMALLHASH_H_
6 #define CVMFS_SMALLHASH_H_
7 
8 #ifndef __STDC_FORMAT_MACROS
9 // NOLINTNEXTLINE
10 #define __STDC_FORMAT_MACROS
11 #endif
12 
13 #include <gtest/gtest_prod.h>
14 #include <inttypes.h>
15 #include <pthread.h>
16 #include <stdint.h>
17 
18 #include <algorithm>
19 #include <cassert>
20 #include <cstdlib>
21 #include <new>
22 
23 #include "util/atomic.h"
24 #include "util/murmur.hxx"
25 #include "util/prng.h"
26 #include "util/smalloc.h"
27 
33 template<class Key, class Value, class Derived>
35  FRIEND_TEST(T_Smallhash, InsertAndCopyMd5Slow);
36 
37  public:
38  static const double kLoadFactor; // mainly useless for the dynamic version
39  static const double kThresholdGrow; // only used for resizable version
40  static const double kThresholdShrink; // only used for resizable version
41 
43  keys_ = NULL;
44  values_ = NULL;
45  hasher_ = NULL;
46  bytes_allocated_ = 0;
47  num_collisions_ = 0;
48  max_collisions_ = 0;
49 
50  // Properly initialized by Init()
51  capacity_ = 0;
53  size_ = 0;
54  }
55 
58  }
59 
60  void Init(uint32_t expected_size, Key empty,
61  uint32_t (*hasher)(const Key &key))
62  {
63  hasher_ = hasher;
64  empty_key_ = empty;
65  capacity_ =
66  static_cast<uint32_t>(static_cast<double>(expected_size)/kLoadFactor);
68  static_cast<Derived *>(this)->SetThresholds(); // No-op for fixed size
69  AllocMemory();
70  this->DoClear(false);
71  }
72 
73  bool Lookup(const Key &key, Value *value) const {
74  uint32_t bucket;
75  uint32_t collisions;
76  const bool found = DoLookup(key, &bucket, &collisions);
77  if (found)
78  *value = values_[bucket];
79  return found;
80  }
81 
89  bool LookupEx(Key *key, Value *value) const {
90  uint32_t bucket = ScaleHash(*key);
91  while (!(keys_[bucket] == empty_key_)) {
92  if (keys_[bucket] == *key) {
93  *key = keys_[bucket];
94  *value = values_[bucket];
95  return true;
96  }
97  bucket = (bucket + 1) % capacity_;
98  }
99  return false;
100  }
101 
102  bool Contains(const Key &key) const {
103  uint32_t bucket;
104  uint32_t collisions;
105  const bool found = DoLookup(key, &bucket, &collisions);
106  return found;
107  }
108 
109  void Insert(const Key &key, const Value &value) {
110  static_cast<Derived *>(this)->Grow(); // No-op if fixed-size
111  const bool overwritten = DoInsert(key, value, true);
112  size_ += !overwritten; // size + 1 if the key was not yet in the map
113  }
114 
115  bool Erase(const Key &key) {
116  uint32_t bucket;
117  uint32_t collisions;
118  const bool found = DoLookup(key, &bucket, &collisions);
119  if (found) {
120  keys_[bucket] = empty_key_;
121  size_--;
122  bucket = (bucket+1) % capacity_;
123  while (!(keys_[bucket] == empty_key_)) {
124  Key rehash = keys_[bucket];
125  keys_[bucket] = empty_key_;
126  DoInsert(rehash, values_[bucket], false);
127  bucket = (bucket+1) % capacity_;
128  }
129  static_cast<Derived *>(this)->Shrink(); // No-op if fixed-size
130  }
131  return found;
132  }
133 
134  void Clear() {
135  DoClear(true);
136  }
137 
138  uint64_t bytes_allocated() const { return bytes_allocated_; }
139  static double GetEntrySize() {
140  const double unit = sizeof(Key) + sizeof(Value);
141  return unit/kLoadFactor;
142  }
143 
144  void GetCollisionStats(uint64_t *num_collisions,
145  uint32_t *max_collisions) const
146  {
147  *num_collisions = num_collisions_;
148  *max_collisions = max_collisions_;
149  }
150 
151  // Careful with the direct access TODO: iterator
152  uint32_t capacity() const { return capacity_; }
153  Key empty_key() const { return empty_key_; }
154  Key *keys() const { return keys_; }
155  Value *values() const { return values_; }
156 
157  // Only needed by compat
158  void SetHasher(uint32_t (*hasher)(const Key &key)) {
159  hasher_ = hasher;
160  }
161 
162  protected:
163  uint32_t ScaleHash(const Key &key) const {
164  double bucket =
165  (static_cast<double>(hasher_(key)) * static_cast<double>(capacity_) /
166  static_cast<double>(static_cast<uint32_t>(-1)));
167  return static_cast<uint32_t>(bucket) % capacity_;
168  }
169 
170  void AllocMemory() {
171  keys_ = static_cast<Key *>(smmap(capacity_ * sizeof(Key)));
172  values_ = static_cast<Value *>(smmap(capacity_ * sizeof(Value)));
173  for (uint32_t i = 0; i < capacity_; ++i) {
174  /*keys_[i] =*/ new (keys_ + i) Key();
175  }
176  for (uint32_t i = 0; i < capacity_; ++i) {
177  /*values_[i] =*/ new (values_ + i) Value();
178  }
179  bytes_allocated_ = (sizeof(Key) + sizeof(Value)) * capacity_;
180  }
181 
182  void DeallocMemory(Key *k, Value *v, uint32_t c) {
183  for (uint32_t i = 0; i < c; ++i) {
184  k[i].~Key();
185  }
186  for (uint32_t i = 0; i < c; ++i) {
187  v[i].~Value();
188  }
189  if (k)
190  smunmap(k);
191  if (v)
192  smunmap(v);
193  k = NULL;
194  v = NULL;
195  }
196 
197  // Returns true iff the key is overwritten
198  bool DoInsert(const Key &key, const Value &value,
199  const bool count_collisions)
200  {
201  uint32_t bucket;
202  uint32_t collisions;
203  const bool overwritten = DoLookup(key, &bucket, &collisions);
204  if (count_collisions) {
205  num_collisions_ += collisions;
206  max_collisions_ = std::max(collisions, max_collisions_);
207  }
208  keys_[bucket] = key;
209  values_[bucket] = value;
210  return overwritten;
211  }
212 
213  bool DoLookup(const Key &key, uint32_t *bucket, uint32_t *collisions) const {
214  *bucket = ScaleHash(key);
215  *collisions = 0;
216  while (!(keys_[*bucket] == empty_key_)) {
217  if (keys_[*bucket] == key)
218  return true;
219  *bucket = (*bucket+1) % capacity_;
220  (*collisions)++;
221  }
222  return false;
223  }
224 
225  void DoClear(const bool reset_capacity) {
226  if (reset_capacity)
227  static_cast<Derived *>(this)->ResetCapacity(); // No-op if fixed-size
228  for (uint32_t i = 0; i < capacity_; ++i)
229  keys_[i] = empty_key_;
230  size_ = 0;
231  }
232 
233  // Methods for resizable version
234  void SetThresholds() { }
235  void Grow() { }
236  void Shrink() { }
237  void ResetCapacity() { }
238 
239  // Separate key and value arrays for better locality
240  Key *keys_;
241  Value *values_;
242  uint32_t capacity_;
244  uint32_t size_;
245  uint32_t (*hasher_)(const Key &key);
247  uint64_t num_collisions_;
248  uint32_t max_collisions_;
250 };
251 
252 
253 template<class Key, class Value>
255  public SmallHashBase< Key, Value, SmallHashFixed<Key, Value> >
256 {
257  friend class SmallHashBase< Key, Value, SmallHashFixed<Key, Value> >;
258  protected:
259  // No-ops
260  void SetThresholds() { }
261  void Grow() { }
262  void Shrink() { }
263  void ResetCapacity() { }
264 };
265 
266 
267 template<class Key, class Value>
269  public SmallHashBase< Key, Value, SmallHashDynamic<Key, Value> >
270 {
271  friend class SmallHashBase< Key, Value, SmallHashDynamic<Key, Value> >;
272  public:
274  static const double kThresholdGrow;
275  static const double kThresholdShrink;
276 
278  num_migrates_ = 0;
279 
280  // Properly set by Init
281  threshold_grow_ = 0;
282  threshold_shrink_ = 0;
283  }
284 
286  {
287  num_migrates_ = 0;
288  CopyFrom(other);
289  }
290 
292  const SmallHashDynamic<Key, Value> &other)
293  {
294  if (&other == this)
295  return *this;
296 
297  CopyFrom(other);
298  return *this;
299  }
300 
301  uint32_t capacity() const { return Base::capacity_; }
302  uint32_t size() const { return Base::size_; }
303  uint32_t num_migrates() const { return num_migrates_; }
304 
305  protected:
306  void SetThresholds() {
308  static_cast<uint32_t>(static_cast<double>(capacity()) * kThresholdGrow);
310  static_cast<uint32_t>(static_cast<double>(capacity()) * kThresholdShrink);
311  }
312 
313  void Grow() {
314  if (size() > threshold_grow_)
315  Migrate(capacity() * 2);
316  }
317 
318  void Shrink() {
319  if (size() < threshold_shrink_) {
320  uint32_t target_capacity = capacity() / 2;
321  if (target_capacity >= Base::initial_capacity_)
322  Migrate(target_capacity);
323  }
324  }
325 
326  void ResetCapacity() {
330  SetThresholds();
331  }
332 
333  private:
334  // Returns a random permutation of indices [0..N-1] that is allocated
335  // by smmap (Knuth's shuffle algorithm)
336  uint32_t *ShuffleIndices(const uint32_t N) {
337  uint32_t *shuffled =
338  static_cast<uint32_t *>(smmap(N * sizeof(uint32_t)));
339  // Init with identity
340  for (unsigned i = 0; i < N; ++i)
341  shuffled[i] = i;
342  // Shuffle (no shuffling for the last element)
343  for (unsigned i = 0; i < N-1; ++i) {
344  const uint32_t swap_idx = i + g_prng.Next(N - i);
345  uint32_t tmp = shuffled[i];
346  shuffled[i] = shuffled[swap_idx];
347  shuffled[swap_idx] = tmp;
348  }
349  return shuffled;
350  }
351 
352  void Migrate(const uint32_t new_capacity) {
353  Key *old_keys = Base::keys_;
354  Value *old_values = Base::values_;
355  uint32_t old_capacity = capacity();
356  uint32_t old_size = size();
357 
358  Base::capacity_ = new_capacity;
359  SetThresholds();
361  Base::DoClear(false);
362  if (new_capacity < old_capacity) {
363  uint32_t *shuffled_indices = ShuffleIndices(old_capacity);
364  for (uint32_t i = 0; i < old_capacity; ++i) {
365  if (old_keys[shuffled_indices[i]] != Base::empty_key_) {
366  Base::Insert(old_keys[shuffled_indices[i]],
367  old_values[shuffled_indices[i]]);
368  }
369  }
370  smunmap(shuffled_indices);
371  } else {
372  for (uint32_t i = 0; i < old_capacity; ++i) {
373  if (old_keys[i] != Base::empty_key_)
374  Base::Insert(old_keys[i], old_values[i]);
375  }
376  }
377  assert(size() == old_size);
378 
379  Base::DeallocMemory(old_keys, old_values, old_capacity);
380  num_migrates_++;
381  }
382 
384  uint32_t *shuffled_indices = ShuffleIndices(other.capacity_);
385  for (uint32_t i = 0; i < other.capacity_; ++i) {
386  if (other.keys_[shuffled_indices[i]] != other.empty_key_) {
387  this->Insert(other.keys_[shuffled_indices[i]],
388  other.values_[shuffled_indices[i]]);
389  }
390  }
391  smunmap(shuffled_indices);
392  }
393 
394  uint32_t num_migrates_;
395  uint32_t threshold_grow_;
397  static Prng g_prng;
398 };
399 
400 
405 template<class Key, class Value>
406 class MultiHash {
407  public:
409  num_hashmaps_ = 0;
410  hashmaps_ = NULL;
411  locks_ = NULL;
412  }
413 
414  void Init(const uint8_t num_hashmaps, const Key &empty_key,
415  uint32_t (*hasher)(const Key &key))
416  {
417  assert(num_hashmaps > 0);
418  const uint8_t N = num_hashmaps;
419  num_hashmaps_ = N;
421  locks_ =
422  static_cast<pthread_mutex_t *>(smalloc(N * sizeof(pthread_mutex_t)));
423  for (uint8_t i = 0; i < N; ++i) {
424  int retval = pthread_mutex_init(&locks_[i], NULL);
425  assert(retval == 0);
426  hashmaps_[i].Init(128, empty_key, hasher);
427  }
428  }
429 
431  for (uint8_t i = 0; i < num_hashmaps_; ++i) {
432  pthread_mutex_destroy(&locks_[i]);
433  }
434  free(locks_);
435  delete[] hashmaps_;
436  }
437 
438  bool Lookup(const Key &key, Value *value) {
439  uint8_t target = SelectHashmap(key);
440  Lock(target);
441  const bool result = hashmaps_[target].Lookup(key, value);
442  Unlock(target);
443  return result;
444  }
445 
446  void Insert(const Key &key, const Value &value) {
447  uint8_t target = SelectHashmap(key);
448  Lock(target);
449  hashmaps_[target].Insert(key, value);
450  Unlock(target);
451  }
452 
453  void Erase(const Key &key) {
454  uint8_t target = SelectHashmap(key);
455  Lock(target);
456  hashmaps_[target].Erase(key);
457  Unlock(target);
458  }
459 
460  void Clear() {
461  for (uint8_t i = 0; i < num_hashmaps_; ++i)
462  Lock(i);
463  for (uint8_t i = 0; i < num_hashmaps_; ++i)
464  hashmaps_[i].Clear();
465  for (uint8_t i = 0; i < num_hashmaps_; ++i)
466  Unlock(i);
467  }
468 
469  uint8_t num_hashmaps() const { return num_hashmaps_; }
470 
471  void GetSizes(uint32_t *sizes) {
472  for (uint8_t i = 0; i < num_hashmaps_; ++i) {
473  Lock(i);
474  sizes[i] = hashmaps_[i].size();
475  Unlock(i);
476  }
477  }
478 
479  void GetCollisionStats(uint64_t *num_collisions, uint32_t *max_collisions) {
480  for (uint8_t i = 0; i < num_hashmaps_; ++i) {
481  Lock(i);
482  hashmaps_[i].GetCollisionStats(&num_collisions[i], &max_collisions[i]);
483  Unlock(i);
484  }
485  }
486 
487  private:
488  inline uint8_t SelectHashmap(const Key &key) {
489  uint32_t hash = MurmurHash2(&key, sizeof(key), 0x37);
490  double bucket =
491  static_cast<double>(hash) * static_cast<double>(num_hashmaps_) /
492  static_cast<double>(static_cast<uint32_t>(-1));
493  return static_cast<uint32_t>(bucket) % num_hashmaps_;
494  }
495 
496  inline void Lock(const uint8_t target) {
497  int retval = pthread_mutex_lock(&locks_[target]);
498  assert(retval == 0);
499  }
500 
501  inline void Unlock(const uint8_t target) {
502  int retval = pthread_mutex_unlock(&locks_[target]);
503  assert(retval == 0);
504  }
505 
506  uint8_t num_hashmaps_;
508  pthread_mutex_t *locks_;
509 };
510 
511 
512 // initialize the static fields
513 template<class Key, class Value>
515 
516 template<class Key, class Value, class Derived>
518 
519 template<class Key, class Value>
521 
522 template<class Key, class Value>
524 
525 #endif // CVMFS_SMALLHASH_H_
Definition: prng.h:28
uint32_t capacity() const
Definition: smallhash.h:152
Value * values_
Definition: smallhash.h:241
SmallHashDynamic< Key, Value > * hashmaps_
Definition: smallhash.h:507
void SetThresholds()
Definition: smallhash.h:260
void GetCollisionStats(uint64_t *num_collisions, uint32_t *max_collisions) const
Definition: smallhash.h:144
void ResetCapacity()
Definition: smallhash.h:326
uint32_t(* hasher_)(const Key &key)
Definition: smallhash.h:245
void Unlock(const uint8_t target)
Definition: smallhash.h:501
~MultiHash()
Definition: smallhash.h:430
void Init(const uint8_t num_hashmaps, const Key &empty_key, uint32_t(*hasher)(const Key &key))
Definition: smallhash.h:414
void ResetCapacity()
Definition: smallhash.h:237
void SetThresholds()
Definition: smallhash.h:306
void Insert(const Key &key, const Value &value)
Definition: smallhash.h:446
void GetSizes(uint32_t *sizes)
Definition: smallhash.h:471
void Shrink()
Definition: smallhash.h:262
uint64_t bytes_allocated() const
Definition: smallhash.h:138
uint64_t num_collisions_
Definition: smallhash.h:247
void SetThresholds()
Definition: smallhash.h:234
uint32_t ScaleHash(const Key &key) const
Definition: smallhash.h:163
assert((mem||(size==0))&&"Out Of Memory")
uint32_t * ShuffleIndices(const uint32_t N)
Definition: smallhash.h:336
void CopyFrom(const SmallHashDynamic< Key, Value > &other)
Definition: smallhash.h:383
static const double kThresholdGrow
Definition: smallhash.h:274
bool Lookup(const Key &key, Value *value)
Definition: smallhash.h:438
void ResetCapacity()
Definition: smallhash.h:263
uint32_t num_migrates_
Definition: smallhash.h:394
void Clear()
Definition: smallhash.h:460
uint8_t SelectHashmap(const Key &key)
Definition: smallhash.h:488
void Lock(const uint8_t target)
Definition: smallhash.h:496
SmallHashDynamic(const SmallHashDynamic< Key, Value > &other)
Definition: smallhash.h:285
static Prng g_prng
Definition: smallhash.h:397
uint32_t threshold_shrink_
Definition: smallhash.h:396
bool LookupEx(Key *key, Value *value) const
Definition: smallhash.h:89
static double GetEntrySize()
Definition: smallhash.h:139
uint32_t size_
Definition: smallhash.h:244
uint32_t capacity() const
Definition: smallhash.h:301
pthread_mutex_t * locks_
Definition: smallhash.h:508
void Insert(const Key &key, const Value &value)
Definition: smallhash.h:109
Key * keys() const
Definition: smallhash.h:154
static const double kThresholdGrow
Definition: smallhash.h:39
uint8_t num_hashmaps_
Definition: smallhash.h:506
uint64_t bytes_allocated_
Definition: smallhash.h:246
void Erase(const Key &key)
Definition: smallhash.h:453
uint32_t threshold_grow_
Definition: smallhash.h:395
static const double kLoadFactor
Definition: smallhash.h:38
bool Contains(const Key &key) const
Definition: smallhash.h:102
bool DoInsert(const Key &key, const Value &value, const bool count_collisions)
Definition: smallhash.h:198
void DoClear(const bool reset_capacity)
Definition: smallhash.h:225
void GetCollisionStats(uint64_t *num_collisions, uint32_t *max_collisions)
Definition: smallhash.h:479
uint32_t max_collisions_
Definition: smallhash.h:248
void Clear()
Definition: smallhash.h:134
void Shrink()
Definition: smallhash.h:236
void SetHasher(uint32_t(*hasher)(const Key &key))
Definition: smallhash.h:158
uint32_t size() const
Definition: smallhash.h:302
uint32_t num_migrates() const
Definition: smallhash.h:303
Value * values() const
Definition: smallhash.h:155
bool Erase(const Key &key)
Definition: smallhash.h:115
uint8_t num_hashmaps() const
Definition: smallhash.h:469
FRIEND_TEST(T_Smallhash, InsertAndCopyMd5Slow)
SmallHashBase< Key, Value, SmallHashDynamic< Key, Value > > Base
Definition: smallhash.h:273
uint32_t initial_capacity_
Definition: smallhash.h:243
static const double kThresholdShrink
Definition: smallhash.h:40
bool Lookup(const Key &key, Value *value) const
Definition: smallhash.h:73
void Grow()
Definition: smallhash.h:235
void DeallocMemory(Key *k, Value *v, uint32_t c)
Definition: smallhash.h:182
void Init(uint32_t expected_size, Key empty, uint32_t(*hasher)(const Key &key))
Definition: smallhash.h:60
bool DoLookup(const Key &key, uint32_t *bucket, uint32_t *collisions) const
Definition: smallhash.h:213
Key empty_key() const
Definition: smallhash.h:153
uint32_t capacity_
Definition: smallhash.h:242
void Migrate(const uint32_t new_capacity)
Definition: smallhash.h:352
uint32_t MurmurHash2(const void *key, int len, uint32_t seed)
Definition: murmur.hxx:23
static const double kThresholdShrink
Definition: smallhash.h:275
SmallHashDynamic< Key, Value > & operator=(const SmallHashDynamic< Key, Value > &other)
Definition: smallhash.h:291
uint32_t Next(const uint64_t boundary)
Definition: prng.h:49
void AllocMemory()
Definition: smallhash.h:170