CernVM-FS  2.10.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
smallhash.h
Go to the documentation of this file.
1 
5 #ifndef CVMFS_SMALLHASH_H_
6 #define CVMFS_SMALLHASH_H_
7 
8 #ifndef __STDC_FORMAT_MACROS
9 // NOLINTNEXTLINE
10 #define __STDC_FORMAT_MACROS
11 #endif
12 
13 #include <gtest/gtest_prod.h>
14 #include <inttypes.h>
15 #include <pthread.h>
16 #include <stdint.h>
17 
18 #include <algorithm>
19 #include <cassert>
20 #include <cstdlib>
21 #include <new>
22 
23 #include "util/atomic.h"
24 #include "util/murmur.hxx"
25 #include "util/prng.h"
26 #include "util/smalloc.h"
27 
33 template<class Key, class Value, class Derived>
35  FRIEND_TEST(T_Smallhash, InsertAndCopyMd5Slow);
36 
37  public:
38  static const double kLoadFactor; // mainly useless for the dynamic version
39  static const double kThresholdGrow; // only used for resizable version
40  static const double kThresholdShrink; // only used for resizable version
41 
43  keys_ = NULL;
44  values_ = NULL;
45  hasher_ = NULL;
46  bytes_allocated_ = 0;
47  num_collisions_ = 0;
48  max_collisions_ = 0;
49 
50  // Properly initialized by Init()
51  capacity_ = 0;
53  size_ = 0;
54  }
55 
58  }
59 
60  void Init(uint32_t expected_size, Key empty,
61  uint32_t (*hasher)(const Key &key))
62  {
63  hasher_ = hasher;
64  empty_key_ = empty;
65  capacity_ =
66  static_cast<uint32_t>(static_cast<double>(expected_size)/kLoadFactor);
68  static_cast<Derived *>(this)->SetThresholds(); // No-op for fixed size
69  AllocMemory();
70  this->DoClear(false);
71  }
72 
73  bool Lookup(const Key &key, Value *value) const {
74  uint32_t bucket;
75  uint32_t collisions;
76  const bool found = DoLookup(key, &bucket, &collisions);
77  if (found)
78  *value = values_[bucket];
79  return found;
80  }
81 
89  bool LookupEx(Key *key, Value *value) const {
90  uint32_t bucket = ScaleHash(*key);
91  while (!(keys_[bucket] == empty_key_)) {
92  if (keys_[bucket] == *key) {
93  *key = keys_[bucket];
94  *value = values_[bucket];
95  return true;
96  }
97  bucket = (bucket + 1) % capacity_;
98  }
99  return false;
100  }
101 
102  bool Contains(const Key &key) const {
103  uint32_t bucket;
104  uint32_t collisions;
105  const bool found = DoLookup(key, &bucket, &collisions);
106  return found;
107  }
108 
109  void Insert(const Key &key, const Value &value) {
110  static_cast<Derived *>(this)->Grow(); // No-op if fixed-size
111  const bool overwritten = DoInsert(key, value, true);
112  size_ += !overwritten; // size + 1 if the key was not yet in the map
113  }
114 
115  bool Erase(const Key &key) {
116  uint32_t bucket;
117  uint32_t collisions;
118  const bool found = DoLookup(key, &bucket, &collisions);
119  if (found) {
120  keys_[bucket] = empty_key_;
121  size_--;
122  bucket = (bucket+1) % capacity_;
123  while (!(keys_[bucket] == empty_key_)) {
124  Key rehash = keys_[bucket];
125  keys_[bucket] = empty_key_;
126  DoInsert(rehash, values_[bucket], false);
127  bucket = (bucket+1) % capacity_;
128  }
129  static_cast<Derived *>(this)->Shrink(); // No-op if fixed-size
130  }
131  return found;
132  }
133 
134  void Clear() {
135  DoClear(true);
136  }
137 
138  uint64_t bytes_allocated() const { return bytes_allocated_; }
139  static double GetEntrySize() {
140  const double unit = sizeof(Key) + sizeof(Value);
141  return unit/kLoadFactor;
142  }
143 
144  void GetCollisionStats(uint64_t *num_collisions,
145  uint32_t *max_collisions) const
146  {
147  *num_collisions = num_collisions_;
148  *max_collisions = max_collisions_;
149  }
150 
151  // Careful with the direct access TODO: iterator
152  uint32_t capacity() const { return capacity_; }
153  Key empty_key() const { return empty_key_; }
154  Key *keys() const { return keys_; }
155  Value *values() const { return values_; }
156 
157  // Only needed by compat
158  void SetHasher(uint32_t (*hasher)(const Key &key)) {
159  hasher_ = hasher;
160  }
161 
162  protected:
163  uint32_t ScaleHash(const Key &key) const {
164  double bucket =
165  (static_cast<double>(hasher_(key)) * static_cast<double>(capacity_) /
166  static_cast<double>(static_cast<uint32_t>(-1)));
167  return static_cast<uint32_t>(bucket) % capacity_;
168  }
169 
170  void AllocMemory() {
171  keys_ = static_cast<Key *>(smmap(capacity_ * sizeof(Key)));
172  values_ = static_cast<Value *>(smmap(capacity_ * sizeof(Value)));
173  for (uint32_t i = 0; i < capacity_; ++i) {
174  /*keys_[i] =*/ new (keys_ + i) Key();
175  }
176  for (uint32_t i = 0; i < capacity_; ++i) {
177  /*values_[i] =*/ new (values_ + i) Value();
178  }
179  bytes_allocated_ = (sizeof(Key) + sizeof(Value)) * capacity_;
180  }
181 
182  void DeallocMemory(Key *k, Value *v, uint32_t c) {
183  for (uint32_t i = 0; i < c; ++i) {
184  k[i].~Key();
185  }
186  for (uint32_t i = 0; i < c; ++i) {
187  v[i].~Value();
188  }
189  smunmap(k);
190  smunmap(v);
191  k = NULL;
192  v = NULL;
193  }
194 
195  // Returns true iff the key is overwritten
196  bool DoInsert(const Key &key, const Value &value,
197  const bool count_collisions)
198  {
199  uint32_t bucket;
200  uint32_t collisions;
201  const bool overwritten = DoLookup(key, &bucket, &collisions);
202  if (count_collisions) {
203  num_collisions_ += collisions;
204  max_collisions_ = std::max(collisions, max_collisions_);
205  }
206  keys_[bucket] = key;
207  values_[bucket] = value;
208  return overwritten;
209  }
210 
211  bool DoLookup(const Key &key, uint32_t *bucket, uint32_t *collisions) const {
212  *bucket = ScaleHash(key);
213  *collisions = 0;
214  while (!(keys_[*bucket] == empty_key_)) {
215  if (keys_[*bucket] == key)
216  return true;
217  *bucket = (*bucket+1) % capacity_;
218  (*collisions)++;
219  }
220  return false;
221  }
222 
223  void DoClear(const bool reset_capacity) {
224  if (reset_capacity)
225  static_cast<Derived *>(this)->ResetCapacity(); // No-op if fixed-size
226  for (uint32_t i = 0; i < capacity_; ++i)
227  keys_[i] = empty_key_;
228  size_ = 0;
229  }
230 
231  // Methods for resizable version
232  void SetThresholds() { }
233  void Grow() { }
234  void Shrink() { }
235  void ResetCapacity() { }
236 
237  // Separate key and value arrays for better locality
238  Key *keys_;
239  Value *values_;
240  uint32_t capacity_;
242  uint32_t size_;
243  uint32_t (*hasher_)(const Key &key);
245  uint64_t num_collisions_;
246  uint32_t max_collisions_;
248 };
249 
250 
251 template<class Key, class Value>
253  public SmallHashBase< Key, Value, SmallHashFixed<Key, Value> >
254 {
255  friend class SmallHashBase< Key, Value, SmallHashFixed<Key, Value> >;
256  protected:
257  // No-ops
258  void SetThresholds() { }
259  void Grow() { }
260  void Shrink() { }
261  void ResetCapacity() { }
262 };
263 
264 
265 template<class Key, class Value>
267  public SmallHashBase< Key, Value, SmallHashDynamic<Key, Value> >
268 {
269  friend class SmallHashBase< Key, Value, SmallHashDynamic<Key, Value> >;
270  public:
272  static const double kThresholdGrow;
273  static const double kThresholdShrink;
274 
276  num_migrates_ = 0;
277 
278  // Properly set by Init
279  threshold_grow_ = 0;
280  threshold_shrink_ = 0;
281  }
282 
284  {
285  num_migrates_ = 0;
286  CopyFrom(other);
287  }
288 
290  const SmallHashDynamic<Key, Value> &other)
291  {
292  if (&other == this)
293  return *this;
294 
295  CopyFrom(other);
296  return *this;
297  }
298 
299  uint32_t capacity() const { return Base::capacity_; }
300  uint32_t size() const { return Base::size_; }
301  uint32_t num_migrates() const { return num_migrates_; }
302 
303  protected:
304  void SetThresholds() {
306  static_cast<uint32_t>(static_cast<double>(capacity()) * kThresholdGrow);
308  static_cast<uint32_t>(static_cast<double>(capacity()) * kThresholdShrink);
309  }
310 
311  void Grow() {
312  if (size() > threshold_grow_)
313  Migrate(capacity() * 2);
314  }
315 
316  void Shrink() {
317  if (size() < threshold_shrink_) {
318  uint32_t target_capacity = capacity() / 2;
319  if (target_capacity >= Base::initial_capacity_)
320  Migrate(target_capacity);
321  }
322  }
323 
324  void ResetCapacity() {
328  SetThresholds();
329  }
330 
331  private:
332  // Returns a random permutation of indices [0..N-1] that is allocated
333  // by smmap (Knuth's shuffle algorithm)
334  uint32_t *ShuffleIndices(const uint32_t N) {
335  uint32_t *shuffled =
336  static_cast<uint32_t *>(smmap(N * sizeof(uint32_t)));
337  // Init with identity
338  for (unsigned i = 0; i < N; ++i)
339  shuffled[i] = i;
340  // Shuffle (no shuffling for the last element)
341  for (unsigned i = 0; i < N-1; ++i) {
342  const uint32_t swap_idx = i + g_prng.Next(N - i);
343  uint32_t tmp = shuffled[i];
344  shuffled[i] = shuffled[swap_idx];
345  shuffled[swap_idx] = tmp;
346  }
347  return shuffled;
348  }
349 
350  void Migrate(const uint32_t new_capacity) {
351  Key *old_keys = Base::keys_;
352  Value *old_values = Base::values_;
353  uint32_t old_capacity = capacity();
354  uint32_t old_size = size();
355 
356  Base::capacity_ = new_capacity;
357  SetThresholds();
359  Base::DoClear(false);
360  if (new_capacity < old_capacity) {
361  uint32_t *shuffled_indices = ShuffleIndices(old_capacity);
362  for (uint32_t i = 0; i < old_capacity; ++i) {
363  if (old_keys[shuffled_indices[i]] != Base::empty_key_) {
364  Base::Insert(old_keys[shuffled_indices[i]],
365  old_values[shuffled_indices[i]]);
366  }
367  }
368  smunmap(shuffled_indices);
369  } else {
370  for (uint32_t i = 0; i < old_capacity; ++i) {
371  if (old_keys[i] != Base::empty_key_)
372  Base::Insert(old_keys[i], old_values[i]);
373  }
374  }
375  assert(size() == old_size);
376 
377  Base::DeallocMemory(old_keys, old_values, old_capacity);
378  num_migrates_++;
379  }
380 
382  uint32_t *shuffled_indices = ShuffleIndices(other.capacity_);
383  for (uint32_t i = 0; i < other.capacity_; ++i) {
384  if (other.keys_[shuffled_indices[i]] != other.empty_key_) {
385  this->Insert(other.keys_[shuffled_indices[i]],
386  other.values_[shuffled_indices[i]]);
387  }
388  }
389  smunmap(shuffled_indices);
390  }
391 
392  uint32_t num_migrates_;
393  uint32_t threshold_grow_;
395  static Prng g_prng;
396 };
397 
398 
403 template<class Key, class Value>
404 class MultiHash {
405  public:
407  num_hashmaps_ = 0;
408  hashmaps_ = NULL;
409  locks_ = NULL;
410  }
411 
412  void Init(const uint8_t num_hashmaps, const Key &empty_key,
413  uint32_t (*hasher)(const Key &key))
414  {
415  assert(num_hashmaps > 0);
416  const uint8_t N = num_hashmaps;
417  num_hashmaps_ = N;
419  locks_ =
420  static_cast<pthread_mutex_t *>(smalloc(N * sizeof(pthread_mutex_t)));
421  for (uint8_t i = 0; i < N; ++i) {
422  int retval = pthread_mutex_init(&locks_[i], NULL);
423  assert(retval == 0);
424  hashmaps_[i].Init(128, empty_key, hasher);
425  }
426  }
427 
429  for (uint8_t i = 0; i < num_hashmaps_; ++i) {
430  pthread_mutex_destroy(&locks_[i]);
431  }
432  free(locks_);
433  delete[] hashmaps_;
434  }
435 
436  bool Lookup(const Key &key, Value *value) {
437  uint8_t target = SelectHashmap(key);
438  Lock(target);
439  const bool result = hashmaps_[target].Lookup(key, value);
440  Unlock(target);
441  return result;
442  }
443 
444  void Insert(const Key &key, const Value &value) {
445  uint8_t target = SelectHashmap(key);
446  Lock(target);
447  hashmaps_[target].Insert(key, value);
448  Unlock(target);
449  }
450 
451  void Erase(const Key &key) {
452  uint8_t target = SelectHashmap(key);
453  Lock(target);
454  hashmaps_[target].Erase(key);
455  Unlock(target);
456  }
457 
458  void Clear() {
459  for (uint8_t i = 0; i < num_hashmaps_; ++i)
460  Lock(i);
461  for (uint8_t i = 0; i < num_hashmaps_; ++i)
462  hashmaps_[i].Clear();
463  for (uint8_t i = 0; i < num_hashmaps_; ++i)
464  Unlock(i);
465  }
466 
467  uint8_t num_hashmaps() const { return num_hashmaps_; }
468 
469  void GetSizes(uint32_t *sizes) {
470  for (uint8_t i = 0; i < num_hashmaps_; ++i) {
471  Lock(i);
472  sizes[i] = hashmaps_[i].size();
473  Unlock(i);
474  }
475  }
476 
477  void GetCollisionStats(uint64_t *num_collisions, uint32_t *max_collisions) {
478  for (uint8_t i = 0; i < num_hashmaps_; ++i) {
479  Lock(i);
480  hashmaps_[i].GetCollisionStats(&num_collisions[i], &max_collisions[i]);
481  Unlock(i);
482  }
483  }
484 
485  private:
486  inline uint8_t SelectHashmap(const Key &key) {
487  uint32_t hash = MurmurHash2(&key, sizeof(key), 0x37);
488  double bucket =
489  static_cast<double>(hash) * static_cast<double>(num_hashmaps_) /
490  static_cast<double>(static_cast<uint32_t>(-1));
491  return static_cast<uint32_t>(bucket) % num_hashmaps_;
492  }
493 
494  inline void Lock(const uint8_t target) {
495  int retval = pthread_mutex_lock(&locks_[target]);
496  assert(retval == 0);
497  }
498 
499  inline void Unlock(const uint8_t target) {
500  int retval = pthread_mutex_unlock(&locks_[target]);
501  assert(retval == 0);
502  }
503 
504  uint8_t num_hashmaps_;
506  pthread_mutex_t *locks_;
507 };
508 
509 
510 // initialize the static fields
511 template<class Key, class Value>
513 
514 template<class Key, class Value, class Derived>
516 
517 template<class Key, class Value>
519 
520 template<class Key, class Value>
522 
523 #endif // CVMFS_SMALLHASH_H_
Definition: prng.h:28
uint32_t capacity() const
Definition: smallhash.h:152
Value * values_
Definition: smallhash.h:239
SmallHashDynamic< Key, Value > * hashmaps_
Definition: smallhash.h:505
void SetThresholds()
Definition: smallhash.h:258
void GetCollisionStats(uint64_t *num_collisions, uint32_t *max_collisions) const
Definition: smallhash.h:144
void ResetCapacity()
Definition: smallhash.h:324
uint32_t(* hasher_)(const Key &key)
Definition: smallhash.h:243
void Unlock(const uint8_t target)
Definition: smallhash.h:499
~MultiHash()
Definition: smallhash.h:428
void Init(const uint8_t num_hashmaps, const Key &empty_key, uint32_t(*hasher)(const Key &key))
Definition: smallhash.h:412
void ResetCapacity()
Definition: smallhash.h:235
void SetThresholds()
Definition: smallhash.h:304
void Insert(const Key &key, const Value &value)
Definition: smallhash.h:444
void GetSizes(uint32_t *sizes)
Definition: smallhash.h:469
void Shrink()
Definition: smallhash.h:260
uint64_t bytes_allocated() const
Definition: smallhash.h:138
uint64_t num_collisions_
Definition: smallhash.h:245
void SetThresholds()
Definition: smallhash.h:232
uint32_t ScaleHash(const Key &key) const
Definition: smallhash.h:163
assert((mem||(size==0))&&"Out Of Memory")
uint32_t * ShuffleIndices(const uint32_t N)
Definition: smallhash.h:334
void CopyFrom(const SmallHashDynamic< Key, Value > &other)
Definition: smallhash.h:381
static const double kThresholdGrow
Definition: smallhash.h:272
bool Lookup(const Key &key, Value *value)
Definition: smallhash.h:436
void ResetCapacity()
Definition: smallhash.h:261
uint32_t num_migrates_
Definition: smallhash.h:392
void Clear()
Definition: smallhash.h:458
uint8_t SelectHashmap(const Key &key)
Definition: smallhash.h:486
void Lock(const uint8_t target)
Definition: smallhash.h:494
SmallHashDynamic(const SmallHashDynamic< Key, Value > &other)
Definition: smallhash.h:283
static Prng g_prng
Definition: smallhash.h:395
uint32_t threshold_shrink_
Definition: smallhash.h:394
bool LookupEx(Key *key, Value *value) const
Definition: smallhash.h:89
static double GetEntrySize()
Definition: smallhash.h:139
uint32_t size_
Definition: smallhash.h:242
uint32_t capacity() const
Definition: smallhash.h:299
pthread_mutex_t * locks_
Definition: smallhash.h:506
void Insert(const Key &key, const Value &value)
Definition: smallhash.h:109
Key * keys() const
Definition: smallhash.h:154
static const double kThresholdGrow
Definition: smallhash.h:39
uint8_t num_hashmaps_
Definition: smallhash.h:504
uint64_t bytes_allocated_
Definition: smallhash.h:244
void Erase(const Key &key)
Definition: smallhash.h:451
uint32_t threshold_grow_
Definition: smallhash.h:393
static const double kLoadFactor
Definition: smallhash.h:38
bool Contains(const Key &key) const
Definition: smallhash.h:102
bool DoInsert(const Key &key, const Value &value, const bool count_collisions)
Definition: smallhash.h:196
void DoClear(const bool reset_capacity)
Definition: smallhash.h:223
void GetCollisionStats(uint64_t *num_collisions, uint32_t *max_collisions)
Definition: smallhash.h:477
uint32_t max_collisions_
Definition: smallhash.h:246
void Clear()
Definition: smallhash.h:134
void Shrink()
Definition: smallhash.h:234
void SetHasher(uint32_t(*hasher)(const Key &key))
Definition: smallhash.h:158
uint32_t size() const
Definition: smallhash.h:300
uint32_t num_migrates() const
Definition: smallhash.h:301
Value * values() const
Definition: smallhash.h:155
bool Erase(const Key &key)
Definition: smallhash.h:115
uint8_t num_hashmaps() const
Definition: smallhash.h:467
FRIEND_TEST(T_Smallhash, InsertAndCopyMd5Slow)
SmallHashBase< Key, Value, SmallHashDynamic< Key, Value > > Base
Definition: smallhash.h:271
uint32_t initial_capacity_
Definition: smallhash.h:241
static const double kThresholdShrink
Definition: smallhash.h:40
bool Lookup(const Key &key, Value *value) const
Definition: smallhash.h:73
void Grow()
Definition: smallhash.h:233
void DeallocMemory(Key *k, Value *v, uint32_t c)
Definition: smallhash.h:182
void Init(uint32_t expected_size, Key empty, uint32_t(*hasher)(const Key &key))
Definition: smallhash.h:60
bool DoLookup(const Key &key, uint32_t *bucket, uint32_t *collisions) const
Definition: smallhash.h:211
Key empty_key() const
Definition: smallhash.h:153
uint32_t capacity_
Definition: smallhash.h:240
void Migrate(const uint32_t new_capacity)
Definition: smallhash.h:350
uint32_t MurmurHash2(const void *key, int len, uint32_t seed)
Definition: murmur.hxx:23
static const double kThresholdShrink
Definition: smallhash.h:273
SmallHashDynamic< Key, Value > & operator=(const SmallHashDynamic< Key, Value > &other)
Definition: smallhash.h:289
uint32_t Next(const uint64_t boundary)
Definition: prng.h:49
void AllocMemory()
Definition: smallhash.h:170