CernVM-FS  2.10.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
smallhash.h
Go to the documentation of this file.
1 
5 #ifndef CVMFS_SMALLHASH_H_
6 #define CVMFS_SMALLHASH_H_
7 
8 #ifndef __STDC_FORMAT_MACROS
9 // NOLINTNEXTLINE
10 #define __STDC_FORMAT_MACROS
11 #endif
12 
13 #include <gtest/gtest_prod.h>
14 #include <inttypes.h>
15 #include <pthread.h>
16 #include <stdint.h>
17 
18 #include <algorithm>
19 #include <cassert>
20 #include <cstdlib>
21 #include <new>
22 
23 #include "atomic.h"
24 #include "murmur.hxx"
25 #include "prng.h"
26 #include "smalloc.h"
27 
33 template<class Key, class Value, class Derived>
35  FRIEND_TEST(T_Smallhash, InsertAndCopyMd5Slow);
36 
37  public:
38  static const double kLoadFactor; // mainly useless for the dynamic version
39  static const double kThresholdGrow; // only used for resizable version
40  static const double kThresholdShrink; // only used for resizable version
41 
43  keys_ = NULL;
44  values_ = NULL;
45  hasher_ = NULL;
46  bytes_allocated_ = 0;
47  num_collisions_ = 0;
48  max_collisions_ = 0;
49 
50  // Properly initialized by Init()
51  capacity_ = 0;
53  size_ = 0;
54  }
55 
58  }
59 
60  void Init(uint32_t expected_size, Key empty,
61  uint32_t (*hasher)(const Key &key))
62  {
63  hasher_ = hasher;
64  empty_key_ = empty;
65  capacity_ =
66  static_cast<uint32_t>(static_cast<double>(expected_size)/kLoadFactor);
68  static_cast<Derived *>(this)->SetThresholds(); // No-op for fixed size
69  AllocMemory();
70  this->DoClear(false);
71  }
72 
73  bool Lookup(const Key &key, Value *value) const {
74  uint32_t bucket;
75  uint32_t collisions;
76  const bool found = DoLookup(key, &bucket, &collisions);
77  if (found)
78  *value = values_[bucket];
79  return found;
80  }
81 
82  bool Contains(const Key &key) const {
83  uint32_t bucket;
84  uint32_t collisions;
85  const bool found = DoLookup(key, &bucket, &collisions);
86  return found;
87  }
88 
89  void Insert(const Key &key, const Value &value) {
90  static_cast<Derived *>(this)->Grow(); // No-op if fixed-size
91  const bool overwritten = DoInsert(key, value, true);
92  size_ += !overwritten; // size + 1 if the key was not yet in the map
93  }
94 
95  void Erase(const Key &key) {
96  uint32_t bucket;
97  uint32_t collisions;
98  const bool found = DoLookup(key, &bucket, &collisions);
99  if (found) {
100  keys_[bucket] = empty_key_;
101  size_--;
102  bucket = (bucket+1) % capacity_;
103  while (!(keys_[bucket] == empty_key_)) {
104  Key rehash = keys_[bucket];
105  keys_[bucket] = empty_key_;
106  DoInsert(rehash, values_[bucket], false);
107  bucket = (bucket+1) % capacity_;
108  }
109  static_cast<Derived *>(this)->Shrink(); // No-op if fixed-size
110  }
111  }
112 
113  void Clear() {
114  DoClear(true);
115  }
116 
117  uint64_t bytes_allocated() const { return bytes_allocated_; }
118  static double GetEntrySize() {
119  const double unit = sizeof(Key) + sizeof(Value);
120  return unit/kLoadFactor;
121  }
122 
123  void GetCollisionStats(uint64_t *num_collisions,
124  uint32_t *max_collisions) const
125  {
126  *num_collisions = num_collisions_;
127  *max_collisions = max_collisions_;
128  }
129 
130  // Careful with the direct access TODO: iterator
131  uint32_t capacity() const { return capacity_; }
132  Key empty_key() const { return empty_key_; }
133  Key *keys() const { return keys_; }
134  Value *values() const { return values_; }
135 
136  // Only needed by compat
137  void SetHasher(uint32_t (*hasher)(const Key &key)) {
138  hasher_ = hasher;
139  }
140 
141  protected:
142  uint32_t ScaleHash(const Key &key) const {
143  double bucket =
144  (static_cast<double>(hasher_(key)) * static_cast<double>(capacity_) /
145  static_cast<double>(static_cast<uint32_t>(-1)));
146  return static_cast<uint32_t>(bucket) % capacity_;
147  }
148 
149  void AllocMemory() {
150  keys_ = static_cast<Key *>(smmap(capacity_ * sizeof(Key)));
151  values_ = static_cast<Value *>(smmap(capacity_ * sizeof(Value)));
152  for (uint32_t i = 0; i < capacity_; ++i) {
153  /*keys_[i] =*/ new (keys_ + i) Key();
154  }
155  for (uint32_t i = 0; i < capacity_; ++i) {
156  /*values_[i] =*/ new (values_ + i) Value();
157  }
158  bytes_allocated_ = (sizeof(Key) + sizeof(Value)) * capacity_;
159  }
160 
161  void DeallocMemory(Key *k, Value *v, uint32_t c) {
162  for (uint32_t i = 0; i < c; ++i) {
163  k[i].~Key();
164  }
165  for (uint32_t i = 0; i < c; ++i) {
166  v[i].~Value();
167  }
168  smunmap(k);
169  smunmap(v);
170  k = NULL;
171  v = NULL;
172  }
173 
174  // Returns true iff the key is overwritten
175  bool DoInsert(const Key &key, const Value &value,
176  const bool count_collisions)
177  {
178  uint32_t bucket;
179  uint32_t collisions;
180  const bool overwritten = DoLookup(key, &bucket, &collisions);
181  if (count_collisions) {
182  num_collisions_ += collisions;
183  max_collisions_ = std::max(collisions, max_collisions_);
184  }
185  keys_[bucket] = key;
186  values_[bucket] = value;
187  return overwritten;
188  }
189 
190  bool DoLookup(const Key &key, uint32_t *bucket, uint32_t *collisions) const {
191  *bucket = ScaleHash(key);
192  *collisions = 0;
193  while (!(keys_[*bucket] == empty_key_)) {
194  if (keys_[*bucket] == key)
195  return true;
196  *bucket = (*bucket+1) % capacity_;
197  (*collisions)++;
198  }
199  return false;
200  }
201 
202  void DoClear(const bool reset_capacity) {
203  if (reset_capacity)
204  static_cast<Derived *>(this)->ResetCapacity(); // No-op if fixed-size
205  for (uint32_t i = 0; i < capacity_; ++i)
206  keys_[i] = empty_key_;
207  size_ = 0;
208  }
209 
210  // Methods for resizable version
211  void SetThresholds() { }
212  void Grow() { }
213  void Shrink() { }
214  void ResetCapacity() { }
215 
216  // Separate key and value arrays for better locality
217  Key *keys_;
218  Value *values_;
219  uint32_t capacity_;
221  uint32_t size_;
222  uint32_t (*hasher_)(const Key &key);
224  uint64_t num_collisions_;
225  uint32_t max_collisions_;
227 };
228 
229 
230 template<class Key, class Value>
232  public SmallHashBase< Key, Value, SmallHashFixed<Key, Value> >
233 {
234  friend class SmallHashBase< Key, Value, SmallHashFixed<Key, Value> >;
235  protected:
236  // No-ops
237  void SetThresholds() { }
238  void Grow() { }
239  void Shrink() { }
240  void ResetCapacity() { }
241 };
242 
243 
244 template<class Key, class Value>
246  public SmallHashBase< Key, Value, SmallHashDynamic<Key, Value> >
247 {
248  friend class SmallHashBase< Key, Value, SmallHashDynamic<Key, Value> >;
249  public:
251  static const double kThresholdGrow;
252  static const double kThresholdShrink;
253 
255  num_migrates_ = 0;
256 
257  // Properly set by Init
258  threshold_grow_ = 0;
259  threshold_shrink_ = 0;
260  }
261 
263  {
264  num_migrates_ = 0;
265  CopyFrom(other);
266  }
267 
269  const SmallHashDynamic<Key, Value> &other)
270  {
271  if (&other == this)
272  return *this;
273 
274  CopyFrom(other);
275  return *this;
276  }
277 
278  uint32_t capacity() const { return Base::capacity_; }
279  uint32_t size() const { return Base::size_; }
280  uint32_t num_migrates() const { return num_migrates_; }
281 
282  protected:
283  void SetThresholds() {
285  static_cast<uint32_t>(static_cast<double>(capacity()) * kThresholdGrow);
287  static_cast<uint32_t>(static_cast<double>(capacity()) * kThresholdShrink);
288  }
289 
290  void Grow() {
291  if (size() > threshold_grow_)
292  Migrate(capacity() * 2);
293  }
294 
295  void Shrink() {
296  if (size() < threshold_shrink_) {
297  uint32_t target_capacity = capacity() / 2;
298  if (target_capacity >= Base::initial_capacity_)
299  Migrate(target_capacity);
300  }
301  }
302 
303  void ResetCapacity() {
307  SetThresholds();
308  }
309 
310  private:
311  // Returns a random permutation of indices [0..N-1] that is allocated
312  // by smmap (Knuth's shuffle algorithm)
313  uint32_t *ShuffleIndices(const uint32_t N) {
314  uint32_t *shuffled =
315  static_cast<uint32_t *>(smmap(N * sizeof(uint32_t)));
316  // Init with identity
317  for (unsigned i = 0; i < N; ++i)
318  shuffled[i] = i;
319  // Shuffle (no shuffling for the last element)
320  for (unsigned i = 0; i < N-1; ++i) {
321  const uint32_t swap_idx = i + g_prng.Next(N - i);
322  uint32_t tmp = shuffled[i];
323  shuffled[i] = shuffled[swap_idx];
324  shuffled[swap_idx] = tmp;
325  }
326  return shuffled;
327  }
328 
329  void Migrate(const uint32_t new_capacity) {
330  Key *old_keys = Base::keys_;
331  Value *old_values = Base::values_;
332  uint32_t old_capacity = capacity();
333  uint32_t old_size = size();
334 
335  Base::capacity_ = new_capacity;
336  SetThresholds();
338  Base::DoClear(false);
339  if (new_capacity < old_capacity) {
340  uint32_t *shuffled_indices = ShuffleIndices(old_capacity);
341  for (uint32_t i = 0; i < old_capacity; ++i) {
342  if (old_keys[shuffled_indices[i]] != Base::empty_key_) {
343  Base::Insert(old_keys[shuffled_indices[i]],
344  old_values[shuffled_indices[i]]);
345  }
346  }
347  smunmap(shuffled_indices);
348  } else {
349  for (uint32_t i = 0; i < old_capacity; ++i) {
350  if (old_keys[i] != Base::empty_key_)
351  Base::Insert(old_keys[i], old_values[i]);
352  }
353  }
354  assert(size() == old_size);
355 
356  Base::DeallocMemory(old_keys, old_values, old_capacity);
357  num_migrates_++;
358  }
359 
361  uint32_t *shuffled_indices = ShuffleIndices(other.capacity_);
362  for (uint32_t i = 0; i < other.capacity_; ++i) {
363  if (other.keys_[shuffled_indices[i]] != other.empty_key_) {
364  this->Insert(other.keys_[shuffled_indices[i]],
365  other.values_[shuffled_indices[i]]);
366  }
367  }
368  smunmap(shuffled_indices);
369  }
370 
371  uint32_t num_migrates_;
372  uint32_t threshold_grow_;
374  static Prng g_prng;
375 };
376 
377 
382 template<class Key, class Value>
383 class MultiHash {
384  public:
386  num_hashmaps_ = 0;
387  hashmaps_ = NULL;
388  locks_ = NULL;
389  }
390 
391  void Init(const uint8_t num_hashmaps, const Key &empty_key,
392  uint32_t (*hasher)(const Key &key))
393  {
394  assert(num_hashmaps > 0);
395  const uint8_t N = num_hashmaps;
396  num_hashmaps_ = N;
398  locks_ =
399  static_cast<pthread_mutex_t *>(smalloc(N * sizeof(pthread_mutex_t)));
400  for (uint8_t i = 0; i < N; ++i) {
401  int retval = pthread_mutex_init(&locks_[i], NULL);
402  assert(retval == 0);
403  hashmaps_[i].Init(128, empty_key, hasher);
404  }
405  }
406 
408  for (uint8_t i = 0; i < num_hashmaps_; ++i) {
409  pthread_mutex_destroy(&locks_[i]);
410  }
411  free(locks_);
412  delete[] hashmaps_;
413  }
414 
415  bool Lookup(const Key &key, Value *value) {
416  uint8_t target = SelectHashmap(key);
417  Lock(target);
418  const bool result = hashmaps_[target].Lookup(key, value);
419  Unlock(target);
420  return result;
421  }
422 
423  void Insert(const Key &key, const Value &value) {
424  uint8_t target = SelectHashmap(key);
425  Lock(target);
426  hashmaps_[target].Insert(key, value);
427  Unlock(target);
428  }
429 
430  void Erase(const Key &key) {
431  uint8_t target = SelectHashmap(key);
432  Lock(target);
433  hashmaps_[target].Erase(key);
434  Unlock(target);
435  }
436 
437  void Clear() {
438  for (uint8_t i = 0; i < num_hashmaps_; ++i)
439  Lock(i);
440  for (uint8_t i = 0; i < num_hashmaps_; ++i)
441  hashmaps_[i].Clear();
442  for (uint8_t i = 0; i < num_hashmaps_; ++i)
443  Unlock(i);
444  }
445 
446  uint8_t num_hashmaps() const { return num_hashmaps_; }
447 
448  void GetSizes(uint32_t *sizes) {
449  for (uint8_t i = 0; i < num_hashmaps_; ++i) {
450  Lock(i);
451  sizes[i] = hashmaps_[i].size();
452  Unlock(i);
453  }
454  }
455 
456  void GetCollisionStats(uint64_t *num_collisions, uint32_t *max_collisions) {
457  for (uint8_t i = 0; i < num_hashmaps_; ++i) {
458  Lock(i);
459  hashmaps_[i].GetCollisionStats(&num_collisions[i], &max_collisions[i]);
460  Unlock(i);
461  }
462  }
463 
464  private:
465  inline uint8_t SelectHashmap(const Key &key) {
466  uint32_t hash = MurmurHash2(&key, sizeof(key), 0x37);
467  double bucket =
468  static_cast<double>(hash) * static_cast<double>(num_hashmaps_) /
469  static_cast<double>(static_cast<uint32_t>(-1));
470  return static_cast<uint32_t>(bucket) % num_hashmaps_;
471  }
472 
473  inline void Lock(const uint8_t target) {
474  int retval = pthread_mutex_lock(&locks_[target]);
475  assert(retval == 0);
476  }
477 
478  inline void Unlock(const uint8_t target) {
479  int retval = pthread_mutex_unlock(&locks_[target]);
480  assert(retval == 0);
481  }
482 
483  uint8_t num_hashmaps_;
485  pthread_mutex_t *locks_;
486 };
487 
488 
489 // initialize the static fields
490 template<class Key, class Value>
492 
493 template<class Key, class Value, class Derived>
495 
496 template<class Key, class Value>
498 
499 template<class Key, class Value>
501 
502 #endif // CVMFS_SMALLHASH_H_
Definition: prng.h:25
uint32_t capacity() const
Definition: smallhash.h:131
Value * values_
Definition: smallhash.h:218
SmallHashDynamic< Key, Value > * hashmaps_
Definition: smallhash.h:484
void SetThresholds()
Definition: smallhash.h:237
void GetCollisionStats(uint64_t *num_collisions, uint32_t *max_collisions) const
Definition: smallhash.h:123
void ResetCapacity()
Definition: smallhash.h:303
uint32_t(* hasher_)(const Key &key)
Definition: smallhash.h:222
void Unlock(const uint8_t target)
Definition: smallhash.h:478
~MultiHash()
Definition: smallhash.h:407
void Init(const uint8_t num_hashmaps, const Key &empty_key, uint32_t(*hasher)(const Key &key))
Definition: smallhash.h:391
void ResetCapacity()
Definition: smallhash.h:214
void SetThresholds()
Definition: smallhash.h:283
void Insert(const Key &key, const Value &value)
Definition: smallhash.h:423
void GetSizes(uint32_t *sizes)
Definition: smallhash.h:448
void Shrink()
Definition: smallhash.h:239
uint64_t bytes_allocated() const
Definition: smallhash.h:117
uint64_t num_collisions_
Definition: smallhash.h:224
void SetThresholds()
Definition: smallhash.h:211
uint32_t ScaleHash(const Key &key) const
Definition: smallhash.h:142
assert((mem||(size==0))&&"Out Of Memory")
uint32_t * ShuffleIndices(const uint32_t N)
Definition: smallhash.h:313
void CopyFrom(const SmallHashDynamic< Key, Value > &other)
Definition: smallhash.h:360
static const double kThresholdGrow
Definition: smallhash.h:251
bool Lookup(const Key &key, Value *value)
Definition: smallhash.h:415
void ResetCapacity()
Definition: smallhash.h:240
uint32_t num_migrates_
Definition: smallhash.h:371
void Clear()
Definition: smallhash.h:437
uint8_t SelectHashmap(const Key &key)
Definition: smallhash.h:465
void Lock(const uint8_t target)
Definition: smallhash.h:473
SmallHashDynamic(const SmallHashDynamic< Key, Value > &other)
Definition: smallhash.h:262
static Prng g_prng
Definition: smallhash.h:374
uint32_t threshold_shrink_
Definition: smallhash.h:373
static double GetEntrySize()
Definition: smallhash.h:118
uint32_t size_
Definition: smallhash.h:221
uint32_t capacity() const
Definition: smallhash.h:278
pthread_mutex_t * locks_
Definition: smallhash.h:485
void Insert(const Key &key, const Value &value)
Definition: smallhash.h:89
Key * keys() const
Definition: smallhash.h:133
static const double kThresholdGrow
Definition: smallhash.h:39
uint8_t num_hashmaps_
Definition: smallhash.h:483
uint64_t bytes_allocated_
Definition: smallhash.h:223
void Erase(const Key &key)
Definition: smallhash.h:430
uint32_t threshold_grow_
Definition: smallhash.h:372
static const double kLoadFactor
Definition: smallhash.h:38
bool Contains(const Key &key) const
Definition: smallhash.h:82
bool DoInsert(const Key &key, const Value &value, const bool count_collisions)
Definition: smallhash.h:175
void DoClear(const bool reset_capacity)
Definition: smallhash.h:202
void GetCollisionStats(uint64_t *num_collisions, uint32_t *max_collisions)
Definition: smallhash.h:456
uint32_t max_collisions_
Definition: smallhash.h:225
void Clear()
Definition: smallhash.h:113
void Shrink()
Definition: smallhash.h:213
void SetHasher(uint32_t(*hasher)(const Key &key))
Definition: smallhash.h:137
uint32_t size() const
Definition: smallhash.h:279
uint32_t num_migrates() const
Definition: smallhash.h:280
Value * values() const
Definition: smallhash.h:134
uint8_t num_hashmaps() const
Definition: smallhash.h:446
FRIEND_TEST(T_Smallhash, InsertAndCopyMd5Slow)
SmallHashBase< Key, Value, SmallHashDynamic< Key, Value > > Base
Definition: smallhash.h:250
uint32_t initial_capacity_
Definition: smallhash.h:220
static const double kThresholdShrink
Definition: smallhash.h:40
bool Lookup(const Key &key, Value *value) const
Definition: smallhash.h:73
void Grow()
Definition: smallhash.h:212
void DeallocMemory(Key *k, Value *v, uint32_t c)
Definition: smallhash.h:161
void Init(uint32_t expected_size, Key empty, uint32_t(*hasher)(const Key &key))
Definition: smallhash.h:60
bool DoLookup(const Key &key, uint32_t *bucket, uint32_t *collisions) const
Definition: smallhash.h:190
Key empty_key() const
Definition: smallhash.h:132
void Erase(const Key &key)
Definition: smallhash.h:95
uint32_t capacity_
Definition: smallhash.h:219
void Migrate(const uint32_t new_capacity)
Definition: smallhash.h:329
uint32_t MurmurHash2(const void *key, int len, uint32_t seed)
Definition: murmur.hxx:23
static const double kThresholdShrink
Definition: smallhash.h:252
SmallHashDynamic< Key, Value > & operator=(const SmallHashDynamic< Key, Value > &other)
Definition: smallhash.h:268
uint32_t Next(const uint64_t boundary)
Definition: prng.h:46
void AllocMemory()
Definition: smallhash.h:149