CernVM-FS  2.9.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
smallhash.h
Go to the documentation of this file.
1 
5 #ifndef CVMFS_SMALLHASH_H_
6 #define CVMFS_SMALLHASH_H_
7 
8 #ifndef __STDC_FORMAT_MACROS
9 #define __STDC_FORMAT_MACROS
10 #endif
11 
12 #include <gtest/gtest_prod.h>
13 #include <inttypes.h>
14 #include <pthread.h>
15 #include <stdint.h>
16 
17 #include <algorithm>
18 #include <cassert>
19 #include <cstdlib>
20 #include <new>
21 
22 #include "atomic.h"
23 #include "murmur.h"
24 #include "prng.h"
25 #include "smalloc.h"
26 
32 template<class Key, class Value, class Derived>
34  FRIEND_TEST(T_Smallhash, InsertAndCopyMd5Slow);
35 
36  public:
37  static const double kLoadFactor; // mainly useless for the dynamic version
38  static const double kThresholdGrow; // only used for resizable version
39  static const double kThresholdShrink; // only used for resizable version
40 
42  keys_ = NULL;
43  values_ = NULL;
44  hasher_ = NULL;
45  bytes_allocated_ = 0;
46  num_collisions_ = 0;
47  max_collisions_ = 0;
48 
49  // Properly initialized by Init()
50  capacity_ = 0;
52  size_ = 0;
53  }
54 
57  }
58 
59  void Init(uint32_t expected_size, Key empty,
60  uint32_t (*hasher)(const Key &key))
61  {
62  hasher_ = hasher;
63  empty_key_ = empty;
64  capacity_ =
65  static_cast<uint32_t>(static_cast<double>(expected_size)/kLoadFactor);
67  static_cast<Derived *>(this)->SetThresholds(); // No-op for fixed size
68  AllocMemory();
69  this->DoClear(false);
70  }
71 
72  bool Lookup(const Key &key, Value *value) const {
73  uint32_t bucket;
74  uint32_t collisions;
75  const bool found = DoLookup(key, &bucket, &collisions);
76  if (found)
77  *value = values_[bucket];
78  return found;
79  }
80 
81  bool Contains(const Key &key) const {
82  uint32_t bucket;
83  uint32_t collisions;
84  const bool found = DoLookup(key, &bucket, &collisions);
85  return found;
86  }
87 
88  void Insert(const Key &key, const Value &value) {
89  static_cast<Derived *>(this)->Grow(); // No-op if fixed-size
90  const bool overwritten = DoInsert(key, value, true);
91  size_ += !overwritten; // size + 1 if the key was not yet in the map
92  }
93 
94  void Erase(const Key &key) {
95  uint32_t bucket;
96  uint32_t collisions;
97  const bool found = DoLookup(key, &bucket, &collisions);
98  if (found) {
99  keys_[bucket] = empty_key_;
100  size_--;
101  bucket = (bucket+1) % capacity_;
102  while (!(keys_[bucket] == empty_key_)) {
103  Key rehash = keys_[bucket];
104  keys_[bucket] = empty_key_;
105  DoInsert(rehash, values_[bucket], false);
106  bucket = (bucket+1) % capacity_;
107  }
108  static_cast<Derived *>(this)->Shrink(); // No-op if fixed-size
109  }
110  }
111 
112  void Clear() {
113  DoClear(true);
114  }
115 
116  uint64_t bytes_allocated() const { return bytes_allocated_; }
117  static double GetEntrySize() {
118  const double unit = sizeof(Key) + sizeof(Value);
119  return unit/kLoadFactor;
120  }
121 
122  void GetCollisionStats(uint64_t *num_collisions,
123  uint32_t *max_collisions) const
124  {
125  *num_collisions = num_collisions_;
126  *max_collisions = max_collisions_;
127  }
128 
129  // Careful with the direct access TODO: iterator
130  uint32_t capacity() const { return capacity_; }
131  Key empty_key() const { return empty_key_; }
132  Key *keys() const { return keys_; }
133  Value *values() const { return values_; }
134 
135  // Only needed by compat
136  void SetHasher(uint32_t (*hasher)(const Key &key)) {
137  hasher_ = hasher;
138  }
139 
140  protected:
141  uint32_t ScaleHash(const Key &key) const {
142  double bucket =
143  (static_cast<double>(hasher_(key)) * static_cast<double>(capacity_) /
144  static_cast<double>((uint32_t)(-1)));
145  return (uint32_t)bucket % capacity_;
146  }
147 
148  void AllocMemory() {
149  keys_ = static_cast<Key *>(smmap(capacity_ * sizeof(Key)));
150  values_ = static_cast<Value *>(smmap(capacity_ * sizeof(Value)));
151  for (uint32_t i = 0; i < capacity_; ++i) {
152  /*keys_[i] =*/ new (keys_ + i) Key();
153  }
154  for (uint32_t i = 0; i < capacity_; ++i) {
155  /*values_[i] =*/ new (values_ + i) Value();
156  }
157  bytes_allocated_ = (sizeof(Key) + sizeof(Value)) * capacity_;
158  }
159 
160  void DeallocMemory(Key *k, Value *v, uint32_t c) {
161  for (uint32_t i = 0; i < c; ++i) {
162  k[i].~Key();
163  }
164  for (uint32_t i = 0; i < c; ++i) {
165  v[i].~Value();
166  }
167  smunmap(k);
168  smunmap(v);
169  k = NULL;
170  v = NULL;
171  }
172 
173  // Returns true iff the key is overwritten
174  bool DoInsert(const Key &key, const Value &value,
175  const bool count_collisions)
176  {
177  uint32_t bucket;
178  uint32_t collisions;
179  const bool overwritten = DoLookup(key, &bucket, &collisions);
180  if (count_collisions) {
181  num_collisions_ += collisions;
182  max_collisions_ = std::max(collisions, max_collisions_);
183  }
184  keys_[bucket] = key;
185  values_[bucket] = value;
186  return overwritten;
187  }
188 
189  bool DoLookup(const Key &key, uint32_t *bucket, uint32_t *collisions) const {
190  *bucket = ScaleHash(key);
191  *collisions = 0;
192  while (!(keys_[*bucket] == empty_key_)) {
193  if (keys_[*bucket] == key)
194  return true;
195  *bucket = (*bucket+1) % capacity_;
196  (*collisions)++;
197  }
198  return false;
199  }
200 
201  void DoClear(const bool reset_capacity) {
202  if (reset_capacity)
203  static_cast<Derived *>(this)->ResetCapacity(); // No-op if fixed-size
204  for (uint32_t i = 0; i < capacity_; ++i)
205  keys_[i] = empty_key_;
206  size_ = 0;
207  }
208 
209  // Methods for resizable version
210  void SetThresholds() { }
211  void Grow() { }
212  void Shrink() { }
213  void ResetCapacity() { }
214 
215  // Separate key and value arrays for better locality
216  Key *keys_;
217  Value *values_;
218  uint32_t capacity_;
220  uint32_t size_;
221  uint32_t (*hasher_)(const Key &key);
223  uint64_t num_collisions_;
224  uint32_t max_collisions_;
226 };
227 
228 
229 template<class Key, class Value>
231  public SmallHashBase< Key, Value, SmallHashFixed<Key, Value> >
232 {
233  friend class SmallHashBase< Key, Value, SmallHashFixed<Key, Value> >;
234  protected:
235  // No-ops
236  void SetThresholds() { }
237  void Grow() { }
238  void Shrink() { }
239  void ResetCapacity() { }
240 };
241 
242 
243 template<class Key, class Value>
245  public SmallHashBase< Key, Value, SmallHashDynamic<Key, Value> >
246 {
247  friend class SmallHashBase< Key, Value, SmallHashDynamic<Key, Value> >;
248  public:
250  static const double kThresholdGrow;
251  static const double kThresholdShrink;
252 
254  num_migrates_ = 0;
255 
256  // Properly set by Init
257  threshold_grow_ = 0;
258  threshold_shrink_ = 0;
259  }
260 
262  {
263  num_migrates_ = 0;
264  CopyFrom(other);
265  }
266 
268  const SmallHashDynamic<Key, Value> &other)
269  {
270  if (&other == this)
271  return *this;
272 
273  CopyFrom(other);
274  return *this;
275  }
276 
277  uint32_t capacity() const { return Base::capacity_; }
278  uint32_t size() const { return Base::size_; }
279  uint32_t num_migrates() const { return num_migrates_; }
280 
281  protected:
282  void SetThresholds() {
284  static_cast<uint32_t>(static_cast<double>(capacity()) * kThresholdGrow);
286  static_cast<uint32_t>(static_cast<double>(capacity()) * kThresholdShrink);
287  }
288 
289  void Grow() {
290  if (size() > threshold_grow_)
291  Migrate(capacity() * 2);
292  }
293 
294  void Shrink() {
295  if (size() < threshold_shrink_) {
296  uint32_t target_capacity = capacity() / 2;
297  if (target_capacity >= Base::initial_capacity_)
298  Migrate(target_capacity);
299  }
300  }
301 
302  void ResetCapacity() {
306  SetThresholds();
307  }
308 
309  private:
310  // Returns a random permutation of indices [0..N-1] that is allocated
311  // by smmap (Knuth's shuffle algorithm)
312  uint32_t *ShuffleIndices(const uint32_t N) {
313  uint32_t *shuffled =
314  static_cast<uint32_t *>(smmap(N * sizeof(uint32_t)));
315  // Init with identity
316  for (unsigned i = 0; i < N; ++i)
317  shuffled[i] = i;
318  // Shuffle (no shuffling for the last element)
319  for (unsigned i = 0; i < N-1; ++i) {
320  const uint32_t swap_idx = i + g_prng.Next(N - i);
321  uint32_t tmp = shuffled[i];
322  shuffled[i] = shuffled[swap_idx];
323  shuffled[swap_idx] = tmp;
324  }
325  return shuffled;
326  }
327 
328  void Migrate(const uint32_t new_capacity) {
329  Key *old_keys = Base::keys_;
330  Value *old_values = Base::values_;
331  uint32_t old_capacity = capacity();
332  uint32_t old_size = size();
333 
334  Base::capacity_ = new_capacity;
335  SetThresholds();
337  Base::DoClear(false);
338  if (new_capacity < old_capacity) {
339  uint32_t *shuffled_indices = ShuffleIndices(old_capacity);
340  for (uint32_t i = 0; i < old_capacity; ++i) {
341  if (old_keys[shuffled_indices[i]] != Base::empty_key_)
342  Base::Insert(old_keys[shuffled_indices[i]],
343  old_values[shuffled_indices[i]]);
344  }
345  smunmap(shuffled_indices);
346  } else {
347  for (uint32_t i = 0; i < old_capacity; ++i) {
348  if (old_keys[i] != Base::empty_key_)
349  Base::Insert(old_keys[i], old_values[i]);
350  }
351  }
352  assert(size() == old_size);
353 
354  Base::DeallocMemory(old_keys, old_values, old_capacity);
355  num_migrates_++;
356  }
357 
359  uint32_t *shuffled_indices = ShuffleIndices(other.capacity_);
360  for (uint32_t i = 0; i < other.capacity_; ++i) {
361  if (other.keys_[shuffled_indices[i]] != other.empty_key_)
362  this->Insert(other.keys_[shuffled_indices[i]],
363  other.values_[shuffled_indices[i]]);
364  }
365  smunmap(shuffled_indices);
366  }
367 
368  uint32_t num_migrates_;
369  uint32_t threshold_grow_;
371  static Prng g_prng;
372 };
373 
374 
379 template<class Key, class Value>
380 class MultiHash {
381  public:
383  num_hashmaps_ = 0;
384  hashmaps_ = NULL;
385  locks_ = NULL;
386  }
387 
388  void Init(const uint8_t num_hashmaps, const Key &empty_key,
389  uint32_t (*hasher)(const Key &key))
390  {
391  assert(num_hashmaps > 0);
392  const uint8_t N = num_hashmaps;
393  num_hashmaps_ = N;
395  locks_ =
396  static_cast<pthread_mutex_t *>(smalloc(N * sizeof(pthread_mutex_t)));
397  for (uint8_t i = 0; i < N; ++i) {
398  int retval = pthread_mutex_init(&locks_[i], NULL);
399  assert(retval == 0);
400  hashmaps_[i].Init(128, empty_key, hasher);
401  }
402  }
403 
405  for (uint8_t i = 0; i < num_hashmaps_; ++i) {
406  pthread_mutex_destroy(&locks_[i]);
407  }
408  free(locks_);
409  delete[] hashmaps_;
410  }
411 
412  bool Lookup(const Key &key, Value *value) {
413  uint8_t target = SelectHashmap(key);
414  Lock(target);
415  const bool result = hashmaps_[target].Lookup(key, value);
416  Unlock(target);
417  return result;
418  }
419 
420  void Insert(const Key &key, const Value &value) {
421  uint8_t target = SelectHashmap(key);
422  Lock(target);
423  hashmaps_[target].Insert(key, value);
424  Unlock(target);
425  }
426 
427  void Erase(const Key &key) {
428  uint8_t target = SelectHashmap(key);
429  Lock(target);
430  hashmaps_[target].Erase(key);
431  Unlock(target);
432  }
433 
434  void Clear() {
435  for (uint8_t i = 0; i < num_hashmaps_; ++i)
436  Lock(i);
437  for (uint8_t i = 0; i < num_hashmaps_; ++i)
438  hashmaps_[i].Clear();
439  for (uint8_t i = 0; i < num_hashmaps_; ++i)
440  Unlock(i);
441  }
442 
443  uint8_t num_hashmaps() const { return num_hashmaps_; }
444 
445  void GetSizes(uint32_t *sizes) {
446  for (uint8_t i = 0; i < num_hashmaps_; ++i) {
447  Lock(i);
448  sizes[i] = hashmaps_[i].size();
449  Unlock(i);
450  }
451  }
452 
453  void GetCollisionStats(uint64_t *num_collisions, uint32_t *max_collisions) {
454  for (uint8_t i = 0; i < num_hashmaps_; ++i) {
455  Lock(i);
456  hashmaps_[i].GetCollisionStats(&num_collisions[i], &max_collisions[i]);
457  Unlock(i);
458  }
459  }
460 
461  private:
462  inline uint8_t SelectHashmap(const Key &key) {
463  uint32_t hash = MurmurHash2(&key, sizeof(key), 0x37);
464  double bucket =
465  static_cast<double>(hash) * static_cast<double>(num_hashmaps_) /
466  static_cast<double>((uint32_t)(-1));
467  return (uint32_t)bucket % num_hashmaps_;
468  }
469 
470  inline void Lock(const uint8_t target) {
471  int retval = pthread_mutex_lock(&locks_[target]);
472  assert(retval == 0);
473  }
474 
475  inline void Unlock(const uint8_t target) {
476  int retval = pthread_mutex_unlock(&locks_[target]);
477  assert(retval == 0);
478  }
479 
480  uint8_t num_hashmaps_;
482  pthread_mutex_t *locks_;
483 };
484 
485 
486 // initialize the static fields
487 template<class Key, class Value>
489 
490 template<class Key, class Value, class Derived>
492 
493 template<class Key, class Value>
495 
496 template<class Key, class Value>
498 
499 #endif // CVMFS_SMALLHASH_H_
Definition: prng.h:25
uint32_t capacity() const
Definition: smallhash.h:130
Value * values_
Definition: smallhash.h:217
SmallHashDynamic< Key, Value > * hashmaps_
Definition: smallhash.h:481
void SetThresholds()
Definition: smallhash.h:236
void GetCollisionStats(uint64_t *num_collisions, uint32_t *max_collisions) const
Definition: smallhash.h:122
void ResetCapacity()
Definition: smallhash.h:302
uint32_t(* hasher_)(const Key &key)
Definition: smallhash.h:221
void Unlock(const uint8_t target)
Definition: smallhash.h:475
~MultiHash()
Definition: smallhash.h:404
void Init(const uint8_t num_hashmaps, const Key &empty_key, uint32_t(*hasher)(const Key &key))
Definition: smallhash.h:388
void ResetCapacity()
Definition: smallhash.h:213
void SetThresholds()
Definition: smallhash.h:282
void Insert(const Key &key, const Value &value)
Definition: smallhash.h:420
void GetSizes(uint32_t *sizes)
Definition: smallhash.h:445
void Shrink()
Definition: smallhash.h:238
uint64_t bytes_allocated() const
Definition: smallhash.h:116
uint64_t num_collisions_
Definition: smallhash.h:223
void SetThresholds()
Definition: smallhash.h:210
uint32_t ScaleHash(const Key &key) const
Definition: smallhash.h:141
assert((mem||(size==0))&&"Out Of Memory")
uint32_t * ShuffleIndices(const uint32_t N)
Definition: smallhash.h:312
void CopyFrom(const SmallHashDynamic< Key, Value > &other)
Definition: smallhash.h:358
static const double kThresholdGrow
Definition: smallhash.h:250
bool Lookup(const Key &key, Value *value)
Definition: smallhash.h:412
void ResetCapacity()
Definition: smallhash.h:239
uint32_t num_migrates_
Definition: smallhash.h:368
void Clear()
Definition: smallhash.h:434
uint8_t SelectHashmap(const Key &key)
Definition: smallhash.h:462
void Lock(const uint8_t target)
Definition: smallhash.h:470
SmallHashDynamic(const SmallHashDynamic< Key, Value > &other)
Definition: smallhash.h:261
static Prng g_prng
Definition: smallhash.h:371
uint32_t threshold_shrink_
Definition: smallhash.h:370
static double GetEntrySize()
Definition: smallhash.h:117
uint32_t size_
Definition: smallhash.h:220
uint32_t capacity() const
Definition: smallhash.h:277
pthread_mutex_t * locks_
Definition: smallhash.h:482
void Insert(const Key &key, const Value &value)
Definition: smallhash.h:88
Key * keys() const
Definition: smallhash.h:132
static const double kThresholdGrow
Definition: smallhash.h:38
uint8_t num_hashmaps_
Definition: smallhash.h:480
uint64_t bytes_allocated_
Definition: smallhash.h:222
void Erase(const Key &key)
Definition: smallhash.h:427
uint32_t threshold_grow_
Definition: smallhash.h:369
static const double kLoadFactor
Definition: smallhash.h:37
bool Contains(const Key &key) const
Definition: smallhash.h:81
bool DoInsert(const Key &key, const Value &value, const bool count_collisions)
Definition: smallhash.h:174
void DoClear(const bool reset_capacity)
Definition: smallhash.h:201
void GetCollisionStats(uint64_t *num_collisions, uint32_t *max_collisions)
Definition: smallhash.h:453
uint32_t max_collisions_
Definition: smallhash.h:224
void Clear()
Definition: smallhash.h:112
void Shrink()
Definition: smallhash.h:212
void SetHasher(uint32_t(*hasher)(const Key &key))
Definition: smallhash.h:136
uint32_t size() const
Definition: smallhash.h:278
uint32_t num_migrates() const
Definition: smallhash.h:279
Value * values() const
Definition: smallhash.h:133
uint8_t num_hashmaps() const
Definition: smallhash.h:443
FRIEND_TEST(T_Smallhash, InsertAndCopyMd5Slow)
SmallHashBase< Key, Value, SmallHashDynamic< Key, Value > > Base
Definition: smallhash.h:249
uint32_t MurmurHash2(const void *key, int len, uint32_t seed)
Definition: murmur.h:23
uint32_t initial_capacity_
Definition: smallhash.h:219
static const double kThresholdShrink
Definition: smallhash.h:39
bool Lookup(const Key &key, Value *value) const
Definition: smallhash.h:72
void Grow()
Definition: smallhash.h:211
void DeallocMemory(Key *k, Value *v, uint32_t c)
Definition: smallhash.h:160
void Init(uint32_t expected_size, Key empty, uint32_t(*hasher)(const Key &key))
Definition: smallhash.h:59
bool DoLookup(const Key &key, uint32_t *bucket, uint32_t *collisions) const
Definition: smallhash.h:189
Key empty_key() const
Definition: smallhash.h:131
void Erase(const Key &key)
Definition: smallhash.h:94
uint32_t capacity_
Definition: smallhash.h:218
void Migrate(const uint32_t new_capacity)
Definition: smallhash.h:328
static const double kThresholdShrink
Definition: smallhash.h:251
SmallHashDynamic< Key, Value > & operator=(const SmallHashDynamic< Key, Value > &other)
Definition: smallhash.h:267
uint32_t Next(const uint64_t boundary)
Definition: prng.h:45
void AllocMemory()
Definition: smallhash.h:148