CernVM-FS  2.10.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
smallhash.h
Go to the documentation of this file.
1 
5 #ifndef CVMFS_SMALLHASH_H_
6 #define CVMFS_SMALLHASH_H_
7 
8 #ifndef __STDC_FORMAT_MACROS
9 // NOLINTNEXTLINE
10 #define __STDC_FORMAT_MACROS
11 #endif
12 
13 #include <gtest/gtest_prod.h>
14 #include <inttypes.h>
15 #include <pthread.h>
16 #include <stdint.h>
17 
18 #include <algorithm>
19 #include <cassert>
20 #include <cstdlib>
21 #include <new>
22 
23 #include "atomic.h"
24 #include "murmur.hxx"
25 #include "prng.h"
26 #include "smalloc.h"
27 
33 template<class Key, class Value, class Derived>
35  FRIEND_TEST(T_Smallhash, InsertAndCopyMd5Slow);
36 
37  public:
38  static const double kLoadFactor; // mainly useless for the dynamic version
39  static const double kThresholdGrow; // only used for resizable version
40  static const double kThresholdShrink; // only used for resizable version
41 
43  keys_ = NULL;
44  values_ = NULL;
45  hasher_ = NULL;
46  bytes_allocated_ = 0;
47  num_collisions_ = 0;
48  max_collisions_ = 0;
49 
50  // Properly initialized by Init()
51  capacity_ = 0;
53  size_ = 0;
54  }
55 
58  }
59 
60  void Init(uint32_t expected_size, Key empty,
61  uint32_t (*hasher)(const Key &key))
62  {
63  hasher_ = hasher;
64  empty_key_ = empty;
65  capacity_ =
66  static_cast<uint32_t>(static_cast<double>(expected_size)/kLoadFactor);
68  static_cast<Derived *>(this)->SetThresholds(); // No-op for fixed size
69  AllocMemory();
70  this->DoClear(false);
71  }
72 
73  bool Lookup(const Key &key, Value *value) const {
74  uint32_t bucket;
75  uint32_t collisions;
76  const bool found = DoLookup(key, &bucket, &collisions);
77  if (found)
78  *value = values_[bucket];
79  return found;
80  }
81 
82  bool Contains(const Key &key) const {
83  uint32_t bucket;
84  uint32_t collisions;
85  const bool found = DoLookup(key, &bucket, &collisions);
86  return found;
87  }
88 
89  void Insert(const Key &key, const Value &value) {
90  static_cast<Derived *>(this)->Grow(); // No-op if fixed-size
91  const bool overwritten = DoInsert(key, value, true);
92  size_ += !overwritten; // size + 1 if the key was not yet in the map
93  }
94 
95  bool Erase(const Key &key) {
96  uint32_t bucket;
97  uint32_t collisions;
98  const bool found = DoLookup(key, &bucket, &collisions);
99  if (found) {
100  keys_[bucket] = empty_key_;
101  size_--;
102  bucket = (bucket+1) % capacity_;
103  while (!(keys_[bucket] == empty_key_)) {
104  Key rehash = keys_[bucket];
105  keys_[bucket] = empty_key_;
106  DoInsert(rehash, values_[bucket], false);
107  bucket = (bucket+1) % capacity_;
108  }
109  static_cast<Derived *>(this)->Shrink(); // No-op if fixed-size
110  }
111  return found;
112  }
113 
114  void Clear() {
115  DoClear(true);
116  }
117 
118  uint64_t bytes_allocated() const { return bytes_allocated_; }
119  static double GetEntrySize() {
120  const double unit = sizeof(Key) + sizeof(Value);
121  return unit/kLoadFactor;
122  }
123 
124  void GetCollisionStats(uint64_t *num_collisions,
125  uint32_t *max_collisions) const
126  {
127  *num_collisions = num_collisions_;
128  *max_collisions = max_collisions_;
129  }
130 
131  // Careful with the direct access TODO: iterator
132  uint32_t capacity() const { return capacity_; }
133  Key empty_key() const { return empty_key_; }
134  Key *keys() const { return keys_; }
135  Value *values() const { return values_; }
136 
137  // Only needed by compat
138  void SetHasher(uint32_t (*hasher)(const Key &key)) {
139  hasher_ = hasher;
140  }
141 
142  protected:
143  uint32_t ScaleHash(const Key &key) const {
144  double bucket =
145  (static_cast<double>(hasher_(key)) * static_cast<double>(capacity_) /
146  static_cast<double>(static_cast<uint32_t>(-1)));
147  return static_cast<uint32_t>(bucket) % capacity_;
148  }
149 
150  void AllocMemory() {
151  keys_ = static_cast<Key *>(smmap(capacity_ * sizeof(Key)));
152  values_ = static_cast<Value *>(smmap(capacity_ * sizeof(Value)));
153  for (uint32_t i = 0; i < capacity_; ++i) {
154  /*keys_[i] =*/ new (keys_ + i) Key();
155  }
156  for (uint32_t i = 0; i < capacity_; ++i) {
157  /*values_[i] =*/ new (values_ + i) Value();
158  }
159  bytes_allocated_ = (sizeof(Key) + sizeof(Value)) * capacity_;
160  }
161 
162  void DeallocMemory(Key *k, Value *v, uint32_t c) {
163  for (uint32_t i = 0; i < c; ++i) {
164  k[i].~Key();
165  }
166  for (uint32_t i = 0; i < c; ++i) {
167  v[i].~Value();
168  }
169  smunmap(k);
170  smunmap(v);
171  k = NULL;
172  v = NULL;
173  }
174 
175  // Returns true iff the key is overwritten
176  bool DoInsert(const Key &key, const Value &value,
177  const bool count_collisions)
178  {
179  uint32_t bucket;
180  uint32_t collisions;
181  const bool overwritten = DoLookup(key, &bucket, &collisions);
182  if (count_collisions) {
183  num_collisions_ += collisions;
184  max_collisions_ = std::max(collisions, max_collisions_);
185  }
186  keys_[bucket] = key;
187  values_[bucket] = value;
188  return overwritten;
189  }
190 
191  bool DoLookup(const Key &key, uint32_t *bucket, uint32_t *collisions) const {
192  *bucket = ScaleHash(key);
193  *collisions = 0;
194  while (!(keys_[*bucket] == empty_key_)) {
195  if (keys_[*bucket] == key)
196  return true;
197  *bucket = (*bucket+1) % capacity_;
198  (*collisions)++;
199  }
200  return false;
201  }
202 
203  void DoClear(const bool reset_capacity) {
204  if (reset_capacity)
205  static_cast<Derived *>(this)->ResetCapacity(); // No-op if fixed-size
206  for (uint32_t i = 0; i < capacity_; ++i)
207  keys_[i] = empty_key_;
208  size_ = 0;
209  }
210 
211  // Methods for resizable version
212  void SetThresholds() { }
213  void Grow() { }
214  void Shrink() { }
215  void ResetCapacity() { }
216 
217  // Separate key and value arrays for better locality
218  Key *keys_;
219  Value *values_;
220  uint32_t capacity_;
222  uint32_t size_;
223  uint32_t (*hasher_)(const Key &key);
225  uint64_t num_collisions_;
226  uint32_t max_collisions_;
228 };
229 
230 
231 template<class Key, class Value>
233  public SmallHashBase< Key, Value, SmallHashFixed<Key, Value> >
234 {
235  friend class SmallHashBase< Key, Value, SmallHashFixed<Key, Value> >;
236  protected:
237  // No-ops
238  void SetThresholds() { }
239  void Grow() { }
240  void Shrink() { }
241  void ResetCapacity() { }
242 };
243 
244 
245 template<class Key, class Value>
247  public SmallHashBase< Key, Value, SmallHashDynamic<Key, Value> >
248 {
249  friend class SmallHashBase< Key, Value, SmallHashDynamic<Key, Value> >;
250  public:
252  static const double kThresholdGrow;
253  static const double kThresholdShrink;
254 
256  num_migrates_ = 0;
257 
258  // Properly set by Init
259  threshold_grow_ = 0;
260  threshold_shrink_ = 0;
261  }
262 
264  {
265  num_migrates_ = 0;
266  CopyFrom(other);
267  }
268 
270  const SmallHashDynamic<Key, Value> &other)
271  {
272  if (&other == this)
273  return *this;
274 
275  CopyFrom(other);
276  return *this;
277  }
278 
279  uint32_t capacity() const { return Base::capacity_; }
280  uint32_t size() const { return Base::size_; }
281  uint32_t num_migrates() const { return num_migrates_; }
282 
283  protected:
284  void SetThresholds() {
286  static_cast<uint32_t>(static_cast<double>(capacity()) * kThresholdGrow);
288  static_cast<uint32_t>(static_cast<double>(capacity()) * kThresholdShrink);
289  }
290 
291  void Grow() {
292  if (size() > threshold_grow_)
293  Migrate(capacity() * 2);
294  }
295 
296  void Shrink() {
297  if (size() < threshold_shrink_) {
298  uint32_t target_capacity = capacity() / 2;
299  if (target_capacity >= Base::initial_capacity_)
300  Migrate(target_capacity);
301  }
302  }
303 
304  void ResetCapacity() {
308  SetThresholds();
309  }
310 
311  private:
312  // Returns a random permutation of indices [0..N-1] that is allocated
313  // by smmap (Knuth's shuffle algorithm)
314  uint32_t *ShuffleIndices(const uint32_t N) {
315  uint32_t *shuffled =
316  static_cast<uint32_t *>(smmap(N * sizeof(uint32_t)));
317  // Init with identity
318  for (unsigned i = 0; i < N; ++i)
319  shuffled[i] = i;
320  // Shuffle (no shuffling for the last element)
321  for (unsigned i = 0; i < N-1; ++i) {
322  const uint32_t swap_idx = i + g_prng.Next(N - i);
323  uint32_t tmp = shuffled[i];
324  shuffled[i] = shuffled[swap_idx];
325  shuffled[swap_idx] = tmp;
326  }
327  return shuffled;
328  }
329 
330  void Migrate(const uint32_t new_capacity) {
331  Key *old_keys = Base::keys_;
332  Value *old_values = Base::values_;
333  uint32_t old_capacity = capacity();
334  uint32_t old_size = size();
335 
336  Base::capacity_ = new_capacity;
337  SetThresholds();
339  Base::DoClear(false);
340  if (new_capacity < old_capacity) {
341  uint32_t *shuffled_indices = ShuffleIndices(old_capacity);
342  for (uint32_t i = 0; i < old_capacity; ++i) {
343  if (old_keys[shuffled_indices[i]] != Base::empty_key_) {
344  Base::Insert(old_keys[shuffled_indices[i]],
345  old_values[shuffled_indices[i]]);
346  }
347  }
348  smunmap(shuffled_indices);
349  } else {
350  for (uint32_t i = 0; i < old_capacity; ++i) {
351  if (old_keys[i] != Base::empty_key_)
352  Base::Insert(old_keys[i], old_values[i]);
353  }
354  }
355  assert(size() == old_size);
356 
357  Base::DeallocMemory(old_keys, old_values, old_capacity);
358  num_migrates_++;
359  }
360 
362  uint32_t *shuffled_indices = ShuffleIndices(other.capacity_);
363  for (uint32_t i = 0; i < other.capacity_; ++i) {
364  if (other.keys_[shuffled_indices[i]] != other.empty_key_) {
365  this->Insert(other.keys_[shuffled_indices[i]],
366  other.values_[shuffled_indices[i]]);
367  }
368  }
369  smunmap(shuffled_indices);
370  }
371 
372  uint32_t num_migrates_;
373  uint32_t threshold_grow_;
375  static Prng g_prng;
376 };
377 
378 
383 template<class Key, class Value>
384 class MultiHash {
385  public:
387  num_hashmaps_ = 0;
388  hashmaps_ = NULL;
389  locks_ = NULL;
390  }
391 
392  void Init(const uint8_t num_hashmaps, const Key &empty_key,
393  uint32_t (*hasher)(const Key &key))
394  {
395  assert(num_hashmaps > 0);
396  const uint8_t N = num_hashmaps;
397  num_hashmaps_ = N;
399  locks_ =
400  static_cast<pthread_mutex_t *>(smalloc(N * sizeof(pthread_mutex_t)));
401  for (uint8_t i = 0; i < N; ++i) {
402  int retval = pthread_mutex_init(&locks_[i], NULL);
403  assert(retval == 0);
404  hashmaps_[i].Init(128, empty_key, hasher);
405  }
406  }
407 
409  for (uint8_t i = 0; i < num_hashmaps_; ++i) {
410  pthread_mutex_destroy(&locks_[i]);
411  }
412  free(locks_);
413  delete[] hashmaps_;
414  }
415 
416  bool Lookup(const Key &key, Value *value) {
417  uint8_t target = SelectHashmap(key);
418  Lock(target);
419  const bool result = hashmaps_[target].Lookup(key, value);
420  Unlock(target);
421  return result;
422  }
423 
424  void Insert(const Key &key, const Value &value) {
425  uint8_t target = SelectHashmap(key);
426  Lock(target);
427  hashmaps_[target].Insert(key, value);
428  Unlock(target);
429  }
430 
431  void Erase(const Key &key) {
432  uint8_t target = SelectHashmap(key);
433  Lock(target);
434  hashmaps_[target].Erase(key);
435  Unlock(target);
436  }
437 
438  void Clear() {
439  for (uint8_t i = 0; i < num_hashmaps_; ++i)
440  Lock(i);
441  for (uint8_t i = 0; i < num_hashmaps_; ++i)
442  hashmaps_[i].Clear();
443  for (uint8_t i = 0; i < num_hashmaps_; ++i)
444  Unlock(i);
445  }
446 
447  uint8_t num_hashmaps() const { return num_hashmaps_; }
448 
449  void GetSizes(uint32_t *sizes) {
450  for (uint8_t i = 0; i < num_hashmaps_; ++i) {
451  Lock(i);
452  sizes[i] = hashmaps_[i].size();
453  Unlock(i);
454  }
455  }
456 
457  void GetCollisionStats(uint64_t *num_collisions, uint32_t *max_collisions) {
458  for (uint8_t i = 0; i < num_hashmaps_; ++i) {
459  Lock(i);
460  hashmaps_[i].GetCollisionStats(&num_collisions[i], &max_collisions[i]);
461  Unlock(i);
462  }
463  }
464 
465  private:
466  inline uint8_t SelectHashmap(const Key &key) {
467  uint32_t hash = MurmurHash2(&key, sizeof(key), 0x37);
468  double bucket =
469  static_cast<double>(hash) * static_cast<double>(num_hashmaps_) /
470  static_cast<double>(static_cast<uint32_t>(-1));
471  return static_cast<uint32_t>(bucket) % num_hashmaps_;
472  }
473 
474  inline void Lock(const uint8_t target) {
475  int retval = pthread_mutex_lock(&locks_[target]);
476  assert(retval == 0);
477  }
478 
479  inline void Unlock(const uint8_t target) {
480  int retval = pthread_mutex_unlock(&locks_[target]);
481  assert(retval == 0);
482  }
483 
484  uint8_t num_hashmaps_;
486  pthread_mutex_t *locks_;
487 };
488 
489 
490 // initialize the static fields
491 template<class Key, class Value>
493 
494 template<class Key, class Value, class Derived>
496 
497 template<class Key, class Value>
499 
500 template<class Key, class Value>
502 
503 #endif // CVMFS_SMALLHASH_H_
Definition: prng.h:28
uint32_t capacity() const
Definition: smallhash.h:132
Value * values_
Definition: smallhash.h:219
SmallHashDynamic< Key, Value > * hashmaps_
Definition: smallhash.h:485
void SetThresholds()
Definition: smallhash.h:238
void GetCollisionStats(uint64_t *num_collisions, uint32_t *max_collisions) const
Definition: smallhash.h:124
void ResetCapacity()
Definition: smallhash.h:304
uint32_t(* hasher_)(const Key &key)
Definition: smallhash.h:223
void Unlock(const uint8_t target)
Definition: smallhash.h:479
~MultiHash()
Definition: smallhash.h:408
void Init(const uint8_t num_hashmaps, const Key &empty_key, uint32_t(*hasher)(const Key &key))
Definition: smallhash.h:392
void ResetCapacity()
Definition: smallhash.h:215
void SetThresholds()
Definition: smallhash.h:284
void Insert(const Key &key, const Value &value)
Definition: smallhash.h:424
void GetSizes(uint32_t *sizes)
Definition: smallhash.h:449
void Shrink()
Definition: smallhash.h:240
uint64_t bytes_allocated() const
Definition: smallhash.h:118
uint64_t num_collisions_
Definition: smallhash.h:225
void SetThresholds()
Definition: smallhash.h:212
uint32_t ScaleHash(const Key &key) const
Definition: smallhash.h:143
assert((mem||(size==0))&&"Out Of Memory")
uint32_t * ShuffleIndices(const uint32_t N)
Definition: smallhash.h:314
void CopyFrom(const SmallHashDynamic< Key, Value > &other)
Definition: smallhash.h:361
static const double kThresholdGrow
Definition: smallhash.h:252
bool Lookup(const Key &key, Value *value)
Definition: smallhash.h:416
void ResetCapacity()
Definition: smallhash.h:241
uint32_t num_migrates_
Definition: smallhash.h:372
void Clear()
Definition: smallhash.h:438
uint8_t SelectHashmap(const Key &key)
Definition: smallhash.h:466
void Lock(const uint8_t target)
Definition: smallhash.h:474
SmallHashDynamic(const SmallHashDynamic< Key, Value > &other)
Definition: smallhash.h:263
static Prng g_prng
Definition: smallhash.h:375
uint32_t threshold_shrink_
Definition: smallhash.h:374
static double GetEntrySize()
Definition: smallhash.h:119
uint32_t size_
Definition: smallhash.h:222
uint32_t capacity() const
Definition: smallhash.h:279
pthread_mutex_t * locks_
Definition: smallhash.h:486
void Insert(const Key &key, const Value &value)
Definition: smallhash.h:89
Key * keys() const
Definition: smallhash.h:134
static const double kThresholdGrow
Definition: smallhash.h:39
uint8_t num_hashmaps_
Definition: smallhash.h:484
uint64_t bytes_allocated_
Definition: smallhash.h:224
void Erase(const Key &key)
Definition: smallhash.h:431
uint32_t threshold_grow_
Definition: smallhash.h:373
static const double kLoadFactor
Definition: smallhash.h:38
bool Contains(const Key &key) const
Definition: smallhash.h:82
bool DoInsert(const Key &key, const Value &value, const bool count_collisions)
Definition: smallhash.h:176
void DoClear(const bool reset_capacity)
Definition: smallhash.h:203
void GetCollisionStats(uint64_t *num_collisions, uint32_t *max_collisions)
Definition: smallhash.h:457
uint32_t max_collisions_
Definition: smallhash.h:226
void Clear()
Definition: smallhash.h:114
void Shrink()
Definition: smallhash.h:214
void SetHasher(uint32_t(*hasher)(const Key &key))
Definition: smallhash.h:138
uint32_t size() const
Definition: smallhash.h:280
uint32_t num_migrates() const
Definition: smallhash.h:281
Value * values() const
Definition: smallhash.h:135
bool Erase(const Key &key)
Definition: smallhash.h:95
uint8_t num_hashmaps() const
Definition: smallhash.h:447
FRIEND_TEST(T_Smallhash, InsertAndCopyMd5Slow)
SmallHashBase< Key, Value, SmallHashDynamic< Key, Value > > Base
Definition: smallhash.h:251
uint32_t initial_capacity_
Definition: smallhash.h:221
static const double kThresholdShrink
Definition: smallhash.h:40
bool Lookup(const Key &key, Value *value) const
Definition: smallhash.h:73
void Grow()
Definition: smallhash.h:213
void DeallocMemory(Key *k, Value *v, uint32_t c)
Definition: smallhash.h:162
void Init(uint32_t expected_size, Key empty, uint32_t(*hasher)(const Key &key))
Definition: smallhash.h:60
bool DoLookup(const Key &key, uint32_t *bucket, uint32_t *collisions) const
Definition: smallhash.h:191
Key empty_key() const
Definition: smallhash.h:133
uint32_t capacity_
Definition: smallhash.h:220
void Migrate(const uint32_t new_capacity)
Definition: smallhash.h:330
uint32_t MurmurHash2(const void *key, int len, uint32_t seed)
Definition: murmur.hxx:23
static const double kThresholdShrink
Definition: smallhash.h:253
SmallHashDynamic< Key, Value > & operator=(const SmallHashDynamic< Key, Value > &other)
Definition: smallhash.h:269
uint32_t Next(const uint64_t boundary)
Definition: prng.h:49
void AllocMemory()
Definition: smallhash.h:150