GCC Code Coverage Report
Directory: cvmfs/ Exec Total Coverage
File: cvmfs/smallhash.h Lines: 249 252 98.8 %
Date: 2019-02-03 02:48:13 Branches: 187 483 38.7 %

Line Branch Exec Source
1
/**
2
 * This file is part of the CernVM File System.
3
 */
4
5
#ifndef CVMFS_SMALLHASH_H_
6
#define CVMFS_SMALLHASH_H_
7
8
#ifndef __STDC_FORMAT_MACROS
9
#define __STDC_FORMAT_MACROS
10
#endif
11
12
#include <gtest/gtest_prod.h>
13
#include <inttypes.h>
14
#include <pthread.h>
15
#include <stdint.h>
16
17
#include <algorithm>
18
#include <cassert>
19
#include <cstdlib>
20
#include <new>
21
22
#include "atomic.h"
23
#include "murmur.h"
24
#include "prng.h"
25
#include "smalloc.h"
26
27
/**
28
 * Hash table with linear probing as collision resolution.  Works only for
29
 * a fixed (maximum) number of elements, i.e. no resizing.  Load factor fixed
30
 * to 0.7.
31
 */
32
template<class Key, class Value, class Derived>
33
class SmallHashBase {
34
  FRIEND_TEST(T_Smallhash, InsertAndCopyMd5Slow);
35
36
 public:
37
  static const double kLoadFactor;  // mainly useless for the dynamic version
38
  static const double kThresholdGrow;  // only used for resizable version
39
  static const double kThresholdShrink;  // only used for resizable version
40
41
2295
  SmallHashBase() {
42
2295
    keys_ = NULL;
43
2295
    values_ = NULL;
44
2295
    hasher_ = NULL;
45
2295
    bytes_allocated_ = 0;
46
2295
    num_collisions_ = 0;
47
2295
    max_collisions_ = 0;
48
49
    // Properly initialized by Init()
50
2295
    capacity_ = 0;
51
2295
    initial_capacity_ = 0;
52
2295
    size_ = 0;
53
2295
  }
54
55
2295
  ~SmallHashBase() {
56
2295
    DeallocMemory(keys_, values_, capacity_);
57
2295
  }
58
59
2295
  void Init(uint32_t expected_size, Key empty,
60
            uint32_t (*hasher)(const Key &key))
61
  {
62
2295
    hasher_ = hasher;
63

2295
    empty_key_ = empty;
64
2295
    capacity_ =
65
      static_cast<uint32_t>(static_cast<double>(expected_size)/kLoadFactor);
66
2295
    initial_capacity_ = capacity_;
67
2295
    static_cast<Derived *>(this)->SetThresholds();  // No-op for fixed size
68
2295
    AllocMemory();
69
2295
    this->DoClear(false);
70
2295
  }
71
72
8054797
  bool Lookup(const Key &key, Value *value) const {
73
    uint32_t bucket;
74
    uint32_t collisions;
75
8054797
    const bool found = DoLookup(key, &bucket, &collisions);
76



8054797
    if (found)
77
8048156
      *value = values_[bucket];
78
8054797
    return found;
79
  }
80
81
4001546
  bool Contains(const Key &key) const {
82
    uint32_t bucket;
83
    uint32_t collisions;
84
4001546
    const bool found = DoLookup(key, &bucket, &collisions);
85
4001546
    return found;
86
  }
87
88
116836055
  void Insert(const Key &key, const Value &value) {
89
116836055
    static_cast<Derived *>(this)->Grow();  // No-op if fixed-size
90
116818211
    const bool overwritten = DoInsert(key, value, true);
91
116728627
    size_ += !overwritten;  // size + 1 if the key was not yet in the map
92
116728627
  }
93
94
11983557
  void Erase(const Key &key) {
95
    uint32_t bucket;
96
    uint32_t collisions;
97
11983557
    const bool found = DoLookup(key, &bucket, &collisions);
98



11971741
    if (found) {
99

11936337
      keys_[bucket] = empty_key_;
100
11936337
      size_--;
101
11936337
      bucket = (bucket+1) % capacity_;
102



45623813
      while (!(keys_[bucket] == empty_key_)) {
103
21748703
        Key rehash = keys_[bucket];
104

21748703
        keys_[bucket] = empty_key_;
105
21748703
        DoInsert(rehash, values_[bucket], false);
106
21751139
        bucket = (bucket+1) % capacity_;
107
      }
108
11938773
      static_cast<Derived *>(this)->Shrink();  // No-op if fixed-size
109
    }
110
11966033
  }
111
112
194
  void Clear() {
113
194
    DoClear(true);
114
194
  }
115
116
215
  uint64_t bytes_allocated() const { return bytes_allocated_; }
117
96
  static double GetEntrySize() {
118
96
    const double unit = sizeof(Key) + sizeof(Value);
119
96
    return unit/kLoadFactor;
120
  }
121
122
  void GetCollisionStats(uint64_t *num_collisions,
123
                         uint32_t *max_collisions) const
124
  {
125
    *num_collisions = num_collisions_;
126
    *max_collisions = max_collisions_;
127
  }
128
129
  // Careful with the direct access TODO: iterator
130
  uint32_t capacity() const { return capacity_; }
131
3086
  Key empty_key() const { return empty_key_; }
132
13932
  Key *keys() const { return keys_; }
133
34
  Value *values() const { return values_; }
134
135
  // Only needed by compat
136
  void SetHasher(uint32_t (*hasher)(const Key &key)) {
137
    hasher_ = hasher;
138
  }
139
140
 protected:
141
162440298
  uint32_t ScaleHash(const Key &key) const {
142
    double bucket =
143
      (static_cast<double>(hasher_(key)) * static_cast<double>(capacity_) /
144
162440298
      static_cast<double>((uint32_t)(-1)));
145
162477194
    return (uint32_t)bucket % capacity_;
146
  }
147
148
11155
  void AllocMemory() {
149
11155
    keys_ = static_cast<Key *>(smmap(capacity_ * sizeof(Key)));
150
11155
    values_ = static_cast<Value *>(smmap(capacity_ * sizeof(Value)));
151



219953079
    for (uint32_t i = 0; i < capacity_; ++i) {
152




219941980
      /*keys_[i] =*/ new (keys_ + i) Key();
153
    }
154



219936975
    for (uint32_t i = 0; i < capacity_; ++i) {
155





219925820
      /*values_[i] =*/ new (values_ + i) Value();
156
    }
157
11155
    bytes_allocated_ = (sizeof(Key) + sizeof(Value)) * capacity_;
158
11155
  }
159
160
11155
  void DeallocMemory(Key *k, Value *v, uint32_t c) {
161





220065183
    for (uint32_t i = 0; i < c; ++i) {
162
220054028
      k[i].~Key();
163
    }
164





220048055
    for (uint32_t i = 0; i < c; ++i) {
165
220036900
      v[i].~Value();
166
    }
167
11155
    smunmap(k);
168
11155
    smunmap(v);
169
11155
    k = NULL;
170
11155
    v = NULL;
171
11155
  }
172
173
  // Returns true iff the key is overwritten
174
138560826
  bool DoInsert(const Key &key, const Value &value,
175
                const bool count_collisions)
176
  {
177
    uint32_t bucket;
178
    uint32_t collisions;
179
138560826
    const bool overwritten = DoLookup(key, &bucket, &collisions);
180



138578018
    if (count_collisions) {
181
116821343
      num_collisions_ += collisions;
182
116821343
      max_collisions_ = std::max(collisions, max_collisions_);
183
    }
184

138523922
    keys_[bucket] = key;
185
138523922
    values_[bucket] = value;
186
138523922
    return overwritten;
187
  }
188
189
162418850
  bool DoLookup(const Key &key, uint32_t *bucket, uint32_t *collisions) const {
190
162418850
    *bucket = ScaleHash(key);
191
162510158
    *collisions = 0;
192




1018816242
    while (!(keys_[*bucket] == empty_key_)) {
193




717752466
      if (keys_[*bucket] == key)
194
23956540
        return true;
195
693795926
      *bucket = (*bucket+1) % capacity_;
196
693795926
      (*collisions)++;
197
    }
198
138553618
    return false;
199
  }
200
201
11157
  void DoClear(const bool reset_capacity) {
202



11157
    if (reset_capacity)
203
194
      static_cast<Derived *>(this)->ResetCapacity();  // No-op if fixed-size
204



219992327
    for (uint32_t i = 0; i < capacity_; ++i)
205

219981170
      keys_[i] = empty_key_;
206
11157
    size_ = 0;
207
11157
  }
208
209
  // Methods for resizable version
210
  void SetThresholds() { }
211
  void Grow() { }
212
  void Shrink() { }
213
  void ResetCapacity() { }
214
215
  // Separate key and value arrays for better locality
216
  Key *keys_;
217
  Value *values_;
218
  uint32_t capacity_;
219
  uint32_t initial_capacity_;
220
  uint32_t size_;
221
  uint32_t (*hasher_)(const Key &key);
222
  uint64_t bytes_allocated_;
223
  uint64_t num_collisions_;
224
  uint32_t max_collisions_;  /**< maximum collisions for a single insert */
225
  Key empty_key_;
226
};
227
228
229
template<class Key, class Value>
230
class SmallHashFixed :
231
  public SmallHashBase< Key, Value, SmallHashFixed<Key, Value> >
232
426
{
233
  friend class SmallHashBase< Key, Value, SmallHashFixed<Key, Value> >;
234
 protected:
235
  // No-ops
236
213
  void SetThresholds() { }
237
26399
  void Grow() { }
238
1138
  void Shrink() { }
239
2
  void ResetCapacity() { }
240
};
241
242
243
template<class Key, class Value>
244
class SmallHashDynamic :
245
  public SmallHashBase< Key, Value, SmallHashDynamic<Key, Value> >
246
2082
{
247
  friend class SmallHashBase< Key, Value, SmallHashDynamic<Key, Value> >;
248
 public:
249
  typedef SmallHashBase< Key, Value, SmallHashDynamic<Key, Value> > Base;
250
  static const double kThresholdGrow;
251
  static const double kThresholdShrink;
252
253
2082
  SmallHashDynamic() : Base() {
254
2082
    num_migrates_ = 0;
255
256
    // Properly set by Init
257
2082
    threshold_grow_ = 0;
258
2082
    threshold_shrink_ = 0;
259
2082
  }
260
261
  explicit SmallHashDynamic(const SmallHashDynamic<Key, Value> &other) : Base()
262
  {
263
    num_migrates_ = 0;
264
    CopyFrom(other);
265
  }
266
267
4
  SmallHashDynamic<Key, Value> &operator= (
268
    const SmallHashDynamic<Key, Value> &other)
269
  {
270


4
    if (&other == this)
271
      return *this;
272
273
4
    CopyFrom(other);
274
4
    return *this;
275
  }
276
277
64377
  uint32_t capacity() const { return Base::capacity_; }
278
128751187
  uint32_t size() const { return Base::size_; }
279
  uint32_t num_migrates() const { return num_migrates_; }
280
281
 protected:
282
10942
  void SetThresholds() {
283
10942
    threshold_grow_ =
284
      static_cast<uint32_t>(static_cast<double>(capacity()) * kThresholdGrow);
285
10942
    threshold_shrink_ =
286
      static_cast<uint32_t>(static_cast<double>(capacity()) * kThresholdShrink);
287
10942
  }
288
289
116757440
  void Grow() {
290



116757440
    if (size() > threshold_grow_)
291
5916
      Migrate(capacity() * 2);
292
116791208
  }
293
294
11936531
  void Shrink() {
295



11936531
    if (size() < threshold_shrink_) {
296
16903
      uint32_t target_capacity = capacity() / 2;
297



16903
      if (target_capacity >= Base::initial_capacity_)
298
2752
        Migrate(target_capacity);
299
    }
300
11929791
  }
301
302
192
  void ResetCapacity() {
303
192
    Base::DeallocMemory(Base::keys_, Base::values_, Base::capacity_);
304
192
    Base::capacity_ = Base::initial_capacity_;
305
192
    Base::AllocMemory();
306
192
    SetThresholds();
307
192
  }
308
309
 private:
310
  // Returns a random permutation of indices [0..N-1] that is allocated
311
  // by smmap (Knuth's shuffle algorithm)
312
2756
  uint32_t *ShuffleIndices(const uint32_t N) {
313
    uint32_t *shuffled =
314
2756
      static_cast<uint32_t *>(smmap(N * sizeof(uint32_t)));
315
    // Init with identity
316



45529820
    for (unsigned i = 0; i < N; ++i)
317
45527072
      shuffled[i] = i;
318
    // Shuffle (no shuffling for the last element)
319



45255188
    for (unsigned i = 0; i < N-1; ++i) {
320
45252432
      const uint32_t swap_idx = i + g_prng.Next(N - i);
321
45252440
      uint32_t tmp = shuffled[i];
322
45252440
      shuffled[i] = shuffled[swap_idx];
323
45252440
      shuffled[swap_idx]  = tmp;
324
    }
325
2756
    return shuffled;
326
  }
327
328
8668
  void Migrate(const uint32_t new_capacity) {
329
8668
    Key *old_keys = Base::keys_;
330
8668
    Value *old_values = Base::values_;
331
8668
    uint32_t old_capacity = capacity();
332
8668
    uint32_t old_size = size();
333
334
8668
    Base::capacity_ = new_capacity;
335
8668
    SetThresholds();
336
8668
    Base::AllocMemory();
337
8668
    Base::DoClear(false);
338



8668
    if (new_capacity < old_capacity) {
339
2752
      uint32_t *shuffled_indices = ShuffleIndices(old_capacity);
340



40066028
      for (uint32_t i = 0; i < old_capacity; ++i) {
341




40063276
        if (old_keys[shuffled_indices[i]] != Base::empty_key_)
342
10016528
          Base::Insert(old_keys[shuffled_indices[i]],
343
                       old_values[shuffled_indices[i]]);
344
      }
345
2752
      smunmap(shuffled_indices);
346
    } else {
347



73096332
      for (uint32_t i = 0; i < old_capacity; ++i) {
348




73090588
        if (old_keys[i] != Base::empty_key_)
349
54823080
          Base::Insert(old_keys[i], old_values[i]);
350
      }
351
    }
352



8496
    assert(size() == old_size);
353
354
8668
    Base::DeallocMemory(old_keys, old_values, old_capacity);
355
8668
    num_migrates_++;
356
8668
  }
357
358
4
  void CopyFrom(const SmallHashDynamic<Key, Value> &other) {
359
4
    uint32_t *shuffled_indices = ShuffleIndices(other.capacity_);
360


5505028
    for (uint32_t i = 0; i < other.capacity_; ++i) {
361


5505024
      if (other.keys_[shuffled_indices[i]] != other.empty_key_)
362
4000000
        this->Insert(other.keys_[shuffled_indices[i]],
363
                     other.values_[shuffled_indices[i]]);
364
    }
365
4
    smunmap(shuffled_indices);
366
4
  }
367
368
  uint32_t num_migrates_;
369
  uint32_t threshold_grow_;
370
  uint32_t threshold_shrink_;
371
  static Prng g_prng;
372
};
373
374
375
/**
376
 * Distributes the key-value pairs over $n$ dynamic hash maps with individual
377
 * mutexes.  Hence low mutex contention, and benefits from multiple processors.
378
 */
379
template<class Key, class Value>
380
class MultiHash {
381
 public:
382
36
  MultiHash() {
383
36
    num_hashmaps_ = 0;
384
36
    hashmaps_ = NULL;
385
36
    locks_ = NULL;
386
36
  }
387
388
36
  void Init(const uint8_t num_hashmaps, const Key &empty_key,
389
            uint32_t (*hasher)(const Key &key))
390
  {
391
36
    assert(num_hashmaps > 0);
392
36
    const uint8_t N = num_hashmaps;
393
36
    num_hashmaps_ = N;
394

36
    hashmaps_ = new SmallHashDynamic<Key, Value>[N]();
395
36
    locks_ =
396
      static_cast<pthread_mutex_t *>(smalloc(N * sizeof(pthread_mutex_t)));
397
1548
    for (uint8_t i = 0; i < N; ++i) {
398
1512
      int retval = pthread_mutex_init(&locks_[i], NULL);
399
1512
      assert(retval == 0);
400
1512
      hashmaps_[i].Init(128, empty_key, hasher);
401
    }
402
36
  }
403
404
36
  ~MultiHash() {
405
1548
    for (uint8_t i = 0; i < num_hashmaps_; ++i) {
406
1512
      pthread_mutex_destroy(&locks_[i]);
407
    }
408
36
    free(locks_);
409

36
    delete[] hashmaps_;
410
36
  }
411
412
4000000
  bool Lookup(const Key &key, Value *value) {
413
4000000
    uint8_t target = SelectHashmap(key);
414
4000000
    Lock(target);
415
4000000
    const bool result = hashmaps_[target].Lookup(key, value);
416
4000000
    Unlock(target);
417
4000000
    return result;
418
  }
419
420
15965036
  void Insert(const Key &key, const Value &value) {
421
15965036
    uint8_t target = SelectHashmap(key);
422
15767960
    Lock(target);
423
15970872
    hashmaps_[target].Insert(key, value);
424
15857812
    Unlock(target);
425
15900260
  }
426
427
7977376
  void Erase(const Key &key) {
428
7977376
    uint8_t target = SelectHashmap(key);
429
7970228
    Lock(target);
430
7985044
    hashmaps_[target].Erase(key);
431
7929712
    Unlock(target);
432
7977524
  }
433
434
4
  void Clear() {
435
172
    for (uint8_t i = 0; i < num_hashmaps_; ++i)
436
168
      Lock(i);
437
172
    for (uint8_t i = 0; i < num_hashmaps_; ++i)
438
168
      hashmaps_[i].Clear();
439
172
    for (uint8_t i = 0; i < num_hashmaps_; ++i)
440
168
      Unlock(i);
441
4
  }
442
443
36
  uint8_t num_hashmaps() const { return num_hashmaps_; }
444
445
32
  void GetSizes(uint32_t *sizes) {
446
1376
    for (uint8_t i = 0; i < num_hashmaps_; ++i) {
447
1344
      Lock(i);
448
1344
      sizes[i] = hashmaps_[i].size();
449
1344
      Unlock(i);
450
    }
451
32
  }
452
453
  void GetCollisionStats(uint64_t *num_collisions, uint32_t *max_collisions) {
454
    for (uint8_t i = 0; i < num_hashmaps_; ++i) {
455
      Lock(i);
456
      hashmaps_[i].GetCollisionStats(&num_collisions[i], &max_collisions[i]);
457
      Unlock(i);
458
    }
459
  }
460
461
 private:
462
27765412
  inline uint8_t SelectHashmap(const Key &key) {
463
27765412
    uint32_t hash = MurmurHash2(&key, sizeof(key), 0x37);
464
    double bucket =
465
      static_cast<double>(hash) * static_cast<double>(num_hashmaps_) /
466
27928844
      static_cast<double>((uint32_t)(-1));
467
27928844
    return (uint32_t)bucket % num_hashmaps_;
468
  }
469
470
27786460
  inline void Lock(const uint8_t target) {
471
27786460
    int retval = pthread_mutex_lock(&locks_[target]);
472
27978004
    assert(retval == 0);
473
27978004
  }
474
475
27789232
  inline void Unlock(const uint8_t target) {
476
27789232
    int retval = pthread_mutex_unlock(&locks_[target]);
477
27978808
    assert(retval == 0);
478
27978808
  }
479
480
  uint8_t num_hashmaps_;
481
  SmallHashDynamic<Key, Value> *hashmaps_;
482
  pthread_mutex_t *locks_;
483
};
484
485
486
// initialize the static fields
487
template<class Key, class Value>
488



165
Prng SmallHashDynamic<Key, Value>::g_prng;
489
490
template<class Key, class Value, class Derived>
491
const double SmallHashBase<Key, Value, Derived>::kLoadFactor = 0.75;
492
493
template<class Key, class Value>
494
const double SmallHashDynamic<Key, Value>::kThresholdGrow = 0.75;
495
496
template<class Key, class Value>
497
const double SmallHashDynamic<Key, Value>::kThresholdShrink = 0.25;
498
499
#endif  // CVMFS_SMALLHASH_H_