| Directory: | cvmfs/ |
|---|---|
| File: | cvmfs/smallhash.h |
| Date: | 2025-12-21 02:39:23 |
| Exec | Total | Coverage | |
|---|---|---|---|
| Lines: | 260 | 263 | 98.9% |
| Branches: | 89 | 110 | 80.9% |
| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /** | ||
| 2 | * This file is part of the CernVM File System. | ||
| 3 | */ | ||
| 4 | |||
| 5 | #ifndef CVMFS_SMALLHASH_H_ | ||
| 6 | #define CVMFS_SMALLHASH_H_ | ||
| 7 | |||
| 8 | #ifndef __STDC_FORMAT_MACROS | ||
| 9 | // NOLINTNEXTLINE | ||
| 10 | #define __STDC_FORMAT_MACROS | ||
| 11 | #endif | ||
| 12 | |||
| 13 | #include "duplex_testing.h" | ||
| 14 | #include <inttypes.h> | ||
| 15 | #include <pthread.h> | ||
| 16 | #include <stdint.h> | ||
| 17 | |||
| 18 | #include <algorithm> | ||
| 19 | #include <cassert> | ||
| 20 | #include <cstdlib> | ||
| 21 | #include <new> | ||
| 22 | |||
| 23 | #include "util/atomic.h" | ||
| 24 | #include "util/murmur.hxx" | ||
| 25 | #include "util/prng.h" | ||
| 26 | #include "util/smalloc.h" | ||
| 27 | |||
| 28 | /** | ||
| 29 | * Hash table with linear probing as collision resolution. Works only for | ||
| 30 | * a fixed (maximum) number of elements, i.e. no resizing. Load factor fixed | ||
| 31 | * to 0.7. | ||
| 32 | */ | ||
| 33 | template<class Key, class Value, class Derived> | ||
| 34 | class SmallHashBase { | ||
| 35 | FRIEND_TEST(T_Smallhash, InsertAndCopyMd5Slow); | ||
| 36 | |||
| 37 | public: | ||
| 38 | static const double kLoadFactor; // mainly useless for the dynamic version | ||
| 39 | static const double kThresholdGrow; // only used for resizable version | ||
| 40 | static const double kThresholdShrink; // only used for resizable version | ||
| 41 | |||
| 42 | 198625 | SmallHashBase() { | |
| 43 | 198625 | keys_ = NULL; | |
| 44 | 198625 | values_ = NULL; | |
| 45 | 198625 | hasher_ = NULL; | |
| 46 | 198625 | bytes_allocated_ = 0; | |
| 47 | 198625 | num_collisions_ = 0; | |
| 48 | 198625 | max_collisions_ = 0; | |
| 49 | |||
| 50 | // Properly initialized by Init() | ||
| 51 | 198625 | capacity_ = 0; | |
| 52 | 198625 | initial_capacity_ = 0; | |
| 53 | 198625 | size_ = 0; | |
| 54 | 198625 | } | |
| 55 | |||
| 56 | 198487 | ~SmallHashBase() { DeallocMemory(keys_, values_, capacity_); } | |
| 57 | |||
| 58 | 198541 | void Init(uint32_t expected_size, Key empty, | |
| 59 | uint32_t (*hasher)(const Key &key)) { | ||
| 60 | 198541 | hasher_ = hasher; | |
| 61 | 198541 | empty_key_ = empty; | |
| 62 | 198541 | capacity_ = static_cast<uint32_t>(static_cast<double>(expected_size) | |
| 63 | 198541 | / kLoadFactor); | |
| 64 | 198541 | initial_capacity_ = capacity_; | |
| 65 | 198541 | static_cast<Derived *>(this)->SetThresholds(); // No-op for fixed size | |
| 66 | 198541 | AllocMemory(); | |
| 67 | 198541 | this->DoClear(false); | |
| 68 | 198541 | } | |
| 69 | |||
| 70 | 130937575 | bool Lookup(const Key &key, Value *value) const { | |
| 71 | uint32_t bucket; | ||
| 72 | uint32_t collisions; | ||
| 73 |
1/2✓ Branch 1 taken 130962480 times.
✗ Branch 2 not taken.
|
130937575 | const bool found = DoLookup(key, &bucket, &collisions); |
| 74 |
2/2✓ Branch 0 taken 111438517 times.
✓ Branch 1 taken 19523963 times.
|
131036830 | if (found) |
| 75 |
1/2✓ Branch 1 taken 21631 times.
✗ Branch 2 not taken.
|
111463134 | *value = values_[bucket]; |
| 76 | 131036830 | return found; | |
| 77 | } | ||
| 78 | |||
| 79 | /** | ||
| 80 | * Returns both the key and the value. That is useful if Key's equality | ||
| 81 | * operator implements an equivalence relation on Key. In this case, LookupEx | ||
| 82 | * returns the key representing the equivalence class that has been used | ||
| 83 | * during Insert(). | ||
| 84 | * Used to return a glue::InodeEx element when looking for an inode. | ||
| 85 | */ | ||
| 86 | 20 | bool LookupEx(Key *key, Value *value) const { | |
| 87 | 20 | uint32_t bucket = ScaleHash(*key); | |
| 88 |
2/2✓ Branch 1 taken 6 times.
✓ Branch 2 taken 4 times.
|
20 | while (!(keys_[bucket] == empty_key_)) { |
| 89 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
12 | if (keys_[bucket] == *key) { |
| 90 | 12 | *key = keys_[bucket]; | |
| 91 | 12 | *value = values_[bucket]; | |
| 92 | 12 | return true; | |
| 93 | } | ||
| 94 | ✗ | bucket = (bucket + 1) % capacity_; | |
| 95 | } | ||
| 96 | 8 | return false; | |
| 97 | } | ||
| 98 | |||
| 99 | 22049996 | bool Contains(const Key &key) const { | |
| 100 | uint32_t bucket; | ||
| 101 | uint32_t collisions; | ||
| 102 |
1/2✓ Branch 1 taken 22049473 times.
✗ Branch 2 not taken.
|
22049996 | const bool found = DoLookup(key, &bucket, &collisions); |
| 103 | 22049996 | return found; | |
| 104 | } | ||
| 105 | |||
| 106 | 2454044583 | void Insert(const Key &key, const Value &value) { | |
| 107 | 2454044583 | static_cast<Derived *>(this)->Grow(); // No-op if fixed-size | |
| 108 | 2444680443 | const bool overwritten = DoInsert(key, value, true); | |
| 109 | 2453428266 | size_ += !overwritten; // size + 1 if the key was not yet in the map | |
| 110 | 2453428266 | } | |
| 111 | |||
| 112 | 141593215 | bool Erase(const Key &key) { | |
| 113 | uint32_t bucket; | ||
| 114 | uint32_t collisions; | ||
| 115 |
1/2✓ Branch 1 taken 136667402 times.
✗ Branch 2 not taken.
|
141593215 | const bool found = DoLookup(key, &bucket, &collisions); |
| 116 |
1/2✓ Branch 0 taken 136682975 times.
✗ Branch 1 not taken.
|
136669627 | if (found) { |
| 117 | 136685158 | keys_[bucket] = empty_key_; | |
| 118 | 136685158 | size_--; | |
| 119 | 136685158 | bucket = (bucket + 1) % capacity_; | |
| 120 |
3/3✓ Branch 0 taken 206212220 times.
✓ Branch 1 taken 130212926 times.
✓ Branch 2 taken 6276546 times.
|
342703882 | while (!(keys_[bucket] == empty_key_)) { |
| 121 | 207716952 | Key rehash = keys_[bucket]; | |
| 122 | 207716952 | keys_[bucket] = empty_key_; | |
| 123 |
1/2✓ Branch 1 taken 206018717 times.
✗ Branch 2 not taken.
|
207716952 | DoInsert(rehash, values_[bucket], false); |
| 124 | 206018724 | bucket = (bucket + 1) % capacity_; | |
| 125 | } | ||
| 126 |
1/2✓ Branch 1 taken 137239150 times.
✗ Branch 2 not taken.
|
134986930 | static_cast<Derived *>(this)->Shrink(); // No-op if fixed-size |
| 127 | } | ||
| 128 | 137241079 | return found; | |
| 129 | } | ||
| 130 | |||
| 131 | 2653 | void Clear() { DoClear(true); } | |
| 132 | |||
| 133 | 6502 | uint64_t bytes_allocated() const { return bytes_allocated_; } | |
| 134 | 1752 | static double GetEntrySize() { | |
| 135 | 3504 | const double unit = sizeof(Key) + sizeof(Value); | |
| 136 | 3504 | return unit / kLoadFactor; | |
| 137 | } | ||
| 138 | |||
| 139 | void GetCollisionStats(uint64_t *num_collisions, | ||
| 140 | uint32_t *max_collisions) const { | ||
| 141 | *num_collisions = num_collisions_; | ||
| 142 | *max_collisions = max_collisions_; | ||
| 143 | } | ||
| 144 | |||
| 145 | // Careful with the direct access TODO: iterator | ||
| 146 | uint32_t capacity() const { return capacity_; } | ||
| 147 | 135378 | Key empty_key() const { return empty_key_; } | |
| 148 | 330894 | Key *keys() const { return keys_; } | |
| 149 | 353 | Value *values() const { return values_; } | |
| 150 | |||
| 151 | // Only needed by compat | ||
| 152 | ✗ | void SetHasher(uint32_t (*hasher)(const Key &key)) { hasher_ = hasher; } | |
| 153 | |||
| 154 | protected: | ||
| 155 | 3330106399 | uint32_t ScaleHash(const Key &key) const { | |
| 156 | 3330106399 | const double bucket = (static_cast<double>(hasher_(key)) | |
| 157 | 3319472242 | * static_cast<double>(capacity_) | |
| 158 | / static_cast<double>(static_cast<uint32_t>(-1))); | ||
| 159 | 3319472242 | return static_cast<uint32_t>(bucket) % capacity_; | |
| 160 | } | ||
| 161 | |||
| 162 | 386258 | void AllocMemory() { | |
| 163 | 386258 | keys_ = static_cast<Key *>(smmap(capacity_ * sizeof(Key))); | |
| 164 | 386258 | values_ = static_cast<Value *>(smmap(capacity_ * sizeof(Value))); | |
| 165 |
2/2✓ Branch 0 taken 2083523035 times.
✓ Branch 1 taken 195325 times.
|
4156938466 | for (uint32_t i = 0; i < capacity_; ++i) { |
| 166 | 4156552292 | /*keys_[i] =*/new (keys_ + i) Key(); | |
| 167 | } | ||
| 168 |
2/2✓ Branch 0 taken 2089900903 times.
✓ Branch 1 taken 235939 times.
|
4169775430 | for (uint32_t i = 0; i < capacity_; ++i) { |
| 169 | 4169308028 | /*values_[i] =*/new (values_ + i) Value(); | |
| 170 | } | ||
| 171 | 467402 | bytes_allocated_ = (sizeof(Key) + sizeof(Value)) * capacity_; | |
| 172 | 467402 | } | |
| 173 | |||
| 174 | 386204 | void DeallocMemory(Key *k, Value *v, uint32_t c) { | |
| 175 |
2/2✓ Branch 0 taken 2101635157 times.
✓ Branch 1 taken 195339 times.
|
4193165470 | for (uint32_t i = 0; i < c; ++i) { |
| 176 | 4192779266 | k[i].~Key(); | |
| 177 | } | ||
| 178 |
2/2✓ Branch 0 taken 2105547793 times.
✓ Branch 1 taken 195339 times.
|
4200990742 | for (uint32_t i = 0; i < c; ++i) { |
| 179 | 4200604538 | v[i].~Value(); | |
| 180 | } | ||
| 181 |
2/2✓ Branch 0 taken 195297 times.
✓ Branch 1 taken 42 times.
|
386204 | if (k) |
| 182 | 386120 | smunmap(k); | |
| 183 |
2/2✓ Branch 0 taken 195297 times.
✓ Branch 1 taken 42 times.
|
386204 | if (v) |
| 184 | 386120 | smunmap(v); | |
| 185 | 386204 | k = NULL; | |
| 186 | 386204 | v = NULL; | |
| 187 | 386204 | } | |
| 188 | |||
| 189 | // Returns true iff the key is overwritten | ||
| 190 | 2845266119 | bool DoInsert(const Key &key, const Value &value, | |
| 191 | const bool count_collisions) { | ||
| 192 | uint32_t bucket; | ||
| 193 | uint32_t collisions; | ||
| 194 |
1/2✓ Branch 1 taken 1444811371 times.
✗ Branch 2 not taken.
|
2845266119 | const bool overwritten = DoLookup(key, &bucket, &collisions); |
| 195 |
2/2✓ Branch 0 taken 1239511358 times.
✓ Branch 1 taken 205300013 times.
|
2840855975 | if (count_collisions) { |
| 196 | 2440851153 | num_collisions_ += collisions; | |
| 197 | 2440851153 | max_collisions_ = std::max(collisions, max_collisions_); | |
| 198 | } | ||
| 199 | 2855086853 | keys_[bucket] = key; | |
| 200 |
1/2✓ Branch 1 taken 10411703 times.
✗ Branch 2 not taken.
|
2855086853 | values_[bucket] = value; |
| 201 | 2855086853 | return overwritten; | |
| 202 | } | ||
| 203 | |||
| 204 | 3332956334 | bool DoLookup(const Key &key, uint32_t *bucket, uint32_t *collisions) const { | |
| 205 | 3332956334 | *bucket = ScaleHash(key); | |
| 206 | 3318236804 | *collisions = 0; | |
| 207 |
3/3✓ Branch 0 taken 7017995354 times.
✓ Branch 1 taken 1345311904 times.
✓ Branch 2 taken 301283228 times.
|
11819718272 | while (!(keys_[*bucket] == empty_key_)) { |
| 208 |
3/3✓ Branch 0 taken 257292738 times.
✓ Branch 1 taken 6792320313 times.
✓ Branch 2 taken 187349480 times.
|
9013854156 | if (keys_[*bucket] == key) |
| 209 | 512372688 | return true; | |
| 210 | 8501481468 | *bucket = (*bucket + 1) % capacity_; | |
| 211 | 8501481468 | (*collisions)++; | |
| 212 | } | ||
| 213 | 2805864116 | return false; | |
| 214 | } | ||
| 215 | |||
| 216 | 386276 | void DoClear(const bool reset_capacity) { | |
| 217 |
2/2✓ Branch 0 taken 2653 times.
✓ Branch 1 taken 192732 times.
|
386276 | if (reset_capacity) |
| 218 | 4506 | static_cast<Derived *>(this)->ResetCapacity(); // No-op if fixed-size | |
| 219 |
2/2✓ Branch 0 taken 2099611303 times.
✓ Branch 1 taken 195385 times.
|
4189090534 | for (uint32_t i = 0; i < capacity_; ++i) |
| 220 | 4188704258 | keys_[i] = empty_key_; | |
| 221 | 386276 | size_ = 0; | |
| 222 | 386276 | } | |
| 223 | |||
| 224 | // Methods for resizable version | ||
| 225 | void SetThresholds() { } | ||
| 226 | void Grow() { } | ||
| 227 | void Shrink() { } | ||
| 228 | void ResetCapacity() { } | ||
| 229 | |||
| 230 | // Separate key and value arrays for better locality | ||
| 231 | Key *keys_; | ||
| 232 | Value *values_; | ||
| 233 | uint32_t capacity_; | ||
| 234 | uint32_t initial_capacity_; | ||
| 235 | uint32_t size_; | ||
| 236 | uint32_t (*hasher_)(const Key &key); | ||
| 237 | uint64_t bytes_allocated_; | ||
| 238 | uint64_t num_collisions_; | ||
| 239 | uint32_t max_collisions_; /**< maximum collisions for a single insert */ | ||
| 240 | Key empty_key_; | ||
| 241 | }; | ||
| 242 | |||
| 243 | |||
| 244 | template<class Key, class Value> | ||
| 245 | class SmallHashFixed | ||
| 246 | : public SmallHashBase<Key, Value, SmallHashFixed<Key, Value> > { | ||
| 247 | friend class SmallHashBase<Key, Value, SmallHashFixed<Key, Value> >; | ||
| 248 | |||
| 249 | protected: | ||
| 250 | // No-ops | ||
| 251 | 6484 | void SetThresholds() { } | |
| 252 | 1128151 | void Grow() { } | |
| 253 | 15277 | void Shrink() { } | |
| 254 | 18 | void ResetCapacity() { } | |
| 255 | }; | ||
| 256 | |||
| 257 | |||
| 258 | template<class Key, class Value> | ||
| 259 | class SmallHashDynamic | ||
| 260 | : public SmallHashBase<Key, Value, SmallHashDynamic<Key, Value> > { | ||
| 261 | friend class SmallHashBase<Key, Value, SmallHashDynamic<Key, Value> >; | ||
| 262 | |||
| 263 | public: | ||
| 264 | typedef SmallHashBase<Key, Value, SmallHashDynamic<Key, Value> > Base; | ||
| 265 | static const double kThresholdGrow; | ||
| 266 | static const double kThresholdShrink; | ||
| 267 | |||
| 268 | 192141 | SmallHashDynamic() : Base() { | |
| 269 | 192141 | num_migrates_ = 0; | |
| 270 | |||
| 271 | // Properly set by Init | ||
| 272 | 192141 | threshold_grow_ = 0; | |
| 273 | 192141 | threshold_shrink_ = 0; | |
| 274 | 192141 | } | |
| 275 | |||
| 276 | SmallHashDynamic(const SmallHashDynamic<Key, Value> &other) : Base() { | ||
| 277 | num_migrates_ = 0; | ||
| 278 | CopyFrom(other); | ||
| 279 | } | ||
| 280 | |||
| 281 | 66 | SmallHashDynamic<Key, Value> &operator=( | |
| 282 | const SmallHashDynamic<Key, Value> &other) { | ||
| 283 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 54 times.
|
66 | if (&other == this) |
| 284 | ✗ | return *this; | |
| 285 | |||
| 286 | 66 | CopyFrom(other); | |
| 287 | 66 | return *this; | |
| 288 | } | ||
| 289 | |||
| 290 | 51823682 | uint32_t capacity() const { return Base::capacity_; } | |
| 291 | 1385243989 | uint32_t size() const { return Base::size_; } | |
| 292 | uint32_t num_migrates() const { return num_migrates_; } | ||
| 293 | |||
| 294 | protected: | ||
| 295 | 379774 | void SetThresholds() { | |
| 296 | 379774 | threshold_grow_ = static_cast<uint32_t>(static_cast<double>(capacity()) | |
| 297 | 379774 | * kThresholdGrow); | |
| 298 | 379774 | threshold_shrink_ = static_cast<uint32_t>(static_cast<double>(capacity()) | |
| 299 | 379774 | * kThresholdShrink); | |
| 300 | 379774 | } | |
| 301 | |||
| 302 | 2448336208 | void Grow() { | |
| 303 |
2/2✓ Branch 1 taken 62718 times.
✓ Branch 2 taken 1240591705 times.
|
2448336208 | if (size() > threshold_grow_) |
| 304 | 125436 | Migrate(capacity() * 2); | |
| 305 | 2444295118 | } | |
| 306 | |||
| 307 | 136859280 | void Shrink() { | |
| 308 |
2/2✓ Branch 1 taken 25158102 times.
✓ Branch 2 taken 112099951 times.
|
136859280 | if (size() < threshold_shrink_) { |
| 309 | 25160285 | const uint32_t target_capacity = capacity() / 2; | |
| 310 |
2/2✓ Branch 0 taken 28896 times.
✓ Branch 1 taken 25125735 times.
|
25156814 | if (target_capacity >= Base::initial_capacity_) |
| 311 | 28896 | Migrate(target_capacity); | |
| 312 | } | ||
| 313 | 137256765 | } | |
| 314 | |||
| 315 | 4488 | void ResetCapacity() { | |
| 316 | 4488 | Base::DeallocMemory(Base::keys_, Base::values_, Base::capacity_); | |
| 317 | 4488 | Base::capacity_ = Base::initial_capacity_; | |
| 318 | 4488 | Base::AllocMemory(); | |
| 319 | 4488 | SetThresholds(); | |
| 320 | 4488 | } | |
| 321 | |||
| 322 | private: | ||
| 323 | // Returns a random permutation of indices [0..N-1] that is allocated | ||
| 324 | // by smmap (Knuth's shuffle algorithm) | ||
| 325 | 57816 | uint32_t *ShuffleIndices(const uint32_t N) { | |
| 326 | 57816 | uint32_t *shuffled = static_cast<uint32_t *>(smmap(N * sizeof(uint32_t))); | |
| 327 | // Init with identity | ||
| 328 |
2/2✓ Branch 0 taken 472211670 times.
✓ Branch 1 taken 28950 times.
|
944481240 | for (unsigned i = 0; i < N; ++i) |
| 329 | 944423340 | shuffled[i] = i; | |
| 330 | // Shuffle (no shuffling for the last element) | ||
| 331 |
1/2✓ Branch 0 taken 449889498 times.
✗ Branch 1 not taken.
|
898651152 | for (unsigned i = 0; i < N - 1; ++i) { |
| 332 | 899778996 | const uint32_t swap_idx = i + g_prng.Next(N - i); | |
| 333 | 898593252 | const uint32_t tmp = shuffled[i]; | |
| 334 | 898593252 | shuffled[i] = shuffled[swap_idx]; | |
| 335 | 898593252 | shuffled[swap_idx] = tmp; | |
| 336 | } | ||
| 337 | 66 | return shuffled; | |
| 338 | } | ||
| 339 | |||
| 340 | 183228 | void Migrate(const uint32_t new_capacity) { | |
| 341 | 183228 | Key *old_keys = Base::keys_; | |
| 342 | 183228 | Value *old_values = Base::values_; | |
| 343 | 183228 | const uint32_t old_capacity = capacity(); | |
| 344 | 183228 | const uint32_t old_size = size(); | |
| 345 | |||
| 346 | 183228 | Base::capacity_ = new_capacity; | |
| 347 | 183228 | SetThresholds(); | |
| 348 | 183228 | Base::AllocMemory(); | |
| 349 | 183228 | Base::DoClear(false); | |
| 350 |
2/2✓ Branch 0 taken 28854 times.
✓ Branch 1 taken 62718 times.
|
183144 | if (new_capacity < old_capacity) { |
| 351 | 57708 | uint32_t *shuffled_indices = ShuffleIndices(old_capacity); | |
| 352 |
2/2✓ Branch 0 taken 411404952 times.
✓ Branch 1 taken 4536 times.
|
822818976 | for (uint32_t i = 0; i < old_capacity; ++i) { |
| 353 |
2/3✓ Branch 0 taken 102927090 times.
✓ Branch 1 taken 308477862 times.
✗ Branch 2 not taken.
|
822809904 | if (old_keys[shuffled_indices[i]] != Base::empty_key_) { |
| 354 | 205854180 | Base::Insert(old_keys[shuffled_indices[i]], | |
| 355 | 205854180 | old_values[shuffled_indices[i]]); | |
| 356 | } | ||
| 357 | } | ||
| 358 | 9072 | smunmap(shuffled_indices); | |
| 359 | } else { | ||
| 360 |
2/2✓ Branch 0 taken 773481261 times.
✓ Branch 1 taken 45036 times.
|
1547052594 | for (uint32_t i = 0; i < old_capacity; ++i) { |
| 361 |
3/3✓ Branch 0 taken 441252872 times.
✓ Branch 1 taken 285876485 times.
✓ Branch 2 taken 46351904 times.
|
1546962522 | if (old_keys[i] != Base::empty_key_) |
| 362 | 1160636946 | Base::Insert(old_keys[i], old_values[i]); | |
| 363 | } | ||
| 364 | } | ||
| 365 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 91614 times.
|
147864 | assert(size() == old_size); |
| 366 | |||
| 367 | 183228 | Base::DeallocMemory(old_keys, old_values, old_capacity); | |
| 368 | 183228 | num_migrates_++; | |
| 369 | 183228 | } | |
| 370 | |||
| 371 | 66 | void CopyFrom(const SmallHashDynamic<Key, Value> &other) { | |
| 372 | 66 | uint32_t *shuffled_indices = ShuffleIndices(other.capacity_); | |
| 373 |
2/2✓ Branch 0 taken 57803004 times.
✓ Branch 1 taken 54 times.
|
57803322 | for (uint32_t i = 0; i < other.capacity_; ++i) { |
| 374 |
2/3✗ Branch 0 not taken.
✓ Branch 1 taken 42000126 times.
✓ Branch 2 taken 15802878 times.
|
57803256 | if (other.keys_[shuffled_indices[i]] != other.empty_key_) { |
| 375 | 42000000 | this->Insert(other.keys_[shuffled_indices[i]], | |
| 376 | 42000000 | other.values_[shuffled_indices[i]]); | |
| 377 | } | ||
| 378 | } | ||
| 379 | 66 | smunmap(shuffled_indices); | |
| 380 | 66 | } | |
| 381 | |||
| 382 | uint32_t num_migrates_; | ||
| 383 | uint32_t threshold_grow_; | ||
| 384 | uint32_t threshold_shrink_; | ||
| 385 | static Prng g_prng; | ||
| 386 | }; | ||
| 387 | |||
| 388 | |||
| 389 | /** | ||
| 390 | * Distributes the key-value pairs over $n$ dynamic hash maps with individual | ||
| 391 | * mutexes. Hence low mutex contention, and benefits from multiple processors. | ||
| 392 | */ | ||
| 393 | template<class Key, class Value> | ||
| 394 | class MultiHash { | ||
| 395 | public: | ||
| 396 | 420 | MultiHash() { | |
| 397 | 420 | num_hashmaps_ = 0; | |
| 398 | 420 | hashmaps_ = NULL; | |
| 399 | 420 | locks_ = NULL; | |
| 400 | 420 | } | |
| 401 | |||
| 402 | 420 | void Init(const uint8_t num_hashmaps, const Key &empty_key, | |
| 403 | uint32_t (*hasher)(const Key &key)) { | ||
| 404 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 420 times.
|
420 | assert(num_hashmaps > 0); |
| 405 | 420 | const uint8_t N = num_hashmaps; | |
| 406 | 420 | num_hashmaps_ = N; | |
| 407 |
3/4✓ Branch 2 taken 17640 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 17640 times.
✓ Branch 5 taken 420 times.
|
18060 | hashmaps_ = new SmallHashDynamic<Key, Value>[N](); |
| 408 | 420 | locks_ = static_cast<pthread_mutex_t *>( | |
| 409 | 420 | smalloc(N * sizeof(pthread_mutex_t))); | |
| 410 |
2/2✓ Branch 0 taken 17640 times.
✓ Branch 1 taken 420 times.
|
18060 | for (uint8_t i = 0; i < N; ++i) { |
| 411 | 17640 | int retval = pthread_mutex_init(&locks_[i], NULL); | |
| 412 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 17640 times.
|
17640 | assert(retval == 0); |
| 413 | 17640 | hashmaps_[i].Init(128, empty_key, hasher); | |
| 414 | } | ||
| 415 | 420 | } | |
| 416 | |||
| 417 | 420 | ~MultiHash() { | |
| 418 |
2/2✓ Branch 0 taken 17640 times.
✓ Branch 1 taken 420 times.
|
18060 | for (uint8_t i = 0; i < num_hashmaps_; ++i) { |
| 419 | 17640 | pthread_mutex_destroy(&locks_[i]); | |
| 420 | } | ||
| 421 | 420 | free(locks_); | |
| 422 |
3/4✓ Branch 0 taken 420 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 17640 times.
✓ Branch 3 taken 420 times.
|
18060 | delete[] hashmaps_; |
| 423 | 420 | } | |
| 424 | |||
| 425 | 42000000 | bool Lookup(const Key &key, Value *value) { | |
| 426 | 42000000 | uint8_t target = SelectHashmap(key); | |
| 427 | 42000000 | Lock(target); | |
| 428 | 42000000 | const bool result = hashmaps_[target].Lookup(key, value); | |
| 429 | 42000000 | Unlock(target); | |
| 430 | 42000000 | return result; | |
| 431 | } | ||
| 432 | |||
| 433 | 157655442 | void Insert(const Key &key, const Value &value) { | |
| 434 | 157655442 | uint8_t target = SelectHashmap(key); | |
| 435 | 152775000 | Lock(target); | |
| 436 | 157849230 | hashmaps_[target].Insert(key, value); | |
| 437 | 153000246 | Unlock(target); | |
| 438 | 160362804 | } | |
| 439 | |||
| 440 | 75905466 | void Erase(const Key &key) { | |
| 441 | 75905466 | uint8_t target = SelectHashmap(key); | |
| 442 | 72156714 | Lock(target); | |
| 443 | 76943286 | hashmaps_[target].Erase(key); | |
| 444 | 69621510 | Unlock(target); | |
| 445 | 78305472 | } | |
| 446 | |||
| 447 | 42 | void Clear() { | |
| 448 |
2/2✓ Branch 0 taken 1764 times.
✓ Branch 1 taken 42 times.
|
1806 | for (uint8_t i = 0; i < num_hashmaps_; ++i) |
| 449 | 1764 | Lock(i); | |
| 450 |
2/2✓ Branch 0 taken 1764 times.
✓ Branch 1 taken 42 times.
|
1806 | for (uint8_t i = 0; i < num_hashmaps_; ++i) |
| 451 | 1764 | hashmaps_[i].Clear(); | |
| 452 |
2/2✓ Branch 0 taken 1764 times.
✓ Branch 1 taken 42 times.
|
1806 | for (uint8_t i = 0; i < num_hashmaps_; ++i) |
| 453 | 1764 | Unlock(i); | |
| 454 | 42 | } | |
| 455 | |||
| 456 | 420 | uint8_t num_hashmaps() const { return num_hashmaps_; } | |
| 457 | |||
| 458 | 336 | void GetSizes(uint32_t *sizes) { | |
| 459 |
2/2✓ Branch 0 taken 14112 times.
✓ Branch 1 taken 336 times.
|
14448 | for (uint8_t i = 0; i < num_hashmaps_; ++i) { |
| 460 | 14112 | Lock(i); | |
| 461 | 14112 | sizes[i] = hashmaps_[i].size(); | |
| 462 | 14112 | Unlock(i); | |
| 463 | } | ||
| 464 | 336 | } | |
| 465 | |||
| 466 | void GetCollisionStats(uint64_t *num_collisions, uint32_t *max_collisions) { | ||
| 467 | for (uint8_t i = 0; i < num_hashmaps_; ++i) { | ||
| 468 | Lock(i); | ||
| 469 | hashmaps_[i].GetCollisionStats(&num_collisions[i], &max_collisions[i]); | ||
| 470 | Unlock(i); | ||
| 471 | } | ||
| 472 | } | ||
| 473 | |||
| 474 | private: | ||
| 475 | 273850080 | inline uint8_t SelectHashmap(const Key &key) { | |
| 476 | 273850080 | uint32_t hash = MurmurHash2(&key, sizeof(key), 0x37); | |
| 477 | 268509024 | double bucket = static_cast<double>(hash) | |
| 478 | 268509024 | * static_cast<double>(num_hashmaps_) | |
| 479 | / static_cast<double>(static_cast<uint32_t>(-1)); | ||
| 480 | 268509024 | return static_cast<uint32_t>(bucket) % num_hashmaps_; | |
| 481 | } | ||
| 482 | |||
| 483 | 265994316 | inline void Lock(const uint8_t target) { | |
| 484 | 265994316 | int retval = pthread_mutex_lock(&locks_[target]); | |
| 485 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 287487774 times.
|
287487774 | assert(retval == 0); |
| 486 | 287487774 | } | |
| 487 | |||
| 488 | 264258078 | inline void Unlock(const uint8_t target) { | |
| 489 | 264258078 | int retval = pthread_mutex_unlock(&locks_[target]); | |
| 490 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 289475970 times.
|
289475970 | assert(retval == 0); |
| 491 | 289475970 | } | |
| 492 | |||
| 493 | uint8_t num_hashmaps_; | ||
| 494 | SmallHashDynamic<Key, Value> *hashmaps_; | ||
| 495 | pthread_mutex_t *locks_; | ||
| 496 | }; | ||
| 497 | |||
| 498 | |||
| 499 | // initialize the static fields | ||
| 500 | template<class Key, class Value> | ||
| 501 | Prng SmallHashDynamic<Key, Value>::g_prng; | ||
| 502 | |||
| 503 | template<class Key, class Value, class Derived> | ||
| 504 | const double SmallHashBase<Key, Value, Derived>::kLoadFactor = 0.75; | ||
| 505 | |||
| 506 | template<class Key, class Value> | ||
| 507 | const double SmallHashDynamic<Key, Value>::kThresholdGrow = 0.75; | ||
| 508 | |||
| 509 | template<class Key, class Value> | ||
| 510 | const double SmallHashDynamic<Key, Value>::kThresholdShrink = 0.25; | ||
| 511 | |||
| 512 | #endif // CVMFS_SMALLHASH_H_ | ||
| 513 |