Directory: | cvmfs/ |
---|---|
File: | cvmfs/garbage_collection/hash_filter.h |
Date: | 2025-04-20 02:34:28 |
Exec | Total | Coverage | |
---|---|---|---|
Lines: | 24 | 25 | 96.0% |
Branches: | 8 | 16 | 50.0% |
Line | Branch | Exec | Source |
---|---|---|---|
1 | /** | ||
2 | * This file is part of the CernVM File System. | ||
3 | * | ||
4 | * HashFilters are a container classes that get initialized with a number of | ||
5 | * hashes. Later they can serve queries for other hashes and decide if they are | ||
6 | * contained in the filter or not. | ||
7 | */ | ||
8 | |||
9 | #ifndef CVMFS_GARBAGE_COLLECTION_HASH_FILTER_H_ | ||
10 | #define CVMFS_GARBAGE_COLLECTION_HASH_FILTER_H_ | ||
11 | |||
12 | #include <set> | ||
13 | |||
14 | #include "crypto/hash.h" | ||
15 | #include "smallhash.h" | ||
16 | |||
17 | /** | ||
18 | * Abstract base class of a HashFilter to define the common interface. | ||
19 | */ | ||
20 | class AbstractHashFilter { | ||
21 | public: | ||
22 | 212 | virtual ~AbstractHashFilter() {} | |
23 | |||
24 | /** | ||
25 | * Adds the given hash to the filter | ||
26 | * | ||
27 | * @param hash the hash to be added to the HashFilter | ||
28 | */ | ||
29 | virtual void Fill(const shash::Any &hash) = 0; | ||
30 | |||
31 | /** | ||
32 | * Decides if a presented hash is in the filter or not | ||
33 | * Depending on the concrete implementation of this method it could be a prob- | ||
34 | * abilistic answer. However, implementations should ensure a recall rate of | ||
35 | * 100%, say: never produce false negatives. | ||
36 | * | ||
37 | * @param hash the hash to be queried | ||
38 | * @return true if the hash is (probably) contained in the set | ||
39 | * false if it is definitely not in the set | ||
40 | */ | ||
41 | virtual bool Contains(const shash::Any &hash) const = 0; | ||
42 | |||
43 | /** | ||
44 | * Freezes the filter after filling it with all values. This is not necessary | ||
45 | * but could be used for certain optimizations depending on the implementation | ||
46 | * of the AbstractHashFilter. | ||
47 | * Note: After Freeze() has been called, Fill() should fail! | ||
48 | */ | ||
49 | ✗ | virtual void Freeze() {} | |
50 | |||
51 | /** | ||
52 | * Returns the number of objects already inserted into the filter. | ||
53 | * @return number of objects in the filter | ||
54 | */ | ||
55 | virtual size_t Count() const = 0; | ||
56 | }; | ||
57 | |||
58 | |||
59 | //------------------------------------------------------------------------------ | ||
60 | |||
61 | |||
62 | /** | ||
63 | * This is a simplistic implementation of AbstractHashFilter mainly used for | ||
64 | * testing purposes. It uses an std::set and thus is highly suboptimal. | ||
65 | */ | ||
66 | class SimpleHashFilter : public AbstractHashFilter { | ||
67 | public: | ||
68 | 97 | SimpleHashFilter() : frozen_(false) {} | |
69 | |||
70 | 1003276 | void Fill(const shash::Any &hash) { | |
71 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1003276 times.
|
1003276 | assert(!frozen_); |
72 | 1003276 | hashes_.insert(hash); | |
73 | 1003276 | } | |
74 | |||
75 | 1001659 | bool Contains(const shash::Any &hash) const { | |
76 |
1/2✓ Branch 2 taken 1001659 times.
✗ Branch 3 not taken.
|
1001659 | return hashes_.find(hash) != hashes_.end(); |
77 | } | ||
78 | |||
79 | 8 | void Freeze() { frozen_ = true; } | |
80 | 15 | size_t Count() const { return hashes_.size(); } | |
81 | |||
82 | private: | ||
83 | std::set<shash::Any> hashes_; | ||
84 | bool frozen_; | ||
85 | }; | ||
86 | |||
87 | |||
88 | //------------------------------------------------------------------------------ | ||
89 | |||
90 | |||
91 | /** | ||
92 | * This is an implementation of AbstractHashFilter using the SmallHash structure | ||
93 | * for internal storage. | ||
94 | */ | ||
95 | class SmallhashFilter : public AbstractHashFilter { | ||
96 | protected: | ||
97 | 2000179 | static uint32_t hasher(const shash::Any &key) { | |
98 | // Don't start with the first bytes, because == is using them as well | ||
99 | 2000179 | return (uint32_t) *(reinterpret_cast<const uint32_t *>(key.digest) + 1); | |
100 | } | ||
101 | |||
102 | public: | ||
103 |
1/2✓ Branch 2 taken 9 times.
✗ Branch 3 not taken.
|
9 | SmallhashFilter() : frozen_(false) { |
104 | // zero_element is MD5("unobtanium") | ||
105 | shash::Any zero_element(shash::kMd5, | ||
106 |
2/4✓ Branch 2 taken 9 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 9 times.
✗ Branch 7 not taken.
|
9 | shash::HexPtr("d61f853acc5a39e01f3906f73e31d256")); |
107 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | hashmap_.Init(1048576, zero_element, &SmallhashFilter::hasher); |
108 | 9 | } | |
109 | |||
110 | 1000062 | void Fill(const shash::Any &hash) { | |
111 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1000062 times.
|
1000062 | assert(!frozen_); |
112 |
1/2✓ Branch 1 taken 1000062 times.
✗ Branch 2 not taken.
|
1000062 | hashmap_.Insert(hash, true); |
113 | 1000062 | } | |
114 | |||
115 | 1000117 | bool Contains(const shash::Any &hash) const { | |
116 | 1000117 | return hashmap_.Contains(hash); | |
117 | } | ||
118 | |||
119 | 8 | void Freeze() { frozen_ = true; } | |
120 | 15 | size_t Count() const { return hashmap_.size(); } | |
121 | |||
122 | private: | ||
123 | SmallHashDynamic<shash::Any, bool> hashmap_; | ||
124 | bool frozen_; | ||
125 | }; | ||
126 | |||
127 | #endif // CVMFS_GARBAGE_COLLECTION_HASH_FILTER_H_ | ||
128 |