GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/garbage_collection/hash_filter.h
Date: 2025-07-13 02:35:07
Exec Total Coverage
Lines: 24 25 96.0%
Branches: 8 16 50.0%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 *
4 * HashFilters are a container classes that get initialized with a number of
5 * hashes. Later they can serve queries for other hashes and decide if they are
6 * contained in the filter or not.
7 */
8
9 #ifndef CVMFS_GARBAGE_COLLECTION_HASH_FILTER_H_
10 #define CVMFS_GARBAGE_COLLECTION_HASH_FILTER_H_
11
12 #include <set>
13
14 #include "crypto/hash.h"
15 #include "smallhash.h"
16
17 /**
18 * Abstract base class of a HashFilter to define the common interface.
19 */
20 class AbstractHashFilter {
21 public:
22 6056 virtual ~AbstractHashFilter() { }
23
24 /**
25 * Adds the given hash to the filter
26 *
27 * @param hash the hash to be added to the HashFilter
28 */
29 virtual void Fill(const shash::Any &hash) = 0;
30
31 /**
32 * Decides if a presented hash is in the filter or not
33 * Depending on the concrete implementation of this method it could be a prob-
34 * abilistic answer. However, implementations should ensure a recall rate of
35 * 100%, say: never produce false negatives.
36 *
37 * @param hash the hash to be queried
38 * @return true if the hash is (probably) contained in the set
39 * false if it is definitely not in the set
40 */
41 virtual bool Contains(const shash::Any &hash) const = 0;
42
43 /**
44 * Freezes the filter after filling it with all values. This is not necessary
45 * but could be used for certain optimizations depending on the implementation
46 * of the AbstractHashFilter.
47 * Note: After Freeze() has been called, Fill() should fail!
48 */
49 virtual void Freeze() { }
50
51 /**
52 * Returns the number of objects already inserted into the filter.
53 * @return number of objects in the filter
54 */
55 virtual size_t Count() const = 0;
56 };
57
58
59 //------------------------------------------------------------------------------
60
61
62 /**
63 * This is a simplistic implementation of AbstractHashFilter mainly used for
64 * testing purposes. It uses an std::set and thus is highly suboptimal.
65 */
66 class SimpleHashFilter : public AbstractHashFilter {
67 public:
68 2938 SimpleHashFilter() : frozen_(false) { }
69
70 38097169 void Fill(const shash::Any &hash) {
71
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 38097169 times.
38097169 assert(!frozen_);
72 38097169 hashes_.insert(hash);
73 38097169 }
74
75 38049935 bool Contains(const shash::Any &hash) const {
76
1/2
✓ Branch 2 taken 38049935 times.
✗ Branch 3 not taken.
38049935 return hashes_.find(hash) != hashes_.end();
77 }
78
79 304 void Freeze() { frozen_ = true; }
80 570 size_t Count() const { return hashes_.size(); }
81
82 private:
83 std::set<shash::Any> hashes_;
84 bool frozen_;
85 };
86
87
88 //------------------------------------------------------------------------------
89
90
91 /**
92 * This is an implementation of AbstractHashFilter using the SmallHash structure
93 * for internal storage.
94 */
95 class SmallhashFilter : public AbstractHashFilter {
96 protected:
97 20001790 static uint32_t hasher(const shash::Any &key) {
98 // Don't start with the first bytes, because == is using them as well
99 return static_cast<uint32_t>(
100 20001790 *(reinterpret_cast<const uint32_t *>(key.digest) + 1));
101 }
102
103 public:
104
1/2
✓ Branch 2 taken 90 times.
✗ Branch 3 not taken.
90 SmallhashFilter() : frozen_(false) {
105 // zero_element is MD5("unobtanium")
106 const shash::Any zero_element(
107
2/4
✓ Branch 2 taken 90 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 90 times.
✗ Branch 7 not taken.
90 shash::kMd5, shash::HexPtr("d61f853acc5a39e01f3906f73e31d256"));
108
1/2
✓ Branch 1 taken 90 times.
✗ Branch 2 not taken.
90 hashmap_.Init(1048576, zero_element, &SmallhashFilter::hasher);
109 90 }
110
111 10000620 void Fill(const shash::Any &hash) {
112
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10000620 times.
10000620 assert(!frozen_);
113
1/2
✓ Branch 1 taken 10000620 times.
✗ Branch 2 not taken.
10000620 hashmap_.Insert(hash, true);
114 10000620 }
115
116 10001170 bool Contains(const shash::Any &hash) const {
117 10001170 return hashmap_.Contains(hash);
118 }
119
120 80 void Freeze() { frozen_ = true; }
121 150 size_t Count() const { return hashmap_.size(); }
122
123 private:
124 SmallHashDynamic<shash::Any, bool> hashmap_;
125 bool frozen_;
126 };
127
128 #endif // CVMFS_GARBAGE_COLLECTION_HASH_FILTER_H_
129