GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/garbage_collection/hash_filter.h
Date: 2025-06-22 02:36:02
Exec Total Coverage
Lines: 24 25 96.0%
Branches: 8 16 50.0%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 *
4 * HashFilters are a container classes that get initialized with a number of
5 * hashes. Later they can serve queries for other hashes and decide if they are
6 * contained in the filter or not.
7 */
8
9 #ifndef CVMFS_GARBAGE_COLLECTION_HASH_FILTER_H_
10 #define CVMFS_GARBAGE_COLLECTION_HASH_FILTER_H_
11
12 #include <set>
13
14 #include "crypto/hash.h"
15 #include "smallhash.h"
16
17 /**
18 * Abstract base class of a HashFilter to define the common interface.
19 */
20 class AbstractHashFilter {
21 public:
22 4540 virtual ~AbstractHashFilter() { }
23
24 /**
25 * Adds the given hash to the filter
26 *
27 * @param hash the hash to be added to the HashFilter
28 */
29 virtual void Fill(const shash::Any &hash) = 0;
30
31 /**
32 * Decides if a presented hash is in the filter or not
33 * Depending on the concrete implementation of this method it could be a prob-
34 * abilistic answer. However, implementations should ensure a recall rate of
35 * 100%, say: never produce false negatives.
36 *
37 * @param hash the hash to be queried
38 * @return true if the hash is (probably) contained in the set
39 * false if it is definitely not in the set
40 */
41 virtual bool Contains(const shash::Any &hash) const = 0;
42
43 /**
44 * Freezes the filter after filling it with all values. This is not necessary
45 * but could be used for certain optimizations depending on the implementation
46 * of the AbstractHashFilter.
47 * Note: After Freeze() has been called, Fill() should fail!
48 */
49 virtual void Freeze() { }
50
51 /**
52 * Returns the number of objects already inserted into the filter.
53 * @return number of objects in the filter
54 */
55 virtual size_t Count() const = 0;
56 };
57
58
59 //------------------------------------------------------------------------------
60
61
62 /**
63 * This is a simplistic implementation of AbstractHashFilter mainly used for
64 * testing purposes. It uses an std::set and thus is highly suboptimal.
65 */
66 class SimpleHashFilter : public AbstractHashFilter {
67 public:
68 2072 SimpleHashFilter() : frozen_(false) { }
69
70 20070341 void Fill(const shash::Any &hash) {
71
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 20070341 times.
20070341 assert(!frozen_);
72 20070341 hashes_.insert(hash);
73 20070341 }
74
75 20035493 bool Contains(const shash::Any &hash) const {
76
1/2
✓ Branch 2 taken 20035493 times.
✗ Branch 3 not taken.
20035493 return hashes_.find(hash) != hashes_.end();
77 }
78
79 160 void Freeze() { frozen_ = true; }
80 300 size_t Count() const { return hashes_.size(); }
81
82 private:
83 std::set<shash::Any> hashes_;
84 bool frozen_;
85 };
86
87
88 //------------------------------------------------------------------------------
89
90
91 /**
92 * This is an implementation of AbstractHashFilter using the SmallHash structure
93 * for internal storage.
94 */
95 class SmallhashFilter : public AbstractHashFilter {
96 protected:
97 44003938 static uint32_t hasher(const shash::Any &key) {
98 // Don't start with the first bytes, because == is using them as well
99 44003938 return (uint32_t) * (reinterpret_cast<const uint32_t *>(key.digest) + 1);
100 }
101
102 public:
103
1/2
✓ Branch 2 taken 198 times.
✗ Branch 3 not taken.
198 SmallhashFilter() : frozen_(false) {
104 // zero_element is MD5("unobtanium")
105 const shash::Any zero_element(
106
2/4
✓ Branch 2 taken 198 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 198 times.
✗ Branch 7 not taken.
198 shash::kMd5, shash::HexPtr("d61f853acc5a39e01f3906f73e31d256"));
107
1/2
✓ Branch 1 taken 198 times.
✗ Branch 2 not taken.
198 hashmap_.Init(1048576, zero_element, &SmallhashFilter::hasher);
108 198 }
109
110 22001364 void Fill(const shash::Any &hash) {
111
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 22001364 times.
22001364 assert(!frozen_);
112
1/2
✓ Branch 1 taken 22001364 times.
✗ Branch 2 not taken.
22001364 hashmap_.Insert(hash, true);
113 22001364 }
114
115 22002574 bool Contains(const shash::Any &hash) const {
116 22002574 return hashmap_.Contains(hash);
117 }
118
119 176 void Freeze() { frozen_ = true; }
120 330 size_t Count() const { return hashmap_.size(); }
121
122 private:
123 SmallHashDynamic<shash::Any, bool> hashmap_;
124 bool frozen_;
125 };
126
127 #endif // CVMFS_GARBAGE_COLLECTION_HASH_FILTER_H_
128