1 |
|
|
/** |
2 |
|
|
* This file is part of the CernVM File System. |
3 |
|
|
* |
4 |
|
|
* HashFilters are a container classes that get initialized with a number of |
5 |
|
|
* hashes. Later they can serve queries for other hashes and decide if they are |
6 |
|
|
* contained in the filter or not. |
7 |
|
|
*/ |
8 |
|
|
|
9 |
|
|
#ifndef CVMFS_GARBAGE_COLLECTION_HASH_FILTER_H_ |
10 |
|
|
#define CVMFS_GARBAGE_COLLECTION_HASH_FILTER_H_ |
11 |
|
|
|
12 |
|
|
#include <set> |
13 |
|
|
|
14 |
|
|
#include "hash.h" |
15 |
|
|
#include "smallhash.h" |
16 |
|
|
|
17 |
|
|
/** |
18 |
|
|
* Abstract base class of a HashFilter to define the common interface. |
19 |
|
|
*/ |
20 |
|
67 |
class AbstractHashFilter { |
21 |
|
|
public: |
22 |
✗✓ |
67 |
virtual ~AbstractHashFilter() {} |
23 |
|
|
|
24 |
|
|
/** |
25 |
|
|
* Adds the given hash to the filter |
26 |
|
|
* |
27 |
|
|
* @param hash the hash to be added to the HashFilter |
28 |
|
|
*/ |
29 |
|
|
virtual void Fill(const shash::Any &hash) = 0; |
30 |
|
|
|
31 |
|
|
/** |
32 |
|
|
* Decides if a presented hash is in the filter or not |
33 |
|
|
* Depending on the concrete implementation of this method it could be a prob- |
34 |
|
|
* abilistic answer. However, implementations should ensure a recall rate of |
35 |
|
|
* 100%, say: never produce false negatives. |
36 |
|
|
* |
37 |
|
|
* @param hash the hash to be queried |
38 |
|
|
* @return true if the hash is (probably) contained in the set |
39 |
|
|
* false if it is definitely not in the set |
40 |
|
|
*/ |
41 |
|
|
virtual bool Contains(const shash::Any &hash) const = 0; |
42 |
|
|
|
43 |
|
|
/** |
44 |
|
|
* Freezes the filter after filling it with all values. This is not necessary |
45 |
|
|
* but could be used for certain optimizations depending on the implementation |
46 |
|
|
* of the AbstractHashFilter. |
47 |
|
|
* Note: After Freeze() has been called, Fill() should fail! |
48 |
|
|
*/ |
49 |
|
|
virtual void Freeze() {} |
50 |
|
|
|
51 |
|
|
/** |
52 |
|
|
* Returns the number of objects already inserted into the filter. |
53 |
|
|
* @return number of objects in the filter |
54 |
|
|
*/ |
55 |
|
|
virtual size_t Count() const = 0; |
56 |
|
|
}; |
57 |
|
|
|
58 |
|
|
|
59 |
|
|
//------------------------------------------------------------------------------ |
60 |
|
|
|
61 |
|
|
|
62 |
|
|
/** |
63 |
|
|
* This is a simplistic implementation of AbstractHashFilter mainly used for |
64 |
|
|
* testing purposes. It uses an std::set and thus is highly suboptimal. |
65 |
|
|
*/ |
66 |
✗✓ |
31 |
class SimpleHashFilter : public AbstractHashFilter { |
67 |
|
|
public: |
68 |
|
31 |
SimpleHashFilter() : frozen_(false) {} |
69 |
|
|
|
70 |
|
1001488 |
void Fill(const shash::Any &hash) { |
71 |
✗✓ |
1001488 |
assert(!frozen_); |
72 |
|
1001488 |
hashes_.insert(hash); |
73 |
|
1001488 |
} |
74 |
|
|
|
75 |
|
1000674 |
bool Contains(const shash::Any &hash) const { |
76 |
|
1000674 |
return hashes_.find(hash) != hashes_.end(); |
77 |
|
|
} |
78 |
|
|
|
79 |
|
8 |
void Freeze() { frozen_ = true; } |
80 |
|
15 |
size_t Count() const { return hashes_.size(); } |
81 |
|
|
|
82 |
|
|
private: |
83 |
|
|
std::set<shash::Any> hashes_; |
84 |
|
|
bool frozen_; |
85 |
|
|
}; |
86 |
|
|
|
87 |
|
|
|
88 |
|
|
//------------------------------------------------------------------------------ |
89 |
|
|
|
90 |
|
|
|
91 |
|
|
/** |
92 |
|
|
* This is an implementation of AbstractHashFilter using the SmallHash structure |
93 |
|
|
* for internal storage. |
94 |
|
|
*/ |
95 |
✗✗✓ |
36 |
class SmallhashFilter : public AbstractHashFilter { |
96 |
|
|
protected: |
97 |
|
8000716 |
static uint32_t hasher(const shash::Any &key) { |
98 |
|
|
// Don't start with the first bytes, because == is using them as well |
99 |
|
8000716 |
return (uint32_t) *(reinterpret_cast<const uint32_t *>(key.digest) + 1); |
100 |
|
|
} |
101 |
|
|
|
102 |
|
|
public: |
103 |
|
36 |
SmallhashFilter() : frozen_(false) { |
104 |
|
|
// zero_element is MD5("unobtanium") |
105 |
|
|
shash::Any zero_element(shash::kMd5, |
106 |
|
36 |
shash::HexPtr("d61f853acc5a39e01f3906f73e31d256")); |
107 |
|
36 |
hashmap_.Init(1048576, zero_element, &SmallhashFilter::hasher); |
108 |
|
|
} |
109 |
|
|
|
110 |
|
4000248 |
void Fill(const shash::Any &hash) { |
111 |
✗✓ |
4000248 |
assert(!frozen_); |
112 |
|
4000248 |
hashmap_.Insert(hash, true); |
113 |
|
4000248 |
} |
114 |
|
|
|
115 |
|
4000468 |
bool Contains(const shash::Any &hash) const { |
116 |
|
4000468 |
return hashmap_.Contains(hash); |
117 |
|
|
} |
118 |
|
|
|
119 |
|
32 |
void Freeze() { frozen_ = true; } |
120 |
|
60 |
size_t Count() const { return hashmap_.size(); } |
121 |
|
|
|
122 |
|
|
private: |
123 |
|
|
SmallHashDynamic<shash::Any, bool> hashmap_; |
124 |
|
|
bool frozen_; |
125 |
|
|
}; |
126 |
|
|
|
127 |
|
|
#endif // CVMFS_GARBAGE_COLLECTION_HASH_FILTER_H_ |