GCC Code Coverage Report
Directory: cvmfs/ Exec Total Coverage
File: cvmfs/garbage_collection/hash_filter.h Lines: 26 27 96.3 %
Date: 2019-02-03 02:48:13 Branches: 5 11 45.5 %

Line Branch Exec Source
1
/**
2
 * This file is part of the CernVM File System.
3
 *
4
 * HashFilters are a container classes that get initialized with a number of
5
 * hashes. Later they can serve queries for other hashes and decide if they are
6
 * contained in the filter or not.
7
 */
8
9
#ifndef CVMFS_GARBAGE_COLLECTION_HASH_FILTER_H_
10
#define CVMFS_GARBAGE_COLLECTION_HASH_FILTER_H_
11
12
#include <set>
13
14
#include "hash.h"
15
#include "smallhash.h"
16
17
/**
18
 * Abstract base class of a HashFilter to define the common interface.
19
 */
20
67
class AbstractHashFilter {
21
 public:
22
67
  virtual ~AbstractHashFilter() {}
23
24
  /**
25
   * Adds the given hash to the filter
26
   *
27
   * @param hash  the hash to be added to the HashFilter
28
   */
29
  virtual void Fill(const shash::Any &hash) = 0;
30
31
  /**
32
   * Decides if a presented hash is in the filter or not
33
   * Depending on the concrete implementation of this method it could be a prob-
34
   * abilistic answer. However, implementations should ensure a recall rate of
35
   * 100%, say: never produce false negatives.
36
   *
37
   * @param hash  the hash to be queried
38
   * @return  true   if the hash is (probably) contained in the set
39
   *          false  if it is definitely not in the set
40
   */
41
  virtual bool Contains(const shash::Any &hash) const = 0;
42
43
  /**
44
   * Freezes the filter after filling it with all values. This is not necessary
45
   * but could be used for certain optimizations depending on the implementation
46
   * of the AbstractHashFilter.
47
   * Note: After Freeze() has been called, Fill() should fail!
48
   */
49
  virtual void Freeze() {}
50
51
  /**
52
   * Returns the number of objects already inserted into the filter.
53
   * @return number of objects in the filter
54
   */
55
  virtual size_t Count() const = 0;
56
};
57
58
59
//------------------------------------------------------------------------------
60
61
62
/**
63
 * This is a simplistic implementation of AbstractHashFilter mainly used for
64
 * testing purposes. It uses an std::set and thus is highly suboptimal.
65
 */
66
31
class SimpleHashFilter : public AbstractHashFilter {
67
 public:
68
31
  SimpleHashFilter() : frozen_(false) {}
69
70
1001488
  void Fill(const shash::Any &hash) {
71
1001488
    assert(!frozen_);
72
1001488
    hashes_.insert(hash);
73
1001488
  }
74
75
1000674
  bool Contains(const shash::Any &hash) const {
76
1000674
    return hashes_.find(hash) != hashes_.end();
77
  }
78
79
8
  void Freeze() { frozen_ = true; }
80
15
  size_t Count() const { return hashes_.size(); }
81
82
 private:
83
  std::set<shash::Any>  hashes_;
84
  bool                  frozen_;
85
};
86
87
88
//------------------------------------------------------------------------------
89
90
91
/**
92
 * This is an implementation of AbstractHashFilter using the SmallHash structure
93
 * for internal storage.
94
 */
95
36
class SmallhashFilter : public AbstractHashFilter {
96
 protected:
97
8000716
  static uint32_t hasher(const shash::Any &key) {
98
    // Don't start with the first bytes, because == is using them as well
99
8000716
    return (uint32_t) *(reinterpret_cast<const uint32_t *>(key.digest) + 1);
100
  }
101
102
 public:
103
36
  SmallhashFilter() : frozen_(false) {
104
    // zero_element is MD5("unobtanium")
105
    shash::Any zero_element(shash::kMd5,
106
36
                            shash::HexPtr("d61f853acc5a39e01f3906f73e31d256"));
107
36
    hashmap_.Init(1048576, zero_element, &SmallhashFilter::hasher);
108
  }
109
110
4000248
  void Fill(const shash::Any &hash) {
111
4000248
    assert(!frozen_);
112
4000248
    hashmap_.Insert(hash, true);
113
4000248
  }
114
115
4000468
  bool Contains(const shash::Any &hash) const {
116
4000468
    return hashmap_.Contains(hash);
117
  }
118
119
32
  void   Freeze()      { frozen_ = true;         }
120
60
  size_t Count() const { return hashmap_.size(); }
121
122
 private:
123
  SmallHashDynamic<shash::Any, bool>  hashmap_;
124
  bool                                frozen_;
125
};
126
127
#endif  // CVMFS_GARBAGE_COLLECTION_HASH_FILTER_H_