GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/garbage_collection/garbage_collector.h
Date: 2024-04-21 02:33:16
Exec Total Coverage
Lines: 27 30 90.0%
Branches: 2 4 50.0%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 *
4 * The GarbageCollector class is figuring out which data objects (represented by
5 * their content hashes) can be deleted as outdated garbage.
6 * Garbage collection is performed on the granularity of catalog revisions, thus
7 * a complete repository revision is either considered to be outdated or active.
8 * This way, a mountable repository revision stays completely usable (no nested
9 * catalogs or data objects become unavailable). A revision is defined by it's
10 * root catalog; the revision numbers of nested catalogs are irrelevant, since
11 * they might be referenced by newer (preserved) repository revisions.
12 * Thus, garbage objects are those that are _not_ referenced by any of the pre-
13 * served root catalogs or their direct subordinate nested catalog try.
14 *
15 * We use a two-stage approach:
16 *
17 * 1st Stage - Initialization
18 * The GarbageCollector is reading all the catalogs that are meant
19 * to be preserved. It builds up a filter (HashFilterT) containing
20 * all content hashes that are _not_ to be deleted
21 *
22 * 2nd Stage - Sweeping
23 * The initialized HashFilterT is presented with all content
24 * hashes found in condemned catalogs and decides if they are
25 * referenced by the preserved catalog revisions or not.
26 *
27 * The GarbageCollector is templated with CatalogTraversalT mainly for
28 * testability and with HashFilterT as an instance of the Strategy Pattern to
29 * abstract from the actual hash filtering method to be used.
30 */
31
32 #ifndef CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_H_
33 #define CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_H_
34
35 #include <inttypes.h>
36
37 #include <vector>
38
39 #include "catalog_traversal_parallel.h"
40 #include "garbage_collection/hash_filter.h"
41 #include "statistics.h"
42 #include "upload_facility.h"
43
44 template<class CatalogTraversalT, class HashFilterT>
45 class GarbageCollector {
46 protected:
47 typedef typename CatalogTraversalT::ObjectFetcherTN ObjectFetcherTN;
48 typedef typename ObjectFetcherTN::HistoryTN HistoryTN;
49 typedef typename ObjectFetcherTN::ReflogTN ReflogTN;
50 typedef typename CatalogTraversalT::CatalogTN CatalogTN;
51 typedef typename CatalogTraversalT::CallbackDataTN TraversalCallbackDataTN;
52 typedef typename CatalogTraversalT::Parameters TraversalParameters;
53 typedef std::vector<shash::Any> HashVector;
54
55 public:
56 struct Configuration {
57 static const uint64_t kFullHistory;
58 static const uint64_t kNoHistory;
59 static const time_t kNoTimestamp;
60 static const shash::Any kLatestHistoryDatabase;
61
62 56 Configuration()
63 56 : uploader(NULL)
64 56 , object_fetcher(NULL)
65 56 , reflog(NULL)
66 56 , keep_history_depth(kFullHistory)
67 56 , keep_history_timestamp(kNoTimestamp)
68 56 , dry_run(false)
69 56 , verbose(false)
70 56 , deleted_objects_logfile(NULL)
71 56 , statistics(NULL)
72 56 , extended_stats(false)
73 56 , num_threads(8) {}
74
75 724 bool has_deletion_log() const { return deleted_objects_logfile != NULL; }
76
77 upload::AbstractUploader *uploader;
78 ObjectFetcherTN *object_fetcher;
79 ReflogTN *reflog;
80 uint64_t keep_history_depth;
81 time_t keep_history_timestamp;
82 bool dry_run;
83 bool verbose;
84 FILE *deleted_objects_logfile;
85 perf::Statistics *statistics;
86 bool extended_stats;
87 unsigned int num_threads;
88 };
89
90 public:
91 explicit GarbageCollector(const Configuration &configuration);
92
93 void UseReflogTimestamps();
94 bool Collect();
95
96 168 uint64_t preserved_catalog_count() const { return preserved_catalogs_; }
97 38 uint64_t condemned_catalog_count() const { return condemned_catalogs_; }
98 2 uint64_t condemned_objects_count() const { return condemned_objects_; }
99 uint64_t duplicate_delete_requests() const {
100 return duplicate_delete_requests_; }
101 uint64_t condemned_bytes_count() const { return condemned_bytes_; }
102 42 uint64_t oldest_trunk_catalog() const { return oldest_trunk_catalog_; }
103
104 protected:
105 TraversalParameters GetTraversalParams(const Configuration &configuration);
106
107 void PreserveDataObjects(const TraversalCallbackDataTN &data);
108 void SweepDataObjects(const TraversalCallbackDataTN &data);
109
110 bool AnalyzePreservedCatalogTree();
111 bool CheckPreservedRevisions();
112 bool SweepReflog();
113
114 void CheckAndSweep(const shash::Any &hash);
115 void Sweep(const shash::Any &hash);
116 bool RemoveCatalogFromReflog(const shash::Any &catalog);
117
118 void PrintCatalogTreeEntry(const unsigned int tree_level,
119 const CatalogTN *catalog) const;
120 void LogDeletion(const shash::Any &hash) const;
121
122 private:
123 class ReflogBasedInfoShim :
124 public swissknife::CatalogTraversalInfoShim<CatalogTN>
125 {
126 public:
127 88 explicit ReflogBasedInfoShim(ReflogTN *reflog) : reflog_(reflog) {
128 88 pthread_mutex_init(&reflog_mutex_, NULL);
129 88 }
130 44 virtual ~ReflogBasedInfoShim() {
131 88 pthread_mutex_destroy(&reflog_mutex_);
132 }
133 18 virtual uint64_t GetLastModified(const CatalogTN *catalog) {
134 uint64_t timestamp;
135 36 MutexLockGuard m(&reflog_mutex_);
136
1/2
✓ Branch 2 taken 18 times.
✗ Branch 3 not taken.
36 bool retval = reflog_->GetCatalogTimestamp(catalog->hash(), &timestamp);
137
1/2
✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
72 return retval ? timestamp : catalog->GetLastModified();
138 36 }
139
140 private:
141 ReflogTN *reflog_;
142 pthread_mutex_t reflog_mutex_;
143 };
144
145 const Configuration configuration_;
146 ReflogBasedInfoShim catalog_info_shim_;
147 CatalogTraversalT traversal_;
148 HashFilterT hash_filter_;
149 HashFilterT hash_map_delete_requests_;
150
151
152 bool use_reflog_timestamps_;
153 /**
154 * A marker for the garbage collection grace period, the time span that is
155 * walked back from the current head catalog. There can be named snapshots
156 * older than this snapshot. The oldest_trunk_catalog_ is used as a marker
157 * for when to remove auxiliary files (meta info, history, ...).
158 */
159 uint64_t oldest_trunk_catalog_;
160 bool oldest_trunk_catalog_found_;
161 uint64_t preserved_catalogs_;
162 /**
163 * Number of catalogs in the reflog that are to be deleted (in fact, some of
164 * them might not exist anymore).
165 */
166 uint64_t unreferenced_trees_;
167 /**
168 * Number of root catalogs garbage collected, count grows as GC progresses
169 */
170 uint64_t condemned_trees_;
171 /**
172 * Number of catalogs garbage collected, count grows as GC progresses
173 */
174 uint64_t condemned_catalogs_;
175 /**
176 * Keeps track if the last status report issued, between 0 and 1
177 */
178 float last_reported_status_;
179
180 uint64_t condemned_objects_;
181 uint64_t condemned_bytes_;
182 uint64_t duplicate_delete_requests_;
183 };
184
185 #include "garbage_collector_impl.h"
186
187 #endif // CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_H_
188