GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/garbage_collection/garbage_collector_impl.h
Date: 2026-06-21 02:37:04
Exec Total Coverage
Lines: 134 175 76.6%
Branches: 78 238 32.8%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 */
4
5 #ifndef CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
6 #define CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
7
8 #include <algorithm>
9 #include <limits>
10 #include <string>
11 #include <vector>
12
13 #include "garbage_collection/garbage_collector.h"
14 #include "util/logging.h"
15 #include "util/string.h"
16
17 template<class CatalogTraversalT, class HashFilterT>
18 const uint64_t GarbageCollector<CatalogTraversalT, HashFilterT>::Configuration::
19 kFullHistory = std::numeric_limits<uint64_t>::max();
20
21 template<class CatalogTraversalT, class HashFilterT>
22 const uint64_t GarbageCollector<CatalogTraversalT,
23 HashFilterT>::Configuration::kNoHistory = 0;
24
25 template<class CatalogTraversalT, class HashFilterT>
26 const time_t GarbageCollector<CatalogTraversalT,
27 HashFilterT>::Configuration::kNoTimestamp = 0;
28
29
30 template<class CatalogTraversalT, class HashFilterT>
31 1100 GarbageCollector<CatalogTraversalT, HashFilterT>::GarbageCollector(
32 const Configuration &configuration)
33 1100 : configuration_(configuration)
34 1100 , catalog_info_shim_(configuration.reflog)
35
1/2
✓ Branch 1 taken 550 times.
✗ Branch 2 not taken.
1100 , traversal_(
36
1/2
✓ Branch 1 taken 550 times.
✗ Branch 2 not taken.
1100 GarbageCollector<CatalogTraversalT, HashFilterT>::GetTraversalParams(
37 configuration))
38 1100 , hash_filter_()
39 1100 , hash_map_delete_requests_()
40 1100 , use_reflog_timestamps_(false)
41 1100 , oldest_trunk_catalog_(static_cast<uint64_t>(-1))
42 1100 , oldest_trunk_catalog_found_(false)
43 1100 , preserved_catalogs_(0)
44 1100 , unreferenced_trees_(0)
45 1100 , condemned_trees_(0)
46 1100 , condemned_catalogs_(0)
47 1100 , last_reported_status_(0.0)
48 1100 , condemned_objects_(0)
49 1100 , condemned_bytes_(0)
50 1100 , duplicate_delete_requests_(0) {
51
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 550 times.
1100 assert(configuration_.uploader != NULL);
52 1100 }
53
54
55 template<class CatalogTraversalT, class HashFilterT>
56 50 void GarbageCollector<CatalogTraversalT, HashFilterT>::UseReflogTimestamps() {
57 50 traversal_.SetCatalogInfoShim(&catalog_info_shim_);
58 50 use_reflog_timestamps_ = true;
59 50 }
60
61
62 template<class CatalogTraversalT, class HashFilterT>
63 typename GarbageCollector<CatalogTraversalT, HashFilterT>::TraversalParameters
64 1100 GarbageCollector<CatalogTraversalT, HashFilterT>::GetTraversalParams(
65 const GarbageCollector<CatalogTraversalT, HashFilterT>::Configuration
66 &config) {
67 1100 TraversalParameters params;
68 1100 params.object_fetcher = config.object_fetcher;
69 1100 params.history = config.keep_history_depth;
70 1100 params.timestamp = config.keep_history_timestamp;
71 1100 params.no_repeat_history = true;
72 1100 params.ignore_load_failure = true;
73 1100 params.quiet = !config.verbose;
74 1100 params.num_threads = config.num_threads;
75 1100 return params;
76 }
77
78
79 template<class CatalogTraversalT, class HashFilterT>
80 11650 void GarbageCollector<CatalogTraversalT, HashFilterT>::PreserveDataObjects(
81 const GarbageCollector<CatalogTraversalT,
82 HashFilterT>::TraversalCallbackDataTN
83 &data // NOLINT(runtime/references)
84 ) {
85 11650 ++preserved_catalogs_;
86
87
2/2
✓ Branch 1 taken 1625 times.
✓ Branch 2 taken 4200 times.
11650 if (data.catalog->IsRoot()) {
88 6500 const uint64_t mtime = use_reflog_timestamps_
89
2/2
✓ Branch 0 taken 125 times.
✓ Branch 1 taken 1500 times.
3500 ? catalog_info_shim_.GetLastModified(
90
1/2
✓ Branch 1 taken 125 times.
✗ Branch 2 not taken.
250 data.catalog)
91 3000 : data.catalog->GetLastModified();
92
2/2
✓ Branch 0 taken 1150 times.
✓ Branch 1 taken 475 times.
3250 if (!oldest_trunk_catalog_found_)
93 2300 oldest_trunk_catalog_ = std::min(oldest_trunk_catalog_, mtime);
94
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1625 times.
3250 if (configuration_.verbose) {
95 const uint64_t rev = data.catalog->revision();
96 LogCvmfs(
97 kLogGc, kLogStdout | kLogDebug,
98 "Preserving Revision %" PRIu64 " (%s / added @ %s)", rev,
99 StringifyTime(data.catalog->GetLastModified(), true).c_str(),
100 StringifyTime(catalog_info_shim_.GetLastModified(data.catalog), true)
101 .c_str());
102 PrintCatalogTreeEntry(data.tree_level, data.catalog);
103 }
104
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 1625 times.
3250 if (data.catalog->schema() < 0.99) {
105 LogCvmfs(
106 kLogGc, kLogStdout | kLogDebug,
107 "Warning: "
108 "legacy catalog does not provide access to nested catalog "
109 "hierarchy.\n"
110 " Some unreferenced objects may remain in the repository.");
111 }
112 }
113
114 // the hash of the actual catalog needs to preserved
115
1/2
✓ Branch 2 taken 5825 times.
✗ Branch 3 not taken.
11650 hash_filter_.Fill(data.catalog->hash());
116
117 // all the objects referenced from this catalog need to be preserved
118
1/2
✓ Branch 1 taken 5825 times.
✗ Branch 2 not taken.
11650 const HashVector &referenced_hashes = data.catalog->GetReferencedObjects();
119 11650 typename HashVector::const_iterator i = referenced_hashes.begin();
120 11650 const typename HashVector::const_iterator iend = referenced_hashes.end();
121
2/2
✓ Branch 2 taken 29825 times.
✓ Branch 3 taken 5825 times.
71300 for (; i != iend; ++i) {
122
1/2
✓ Branch 2 taken 29825 times.
✗ Branch 3 not taken.
59650 hash_filter_.Fill(*i);
123 }
124 11650 }
125
126
127 template<class CatalogTraversalT, class HashFilterT>
128 3300 void GarbageCollector<CatalogTraversalT, HashFilterT>::SweepDataObjects(
129 const GarbageCollector<CatalogTraversalT,
130 HashFilterT>::TraversalCallbackDataTN
131 &data // NOLINT(runtime/references)
132 ) {
133 3300 ++condemned_catalogs_;
134
2/2
✓ Branch 1 taken 625 times.
✓ Branch 2 taken 1025 times.
3300 if (data.catalog->IsRoot())
135 1250 ++condemned_trees_;
136
137
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1650 times.
3300 if (configuration_.verbose) {
138 if (data.catalog->IsRoot()) {
139 const uint64_t rev = data.catalog->revision();
140 const time_t mtime = static_cast<time_t>(data.catalog->GetLastModified());
141 LogCvmfs(kLogGc, kLogStdout | kLogDebug,
142 "Sweeping Revision %" PRIu64 " (%s)", rev,
143 StringifyTime(mtime, true).c_str());
144 }
145 PrintCatalogTreeEntry(data.tree_level, data.catalog);
146 }
147
148 // all the objects referenced from this catalog need to be checked against the
149 // the preserved hashes in the hash_filter_ and possibly deleted
150
1/2
✓ Branch 1 taken 1650 times.
✗ Branch 2 not taken.
3300 const HashVector &referenced_hashes = data.catalog->GetReferencedObjects();
151 3300 typename HashVector::const_iterator i = referenced_hashes.begin();
152 3300 const typename HashVector::const_iterator iend = referenced_hashes.end();
153
2/2
✓ Branch 2 taken 9975 times.
✓ Branch 3 taken 1650 times.
23250 for (; i != iend; ++i) {
154
1/2
✓ Branch 2 taken 9975 times.
✗ Branch 3 not taken.
19950 CheckAndSweep(*i);
155 }
156
157 // the catalog itself is also condemned and needs to be removed
158
1/2
✓ Branch 2 taken 1650 times.
✗ Branch 3 not taken.
3300 CheckAndSweep(data.catalog->hash());
159
160 3300 float threshold = static_cast<float>(condemned_trees_)
161 3300 / static_cast<float>(unreferenced_trees_);
162
2/2
✓ Branch 0 taken 625 times.
✓ Branch 1 taken 1025 times.
3300 if (threshold > last_reported_status_ + 0.1) {
163
1/4
✓ Branch 1 taken 625 times.
✗ Branch 2 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
2500 LogCvmfs(kLogGc, kLogStdout | kLogDebug,
164 " - %02.0f%% %" PRIu64 " / %" PRIu64
165 " unreferenced revisions removed [%s]",
166
1/2
✓ Branch 1 taken 625 times.
✗ Branch 2 not taken.
1250 100.0 * threshold, condemned_trees_, unreferenced_trees_,
167 RfcTimestamp().c_str());
168 1250 last_reported_status_ = threshold;
169 }
170 3300 }
171
172
173 template<class CatalogTraversalT, class HashFilterT>
174 23250 void GarbageCollector<CatalogTraversalT, HashFilterT>::CheckAndSweep(
175 const shash::Any &hash) {
176
2/2
✓ Branch 1 taken 5350 times.
✓ Branch 2 taken 6275 times.
23250 if (!hash_filter_.Contains(hash)) {
177
2/2
✓ Branch 1 taken 4525 times.
✓ Branch 2 taken 825 times.
10700 if (!hash_map_delete_requests_.Contains(hash)) {
178 9050 hash_map_delete_requests_.Fill(hash);
179 9050 Sweep(hash);
180 } else {
181 1650 ++duplicate_delete_requests_;
182
1/2
✓ Branch 3 taken 825 times.
✗ Branch 4 not taken.
1650 LogCvmfs(kLogGc, kLogDebug, "Hash %s already marked as to delete",
183 hash.ToString().c_str());
184 }
185 }
186 23250 }
187
188
189 template<class CatalogTraversalT, class HashFilterT>
190 9050 void GarbageCollector<CatalogTraversalT, HashFilterT>::Sweep(
191 const shash::Any &hash) {
192 9050 ++condemned_objects_;
193
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4525 times.
9050 if (configuration_.extended_stats) {
194 if (!hash.HasSuffix() || hash.suffix == shash::kSuffixPartial) {
195 int64_t condemned_bytes = configuration_.uploader->GetObjectSize(hash);
196 if (condemned_bytes > 0) {
197 condemned_bytes_ += condemned_bytes;
198 }
199 }
200 }
201
202 9050 LogDeletion(hash);
203
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4525 times.
9050 if (configuration_.dry_run) {
204 return;
205 }
206
207 9050 configuration_.uploader->RemoveAsync(hash);
208 }
209
210
211 template<class CatalogTraversalT, class HashFilterT>
212 1350 bool GarbageCollector<CatalogTraversalT, HashFilterT>::RemoveCatalogFromReflog(
213 const shash::Any &catalog) {
214
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 675 times.
1350 assert(catalog.suffix == shash::kSuffixCatalog);
215
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 675 times.
1350 return (configuration_.dry_run) ? true
216 1350 : configuration_.reflog->Remove(catalog);
217 }
218
219
220 template<class CatalogTraversalT, class HashFilterT>
221 1050 bool GarbageCollector<CatalogTraversalT, HashFilterT>::Collect() {
222
1/2
✓ Branch 2 taken 525 times.
✗ Branch 3 not taken.
2100 return AnalyzePreservedCatalogTree() && CheckPreservedRevisions()
223
2/4
✓ Branch 0 taken 525 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 525 times.
✗ Branch 4 not taken.
2100 && SweepReflog();
224 }
225
226
227 template<class CatalogTraversalT, class HashFilterT>
228 1050 bool GarbageCollector<CatalogTraversalT,
229 HashFilterT>::AnalyzePreservedCatalogTree() {
230
1/2
✓ Branch 3 taken 525 times.
✗ Branch 4 not taken.
1050 LogCvmfs(kLogGc, kLogStdout, " --> marking unreferenced objects [%s]",
231 RfcTimestamp().c_str());
232
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 525 times.
1050 if (configuration_.verbose) {
233 LogCvmfs(kLogGc, kLogStdout | kLogDebug,
234 "Preserving data objects in latest revision");
235 }
236
237 typename CatalogTraversalT::CallbackTN
238 1050 *callback = traversal_.RegisterListener(
239 &GarbageCollector<CatalogTraversalT,
240 HashFilterT>::PreserveDataObjects,
241 this);
242
243 1050 bool success = traversal_.Traverse();
244 1050 oldest_trunk_catalog_found_ = true;
245
2/4
✓ Branch 0 taken 525 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 525 times.
✗ Branch 4 not taken.
1050 success = success && traversal_.TraverseNamedSnapshots();
246 1050 traversal_.UnregisterListener(callback);
247
248 1050 return success;
249 }
250
251
252 template<class CatalogTraversalT, class HashFilterT>
253 1050 bool GarbageCollector<CatalogTraversalT,
254 HashFilterT>::CheckPreservedRevisions() {
255 1050 const bool keeps_revisions = (preserved_catalog_count() > 0);
256
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 525 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
1050 if (!keeps_revisions && configuration_.verbose) {
257 LogCvmfs(kLogGc, kLogStderr | kLogDebug,
258 "This would delete everything! Abort.");
259 }
260
261 1050 return keeps_revisions;
262 }
263
264
265 template<class CatalogTraversalT, class HashFilterT>
266 1050 bool GarbageCollector<CatalogTraversalT, HashFilterT>::SweepReflog() {
267
2/4
✓ Branch 1 taken 525 times.
✗ Branch 2 not taken.
✓ Branch 5 taken 525 times.
✗ Branch 6 not taken.
1050 LogCvmfs(kLogGc, kLogStdout, " --> sweeping unreferenced objects [%s]",
268 RfcTimestamp().c_str());
269
270 1050 const ReflogTN *reflog = configuration_.reflog;
271 1050 std::vector<shash::Any> catalogs;
272
4/8
✓ Branch 0 taken 525 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 525 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 525 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 525 times.
1050 if (NULL == reflog || !reflog->List(SqlReflog::kRefCatalog, &catalogs)) {
273 LogCvmfs(kLogGc, kLogStderr, "Failed to list catalog reference log");
274 return false;
275 }
276
277 typename CatalogTraversalT::CallbackTN
278
1/2
✓ Branch 1 taken 525 times.
✗ Branch 2 not taken.
1050 *callback = traversal_.RegisterListener(
279 &GarbageCollector<CatalogTraversalT, HashFilterT>::SweepDataObjects,
280 this);
281
282 1050 std::vector<shash::Any> to_sweep;
283 1050 std::vector<shash::Any>::const_iterator i = catalogs.begin();
284 1050 std::vector<shash::Any>::const_iterator iend = catalogs.end();
285
2/2
✓ Branch 2 taken 2300 times.
✓ Branch 3 taken 525 times.
5650 for (; i != iend; ++i) {
286
3/4
✓ Branch 2 taken 2300 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 675 times.
✓ Branch 5 taken 1625 times.
4600 if (!hash_filter_.Contains(*i)) {
287
1/2
✓ Branch 2 taken 675 times.
✗ Branch 3 not taken.
1350 to_sweep.push_back(*i);
288 }
289 }
290 1050 unreferenced_trees_ = to_sweep.size();
291
1/2
✓ Branch 1 taken 525 times.
✗ Branch 2 not taken.
1050 bool success = traversal_.TraverseList(to_sweep,
292 CatalogTraversalT::kDepthFirst);
293
1/2
✓ Branch 1 taken 525 times.
✗ Branch 2 not taken.
1050 traversal_.UnregisterListener(callback);
294
295 1050 i = to_sweep.begin();
296 1050 iend = to_sweep.end();
297
2/2
✓ Branch 1 taken 675 times.
✓ Branch 2 taken 525 times.
2400 for (; i != iend; ++i) {
298
3/6
✓ Branch 0 taken 675 times.
✗ Branch 1 not taken.
✓ Branch 4 taken 675 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 675 times.
✗ Branch 7 not taken.
1350 success = success && RemoveCatalogFromReflog(*i);
299 }
300
301 // TODO(jblomer): turn current counters into perf::Counters
302
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 525 times.
1050 if (configuration_.statistics) {
303 perf::Counter *ctr_preserved_catalogs = configuration_.statistics->Register(
304 "gc.n_preserved_catalogs", "number of live catalogs");
305 perf::Counter *ctr_condemned_catalogs = configuration_.statistics->Register(
306 "gc.n_condemned_catalogs", "number of dead catalogs");
307 perf::Counter *ctr_condemned_objects = configuration_.statistics->Register(
308 "gc.n_condemned_objects", "number of deleted objects");
309 perf::Counter *ctr_condemned_bytes = configuration_.statistics->Register(
310 "gc.sz_condemned_bytes", "number of deleted bytes");
311 perf::Counter
312 *ctr_duplicate_delete_requests = configuration_.statistics->Register(
313 "gc.n_duplicate_delete_requests",
314 "number of duplicated delete requests");
315 ctr_preserved_catalogs->Set(preserved_catalog_count());
316 ctr_condemned_catalogs->Set(condemned_catalog_count());
317 ctr_condemned_objects->Set(condemned_objects_count());
318 ctr_condemned_bytes->Set(condemned_bytes_count());
319 ctr_duplicate_delete_requests->Set(duplicate_delete_requests());
320 }
321
322
1/2
✓ Branch 1 taken 525 times.
✗ Branch 2 not taken.
1050 configuration_.uploader->WaitForUpload();
323
2/4
✓ Branch 1 taken 525 times.
✗ Branch 2 not taken.
✓ Branch 5 taken 525 times.
✗ Branch 6 not taken.
1050 LogCvmfs(kLogGc, kLogStdout, " --> done garbage collecting [%s]",
324 RfcTimestamp().c_str());
325
3/6
✓ Branch 0 taken 525 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 525 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 525 times.
✗ Branch 6 not taken.
1050 return success && (configuration_.uploader->GetNumberOfErrors() == 0);
326 1050 }
327
328
329 template<class CatalogTraversalT, class HashFilterT>
330 void GarbageCollector<CatalogTraversalT, HashFilterT>::PrintCatalogTreeEntry(
331 const unsigned int tree_level, const CatalogTN *catalog) const {
332 std::string tree_indent;
333 for (unsigned int i = 0; i < tree_level; ++i) {
334 tree_indent += "\u2502 ";
335 }
336 tree_indent += "\u251C\u2500 ";
337
338 const std::string hash_string = catalog->hash().ToString();
339 const std::string path = (catalog->mountpoint().IsEmpty())
340 ? "/"
341 : catalog->mountpoint().ToString();
342
343 LogCvmfs(kLogGc, kLogStdout, "%s%s %s", tree_indent.c_str(),
344 hash_string.c_str(), path.c_str());
345 LogCvmfs(kLogGc, kLogDebug, "catalog tree entry: %s %s", hash_string.c_str(),
346 path.c_str());
347 }
348
349
350 template<class CatalogTraversalT, class HashFilterT>
351 9050 void GarbageCollector<CatalogTraversalT, HashFilterT>::LogDeletion(
352 const shash::Any &hash) const {
353
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4525 times.
9050 if (configuration_.verbose) {
354 LogCvmfs(kLogGc, kLogStdout | kLogDebug, "Sweep: %s",
355 hash.ToStringWithSuffix().c_str());
356 }
357
358
2/2
✓ Branch 1 taken 275 times.
✓ Branch 2 taken 4250 times.
9050 if (configuration_.has_deletion_log()) {
359
1/2
✓ Branch 3 taken 275 times.
✗ Branch 4 not taken.
550 const int written = fprintf(configuration_.deleted_objects_logfile, "%s\n",
360 hash.ToStringWithSuffix().c_str());
361
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 275 times.
550 if (written < 0) {
362 LogCvmfs(kLogGc, kLogStderr, "failed to write to deleted objects log");
363 }
364 }
365 9050 }
366
367 #endif // CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
368