GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/garbage_collection/garbage_collector_impl.h
Date: 2025-06-22 02:36:02
Exec Total Coverage
Lines: 134 175 76.6%
Branches: 78 238 32.8%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 */
4
5 #ifndef CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
6 #define CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
7
8 #include <algorithm>
9 #include <limits>
10 #include <string>
11 #include <vector>
12
13 #include "garbage_collection/garbage_collector.h"
14 #include "util/logging.h"
15 #include "util/string.h"
16
17 template<class CatalogTraversalT, class HashFilterT>
18 const uint64_t GarbageCollector<CatalogTraversalT, HashFilterT>::Configuration::
19 kFullHistory = std::numeric_limits<uint64_t>::max();
20
21 template<class CatalogTraversalT, class HashFilterT>
22 const uint64_t GarbageCollector<CatalogTraversalT,
23 HashFilterT>::Configuration::kNoHistory = 0;
24
25 template<class CatalogTraversalT, class HashFilterT>
26 const time_t GarbageCollector<CatalogTraversalT,
27 HashFilterT>::Configuration::kNoTimestamp = 0;
28
29
30 template<class CatalogTraversalT, class HashFilterT>
31 1892 GarbageCollector<CatalogTraversalT, HashFilterT>::GarbageCollector(
32 const Configuration &configuration)
33 1892 : configuration_(configuration)
34 1892 , catalog_info_shim_(configuration.reflog)
35
1/2
✓ Branch 1 taken 946 times.
✗ Branch 2 not taken.
1892 , traversal_(
36
1/2
✓ Branch 1 taken 946 times.
✗ Branch 2 not taken.
1892 GarbageCollector<CatalogTraversalT, HashFilterT>::GetTraversalParams(
37 configuration))
38 1892 , hash_filter_()
39 1892 , hash_map_delete_requests_()
40 1892 , use_reflog_timestamps_(false)
41 1892 , oldest_trunk_catalog_(static_cast<uint64_t>(-1))
42 1892 , oldest_trunk_catalog_found_(false)
43 1892 , preserved_catalogs_(0)
44 1892 , unreferenced_trees_(0)
45 1892 , condemned_trees_(0)
46 1892 , condemned_catalogs_(0)
47 1892 , last_reported_status_(0.0)
48 1892 , condemned_objects_(0)
49 1892 , condemned_bytes_(0)
50 1892 , duplicate_delete_requests_(0) {
51
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 946 times.
1892 assert(configuration_.uploader != NULL);
52 1892 }
53
54
55 template<class CatalogTraversalT, class HashFilterT>
56 86 void GarbageCollector<CatalogTraversalT, HashFilterT>::UseReflogTimestamps() {
57 86 traversal_.SetCatalogInfoShim(&catalog_info_shim_);
58 86 use_reflog_timestamps_ = true;
59 86 }
60
61
62 template<class CatalogTraversalT, class HashFilterT>
63 typename GarbageCollector<CatalogTraversalT, HashFilterT>::TraversalParameters
64 1892 GarbageCollector<CatalogTraversalT, HashFilterT>::GetTraversalParams(
65 const GarbageCollector<CatalogTraversalT, HashFilterT>::Configuration
66 &config) {
67 1892 TraversalParameters params;
68 1892 params.object_fetcher = config.object_fetcher;
69 1892 params.history = config.keep_history_depth;
70 1892 params.timestamp = config.keep_history_timestamp;
71 1892 params.no_repeat_history = true;
72 1892 params.ignore_load_failure = true;
73 1892 params.quiet = !config.verbose;
74 1892 params.num_threads = config.num_threads;
75 1892 return params;
76 }
77
78
79 template<class CatalogTraversalT, class HashFilterT>
80 20038 void GarbageCollector<CatalogTraversalT, HashFilterT>::PreserveDataObjects(
81 const GarbageCollector<CatalogTraversalT,
82 HashFilterT>::TraversalCallbackDataTN
83 &data // NOLINT(runtime/references)
84 ) {
85 20038 ++preserved_catalogs_;
86
87
2/2
✓ Branch 1 taken 2795 times.
✓ Branch 2 taken 7224 times.
20038 if (data.catalog->IsRoot()) {
88 11180 const uint64_t mtime = use_reflog_timestamps_
89
2/2
✓ Branch 0 taken 215 times.
✓ Branch 1 taken 2580 times.
6020 ? catalog_info_shim_.GetLastModified(
90
1/2
✓ Branch 1 taken 215 times.
✗ Branch 2 not taken.
430 data.catalog)
91 5160 : data.catalog->GetLastModified();
92
2/2
✓ Branch 0 taken 1978 times.
✓ Branch 1 taken 817 times.
5590 if (!oldest_trunk_catalog_found_)
93 3956 oldest_trunk_catalog_ = std::min(oldest_trunk_catalog_, mtime);
94
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2795 times.
5590 if (configuration_.verbose) {
95 const uint64_t rev = data.catalog->revision();
96 LogCvmfs(
97 kLogGc, kLogStdout | kLogDebug,
98 "Preserving Revision %" PRIu64 " (%s / added @ %s)", rev,
99 StringifyTime(data.catalog->GetLastModified(), true).c_str(),
100 StringifyTime(catalog_info_shim_.GetLastModified(data.catalog), true)
101 .c_str());
102 PrintCatalogTreeEntry(data.tree_level, data.catalog);
103 }
104
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 2795 times.
5590 if (data.catalog->schema() < 0.99) {
105 LogCvmfs(
106 kLogGc, kLogStdout | kLogDebug,
107 "Warning: "
108 "legacy catalog does not provide access to nested catalog "
109 "hierarchy.\n"
110 " Some unreferenced objects may remain in the repository.");
111 }
112 }
113
114 // the hash of the actual catalog needs to preserved
115
1/2
✓ Branch 2 taken 10019 times.
✗ Branch 3 not taken.
20038 hash_filter_.Fill(data.catalog->hash());
116
117 // all the objects referenced from this catalog need to be preserved
118
1/2
✓ Branch 1 taken 10019 times.
✗ Branch 2 not taken.
20038 const HashVector &referenced_hashes = data.catalog->GetReferencedObjects();
119 20038 typename HashVector::const_iterator i = referenced_hashes.begin();
120 20038 const typename HashVector::const_iterator iend = referenced_hashes.end();
121
2/2
✓ Branch 2 taken 51299 times.
✓ Branch 3 taken 10019 times.
122636 for (; i != iend; ++i) {
122
1/2
✓ Branch 2 taken 51299 times.
✗ Branch 3 not taken.
102598 hash_filter_.Fill(*i);
123 }
124 20038 }
125
126
127 template<class CatalogTraversalT, class HashFilterT>
128 5676 void GarbageCollector<CatalogTraversalT, HashFilterT>::SweepDataObjects(
129 const GarbageCollector<CatalogTraversalT,
130 HashFilterT>::TraversalCallbackDataTN
131 &data // NOLINT(runtime/references)
132 ) {
133 5676 ++condemned_catalogs_;
134
2/2
✓ Branch 1 taken 1075 times.
✓ Branch 2 taken 1763 times.
5676 if (data.catalog->IsRoot())
135 2150 ++condemned_trees_;
136
137
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2838 times.
5676 if (configuration_.verbose) {
138 if (data.catalog->IsRoot()) {
139 const uint64_t rev = data.catalog->revision();
140 const time_t mtime = static_cast<time_t>(data.catalog->GetLastModified());
141 LogCvmfs(kLogGc, kLogStdout | kLogDebug,
142 "Sweeping Revision %" PRIu64 " (%s)", rev,
143 StringifyTime(mtime, true).c_str());
144 }
145 PrintCatalogTreeEntry(data.tree_level, data.catalog);
146 }
147
148 // all the objects referenced from this catalog need to be checked against the
149 // the preserved hashes in the hash_filter_ and possibly deleted
150
1/2
✓ Branch 1 taken 2838 times.
✗ Branch 2 not taken.
5676 const HashVector &referenced_hashes = data.catalog->GetReferencedObjects();
151 5676 typename HashVector::const_iterator i = referenced_hashes.begin();
152 5676 const typename HashVector::const_iterator iend = referenced_hashes.end();
153
2/2
✓ Branch 2 taken 17157 times.
✓ Branch 3 taken 2838 times.
39990 for (; i != iend; ++i) {
154
1/2
✓ Branch 2 taken 17157 times.
✗ Branch 3 not taken.
34314 CheckAndSweep(*i);
155 }
156
157 // the catalog itself is also condemned and needs to be removed
158
1/2
✓ Branch 2 taken 2838 times.
✗ Branch 3 not taken.
5676 CheckAndSweep(data.catalog->hash());
159
160 5676 float threshold = static_cast<float>(condemned_trees_)
161 5676 / static_cast<float>(unreferenced_trees_);
162
2/2
✓ Branch 0 taken 1075 times.
✓ Branch 1 taken 1763 times.
5676 if (threshold > last_reported_status_ + 0.1) {
163
1/4
✓ Branch 1 taken 1075 times.
✗ Branch 2 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
4300 LogCvmfs(kLogGc, kLogStdout | kLogDebug,
164 " - %02.0f%% %" PRIu64 " / %" PRIu64
165 " unreferenced revisions removed [%s]",
166
1/2
✓ Branch 1 taken 1075 times.
✗ Branch 2 not taken.
2150 100.0 * threshold, condemned_trees_, unreferenced_trees_,
167 RfcTimestamp().c_str());
168 2150 last_reported_status_ = threshold;
169 }
170 5676 }
171
172
173 template<class CatalogTraversalT, class HashFilterT>
174 39990 void GarbageCollector<CatalogTraversalT, HashFilterT>::CheckAndSweep(
175 const shash::Any &hash) {
176
2/2
✓ Branch 1 taken 9202 times.
✓ Branch 2 taken 10793 times.
39990 if (!hash_filter_.Contains(hash)) {
177
2/2
✓ Branch 1 taken 7783 times.
✓ Branch 2 taken 1419 times.
18404 if (!hash_map_delete_requests_.Contains(hash)) {
178 15566 hash_map_delete_requests_.Fill(hash);
179 15566 Sweep(hash);
180 } else {
181 2838 ++duplicate_delete_requests_;
182
1/2
✓ Branch 3 taken 1419 times.
✗ Branch 4 not taken.
2838 LogCvmfs(kLogGc, kLogDebug, "Hash %s already marked as to delete",
183 hash.ToString().c_str());
184 }
185 }
186 39990 }
187
188
189 template<class CatalogTraversalT, class HashFilterT>
190 15566 void GarbageCollector<CatalogTraversalT, HashFilterT>::Sweep(
191 const shash::Any &hash) {
192 15566 ++condemned_objects_;
193
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 7783 times.
15566 if (configuration_.extended_stats) {
194 if (!hash.HasSuffix() || hash.suffix == shash::kSuffixPartial) {
195 int64_t condemned_bytes = configuration_.uploader->GetObjectSize(hash);
196 if (condemned_bytes > 0) {
197 condemned_bytes_ += condemned_bytes;
198 }
199 }
200 }
201
202 15566 LogDeletion(hash);
203
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 7783 times.
15566 if (configuration_.dry_run) {
204 return;
205 }
206
207 15566 configuration_.uploader->RemoveAsync(hash);
208 }
209
210
211 template<class CatalogTraversalT, class HashFilterT>
212 2322 bool GarbageCollector<CatalogTraversalT, HashFilterT>::RemoveCatalogFromReflog(
213 const shash::Any &catalog) {
214
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1161 times.
2322 assert(catalog.suffix == shash::kSuffixCatalog);
215
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1161 times.
2322 return (configuration_.dry_run) ? true
216 2322 : configuration_.reflog->Remove(catalog);
217 }
218
219
220 template<class CatalogTraversalT, class HashFilterT>
221 1806 bool GarbageCollector<CatalogTraversalT, HashFilterT>::Collect() {
222
1/2
✓ Branch 2 taken 903 times.
✗ Branch 3 not taken.
3612 return AnalyzePreservedCatalogTree() && CheckPreservedRevisions()
223
2/4
✓ Branch 0 taken 903 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 903 times.
✗ Branch 4 not taken.
3612 && SweepReflog();
224 }
225
226
227 template<class CatalogTraversalT, class HashFilterT>
228 1806 bool GarbageCollector<CatalogTraversalT,
229 HashFilterT>::AnalyzePreservedCatalogTree() {
230
1/2
✓ Branch 3 taken 903 times.
✗ Branch 4 not taken.
1806 LogCvmfs(kLogGc, kLogStdout, " --> marking unreferenced objects [%s]",
231 RfcTimestamp().c_str());
232
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 903 times.
1806 if (configuration_.verbose) {
233 LogCvmfs(kLogGc, kLogStdout | kLogDebug,
234 "Preserving data objects in latest revision");
235 }
236
237 typename CatalogTraversalT::CallbackTN
238 1806 *callback = traversal_.RegisterListener(
239 &GarbageCollector<CatalogTraversalT,
240 HashFilterT>::PreserveDataObjects,
241 this);
242
243 1806 bool success = traversal_.Traverse();
244 1806 oldest_trunk_catalog_found_ = true;
245
2/4
✓ Branch 0 taken 903 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 903 times.
✗ Branch 4 not taken.
1806 success = success && traversal_.TraverseNamedSnapshots();
246 1806 traversal_.UnregisterListener(callback);
247
248 1806 return success;
249 }
250
251
252 template<class CatalogTraversalT, class HashFilterT>
253 1806 bool GarbageCollector<CatalogTraversalT,
254 HashFilterT>::CheckPreservedRevisions() {
255 1806 const bool keeps_revisions = (preserved_catalog_count() > 0);
256
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 903 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
1806 if (!keeps_revisions && configuration_.verbose) {
257 LogCvmfs(kLogGc, kLogStderr | kLogDebug,
258 "This would delete everything! Abort.");
259 }
260
261 1806 return keeps_revisions;
262 }
263
264
265 template<class CatalogTraversalT, class HashFilterT>
266 1806 bool GarbageCollector<CatalogTraversalT, HashFilterT>::SweepReflog() {
267
2/4
✓ Branch 1 taken 903 times.
✗ Branch 2 not taken.
✓ Branch 5 taken 903 times.
✗ Branch 6 not taken.
1806 LogCvmfs(kLogGc, kLogStdout, " --> sweeping unreferenced objects [%s]",
268 RfcTimestamp().c_str());
269
270 1806 const ReflogTN *reflog = configuration_.reflog;
271 1806 std::vector<shash::Any> catalogs;
272
4/8
✓ Branch 0 taken 903 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 903 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 903 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 903 times.
1806 if (NULL == reflog || !reflog->List(SqlReflog::kRefCatalog, &catalogs)) {
273 LogCvmfs(kLogGc, kLogStderr, "Failed to list catalog reference log");
274 return false;
275 }
276
277 typename CatalogTraversalT::CallbackTN
278
1/2
✓ Branch 1 taken 903 times.
✗ Branch 2 not taken.
1806 *callback = traversal_.RegisterListener(
279 &GarbageCollector<CatalogTraversalT, HashFilterT>::SweepDataObjects,
280 this);
281
282 1806 std::vector<shash::Any> to_sweep;
283 1806 std::vector<shash::Any>::const_iterator i = catalogs.begin();
284 1806 std::vector<shash::Any>::const_iterator iend = catalogs.end();
285
2/2
✓ Branch 2 taken 3956 times.
✓ Branch 3 taken 903 times.
9718 for (; i != iend; ++i) {
286
3/4
✓ Branch 2 taken 3956 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1161 times.
✓ Branch 5 taken 2795 times.
7912 if (!hash_filter_.Contains(*i)) {
287
1/2
✓ Branch 2 taken 1161 times.
✗ Branch 3 not taken.
2322 to_sweep.push_back(*i);
288 }
289 }
290 1806 unreferenced_trees_ = to_sweep.size();
291
1/2
✓ Branch 1 taken 903 times.
✗ Branch 2 not taken.
1806 bool success = traversal_.TraverseList(to_sweep,
292 CatalogTraversalT::kDepthFirst);
293
1/2
✓ Branch 1 taken 903 times.
✗ Branch 2 not taken.
1806 traversal_.UnregisterListener(callback);
294
295 1806 i = to_sweep.begin();
296 1806 iend = to_sweep.end();
297
2/2
✓ Branch 1 taken 1161 times.
✓ Branch 2 taken 903 times.
4128 for (; i != iend; ++i) {
298
3/6
✓ Branch 0 taken 1161 times.
✗ Branch 1 not taken.
✓ Branch 4 taken 1161 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 1161 times.
✗ Branch 7 not taken.
2322 success = success && RemoveCatalogFromReflog(*i);
299 }
300
301 // TODO(jblomer): turn current counters into perf::Counters
302
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 903 times.
1806 if (configuration_.statistics) {
303 perf::Counter *ctr_preserved_catalogs = configuration_.statistics->Register(
304 "gc.n_preserved_catalogs", "number of live catalogs");
305 perf::Counter *ctr_condemned_catalogs = configuration_.statistics->Register(
306 "gc.n_condemned_catalogs", "number of dead catalogs");
307 perf::Counter *ctr_condemned_objects = configuration_.statistics->Register(
308 "gc.n_condemned_objects", "number of deleted objects");
309 perf::Counter *ctr_condemned_bytes = configuration_.statistics->Register(
310 "gc.sz_condemned_bytes", "number of deleted bytes");
311 perf::Counter
312 *ctr_duplicate_delete_requests = configuration_.statistics->Register(
313 "gc.n_duplicate_delete_requests",
314 "number of duplicated delete requests");
315 ctr_preserved_catalogs->Set(preserved_catalog_count());
316 ctr_condemned_catalogs->Set(condemned_catalog_count());
317 ctr_condemned_objects->Set(condemned_objects_count());
318 ctr_condemned_bytes->Set(condemned_bytes_count());
319 ctr_duplicate_delete_requests->Set(duplicate_delete_requests());
320 }
321
322
1/2
✓ Branch 1 taken 903 times.
✗ Branch 2 not taken.
1806 configuration_.uploader->WaitForUpload();
323
2/4
✓ Branch 1 taken 903 times.
✗ Branch 2 not taken.
✓ Branch 5 taken 903 times.
✗ Branch 6 not taken.
1806 LogCvmfs(kLogGc, kLogStdout, " --> done garbage collecting [%s]",
324 RfcTimestamp().c_str());
325
3/6
✓ Branch 0 taken 903 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 903 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 903 times.
✗ Branch 6 not taken.
1806 return success && (configuration_.uploader->GetNumberOfErrors() == 0);
326 1806 }
327
328
329 template<class CatalogTraversalT, class HashFilterT>
330 void GarbageCollector<CatalogTraversalT, HashFilterT>::PrintCatalogTreeEntry(
331 const unsigned int tree_level, const CatalogTN *catalog) const {
332 std::string tree_indent;
333 for (unsigned int i = 0; i < tree_level; ++i) {
334 tree_indent += "\u2502 ";
335 }
336 tree_indent += "\u251C\u2500 ";
337
338 const std::string hash_string = catalog->hash().ToString();
339 const std::string path = (catalog->mountpoint().IsEmpty())
340 ? "/"
341 : catalog->mountpoint().ToString();
342
343 LogCvmfs(kLogGc, kLogStdout, "%s%s %s", tree_indent.c_str(),
344 hash_string.c_str(), path.c_str());
345 LogCvmfs(kLogGc, kLogDebug, "catalog tree entry: %s %s", hash_string.c_str(),
346 path.c_str());
347 }
348
349
350 template<class CatalogTraversalT, class HashFilterT>
351 15566 void GarbageCollector<CatalogTraversalT, HashFilterT>::LogDeletion(
352 const shash::Any &hash) const {
353
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 7783 times.
15566 if (configuration_.verbose) {
354 LogCvmfs(kLogGc, kLogStdout | kLogDebug, "Sweep: %s",
355 hash.ToStringWithSuffix().c_str());
356 }
357
358
2/2
✓ Branch 1 taken 473 times.
✓ Branch 2 taken 7310 times.
15566 if (configuration_.has_deletion_log()) {
359
1/2
✓ Branch 3 taken 473 times.
✗ Branch 4 not taken.
946 const int written = fprintf(configuration_.deleted_objects_logfile, "%s\n",
360 hash.ToStringWithSuffix().c_str());
361
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 473 times.
946 if (written < 0) {
362 LogCvmfs(kLogGc, kLogStderr, "failed to write to deleted objects log");
363 }
364 }
365 15566 }
366
367 #endif // CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
368