GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/garbage_collection/garbage_collector_impl.h
Date: 2024-04-21 02:33:16
Exec Total Coverage
Lines: 136 178 76.4%
Branches: 78 240 32.5%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 */
4
5 #ifndef CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
6 #define CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
7
8 #include "garbage_collection/garbage_collector.h"
9
10 #include <algorithm>
11 #include <limits>
12 #include <string>
13 #include <vector>
14
15 #include "util/logging.h"
16 #include "util/string.h"
17
18 template<class CatalogTraversalT, class HashFilterT>
19 const uint64_t GarbageCollector<CatalogTraversalT,
20 HashFilterT>::Configuration::kFullHistory =
21 std::numeric_limits<uint64_t>::max();
22
23 template<class CatalogTraversalT, class HashFilterT>
24 const uint64_t GarbageCollector<CatalogTraversalT,
25 HashFilterT>::Configuration::kNoHistory = 0;
26
27 template<class CatalogTraversalT, class HashFilterT>
28 const time_t GarbageCollector<CatalogTraversalT,
29 HashFilterT>::Configuration::kNoTimestamp = 0;
30
31
32 template <class CatalogTraversalT, class HashFilterT>
33 88 GarbageCollector<CatalogTraversalT, HashFilterT>::GarbageCollector(
34 const Configuration &configuration)
35 88 : configuration_(configuration)
36 88 , catalog_info_shim_(configuration.reflog)
37
1/2
✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
88 , traversal_(
38
1/2
✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
88 GarbageCollector<CatalogTraversalT, HashFilterT>::GetTraversalParams(
39 configuration))
40 88 , hash_filter_()
41 88 , hash_map_delete_requests_()
42 88 , use_reflog_timestamps_(false)
43 88 , oldest_trunk_catalog_(static_cast<uint64_t>(-1))
44 88 , oldest_trunk_catalog_found_(false)
45 88 , preserved_catalogs_(0)
46 88 , unreferenced_trees_(0)
47 88 , condemned_trees_(0)
48 88 , condemned_catalogs_(0)
49 88 , last_reported_status_(0.0)
50 88 , condemned_objects_(0)
51 88 , condemned_bytes_(0)
52 88 , duplicate_delete_requests_(0)
53 {
54
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 44 times.
88 assert(configuration_.uploader != NULL);
55 88 }
56
57
58 template <class CatalogTraversalT, class HashFilterT>
59 4 void GarbageCollector<CatalogTraversalT, HashFilterT>::UseReflogTimestamps() {
60 4 traversal_.SetCatalogInfoShim(&catalog_info_shim_);
61 4 use_reflog_timestamps_ = true;
62 4 }
63
64
65 template <class CatalogTraversalT, class HashFilterT>
66 typename GarbageCollector<CatalogTraversalT, HashFilterT>::TraversalParameters
67 88 GarbageCollector<CatalogTraversalT, HashFilterT>::GetTraversalParams(
68 const GarbageCollector<CatalogTraversalT, HashFilterT>::Configuration &config)
69 {
70 88 TraversalParameters params;
71 88 params.object_fetcher = config.object_fetcher;
72 88 params.history = config.keep_history_depth;
73 88 params.timestamp = config.keep_history_timestamp;
74 88 params.no_repeat_history = true;
75 88 params.ignore_load_failure = true;
76 88 params.quiet = !config.verbose;
77 88 params.num_threads = config.num_threads;
78 88 return params;
79 }
80
81
82 template <class CatalogTraversalT, class HashFilterT>
83 932 void GarbageCollector<CatalogTraversalT, HashFilterT>::PreserveDataObjects(
84 const GarbageCollector<CatalogTraversalT, HashFilterT>::
85 TraversalCallbackDataTN &data // NOLINT(runtime/references)
86 ) {
87 932 ++preserved_catalogs_;
88
89
2/2
✓ Branch 1 taken 130 times.
✓ Branch 2 taken 336 times.
932 if (data.catalog->IsRoot()) {
90 520 const uint64_t mtime = use_reflog_timestamps_
91
3/4
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 120 times.
✓ Branch 3 taken 10 times.
✗ Branch 4 not taken.
260 ? catalog_info_shim_.GetLastModified(data.catalog)
92 240 : data.catalog->GetLastModified();
93
2/2
✓ Branch 0 taken 92 times.
✓ Branch 1 taken 38 times.
260 if (!oldest_trunk_catalog_found_)
94 184 oldest_trunk_catalog_ = std::min(oldest_trunk_catalog_, mtime);
95
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 130 times.
260 if (configuration_.verbose) {
96 const uint64_t rev = data.catalog->revision();
97 LogCvmfs(kLogGc, kLogStdout | kLogDebug,
98 "Preserving Revision %" PRIu64 " (%s / added @ %s)",
99 rev,
100 StringifyTime(data.catalog->GetLastModified(), true).c_str(),
101 StringifyTime(catalog_info_shim_.GetLastModified(data.catalog),
102 true).c_str());
103 PrintCatalogTreeEntry(data.tree_level, data.catalog);
104 }
105
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 130 times.
260 if (data.catalog->schema() < 0.99) {
106 LogCvmfs(kLogGc, kLogStdout | kLogDebug, "Warning: "
107 "legacy catalog does not provide access to nested catalog hierarchy.\n"
108 " Some unreferenced objects may remain in the repository.");
109 }
110 }
111
112 // the hash of the actual catalog needs to preserved
113
1/2
✓ Branch 2 taken 466 times.
✗ Branch 3 not taken.
932 hash_filter_.Fill(data.catalog->hash());
114
115 // all the objects referenced from this catalog need to be preserved
116
1/2
✓ Branch 1 taken 466 times.
✗ Branch 2 not taken.
932 const HashVector &referenced_hashes = data.catalog->GetReferencedObjects();
117 932 typename HashVector::const_iterator i = referenced_hashes.begin();
118 932 const typename HashVector::const_iterator iend = referenced_hashes.end();
119
2/2
✓ Branch 2 taken 2386 times.
✓ Branch 3 taken 466 times.
5704 for (; i != iend; ++i) {
120
1/2
✓ Branch 2 taken 2386 times.
✗ Branch 3 not taken.
4772 hash_filter_.Fill(*i);
121 }
122 932 }
123
124
125 template <class CatalogTraversalT, class HashFilterT>
126 264 void GarbageCollector<CatalogTraversalT, HashFilterT>::SweepDataObjects(
127 const GarbageCollector<CatalogTraversalT, HashFilterT>::
128 TraversalCallbackDataTN &data // NOLINT(runtime/references)
129 ) {
130 264 ++condemned_catalogs_;
131
2/2
✓ Branch 1 taken 50 times.
✓ Branch 2 taken 82 times.
264 if (data.catalog->IsRoot())
132 100 ++condemned_trees_;
133
134
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 132 times.
264 if (configuration_.verbose) {
135 if (data.catalog->IsRoot()) {
136 const uint64_t rev = data.catalog->revision();
137 const time_t mtime = static_cast<time_t>(data.catalog->GetLastModified());
138 LogCvmfs(kLogGc, kLogStdout | kLogDebug,
139 "Sweeping Revision %" PRIu64 " (%s)",
140 rev, StringifyTime(mtime, true).c_str());
141 }
142 PrintCatalogTreeEntry(data.tree_level, data.catalog);
143 }
144
145 // all the objects referenced from this catalog need to be checked against the
146 // the preserved hashes in the hash_filter_ and possibly deleted
147
1/2
✓ Branch 1 taken 132 times.
✗ Branch 2 not taken.
264 const HashVector &referenced_hashes = data.catalog->GetReferencedObjects();
148 264 typename HashVector::const_iterator i = referenced_hashes.begin();
149 264 const typename HashVector::const_iterator iend = referenced_hashes.end();
150
2/2
✓ Branch 2 taken 798 times.
✓ Branch 3 taken 132 times.
1860 for (; i != iend; ++i) {
151
1/2
✓ Branch 2 taken 798 times.
✗ Branch 3 not taken.
1596 CheckAndSweep(*i);
152 }
153
154 // the catalog itself is also condemned and needs to be removed
155
1/2
✓ Branch 2 taken 132 times.
✗ Branch 3 not taken.
264 CheckAndSweep(data.catalog->hash());
156
157 264 float threshold =
158 264 static_cast<float>(condemned_trees_) /
159 264 static_cast<float>(unreferenced_trees_);
160
2/2
✓ Branch 0 taken 50 times.
✓ Branch 1 taken 82 times.
264 if (threshold > last_reported_status_ + 0.1) {
161
1/4
✓ Branch 1 taken 50 times.
✗ Branch 2 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
200 LogCvmfs(kLogGc, kLogStdout | kLogDebug,
162 " - %02.0f%% %" PRIu64 " / %" PRIu64
163 " unreferenced revisions removed [%s]",
164
1/2
✓ Branch 1 taken 50 times.
✗ Branch 2 not taken.
100 100.0 * threshold, condemned_trees_, unreferenced_trees_,
165 RfcTimestamp().c_str());
166 100 last_reported_status_ = threshold;
167 }
168 264 }
169
170
171 template <class CatalogTraversalT, class HashFilterT>
172 1860 void GarbageCollector<CatalogTraversalT, HashFilterT>::CheckAndSweep(
173 const shash::Any &hash)
174 {
175
2/2
✓ Branch 1 taken 428 times.
✓ Branch 2 taken 502 times.
1860 if (!hash_filter_.Contains(hash)) {
176
2/2
✓ Branch 1 taken 362 times.
✓ Branch 2 taken 66 times.
856 if (!hash_map_delete_requests_.Contains(hash)) {
177 724 hash_map_delete_requests_.Fill(hash);
178 724 Sweep(hash);
179 } else {
180 132 ++duplicate_delete_requests_;
181
1/2
✓ Branch 3 taken 66 times.
✗ Branch 4 not taken.
132 LogCvmfs(kLogGc, kLogDebug, "Hash %s already marked as to delete",
182 hash.ToString().c_str());
183 }
184 }
185 1860 }
186
187
188 template <class CatalogTraversalT, class HashFilterT>
189 724 void GarbageCollector<CatalogTraversalT, HashFilterT>::Sweep(
190 const shash::Any &hash) {
191 724 ++condemned_objects_;
192
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 362 times.
724 if (configuration_.extended_stats) {
193 if (!hash.HasSuffix() || hash.suffix == shash::kSuffixPartial) {
194 int64_t condemned_bytes = configuration_.uploader->GetObjectSize(hash);
195 if (condemned_bytes > 0) {
196 condemned_bytes_ += condemned_bytes;
197 }
198 }
199 }
200
201 724 LogDeletion(hash);
202
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 362 times.
724 if (configuration_.dry_run) {
203 return;
204 }
205
206 724 configuration_.uploader->RemoveAsync(hash);
207 }
208
209
210 template <class CatalogTraversalT, class HashFilterT>
211 108 bool GarbageCollector<CatalogTraversalT, HashFilterT>::
212 RemoveCatalogFromReflog(const shash::Any &catalog)
213 {
214
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 54 times.
108 assert(catalog.suffix == shash::kSuffixCatalog);
215 108 return (configuration_.dry_run)
216
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 54 times.
108 ? true
217 108 : configuration_.reflog->Remove(catalog);
218 }
219
220
221 template <class CatalogTraversalT, class HashFilterT>
222 84 bool GarbageCollector<CatalogTraversalT, HashFilterT>::Collect() {
223
0/2
✗ Branch 1 not taken.
✗ Branch 2 not taken.
84 return AnalyzePreservedCatalogTree() &&
224
2/4
✓ Branch 0 taken 42 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 42 times.
✗ Branch 4 not taken.
168 CheckPreservedRevisions() &&
225
1/2
✓ Branch 1 taken 42 times.
✗ Branch 2 not taken.
168 SweepReflog();
226 }
227
228
229 template <class CatalogTraversalT, class HashFilterT>
230 84 bool GarbageCollector<CatalogTraversalT, HashFilterT>::
231 AnalyzePreservedCatalogTree()
232 {
233
1/2
✓ Branch 3 taken 42 times.
✗ Branch 4 not taken.
84 LogCvmfs(kLogGc, kLogStdout, " --> marking unreferenced objects [%s]",
234 RfcTimestamp().c_str());
235
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 42 times.
84 if (configuration_.verbose) {
236 LogCvmfs(kLogGc, kLogStdout | kLogDebug,
237 "Preserving data objects in latest revision");
238 }
239
240 typename CatalogTraversalT::CallbackTN *callback =
241 84 traversal_.RegisterListener(
242 &GarbageCollector<CatalogTraversalT, HashFilterT>::PreserveDataObjects,
243 this);
244
245 84 bool success = traversal_.Traverse();
246 84 oldest_trunk_catalog_found_ = true;
247
2/4
✓ Branch 0 taken 42 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 42 times.
✗ Branch 4 not taken.
84 success = success && traversal_.TraverseNamedSnapshots();
248 84 traversal_.UnregisterListener(callback);
249
250 84 return success;
251 }
252
253
254 template <class CatalogTraversalT, class HashFilterT>
255 84 bool GarbageCollector<CatalogTraversalT, HashFilterT>::CheckPreservedRevisions()
256 {
257 84 const bool keeps_revisions = (preserved_catalog_count() > 0);
258
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 42 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
84 if (!keeps_revisions && configuration_.verbose) {
259 LogCvmfs(kLogGc, kLogStderr | kLogDebug,
260 "This would delete everything! Abort.");
261 }
262
263 84 return keeps_revisions;
264 }
265
266
267 template <class CatalogTraversalT, class HashFilterT>
268 84 bool GarbageCollector<CatalogTraversalT, HashFilterT>::SweepReflog() {
269
2/4
✓ Branch 1 taken 42 times.
✗ Branch 2 not taken.
✓ Branch 5 taken 42 times.
✗ Branch 6 not taken.
84 LogCvmfs(kLogGc, kLogStdout, " --> sweeping unreferenced objects [%s]",
270 RfcTimestamp().c_str());
271
272 84 const ReflogTN *reflog = configuration_.reflog;
273 84 std::vector<shash::Any> catalogs;
274
4/8
✓ Branch 0 taken 42 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 42 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 42 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 42 times.
84 if (NULL == reflog || !reflog->List(SqlReflog::kRefCatalog, &catalogs)) {
275 LogCvmfs(kLogGc, kLogStderr, "Failed to list catalog reference log");
276 return false;
277 }
278
279 typename CatalogTraversalT::CallbackTN *callback =
280
1/2
✓ Branch 1 taken 42 times.
✗ Branch 2 not taken.
84 traversal_.RegisterListener(
281 &GarbageCollector<CatalogTraversalT, HashFilterT>::SweepDataObjects,
282 this);
283
284 84 std::vector<shash::Any> to_sweep;
285 84 std::vector<shash::Any>::const_iterator i = catalogs.begin();
286 84 std::vector<shash::Any>::const_iterator iend = catalogs.end();
287
2/2
✓ Branch 2 taken 184 times.
✓ Branch 3 taken 42 times.
452 for (; i != iend; ++i) {
288
3/4
✓ Branch 2 taken 184 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 54 times.
✓ Branch 5 taken 130 times.
368 if (!hash_filter_.Contains(*i)) {
289
1/2
✓ Branch 2 taken 54 times.
✗ Branch 3 not taken.
108 to_sweep.push_back(*i);
290 }
291 }
292 84 unreferenced_trees_ = to_sweep.size();
293
1/2
✓ Branch 1 taken 42 times.
✗ Branch 2 not taken.
84 bool success = traversal_.TraverseList(to_sweep,
294 CatalogTraversalT::kDepthFirst);
295
1/2
✓ Branch 1 taken 42 times.
✗ Branch 2 not taken.
84 traversal_.UnregisterListener(callback);
296
297 84 i = to_sweep.begin();
298 84 iend = to_sweep.end();
299
2/2
✓ Branch 1 taken 54 times.
✓ Branch 2 taken 42 times.
192 for (; i != iend; ++i) {
300
3/6
✓ Branch 0 taken 54 times.
✗ Branch 1 not taken.
✓ Branch 4 taken 54 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 54 times.
✗ Branch 7 not taken.
108 success = success && RemoveCatalogFromReflog(*i);
301 }
302
303 // TODO(jblomer): turn current counters into perf::Counters
304
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 42 times.
84 if (configuration_.statistics) {
305 perf::Counter *ctr_preserved_catalogs =
306 configuration_.statistics->Register(
307 "gc.n_preserved_catalogs", "number of live catalogs");
308 perf::Counter *ctr_condemned_catalogs =
309 configuration_.statistics->Register(
310 "gc.n_condemned_catalogs", "number of dead catalogs");
311 perf::Counter *ctr_condemned_objects =
312 configuration_.statistics->Register(
313 "gc.n_condemned_objects", "number of deleted objects");
314 perf::Counter *ctr_condemned_bytes =
315 configuration_.statistics->Register(
316 "gc.sz_condemned_bytes", "number of deleted bytes");
317 perf::Counter *ctr_duplicate_delete_requests =
318 configuration_.statistics->Register(
319 "gc.n_duplicate_delete_requests", "number of duplicated delete requests");
320 ctr_preserved_catalogs->Set(preserved_catalog_count());
321 ctr_condemned_catalogs->Set(condemned_catalog_count());
322 ctr_condemned_objects->Set(condemned_objects_count());
323 ctr_condemned_bytes->Set(condemned_bytes_count());
324 ctr_duplicate_delete_requests->Set(duplicate_delete_requests());
325 }
326
327
1/2
✓ Branch 1 taken 42 times.
✗ Branch 2 not taken.
84 configuration_.uploader->WaitForUpload();
328
2/4
✓ Branch 1 taken 42 times.
✗ Branch 2 not taken.
✓ Branch 5 taken 42 times.
✗ Branch 6 not taken.
84 LogCvmfs(kLogGc, kLogStdout, " --> done garbage collecting [%s]",
329 RfcTimestamp().c_str());
330
3/6
✓ Branch 0 taken 42 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 42 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 42 times.
✗ Branch 6 not taken.
84 return success && (configuration_.uploader->GetNumberOfErrors() == 0);
331 84 }
332
333
334 template <class CatalogTraversalT, class HashFilterT>
335 void GarbageCollector<CatalogTraversalT, HashFilterT>::PrintCatalogTreeEntry(
336 const unsigned int tree_level,
337 const CatalogTN *catalog) const
338 {
339 std::string tree_indent;
340 for (unsigned int i = 0; i < tree_level; ++i) {
341 tree_indent += "\u2502 ";
342 }
343 tree_indent += "\u251C\u2500 ";
344
345 const std::string hash_string = catalog->hash().ToString();
346 const std::string path =
347 (catalog->mountpoint().IsEmpty()) ? "/" : catalog->mountpoint().ToString();
348
349 LogCvmfs(kLogGc, kLogStdout, "%s%s %s",
350 tree_indent.c_str(),
351 hash_string.c_str(),
352 path.c_str());
353 LogCvmfs(kLogGc, kLogDebug, "catalog tree entry: %s %s",
354 hash_string.c_str(), path.c_str());
355 }
356
357
358 template <class CatalogTraversalT, class HashFilterT>
359 724 void GarbageCollector<CatalogTraversalT, HashFilterT>::LogDeletion(
360 const shash::Any &hash) const {
361
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 362 times.
724 if (configuration_.verbose) {
362 LogCvmfs(kLogGc, kLogStdout | kLogDebug, "Sweep: %s",
363 hash.ToStringWithSuffix().c_str());
364 }
365
366
2/2
✓ Branch 1 taken 22 times.
✓ Branch 2 taken 340 times.
724 if (configuration_.has_deletion_log()) {
367
1/2
✓ Branch 3 taken 22 times.
✗ Branch 4 not taken.
44 const int written = fprintf(configuration_.deleted_objects_logfile,
368 "%s\n", hash.ToStringWithSuffix().c_str());
369
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 22 times.
44 if (written < 0) {
370 LogCvmfs(kLogGc, kLogStderr, "failed to write to deleted objects log");
371 }
372 }
373 724 }
374
375 #endif // CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
376