GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/garbage_collection/garbage_collector.h
Date: 2025-06-29 02:35:41
Exec Total Coverage
Lines: 26 29 89.7%
Branches: 2 4 50.0%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 *
4 * The GarbageCollector class is figuring out which data objects (represented by
5 * their content hashes) can be deleted as outdated garbage.
6 * Garbage collection is performed on the granularity of catalog revisions, thus
7 * a complete repository revision is either considered to be outdated or active.
8 * This way, a mountable repository revision stays completely usable (no nested
9 * catalogs or data objects become unavailable). A revision is defined by it's
10 * root catalog; the revision numbers of nested catalogs are irrelevant, since
11 * they might be referenced by newer (preserved) repository revisions.
12 * Thus, garbage objects are those that are _not_ referenced by any of the pre-
13 * served root catalogs or their direct subordinate nested catalog try.
14 *
15 * We use a two-stage approach:
16 *
17 * 1st Stage - Initialization
18 * The GarbageCollector is reading all the catalogs that are meant
19 * to be preserved. It builds up a filter (HashFilterT) containing
20 * all content hashes that are _not_ to be deleted
21 *
22 * 2nd Stage - Sweeping
23 * The initialized HashFilterT is presented with all content
24 * hashes found in condemned catalogs and decides if they are
25 * referenced by the preserved catalog revisions or not.
26 *
27 * The GarbageCollector is templated with CatalogTraversalT mainly for
28 * testability and with HashFilterT as an instance of the Strategy Pattern to
29 * abstract from the actual hash filtering method to be used.
30 */
31
32 #ifndef CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_H_
33 #define CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_H_
34
35 #include <inttypes.h>
36
37 #include <vector>
38
39 #include "catalog_traversal_parallel.h"
40 #include "garbage_collection/hash_filter.h"
41 #include "statistics.h"
42 #include "upload_facility.h"
43
44 template<class CatalogTraversalT, class HashFilterT>
45 class GarbageCollector {
46 protected:
47 typedef typename CatalogTraversalT::ObjectFetcherTN ObjectFetcherTN;
48 typedef typename ObjectFetcherTN::HistoryTN HistoryTN;
49 typedef typename ObjectFetcherTN::ReflogTN ReflogTN;
50 typedef typename CatalogTraversalT::CatalogTN CatalogTN;
51 typedef typename CatalogTraversalT::CallbackDataTN TraversalCallbackDataTN;
52 typedef typename CatalogTraversalT::Parameters TraversalParameters;
53 typedef std::vector<shash::Any> HashVector;
54
55 public:
56 struct Configuration {
57 static const uint64_t kFullHistory;
58 static const uint64_t kNoHistory;
59 static const time_t kNoTimestamp;
60 static const shash::Any kLatestHistoryDatabase;
61
62 1568 Configuration()
63 1568 : uploader(NULL)
64 1568 , object_fetcher(NULL)
65 1568 , reflog(NULL)
66 1568 , keep_history_depth(kFullHistory)
67 1568 , keep_history_timestamp(kNoTimestamp)
68 1568 , dry_run(false)
69 1568 , verbose(false)
70 1568 , deleted_objects_logfile(NULL)
71 1568 , statistics(NULL)
72 1568 , extended_stats(false)
73 1568 , num_threads(8) { }
74
75 20272 bool has_deletion_log() const { return deleted_objects_logfile != NULL; }
76
77 upload::AbstractUploader *uploader;
78 ObjectFetcherTN *object_fetcher;
79 ReflogTN *reflog;
80 uint64_t keep_history_depth;
81 time_t keep_history_timestamp;
82 bool dry_run;
83 bool verbose;
84 FILE *deleted_objects_logfile;
85 perf::Statistics *statistics;
86 bool extended_stats;
87 unsigned int num_threads;
88 };
89
90 public:
91 explicit GarbageCollector(const Configuration &configuration);
92
93 void UseReflogTimestamps();
94 bool Collect();
95
96 4704 uint64_t preserved_catalog_count() const { return preserved_catalogs_; }
97 1064 uint64_t condemned_catalog_count() const { return condemned_catalogs_; }
98 56 uint64_t condemned_objects_count() const { return condemned_objects_; }
99 uint64_t duplicate_delete_requests() const {
100 return duplicate_delete_requests_;
101 }
102 uint64_t condemned_bytes_count() const { return condemned_bytes_; }
103 1176 uint64_t oldest_trunk_catalog() const { return oldest_trunk_catalog_; }
104
105 protected:
106 TraversalParameters GetTraversalParams(const Configuration &configuration);
107
108 void PreserveDataObjects(const TraversalCallbackDataTN &data);
109 void SweepDataObjects(const TraversalCallbackDataTN &data);
110
111 bool AnalyzePreservedCatalogTree();
112 bool CheckPreservedRevisions();
113 bool SweepReflog();
114
115 void CheckAndSweep(const shash::Any &hash);
116 void Sweep(const shash::Any &hash);
117 bool RemoveCatalogFromReflog(const shash::Any &catalog);
118
119 void PrintCatalogTreeEntry(const unsigned int tree_level,
120 const CatalogTN *catalog) const;
121 void LogDeletion(const shash::Any &hash) const;
122
123 private:
124 class ReflogBasedInfoShim
125 : public swissknife::CatalogTraversalInfoShim<CatalogTN> {
126 public:
127 2464 explicit ReflogBasedInfoShim(ReflogTN *reflog) : reflog_(reflog) {
128 2464 pthread_mutex_init(&reflog_mutex_, NULL);
129 2464 }
130 1232 virtual ~ReflogBasedInfoShim() { pthread_mutex_destroy(&reflog_mutex_); }
131 504 virtual uint64_t GetLastModified(const CatalogTN *catalog) {
132 uint64_t timestamp;
133 1008 MutexLockGuard m(&reflog_mutex_);
134
1/2
✓ Branch 2 taken 504 times.
✗ Branch 3 not taken.
1008 bool retval = reflog_->GetCatalogTimestamp(catalog->hash(), &timestamp);
135
1/2
✓ Branch 0 taken 504 times.
✗ Branch 1 not taken.
2016 return retval ? timestamp : catalog->GetLastModified();
136 1008 }
137
138 private:
139 ReflogTN *reflog_;
140 pthread_mutex_t reflog_mutex_;
141 };
142
143 const Configuration configuration_;
144 ReflogBasedInfoShim catalog_info_shim_;
145 CatalogTraversalT traversal_;
146 HashFilterT hash_filter_;
147 HashFilterT hash_map_delete_requests_;
148
149
150 bool use_reflog_timestamps_;
151 /**
152 * A marker for the garbage collection grace period, the time span that is
153 * walked back from the current head catalog. There can be named snapshots
154 * older than this snapshot. The oldest_trunk_catalog_ is used as a marker
155 * for when to remove auxiliary files (meta info, history, ...).
156 */
157 uint64_t oldest_trunk_catalog_;
158 bool oldest_trunk_catalog_found_;
159 uint64_t preserved_catalogs_;
160 /**
161 * Number of catalogs in the reflog that are to be deleted (in fact, some of
162 * them might not exist anymore).
163 */
164 uint64_t unreferenced_trees_;
165 /**
166 * Number of root catalogs garbage collected, count grows as GC progresses
167 */
168 uint64_t condemned_trees_;
169 /**
170 * Number of catalogs garbage collected, count grows as GC progresses
171 */
172 uint64_t condemned_catalogs_;
173 /**
174 * Keeps track if the last status report issued, between 0 and 1
175 */
176 float last_reported_status_;
177
178 uint64_t condemned_objects_;
179 uint64_t condemned_bytes_;
180 uint64_t duplicate_delete_requests_;
181 };
182
183 #include "garbage_collector_impl.h"
184
185 #endif // CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_H_
186