Directory: | cvmfs/ |
---|---|
File: | cvmfs/garbage_collection/garbage_collector.h |
Date: | 2025-06-29 02:35:41 |
Exec | Total | Coverage | |
---|---|---|---|
Lines: | 26 | 29 | 89.7% |
Branches: | 2 | 4 | 50.0% |
Line | Branch | Exec | Source |
---|---|---|---|
1 | /** | ||
2 | * This file is part of the CernVM File System. | ||
3 | * | ||
4 | * The GarbageCollector class is figuring out which data objects (represented by | ||
5 | * their content hashes) can be deleted as outdated garbage. | ||
6 | * Garbage collection is performed on the granularity of catalog revisions, thus | ||
7 | * a complete repository revision is either considered to be outdated or active. | ||
8 | * This way, a mountable repository revision stays completely usable (no nested | ||
9 | * catalogs or data objects become unavailable). A revision is defined by it's | ||
10 | * root catalog; the revision numbers of nested catalogs are irrelevant, since | ||
11 | * they might be referenced by newer (preserved) repository revisions. | ||
12 | * Thus, garbage objects are those that are _not_ referenced by any of the pre- | ||
13 | * served root catalogs or their direct subordinate nested catalog try. | ||
14 | * | ||
15 | * We use a two-stage approach: | ||
16 | * | ||
17 | * 1st Stage - Initialization | ||
18 | * The GarbageCollector is reading all the catalogs that are meant | ||
19 | * to be preserved. It builds up a filter (HashFilterT) containing | ||
20 | * all content hashes that are _not_ to be deleted | ||
21 | * | ||
22 | * 2nd Stage - Sweeping | ||
23 | * The initialized HashFilterT is presented with all content | ||
24 | * hashes found in condemned catalogs and decides if they are | ||
25 | * referenced by the preserved catalog revisions or not. | ||
26 | * | ||
27 | * The GarbageCollector is templated with CatalogTraversalT mainly for | ||
28 | * testability and with HashFilterT as an instance of the Strategy Pattern to | ||
29 | * abstract from the actual hash filtering method to be used. | ||
30 | */ | ||
31 | |||
32 | #ifndef CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_H_ | ||
33 | #define CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_H_ | ||
34 | |||
35 | #include <inttypes.h> | ||
36 | |||
37 | #include <vector> | ||
38 | |||
39 | #include "catalog_traversal_parallel.h" | ||
40 | #include "garbage_collection/hash_filter.h" | ||
41 | #include "statistics.h" | ||
42 | #include "upload_facility.h" | ||
43 | |||
44 | template<class CatalogTraversalT, class HashFilterT> | ||
45 | class GarbageCollector { | ||
46 | protected: | ||
47 | typedef typename CatalogTraversalT::ObjectFetcherTN ObjectFetcherTN; | ||
48 | typedef typename ObjectFetcherTN::HistoryTN HistoryTN; | ||
49 | typedef typename ObjectFetcherTN::ReflogTN ReflogTN; | ||
50 | typedef typename CatalogTraversalT::CatalogTN CatalogTN; | ||
51 | typedef typename CatalogTraversalT::CallbackDataTN TraversalCallbackDataTN; | ||
52 | typedef typename CatalogTraversalT::Parameters TraversalParameters; | ||
53 | typedef std::vector<shash::Any> HashVector; | ||
54 | |||
55 | public: | ||
56 | struct Configuration { | ||
57 | static const uint64_t kFullHistory; | ||
58 | static const uint64_t kNoHistory; | ||
59 | static const time_t kNoTimestamp; | ||
60 | static const shash::Any kLatestHistoryDatabase; | ||
61 | |||
62 | 1568 | Configuration() | |
63 | 1568 | : uploader(NULL) | |
64 | 1568 | , object_fetcher(NULL) | |
65 | 1568 | , reflog(NULL) | |
66 | 1568 | , keep_history_depth(kFullHistory) | |
67 | 1568 | , keep_history_timestamp(kNoTimestamp) | |
68 | 1568 | , dry_run(false) | |
69 | 1568 | , verbose(false) | |
70 | 1568 | , deleted_objects_logfile(NULL) | |
71 | 1568 | , statistics(NULL) | |
72 | 1568 | , extended_stats(false) | |
73 | 1568 | , num_threads(8) { } | |
74 | |||
75 | 20272 | bool has_deletion_log() const { return deleted_objects_logfile != NULL; } | |
76 | |||
77 | upload::AbstractUploader *uploader; | ||
78 | ObjectFetcherTN *object_fetcher; | ||
79 | ReflogTN *reflog; | ||
80 | uint64_t keep_history_depth; | ||
81 | time_t keep_history_timestamp; | ||
82 | bool dry_run; | ||
83 | bool verbose; | ||
84 | FILE *deleted_objects_logfile; | ||
85 | perf::Statistics *statistics; | ||
86 | bool extended_stats; | ||
87 | unsigned int num_threads; | ||
88 | }; | ||
89 | |||
90 | public: | ||
91 | explicit GarbageCollector(const Configuration &configuration); | ||
92 | |||
93 | void UseReflogTimestamps(); | ||
94 | bool Collect(); | ||
95 | |||
96 | 4704 | uint64_t preserved_catalog_count() const { return preserved_catalogs_; } | |
97 | 1064 | uint64_t condemned_catalog_count() const { return condemned_catalogs_; } | |
98 | 56 | uint64_t condemned_objects_count() const { return condemned_objects_; } | |
99 | ✗ | uint64_t duplicate_delete_requests() const { | |
100 | ✗ | return duplicate_delete_requests_; | |
101 | } | ||
102 | ✗ | uint64_t condemned_bytes_count() const { return condemned_bytes_; } | |
103 | 1176 | uint64_t oldest_trunk_catalog() const { return oldest_trunk_catalog_; } | |
104 | |||
105 | protected: | ||
106 | TraversalParameters GetTraversalParams(const Configuration &configuration); | ||
107 | |||
108 | void PreserveDataObjects(const TraversalCallbackDataTN &data); | ||
109 | void SweepDataObjects(const TraversalCallbackDataTN &data); | ||
110 | |||
111 | bool AnalyzePreservedCatalogTree(); | ||
112 | bool CheckPreservedRevisions(); | ||
113 | bool SweepReflog(); | ||
114 | |||
115 | void CheckAndSweep(const shash::Any &hash); | ||
116 | void Sweep(const shash::Any &hash); | ||
117 | bool RemoveCatalogFromReflog(const shash::Any &catalog); | ||
118 | |||
119 | void PrintCatalogTreeEntry(const unsigned int tree_level, | ||
120 | const CatalogTN *catalog) const; | ||
121 | void LogDeletion(const shash::Any &hash) const; | ||
122 | |||
123 | private: | ||
124 | class ReflogBasedInfoShim | ||
125 | : public swissknife::CatalogTraversalInfoShim<CatalogTN> { | ||
126 | public: | ||
127 | 2464 | explicit ReflogBasedInfoShim(ReflogTN *reflog) : reflog_(reflog) { | |
128 | 2464 | pthread_mutex_init(&reflog_mutex_, NULL); | |
129 | 2464 | } | |
130 | 1232 | virtual ~ReflogBasedInfoShim() { pthread_mutex_destroy(&reflog_mutex_); } | |
131 | 504 | virtual uint64_t GetLastModified(const CatalogTN *catalog) { | |
132 | uint64_t timestamp; | ||
133 | 1008 | MutexLockGuard m(&reflog_mutex_); | |
134 |
1/2✓ Branch 2 taken 504 times.
✗ Branch 3 not taken.
|
1008 | bool retval = reflog_->GetCatalogTimestamp(catalog->hash(), ×tamp); |
135 |
1/2✓ Branch 0 taken 504 times.
✗ Branch 1 not taken.
|
2016 | return retval ? timestamp : catalog->GetLastModified(); |
136 | 1008 | } | |
137 | |||
138 | private: | ||
139 | ReflogTN *reflog_; | ||
140 | pthread_mutex_t reflog_mutex_; | ||
141 | }; | ||
142 | |||
143 | const Configuration configuration_; | ||
144 | ReflogBasedInfoShim catalog_info_shim_; | ||
145 | CatalogTraversalT traversal_; | ||
146 | HashFilterT hash_filter_; | ||
147 | HashFilterT hash_map_delete_requests_; | ||
148 | |||
149 | |||
150 | bool use_reflog_timestamps_; | ||
151 | /** | ||
152 | * A marker for the garbage collection grace period, the time span that is | ||
153 | * walked back from the current head catalog. There can be named snapshots | ||
154 | * older than this snapshot. The oldest_trunk_catalog_ is used as a marker | ||
155 | * for when to remove auxiliary files (meta info, history, ...). | ||
156 | */ | ||
157 | uint64_t oldest_trunk_catalog_; | ||
158 | bool oldest_trunk_catalog_found_; | ||
159 | uint64_t preserved_catalogs_; | ||
160 | /** | ||
161 | * Number of catalogs in the reflog that are to be deleted (in fact, some of | ||
162 | * them might not exist anymore). | ||
163 | */ | ||
164 | uint64_t unreferenced_trees_; | ||
165 | /** | ||
166 | * Number of root catalogs garbage collected, count grows as GC progresses | ||
167 | */ | ||
168 | uint64_t condemned_trees_; | ||
169 | /** | ||
170 | * Number of catalogs garbage collected, count grows as GC progresses | ||
171 | */ | ||
172 | uint64_t condemned_catalogs_; | ||
173 | /** | ||
174 | * Keeps track if the last status report issued, between 0 and 1 | ||
175 | */ | ||
176 | float last_reported_status_; | ||
177 | |||
178 | uint64_t condemned_objects_; | ||
179 | uint64_t condemned_bytes_; | ||
180 | uint64_t duplicate_delete_requests_; | ||
181 | }; | ||
182 | |||
183 | #include "garbage_collector_impl.h" | ||
184 | |||
185 | #endif // CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_H_ | ||
186 |