Directory: | cvmfs/ |
---|---|
File: | cvmfs/garbage_collection/garbage_collector.h |
Date: | 2025-03-09 02:34:28 |
Exec | Total | Coverage | |
---|---|---|---|
Lines: | 27 | 30 | 90.0% |
Branches: | 2 | 4 | 50.0% |
Line | Branch | Exec | Source |
---|---|---|---|
1 | /** | ||
2 | * This file is part of the CernVM File System. | ||
3 | * | ||
4 | * The GarbageCollector class is figuring out which data objects (represented by | ||
5 | * their content hashes) can be deleted as outdated garbage. | ||
6 | * Garbage collection is performed on the granularity of catalog revisions, thus | ||
7 | * a complete repository revision is either considered to be outdated or active. | ||
8 | * This way, a mountable repository revision stays completely usable (no nested | ||
9 | * catalogs or data objects become unavailable). A revision is defined by it's | ||
10 | * root catalog; the revision numbers of nested catalogs are irrelevant, since | ||
11 | * they might be referenced by newer (preserved) repository revisions. | ||
12 | * Thus, garbage objects are those that are _not_ referenced by any of the pre- | ||
13 | * served root catalogs or their direct subordinate nested catalog try. | ||
14 | * | ||
15 | * We use a two-stage approach: | ||
16 | * | ||
17 | * 1st Stage - Initialization | ||
18 | * The GarbageCollector is reading all the catalogs that are meant | ||
19 | * to be preserved. It builds up a filter (HashFilterT) containing | ||
20 | * all content hashes that are _not_ to be deleted | ||
21 | * | ||
22 | * 2nd Stage - Sweeping | ||
23 | * The initialized HashFilterT is presented with all content | ||
24 | * hashes found in condemned catalogs and decides if they are | ||
25 | * referenced by the preserved catalog revisions or not. | ||
26 | * | ||
27 | * The GarbageCollector is templated with CatalogTraversalT mainly for | ||
28 | * testability and with HashFilterT as an instance of the Strategy Pattern to | ||
29 | * abstract from the actual hash filtering method to be used. | ||
30 | */ | ||
31 | |||
32 | #ifndef CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_H_ | ||
33 | #define CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_H_ | ||
34 | |||
35 | #include <inttypes.h> | ||
36 | |||
37 | #include <vector> | ||
38 | |||
39 | #include "catalog_traversal_parallel.h" | ||
40 | #include "garbage_collection/hash_filter.h" | ||
41 | #include "statistics.h" | ||
42 | #include "upload_facility.h" | ||
43 | |||
44 | template<class CatalogTraversalT, class HashFilterT> | ||
45 | class GarbageCollector { | ||
46 | protected: | ||
47 | typedef typename CatalogTraversalT::ObjectFetcherTN ObjectFetcherTN; | ||
48 | typedef typename ObjectFetcherTN::HistoryTN HistoryTN; | ||
49 | typedef typename ObjectFetcherTN::ReflogTN ReflogTN; | ||
50 | typedef typename CatalogTraversalT::CatalogTN CatalogTN; | ||
51 | typedef typename CatalogTraversalT::CallbackDataTN TraversalCallbackDataTN; | ||
52 | typedef typename CatalogTraversalT::Parameters TraversalParameters; | ||
53 | typedef std::vector<shash::Any> HashVector; | ||
54 | |||
55 | public: | ||
56 | struct Configuration { | ||
57 | static const uint64_t kFullHistory; | ||
58 | static const uint64_t kNoHistory; | ||
59 | static const time_t kNoTimestamp; | ||
60 | static const shash::Any kLatestHistoryDatabase; | ||
61 | |||
62 | 56 | Configuration() | |
63 | 56 | : uploader(NULL) | |
64 | 56 | , object_fetcher(NULL) | |
65 | 56 | , reflog(NULL) | |
66 | 56 | , keep_history_depth(kFullHistory) | |
67 | 56 | , keep_history_timestamp(kNoTimestamp) | |
68 | 56 | , dry_run(false) | |
69 | 56 | , verbose(false) | |
70 | 56 | , deleted_objects_logfile(NULL) | |
71 | 56 | , statistics(NULL) | |
72 | 56 | , extended_stats(false) | |
73 | 56 | , num_threads(8) {} | |
74 | |||
75 | 724 | bool has_deletion_log() const { return deleted_objects_logfile != NULL; } | |
76 | |||
77 | upload::AbstractUploader *uploader; | ||
78 | ObjectFetcherTN *object_fetcher; | ||
79 | ReflogTN *reflog; | ||
80 | uint64_t keep_history_depth; | ||
81 | time_t keep_history_timestamp; | ||
82 | bool dry_run; | ||
83 | bool verbose; | ||
84 | FILE *deleted_objects_logfile; | ||
85 | perf::Statistics *statistics; | ||
86 | bool extended_stats; | ||
87 | unsigned int num_threads; | ||
88 | }; | ||
89 | |||
90 | public: | ||
91 | explicit GarbageCollector(const Configuration &configuration); | ||
92 | |||
93 | void UseReflogTimestamps(); | ||
94 | bool Collect(); | ||
95 | |||
96 | 168 | uint64_t preserved_catalog_count() const { return preserved_catalogs_; } | |
97 | 38 | uint64_t condemned_catalog_count() const { return condemned_catalogs_; } | |
98 | 2 | uint64_t condemned_objects_count() const { return condemned_objects_; } | |
99 | ✗ | uint64_t duplicate_delete_requests() const { | |
100 | ✗ | return duplicate_delete_requests_; } | |
101 | ✗ | uint64_t condemned_bytes_count() const { return condemned_bytes_; } | |
102 | 42 | uint64_t oldest_trunk_catalog() const { return oldest_trunk_catalog_; } | |
103 | |||
104 | protected: | ||
105 | TraversalParameters GetTraversalParams(const Configuration &configuration); | ||
106 | |||
107 | void PreserveDataObjects(const TraversalCallbackDataTN &data); | ||
108 | void SweepDataObjects(const TraversalCallbackDataTN &data); | ||
109 | |||
110 | bool AnalyzePreservedCatalogTree(); | ||
111 | bool CheckPreservedRevisions(); | ||
112 | bool SweepReflog(); | ||
113 | |||
114 | void CheckAndSweep(const shash::Any &hash); | ||
115 | void Sweep(const shash::Any &hash); | ||
116 | bool RemoveCatalogFromReflog(const shash::Any &catalog); | ||
117 | |||
118 | void PrintCatalogTreeEntry(const unsigned int tree_level, | ||
119 | const CatalogTN *catalog) const; | ||
120 | void LogDeletion(const shash::Any &hash) const; | ||
121 | |||
122 | private: | ||
123 | class ReflogBasedInfoShim : | ||
124 | public swissknife::CatalogTraversalInfoShim<CatalogTN> | ||
125 | { | ||
126 | public: | ||
127 | 88 | explicit ReflogBasedInfoShim(ReflogTN *reflog) : reflog_(reflog) { | |
128 | 88 | pthread_mutex_init(&reflog_mutex_, NULL); | |
129 | 88 | } | |
130 | 44 | virtual ~ReflogBasedInfoShim() { | |
131 | 88 | pthread_mutex_destroy(&reflog_mutex_); | |
132 | } | ||
133 | 18 | virtual uint64_t GetLastModified(const CatalogTN *catalog) { | |
134 | uint64_t timestamp; | ||
135 | 36 | MutexLockGuard m(&reflog_mutex_); | |
136 |
1/2✓ Branch 2 taken 18 times.
✗ Branch 3 not taken.
|
36 | bool retval = reflog_->GetCatalogTimestamp(catalog->hash(), ×tamp); |
137 |
1/2✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
|
72 | return retval ? timestamp : catalog->GetLastModified(); |
138 | 36 | } | |
139 | |||
140 | private: | ||
141 | ReflogTN *reflog_; | ||
142 | pthread_mutex_t reflog_mutex_; | ||
143 | }; | ||
144 | |||
145 | const Configuration configuration_; | ||
146 | ReflogBasedInfoShim catalog_info_shim_; | ||
147 | CatalogTraversalT traversal_; | ||
148 | HashFilterT hash_filter_; | ||
149 | HashFilterT hash_map_delete_requests_; | ||
150 | |||
151 | |||
152 | bool use_reflog_timestamps_; | ||
153 | /** | ||
154 | * A marker for the garbage collection grace period, the time span that is | ||
155 | * walked back from the current head catalog. There can be named snapshots | ||
156 | * older than this snapshot. The oldest_trunk_catalog_ is used as a marker | ||
157 | * for when to remove auxiliary files (meta info, history, ...). | ||
158 | */ | ||
159 | uint64_t oldest_trunk_catalog_; | ||
160 | bool oldest_trunk_catalog_found_; | ||
161 | uint64_t preserved_catalogs_; | ||
162 | /** | ||
163 | * Number of catalogs in the reflog that are to be deleted (in fact, some of | ||
164 | * them might not exist anymore). | ||
165 | */ | ||
166 | uint64_t unreferenced_trees_; | ||
167 | /** | ||
168 | * Number of root catalogs garbage collected, count grows as GC progresses | ||
169 | */ | ||
170 | uint64_t condemned_trees_; | ||
171 | /** | ||
172 | * Number of catalogs garbage collected, count grows as GC progresses | ||
173 | */ | ||
174 | uint64_t condemned_catalogs_; | ||
175 | /** | ||
176 | * Keeps track if the last status report issued, between 0 and 1 | ||
177 | */ | ||
178 | float last_reported_status_; | ||
179 | |||
180 | uint64_t condemned_objects_; | ||
181 | uint64_t condemned_bytes_; | ||
182 | uint64_t duplicate_delete_requests_; | ||
183 | }; | ||
184 | |||
185 | #include "garbage_collector_impl.h" | ||
186 | |||
187 | #endif // CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_H_ | ||
188 |