5 #ifndef CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
6 #define CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
17 template<
class CatalogTraversalT,
class HashFilterT>
19 kFullHistory = std::numeric_limits<uint64_t>::max();
21 template<
class CatalogTraversalT,
class HashFilterT>
23 HashFilterT>::Configuration::kNoHistory = 0;
25 template<
class CatalogTraversalT,
class HashFilterT>
27 HashFilterT>::Configuration::kNoTimestamp = 0;
30 template<
class CatalogTraversalT,
class HashFilterT>
33 : configuration_(configuration)
34 , catalog_info_shim_(configuration.
reflog)
39 , hash_map_delete_requests_()
40 , use_reflog_timestamps_(false)
41 , oldest_trunk_catalog_(static_cast<uint64_t>(-1))
42 , oldest_trunk_catalog_found_(false)
43 , preserved_catalogs_(0)
44 , unreferenced_trees_(0)
46 , condemned_catalogs_(0)
47 , last_reported_status_(0.0)
48 , condemned_objects_(0)
50 , duplicate_delete_requests_(0) {
55 template<
class CatalogTraversalT,
class HashFilterT>
57 traversal_.SetCatalogInfoShim(&catalog_info_shim_);
58 use_reflog_timestamps_ =
true;
62 template<
class CatalogTraversalT,
class HashFilterT>
71 params.no_repeat_history =
true;
72 params.ignore_load_failure =
true;
79 template<
class CatalogTraversalT,
class HashFilterT>
85 ++preserved_catalogs_;
87 if (data.catalog->IsRoot()) {
88 const uint64_t mtime = use_reflog_timestamps_
89 ? catalog_info_shim_.GetLastModified(
91 : data.catalog->GetLastModified();
92 if (!oldest_trunk_catalog_found_)
93 oldest_trunk_catalog_ = std::min(oldest_trunk_catalog_, mtime);
94 if (configuration_.verbose) {
95 const uint64_t rev = data.catalog->revision();
98 "Preserving Revision %" PRIu64
" (%s / added @ %s)", rev,
100 StringifyTime(catalog_info_shim_.GetLastModified(data.catalog),
true)
102 PrintCatalogTreeEntry(data.tree_level, data.catalog);
104 if (data.catalog->schema() < 0.99) {
108 "legacy catalog does not provide access to nested catalog "
110 " Some unreferenced objects may remain in the repository.");
115 hash_filter_.Fill(data.catalog->hash());
118 const HashVector &referenced_hashes = data.catalog->GetReferencedObjects();
119 typename HashVector::const_iterator i = referenced_hashes.begin();
120 const typename HashVector::const_iterator iend = referenced_hashes.end();
121 for (; i != iend; ++i) {
122 hash_filter_.Fill(*i);
127 template<
class CatalogTraversalT,
class HashFilterT>
133 ++condemned_catalogs_;
134 if (data.catalog->IsRoot())
137 if (configuration_.verbose) {
138 if (data.catalog->IsRoot()) {
139 const uint64_t rev = data.catalog->revision();
140 const time_t mtime =
static_cast<time_t
>(data.catalog->GetLastModified());
142 "Sweeping Revision %" PRIu64
" (%s)", rev,
145 PrintCatalogTreeEntry(data.tree_level, data.catalog);
150 const HashVector &referenced_hashes = data.catalog->GetReferencedObjects();
151 typename HashVector::const_iterator i = referenced_hashes.begin();
152 const typename HashVector::const_iterator iend = referenced_hashes.end();
153 for (; i != iend; ++i) {
158 CheckAndSweep(data.catalog->hash());
160 float threshold =
static_cast<float>(condemned_trees_)
161 / static_cast<float>(unreferenced_trees_);
162 if (threshold > last_reported_status_ + 0.1) {
164 " - %02.0f%% %" PRIu64
" / %" PRIu64
165 " unreferenced revisions removed [%s]",
166 100.0 * threshold, condemned_trees_, unreferenced_trees_,
168 last_reported_status_ = threshold;
173 template<
class CatalogTraversalT,
class HashFilterT>
176 if (!hash_filter_.Contains(hash)) {
177 if (!hash_map_delete_requests_.Contains(hash)) {
178 hash_map_delete_requests_.Fill(hash);
181 ++duplicate_delete_requests_;
189 template<
class CatalogTraversalT,
class HashFilterT>
192 ++condemned_objects_;
193 if (configuration_.extended_stats) {
195 int64_t condemned_bytes = configuration_.uploader->GetObjectSize(hash);
196 if (condemned_bytes > 0) {
197 condemned_bytes_ += condemned_bytes;
203 if (configuration_.dry_run) {
207 configuration_.uploader->RemoveAsync(hash);
211 template<
class CatalogTraversalT,
class HashFilterT>
215 return (configuration_.dry_run) ?
true
216 : configuration_.reflog->Remove(catalog);
220 template<
class CatalogTraversalT,
class HashFilterT>
222 return AnalyzePreservedCatalogTree() && CheckPreservedRevisions()
227 template<
class CatalogTraversalT,
class HashFilterT>
229 HashFilterT>::AnalyzePreservedCatalogTree() {
232 if (configuration_.verbose) {
234 "Preserving data objects in latest revision");
237 typename CatalogTraversalT::CallbackTN
238 *callback = traversal_.RegisterListener(
240 HashFilterT>::PreserveDataObjects,
243 bool success = traversal_.Traverse();
244 oldest_trunk_catalog_found_ =
true;
245 success = success && traversal_.TraverseNamedSnapshots();
246 traversal_.UnregisterListener(callback);
252 template<
class CatalogTraversalT,
class HashFilterT>
254 HashFilterT>::CheckPreservedRevisions() {
255 const bool keeps_revisions = (preserved_catalog_count() > 0);
256 if (!keeps_revisions && configuration_.verbose) {
258 "This would delete everything! Abort.");
261 return keeps_revisions;
265 template<
class CatalogTraversalT,
class HashFilterT>
271 std::vector<shash::Any> catalogs;
277 typename CatalogTraversalT::CallbackTN
278 *callback = traversal_.RegisterListener(
282 std::vector<shash::Any> to_sweep;
283 std::vector<shash::Any>::const_iterator i = catalogs.begin();
284 std::vector<shash::Any>::const_iterator iend = catalogs.end();
285 for (; i != iend; ++i) {
286 if (!hash_filter_.Contains(*i)) {
287 to_sweep.push_back(*i);
290 unreferenced_trees_ = to_sweep.size();
291 bool success = traversal_.TraverseList(to_sweep,
292 CatalogTraversalT::kDepthFirst);
293 traversal_.UnregisterListener(callback);
295 i = to_sweep.begin();
296 iend = to_sweep.end();
297 for (; i != iend; ++i) {
298 success = success && RemoveCatalogFromReflog(*i);
302 if (configuration_.statistics) {
303 perf::Counter *ctr_preserved_catalogs = configuration_.statistics->Register(
304 "gc.n_preserved_catalogs",
"number of live catalogs");
305 perf::Counter *ctr_condemned_catalogs = configuration_.statistics->Register(
306 "gc.n_condemned_catalogs",
"number of dead catalogs");
307 perf::Counter *ctr_condemned_objects = configuration_.statistics->Register(
308 "gc.n_condemned_objects",
"number of deleted objects");
309 perf::Counter *ctr_condemned_bytes = configuration_.statistics->Register(
310 "gc.sz_condemned_bytes",
"number of deleted bytes");
312 *ctr_duplicate_delete_requests = configuration_.statistics->Register(
313 "gc.n_duplicate_delete_requests",
314 "number of duplicated delete requests");
315 ctr_preserved_catalogs->
Set(preserved_catalog_count());
316 ctr_condemned_catalogs->
Set(condemned_catalog_count());
317 ctr_condemned_objects->
Set(condemned_objects_count());
318 ctr_condemned_bytes->
Set(condemned_bytes_count());
319 ctr_duplicate_delete_requests->
Set(duplicate_delete_requests());
322 configuration_.uploader->WaitForUpload();
325 return success && (configuration_.uploader->GetNumberOfErrors() == 0);
329 template<
class CatalogTraversalT,
class HashFilterT>
331 const unsigned int tree_level,
const CatalogTN *catalog)
const {
332 std::string tree_indent;
333 for (
unsigned int i = 0; i < tree_level; ++i) {
334 tree_indent +=
"\u2502 ";
336 tree_indent +=
"\u251C\u2500 ";
338 const std::string hash_string = catalog->hash().ToString();
339 const std::string path = (catalog->mountpoint().IsEmpty())
341 : catalog->mountpoint().ToString();
344 hash_string.c_str(), path.c_str());
350 template<
class CatalogTraversalT,
class HashFilterT>
353 if (configuration_.verbose) {
358 if (configuration_.has_deletion_log()) {
359 const int written = fprintf(configuration_.deleted_objects_logfile,
"%s\n",
367 #endif // CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
std::string ToString(const bool with_suffix=false) const
void UseReflogTimestamps()
std::string ToStringWithSuffix() const
void LogDeletion(const shash::Any &hash) const
TraversalParameters GetTraversalParams(const Configuration &configuration)
ObjectFetcherTN * object_fetcher
assert((mem||(size==0))&&"Out Of Memory")
void Sweep(const shash::Any &hash)
string StringifyTime(const time_t seconds, const bool utc)
CatalogTraversalT::Parameters TraversalParameters
CatalogTraversalT::CallbackDataTN TraversalCallbackDataTN
void PrintCatalogTreeEntry(const unsigned int tree_level, const CatalogTN *catalog) const
std::string RfcTimestamp()
void Set(const int64_t val)
ObjectFetcherTN::ReflogTN ReflogTN
const char kSuffixPartial
const char kSuffixCatalog
CatalogTraversalT::CatalogTN CatalogTN
manifest::Reflog * reflog
upload::AbstractUploader * uploader
uint64_t keep_history_depth
bool RemoveCatalogFromReflog(const shash::Any &catalog)
void PreserveDataObjects(const TraversalCallbackDataTN &data)
void SweepDataObjects(const TraversalCallbackDataTN &data)
const Configuration configuration_
void CheckAndSweep(const shash::Any &hash)
time_t keep_history_timestamp
std::vector< shash::Any > HashVector
GarbageCollector(const Configuration &configuration)
CVMFS_EXPORT void LogCvmfs(const LogSource source, const int mask, const char *format,...)