5 #ifndef CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
6 #define CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
16 template<
class CatalogTraversalT,
class HashFilterT>
18 HashFilterT>::Configuration::kFullHistory =
19 std::numeric_limits<unsigned int>::max();
21 template<
class CatalogTraversalT,
class HashFilterT>
23 HashFilterT>::Configuration::kNoHistory = 0;
25 template<
class CatalogTraversalT,
class HashFilterT>
27 HashFilterT>::Configuration::kNoTimestamp = 0;
30 template <
class CatalogTraversalT,
class HashFilterT>
33 : configuration_(configuration)
34 , catalog_info_shim_(configuration.
reflog)
39 , hash_map_delete_requests_()
40 , use_reflog_timestamps_(false)
41 , oldest_trunk_catalog_(static_cast<uint64_t>(-1))
42 , oldest_trunk_catalog_found_(false)
43 , preserved_catalogs_(0)
44 , unreferenced_trees_(0)
46 , condemned_catalogs_(0)
47 , last_reported_status_(0.0)
48 , condemned_objects_(0)
50 , duplicate_delete_requests_(0)
56 template <
class CatalogTraversalT,
class HashFilterT>
58 traversal_.SetCatalogInfoShim(&catalog_info_shim_);
59 use_reflog_timestamps_ =
true;
63 template <
class CatalogTraversalT,
class HashFilterT>
72 params.no_repeat_history =
true;
73 params.ignore_load_failure =
true;
80 template <
class CatalogTraversalT,
class HashFilterT>
85 ++preserved_catalogs_;
87 if (data.catalog->IsRoot()) {
88 const uint64_t mtime = use_reflog_timestamps_
89 ? catalog_info_shim_.GetLastModified(data.catalog)
90 : data.catalog->GetLastModified();
91 if (!oldest_trunk_catalog_found_)
92 oldest_trunk_catalog_ = std::min(oldest_trunk_catalog_, mtime);
93 if (configuration_.verbose) {
94 const int rev = data.catalog->revision();
96 "Preserving Revision %d (%s / added @ %s)",
99 StringifyTime(catalog_info_shim_.GetLastModified(data.catalog),
101 PrintCatalogTreeEntry(data.tree_level, data.catalog);
103 if (data.catalog->schema() < 0.99) {
105 "legacy catalog does not provide access to nested catalog hierarchy.\n"
106 " Some unreferenced objects may remain in the repository.");
111 hash_filter_.Fill(data.catalog->hash());
114 const HashVector &referenced_hashes = data.catalog->GetReferencedObjects();
115 typename HashVector::const_iterator i = referenced_hashes.begin();
116 const typename HashVector::const_iterator iend = referenced_hashes.end();
117 for (; i != iend; ++i) {
118 hash_filter_.Fill(*i);
123 template <
class CatalogTraversalT,
class HashFilterT>
128 ++condemned_catalogs_;
129 if (data.catalog->IsRoot())
132 if (configuration_.verbose) {
133 if (data.catalog->IsRoot()) {
134 const int rev = data.catalog->revision();
135 const time_t mtime =
static_cast<time_t
>(data.catalog->GetLastModified());
139 PrintCatalogTreeEntry(data.tree_level, data.catalog);
144 const HashVector &referenced_hashes = data.catalog->GetReferencedObjects();
145 typename HashVector::const_iterator i = referenced_hashes.begin();
146 const typename HashVector::const_iterator iend = referenced_hashes.end();
147 for (; i != iend; ++i) {
152 CheckAndSweep(data.catalog->hash());
155 static_cast<float>(condemned_trees_) /
156 static_cast<float>(unreferenced_trees_);
157 if (threshold > last_reported_status_ + 0.1) {
159 " - %02.0f%% %u / %u unreferenced revisions removed [%s]",
160 100.0 * threshold, condemned_trees_, unreferenced_trees_,
162 last_reported_status_ = threshold;
167 template <
class CatalogTraversalT,
class HashFilterT>
171 if (!hash_filter_.Contains(hash)) {
172 if (!hash_map_delete_requests_.Contains(hash)) {
173 hash_map_delete_requests_.Fill(hash);
176 ++duplicate_delete_requests_;
184 template <
class CatalogTraversalT,
class HashFilterT>
187 ++condemned_objects_;
188 if (configuration_.extended_stats) {
190 int64_t condemned_bytes = configuration_.uploader->GetObjectSize(hash);
191 if (condemned_bytes > 0) {
192 condemned_bytes_ += condemned_bytes;
198 if (configuration_.dry_run) {
202 configuration_.uploader->RemoveAsync(hash);
206 template <
class CatalogTraversalT,
class HashFilterT>
211 return (configuration_.dry_run)
213 : configuration_.reflog->Remove(catalog);
217 template <
class CatalogTraversalT,
class HashFilterT>
219 return AnalyzePreservedCatalogTree() &&
220 CheckPreservedRevisions() &&
225 template <
class CatalogTraversalT,
class HashFilterT>
231 if (configuration_.verbose) {
233 "Preserving data objects in latest revision");
236 typename CatalogTraversalT::CallbackTN *callback =
237 traversal_.RegisterListener(
241 bool success = traversal_.Traverse();
242 oldest_trunk_catalog_found_ =
true;
243 success = success && traversal_.TraverseNamedSnapshots();
244 traversal_.UnregisterListener(callback);
250 template <
class CatalogTraversalT,
class HashFilterT>
253 const bool keeps_revisions = (preserved_catalog_count() > 0);
254 if (!keeps_revisions && configuration_.verbose) {
256 "This would delete everything! Abort.");
259 return keeps_revisions;
263 template <
class CatalogTraversalT,
class HashFilterT>
269 std::vector<shash::Any> catalogs;
275 typename CatalogTraversalT::CallbackTN *callback =
276 traversal_.RegisterListener(
280 std::vector<shash::Any> to_sweep;
281 std::vector<shash::Any>::const_iterator i = catalogs.begin();
282 std::vector<shash::Any>::const_iterator iend = catalogs.end();
283 for (; i != iend; ++i) {
284 if (!hash_filter_.Contains(*i)) {
285 to_sweep.push_back(*i);
288 unreferenced_trees_ = to_sweep.size();
289 bool success = traversal_.TraverseList(to_sweep,
290 CatalogTraversalT::kDepthFirst);
291 traversal_.UnregisterListener(callback);
293 i = to_sweep.begin();
294 iend = to_sweep.end();
295 for (; i != iend; ++i) {
296 success = success && RemoveCatalogFromReflog(*i);
300 if (configuration_.statistics) {
302 configuration_.statistics->Register(
303 "gc.n_preserved_catalogs",
"number of live catalogs");
305 configuration_.statistics->Register(
306 "gc.n_condemned_catalogs",
"number of dead catalogs");
308 configuration_.statistics->Register(
309 "gc.n_condemned_objects",
"number of deleted objects");
311 configuration_.statistics->Register(
312 "gc.sz_condemned_bytes",
"number of deleted bytes");
314 configuration_.statistics->Register(
315 "gc.n_duplicate_delete_requests",
"number of duplicated delete requests");
316 ctr_preserved_catalogs->
Set(preserved_catalog_count());
317 ctr_condemned_catalogs->
Set(condemned_catalog_count());
318 ctr_condemned_objects->
Set(condemned_objects_count());
319 ctr_condemned_bytes->
Set(condemned_bytes_count());
320 ctr_duplicate_delete_requests->
Set(duplicate_delete_requests());
323 configuration_.uploader->WaitForUpload();
326 return success && (configuration_.uploader->GetNumberOfErrors() == 0);
330 template <
class CatalogTraversalT,
class HashFilterT>
332 const unsigned int tree_level,
335 std::string tree_indent;
336 for (
unsigned int i = 0; i < tree_level; ++i) {
337 tree_indent +=
"\u2502 ";
339 tree_indent +=
"\u251C\u2500 ";
341 const std::string hash_string = catalog->hash().ToString();
342 const std::string path =
343 (catalog->mountpoint().IsEmpty()) ?
"/" : catalog->mountpoint().ToString();
350 hash_string.c_str(), path.c_str());
354 template <
class CatalogTraversalT,
class HashFilterT>
357 if (configuration_.verbose) {
362 if (configuration_.has_deletion_log()) {
363 const int written = fprintf(configuration_.deleted_objects_logfile,
371 #endif // CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
#define LogCvmfs(source, mask,...)
bool AnalyzePreservedCatalogTree()
std::string ToString(const bool with_suffix=false) const
void UseReflogTimestamps()
std::string ToStringWithSuffix() const
void LogDeletion(const shash::Any &hash) const
TraversalParameters GetTraversalParams(const Configuration &configuration)
ObjectFetcherTN * object_fetcher
assert((mem||(size==0))&&"Out Of Memory")
void Sweep(const shash::Any &hash)
string StringifyTime(const time_t seconds, const bool utc)
CatalogTraversalT::Parameters TraversalParameters
CatalogTraversalT::CallbackDataTN TraversalCallbackDataTN
void PrintCatalogTreeEntry(const unsigned int tree_level, const CatalogTN *catalog) const
std::string RfcTimestamp()
void Set(const int64_t val)
ObjectFetcherTN::ReflogTN ReflogTN
const char kSuffixPartial
const char kSuffixCatalog
CatalogTraversalT::CatalogTN CatalogTN
manifest::Reflog * reflog
upload::AbstractUploader * uploader
unsigned int keep_history_depth
bool RemoveCatalogFromReflog(const shash::Any &catalog)
void PreserveDataObjects(const TraversalCallbackDataTN &data)
void SweepDataObjects(const TraversalCallbackDataTN &data)
const Configuration configuration_
void CheckAndSweep(const shash::Any &hash)
time_t keep_history_timestamp
std::vector< shash::Any > HashVector
bool CheckPreservedRevisions()
GarbageCollector(const Configuration &configuration)