5 #ifndef CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
6 #define CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
18 template<
class CatalogTraversalT,
class HashFilterT>
20 HashFilterT>::Configuration::kFullHistory =
21 std::numeric_limits<uint64_t>::max();
23 template<
class CatalogTraversalT,
class HashFilterT>
25 HashFilterT>::Configuration::kNoHistory = 0;
27 template<
class CatalogTraversalT,
class HashFilterT>
29 HashFilterT>::Configuration::kNoTimestamp = 0;
32 template <
class CatalogTraversalT,
class HashFilterT>
35 : configuration_(configuration)
36 , catalog_info_shim_(configuration.
reflog)
41 , hash_map_delete_requests_()
42 , use_reflog_timestamps_(false)
43 , oldest_trunk_catalog_(static_cast<uint64_t>(-1))
44 , oldest_trunk_catalog_found_(false)
45 , preserved_catalogs_(0)
46 , unreferenced_trees_(0)
48 , condemned_catalogs_(0)
49 , last_reported_status_(0.0)
50 , condemned_objects_(0)
52 , duplicate_delete_requests_(0)
58 template <
class CatalogTraversalT,
class HashFilterT>
60 traversal_.SetCatalogInfoShim(&catalog_info_shim_);
61 use_reflog_timestamps_ =
true;
65 template <
class CatalogTraversalT,
class HashFilterT>
74 params.no_repeat_history =
true;
75 params.ignore_load_failure =
true;
82 template <
class CatalogTraversalT,
class HashFilterT>
87 ++preserved_catalogs_;
89 if (data.catalog->IsRoot()) {
90 const uint64_t mtime = use_reflog_timestamps_
91 ? catalog_info_shim_.GetLastModified(data.catalog)
92 : data.catalog->GetLastModified();
93 if (!oldest_trunk_catalog_found_)
94 oldest_trunk_catalog_ = std::min(oldest_trunk_catalog_, mtime);
95 if (configuration_.verbose) {
96 const uint64_t rev = data.catalog->revision();
98 "Preserving Revision %" PRIu64
" (%s / added @ %s)",
100 StringifyTime(data.catalog->GetLastModified(),
true).c_str(),
101 StringifyTime(catalog_info_shim_.GetLastModified(data.catalog),
103 PrintCatalogTreeEntry(data.tree_level, data.catalog);
105 if (data.catalog->schema() < 0.99) {
107 "legacy catalog does not provide access to nested catalog hierarchy.\n"
108 " Some unreferenced objects may remain in the repository.");
113 hash_filter_.Fill(data.catalog->hash());
116 const HashVector &referenced_hashes = data.catalog->GetReferencedObjects();
117 typename HashVector::const_iterator i = referenced_hashes.begin();
118 const typename HashVector::const_iterator iend = referenced_hashes.end();
119 for (; i != iend; ++i) {
120 hash_filter_.Fill(*i);
125 template <
class CatalogTraversalT,
class HashFilterT>
130 ++condemned_catalogs_;
131 if (data.catalog->IsRoot())
134 if (configuration_.verbose) {
135 if (data.catalog->IsRoot()) {
136 const uint64_t rev = data.catalog->revision();
137 const time_t mtime =
static_cast<time_t
>(data.catalog->GetLastModified());
139 "Sweeping Revision %" PRIu64
" (%s)",
142 PrintCatalogTreeEntry(data.tree_level, data.catalog);
147 const HashVector &referenced_hashes = data.catalog->GetReferencedObjects();
148 typename HashVector::const_iterator i = referenced_hashes.begin();
149 const typename HashVector::const_iterator iend = referenced_hashes.end();
150 for (; i != iend; ++i) {
155 CheckAndSweep(data.catalog->hash());
158 static_cast<float>(condemned_trees_) /
159 static_cast<float>(unreferenced_trees_);
160 if (threshold > last_reported_status_ + 0.1) {
162 " - %02.0f%% %" PRIu64
" / %" PRIu64
163 " unreferenced revisions removed [%s]",
164 100.0 * threshold, condemned_trees_, unreferenced_trees_,
166 last_reported_status_ = threshold;
171 template <
class CatalogTraversalT,
class HashFilterT>
175 if (!hash_filter_.Contains(hash)) {
176 if (!hash_map_delete_requests_.Contains(hash)) {
177 hash_map_delete_requests_.Fill(hash);
180 ++duplicate_delete_requests_;
188 template <
class CatalogTraversalT,
class HashFilterT>
191 ++condemned_objects_;
192 if (configuration_.extended_stats) {
194 int64_t condemned_bytes = configuration_.uploader->GetObjectSize(hash);
195 if (condemned_bytes > 0) {
196 condemned_bytes_ += condemned_bytes;
202 if (configuration_.dry_run) {
206 configuration_.uploader->RemoveAsync(hash);
210 template <
class CatalogTraversalT,
class HashFilterT>
215 return (configuration_.dry_run)
217 : configuration_.reflog->Remove(catalog);
221 template <
class CatalogTraversalT,
class HashFilterT>
223 return AnalyzePreservedCatalogTree() &&
224 CheckPreservedRevisions() &&
229 template <
class CatalogTraversalT,
class HashFilterT>
235 if (configuration_.verbose) {
237 "Preserving data objects in latest revision");
240 typename CatalogTraversalT::CallbackTN *callback =
241 traversal_.RegisterListener(
245 bool success = traversal_.Traverse();
246 oldest_trunk_catalog_found_ =
true;
247 success = success && traversal_.TraverseNamedSnapshots();
248 traversal_.UnregisterListener(callback);
254 template <
class CatalogTraversalT,
class HashFilterT>
257 const bool keeps_revisions = (preserved_catalog_count() > 0);
258 if (!keeps_revisions && configuration_.verbose) {
260 "This would delete everything! Abort.");
263 return keeps_revisions;
267 template <
class CatalogTraversalT,
class HashFilterT>
273 std::vector<shash::Any> catalogs;
279 typename CatalogTraversalT::CallbackTN *callback =
280 traversal_.RegisterListener(
284 std::vector<shash::Any> to_sweep;
285 std::vector<shash::Any>::const_iterator i = catalogs.begin();
286 std::vector<shash::Any>::const_iterator iend = catalogs.end();
287 for (; i != iend; ++i) {
288 if (!hash_filter_.Contains(*i)) {
289 to_sweep.push_back(*i);
292 unreferenced_trees_ = to_sweep.size();
293 bool success = traversal_.TraverseList(to_sweep,
294 CatalogTraversalT::kDepthFirst);
295 traversal_.UnregisterListener(callback);
297 i = to_sweep.begin();
298 iend = to_sweep.end();
299 for (; i != iend; ++i) {
300 success = success && RemoveCatalogFromReflog(*i);
304 if (configuration_.statistics) {
306 configuration_.statistics->Register(
307 "gc.n_preserved_catalogs",
"number of live catalogs");
309 configuration_.statistics->Register(
310 "gc.n_condemned_catalogs",
"number of dead catalogs");
312 configuration_.statistics->Register(
313 "gc.n_condemned_objects",
"number of deleted objects");
315 configuration_.statistics->Register(
316 "gc.sz_condemned_bytes",
"number of deleted bytes");
318 configuration_.statistics->Register(
319 "gc.n_duplicate_delete_requests",
"number of duplicated delete requests");
320 ctr_preserved_catalogs->
Set(preserved_catalog_count());
321 ctr_condemned_catalogs->
Set(condemned_catalog_count());
322 ctr_condemned_objects->
Set(condemned_objects_count());
323 ctr_condemned_bytes->
Set(condemned_bytes_count());
324 ctr_duplicate_delete_requests->
Set(duplicate_delete_requests());
327 configuration_.uploader->WaitForUpload();
330 return success && (configuration_.uploader->GetNumberOfErrors() == 0);
334 template <
class CatalogTraversalT,
class HashFilterT>
336 const unsigned int tree_level,
339 std::string tree_indent;
340 for (
unsigned int i = 0; i < tree_level; ++i) {
341 tree_indent +=
"\u2502 ";
343 tree_indent +=
"\u251C\u2500 ";
345 const std::string hash_string = catalog->hash().ToString();
346 const std::string path =
347 (catalog->mountpoint().IsEmpty()) ?
"/" : catalog->mountpoint().ToString();
354 hash_string.c_str(), path.c_str());
358 template <
class CatalogTraversalT,
class HashFilterT>
361 if (configuration_.verbose) {
366 if (configuration_.has_deletion_log()) {
367 const int written = fprintf(configuration_.deleted_objects_logfile,
375 #endif // CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
bool AnalyzePreservedCatalogTree()
std::string ToString(const bool with_suffix=false) const
void UseReflogTimestamps()
std::string ToStringWithSuffix() const
void LogDeletion(const shash::Any &hash) const
TraversalParameters GetTraversalParams(const Configuration &configuration)
ObjectFetcherTN * object_fetcher
assert((mem||(size==0))&&"Out Of Memory")
void Sweep(const shash::Any &hash)
string StringifyTime(const time_t seconds, const bool utc)
CatalogTraversalT::Parameters TraversalParameters
CatalogTraversalT::CallbackDataTN TraversalCallbackDataTN
void PrintCatalogTreeEntry(const unsigned int tree_level, const CatalogTN *catalog) const
std::string RfcTimestamp()
void Set(const int64_t val)
ObjectFetcherTN::ReflogTN ReflogTN
const char kSuffixPartial
const char kSuffixCatalog
CatalogTraversalT::CatalogTN CatalogTN
manifest::Reflog * reflog
upload::AbstractUploader * uploader
uint64_t keep_history_depth
bool RemoveCatalogFromReflog(const shash::Any &catalog)
void PreserveDataObjects(const TraversalCallbackDataTN &data)
void SweepDataObjects(const TraversalCallbackDataTN &data)
const Configuration configuration_
void CheckAndSweep(const shash::Any &hash)
time_t keep_history_timestamp
std::vector< shash::Any > HashVector
bool CheckPreservedRevisions()
GarbageCollector(const Configuration &configuration)
CVMFS_EXPORT void LogCvmfs(const LogSource source, const int mask, const char *format,...)