CernVM-FS  2.13.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
garbage_collector_impl.h
Go to the documentation of this file.
1 
5 #ifndef CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
6 #define CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
7 
8 #include <algorithm>
9 #include <limits>
10 #include <string>
11 #include <vector>
12 
14 #include "util/logging.h"
15 #include "util/string.h"
16 
17 template<class CatalogTraversalT, class HashFilterT>
19  kFullHistory = std::numeric_limits<uint64_t>::max();
20 
21 template<class CatalogTraversalT, class HashFilterT>
22 const uint64_t GarbageCollector<CatalogTraversalT,
23  HashFilterT>::Configuration::kNoHistory = 0;
24 
25 template<class CatalogTraversalT, class HashFilterT>
26 const time_t GarbageCollector<CatalogTraversalT,
27  HashFilterT>::Configuration::kNoTimestamp = 0;
28 
29 
30 template<class CatalogTraversalT, class HashFilterT>
32  const Configuration &configuration)
33  : configuration_(configuration)
34  , catalog_info_shim_(configuration.reflog)
35  , traversal_(
36  GarbageCollector<CatalogTraversalT, HashFilterT>::GetTraversalParams(
37  configuration))
38  , hash_filter_()
39  , hash_map_delete_requests_()
40  , use_reflog_timestamps_(false)
41  , oldest_trunk_catalog_(static_cast<uint64_t>(-1))
42  , oldest_trunk_catalog_found_(false)
43  , preserved_catalogs_(0)
44  , unreferenced_trees_(0)
45  , condemned_trees_(0)
46  , condemned_catalogs_(0)
47  , last_reported_status_(0.0)
48  , condemned_objects_(0)
49  , condemned_bytes_(0)
50  , duplicate_delete_requests_(0) {
52 }
53 
54 
55 template<class CatalogTraversalT, class HashFilterT>
57  traversal_.SetCatalogInfoShim(&catalog_info_shim_);
58  use_reflog_timestamps_ = true;
59 }
60 
61 
62 template<class CatalogTraversalT, class HashFilterT>
66  &config) {
67  TraversalParameters params;
68  params.object_fetcher = config.object_fetcher;
69  params.history = config.keep_history_depth;
70  params.timestamp = config.keep_history_timestamp;
71  params.no_repeat_history = true;
72  params.ignore_load_failure = true;
73  params.quiet = !config.verbose;
74  params.num_threads = config.num_threads;
75  return params;
76 }
77 
78 
79 template<class CatalogTraversalT, class HashFilterT>
81  const GarbageCollector<CatalogTraversalT,
82  HashFilterT>::TraversalCallbackDataTN
83  &data // NOLINT(runtime/references)
84 ) {
85  ++preserved_catalogs_;
86 
87  if (data.catalog->IsRoot()) {
88  const uint64_t mtime = use_reflog_timestamps_
89  ? catalog_info_shim_.GetLastModified(
90  data.catalog)
91  : data.catalog->GetLastModified();
92  if (!oldest_trunk_catalog_found_)
93  oldest_trunk_catalog_ = std::min(oldest_trunk_catalog_, mtime);
94  if (configuration_.verbose) {
95  const uint64_t rev = data.catalog->revision();
96  LogCvmfs(
98  "Preserving Revision %" PRIu64 " (%s / added @ %s)", rev,
99  StringifyTime(data.catalog->GetLastModified(), true).c_str(),
100  StringifyTime(catalog_info_shim_.GetLastModified(data.catalog), true)
101  .c_str());
102  PrintCatalogTreeEntry(data.tree_level, data.catalog);
103  }
104  if (data.catalog->schema() < 0.99) {
105  LogCvmfs(
107  "Warning: "
108  "legacy catalog does not provide access to nested catalog "
109  "hierarchy.\n"
110  " Some unreferenced objects may remain in the repository.");
111  }
112  }
113 
114  // the hash of the actual catalog needs to preserved
115  hash_filter_.Fill(data.catalog->hash());
116 
117  // all the objects referenced from this catalog need to be preserved
118  const HashVector &referenced_hashes = data.catalog->GetReferencedObjects();
119  typename HashVector::const_iterator i = referenced_hashes.begin();
120  const typename HashVector::const_iterator iend = referenced_hashes.end();
121  for (; i != iend; ++i) {
122  hash_filter_.Fill(*i);
123  }
124 }
125 
126 
127 template<class CatalogTraversalT, class HashFilterT>
129  const GarbageCollector<CatalogTraversalT,
130  HashFilterT>::TraversalCallbackDataTN
131  &data // NOLINT(runtime/references)
132 ) {
133  ++condemned_catalogs_;
134  if (data.catalog->IsRoot())
135  ++condemned_trees_;
136 
137  if (configuration_.verbose) {
138  if (data.catalog->IsRoot()) {
139  const uint64_t rev = data.catalog->revision();
140  const time_t mtime = static_cast<time_t>(data.catalog->GetLastModified());
142  "Sweeping Revision %" PRIu64 " (%s)", rev,
143  StringifyTime(mtime, true).c_str());
144  }
145  PrintCatalogTreeEntry(data.tree_level, data.catalog);
146  }
147 
148  // all the objects referenced from this catalog need to be checked against the
149  // the preserved hashes in the hash_filter_ and possibly deleted
150  const HashVector &referenced_hashes = data.catalog->GetReferencedObjects();
151  typename HashVector::const_iterator i = referenced_hashes.begin();
152  const typename HashVector::const_iterator iend = referenced_hashes.end();
153  for (; i != iend; ++i) {
154  CheckAndSweep(*i);
155  }
156 
157  // the catalog itself is also condemned and needs to be removed
158  CheckAndSweep(data.catalog->hash());
159 
160  float threshold = static_cast<float>(condemned_trees_)
161  / static_cast<float>(unreferenced_trees_);
162  if (threshold > last_reported_status_ + 0.1) {
164  " - %02.0f%% %" PRIu64 " / %" PRIu64
165  " unreferenced revisions removed [%s]",
166  100.0 * threshold, condemned_trees_, unreferenced_trees_,
167  RfcTimestamp().c_str());
168  last_reported_status_ = threshold;
169  }
170 }
171 
172 
173 template<class CatalogTraversalT, class HashFilterT>
175  const shash::Any &hash) {
176  if (!hash_filter_.Contains(hash)) {
177  if (!hash_map_delete_requests_.Contains(hash)) {
178  hash_map_delete_requests_.Fill(hash);
179  Sweep(hash);
180  } else {
181  ++duplicate_delete_requests_;
182  LogCvmfs(kLogGc, kLogDebug, "Hash %s already marked as to delete",
183  hash.ToString().c_str());
184  }
185  }
186 }
187 
188 
189 template<class CatalogTraversalT, class HashFilterT>
191  const shash::Any &hash) {
192  ++condemned_objects_;
193  if (configuration_.extended_stats) {
194  if (!hash.HasSuffix() || hash.suffix == shash::kSuffixPartial) {
195  int64_t condemned_bytes = configuration_.uploader->GetObjectSize(hash);
196  if (condemned_bytes > 0) {
197  condemned_bytes_ += condemned_bytes;
198  }
199  }
200  }
201 
202  LogDeletion(hash);
203  if (configuration_.dry_run) {
204  return;
205  }
206 
207  configuration_.uploader->RemoveAsync(hash);
208 }
209 
210 
211 template<class CatalogTraversalT, class HashFilterT>
213  const shash::Any &catalog) {
214  assert(catalog.suffix == shash::kSuffixCatalog);
215  return (configuration_.dry_run) ? true
216  : configuration_.reflog->Remove(catalog);
217 }
218 
219 
220 template<class CatalogTraversalT, class HashFilterT>
222  return AnalyzePreservedCatalogTree() && CheckPreservedRevisions()
223  && SweepReflog();
224 }
225 
226 
227 template<class CatalogTraversalT, class HashFilterT>
228 bool GarbageCollector<CatalogTraversalT,
229  HashFilterT>::AnalyzePreservedCatalogTree() {
230  LogCvmfs(kLogGc, kLogStdout, " --> marking unreferenced objects [%s]",
231  RfcTimestamp().c_str());
232  if (configuration_.verbose) {
234  "Preserving data objects in latest revision");
235  }
236 
237  typename CatalogTraversalT::CallbackTN
238  *callback = traversal_.RegisterListener(
239  &GarbageCollector<CatalogTraversalT,
240  HashFilterT>::PreserveDataObjects,
241  this);
242 
243  bool success = traversal_.Traverse();
244  oldest_trunk_catalog_found_ = true;
245  success = success && traversal_.TraverseNamedSnapshots();
246  traversal_.UnregisterListener(callback);
247 
248  return success;
249 }
250 
251 
252 template<class CatalogTraversalT, class HashFilterT>
253 bool GarbageCollector<CatalogTraversalT,
254  HashFilterT>::CheckPreservedRevisions() {
255  const bool keeps_revisions = (preserved_catalog_count() > 0);
256  if (!keeps_revisions && configuration_.verbose) {
258  "This would delete everything! Abort.");
259  }
260 
261  return keeps_revisions;
262 }
263 
264 
265 template<class CatalogTraversalT, class HashFilterT>
267  LogCvmfs(kLogGc, kLogStdout, " --> sweeping unreferenced objects [%s]",
268  RfcTimestamp().c_str());
269 
270  const ReflogTN *reflog = configuration_.reflog;
271  std::vector<shash::Any> catalogs;
272  if (NULL == reflog || !reflog->List(SqlReflog::kRefCatalog, &catalogs)) {
273  LogCvmfs(kLogGc, kLogStderr, "Failed to list catalog reference log");
274  return false;
275  }
276 
277  typename CatalogTraversalT::CallbackTN
278  *callback = traversal_.RegisterListener(
280  this);
281 
282  std::vector<shash::Any> to_sweep;
283  std::vector<shash::Any>::const_iterator i = catalogs.begin();
284  std::vector<shash::Any>::const_iterator iend = catalogs.end();
285  for (; i != iend; ++i) {
286  if (!hash_filter_.Contains(*i)) {
287  to_sweep.push_back(*i);
288  }
289  }
290  unreferenced_trees_ = to_sweep.size();
291  bool success = traversal_.TraverseList(to_sweep,
292  CatalogTraversalT::kDepthFirst);
293  traversal_.UnregisterListener(callback);
294 
295  i = to_sweep.begin();
296  iend = to_sweep.end();
297  for (; i != iend; ++i) {
298  success = success && RemoveCatalogFromReflog(*i);
299  }
300 
301  // TODO(jblomer): turn current counters into perf::Counters
302  if (configuration_.statistics) {
303  perf::Counter *ctr_preserved_catalogs = configuration_.statistics->Register(
304  "gc.n_preserved_catalogs", "number of live catalogs");
305  perf::Counter *ctr_condemned_catalogs = configuration_.statistics->Register(
306  "gc.n_condemned_catalogs", "number of dead catalogs");
307  perf::Counter *ctr_condemned_objects = configuration_.statistics->Register(
308  "gc.n_condemned_objects", "number of deleted objects");
309  perf::Counter *ctr_condemned_bytes = configuration_.statistics->Register(
310  "gc.sz_condemned_bytes", "number of deleted bytes");
312  *ctr_duplicate_delete_requests = configuration_.statistics->Register(
313  "gc.n_duplicate_delete_requests",
314  "number of duplicated delete requests");
315  ctr_preserved_catalogs->Set(preserved_catalog_count());
316  ctr_condemned_catalogs->Set(condemned_catalog_count());
317  ctr_condemned_objects->Set(condemned_objects_count());
318  ctr_condemned_bytes->Set(condemned_bytes_count());
319  ctr_duplicate_delete_requests->Set(duplicate_delete_requests());
320  }
321 
322  configuration_.uploader->WaitForUpload();
323  LogCvmfs(kLogGc, kLogStdout, " --> done garbage collecting [%s]",
324  RfcTimestamp().c_str());
325  return success && (configuration_.uploader->GetNumberOfErrors() == 0);
326 }
327 
328 
329 template<class CatalogTraversalT, class HashFilterT>
331  const unsigned int tree_level, const CatalogTN *catalog) const {
332  std::string tree_indent;
333  for (unsigned int i = 0; i < tree_level; ++i) {
334  tree_indent += "\u2502 ";
335  }
336  tree_indent += "\u251C\u2500 ";
337 
338  const std::string hash_string = catalog->hash().ToString();
339  const std::string path = (catalog->mountpoint().IsEmpty())
340  ? "/"
341  : catalog->mountpoint().ToString();
342 
343  LogCvmfs(kLogGc, kLogStdout, "%s%s %s", tree_indent.c_str(),
344  hash_string.c_str(), path.c_str());
345  LogCvmfs(kLogGc, kLogDebug, "catalog tree entry: %s %s", hash_string.c_str(),
346  path.c_str());
347 }
348 
349 
350 template<class CatalogTraversalT, class HashFilterT>
352  const shash::Any &hash) const {
353  if (configuration_.verbose) {
354  LogCvmfs(kLogGc, kLogStdout | kLogDebug, "Sweep: %s",
355  hash.ToStringWithSuffix().c_str());
356  }
357 
358  if (configuration_.has_deletion_log()) {
359  const int written = fprintf(configuration_.deleted_objects_logfile, "%s\n",
360  hash.ToStringWithSuffix().c_str());
361  if (written < 0) {
362  LogCvmfs(kLogGc, kLogStderr, "failed to write to deleted objects log");
363  }
364  }
365 }
366 
367 #endif // CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
std::string ToString(const bool with_suffix=false) const
Definition: hash.h:241
std::string ToStringWithSuffix() const
Definition: hash.h:296
void LogDeletion(const shash::Any &hash) const
TraversalParameters GetTraversalParams(const Configuration &configuration)
bool HasSuffix() const
Definition: hash.h:231
assert((mem||(size==0))&&"Out Of Memory")
void Sweep(const shash::Any &hash)
string StringifyTime(const time_t seconds, const bool utc)
Definition: string.cc:104
CatalogTraversalT::Parameters TraversalParameters
CatalogTraversalT::CallbackDataTN TraversalCallbackDataTN
void PrintCatalogTreeEntry(const unsigned int tree_level, const CatalogTN *catalog) const
std::string RfcTimestamp()
Definition: string.cc:145
void Set(const int64_t val)
Definition: statistics.h:33
ObjectFetcherTN::ReflogTN ReflogTN
const char kSuffixPartial
Definition: hash.h:57
const char kSuffixCatalog
Definition: hash.h:54
CatalogTraversalT::CatalogTN CatalogTN
upload::AbstractUploader * uploader
bool RemoveCatalogFromReflog(const shash::Any &catalog)
void PreserveDataObjects(const TraversalCallbackDataTN &data)
void SweepDataObjects(const TraversalCallbackDataTN &data)
const Configuration configuration_
void CheckAndSweep(const shash::Any &hash)
std::vector< shash::Any > HashVector
Suffix suffix
Definition: hash.h:123
GarbageCollector(const Configuration &configuration)
CVMFS_EXPORT void LogCvmfs(const LogSource source, const int mask, const char *format,...)
Definition: logging.cc:545