CernVM-FS  2.12.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
garbage_collector_impl.h
Go to the documentation of this file.
1 
5 #ifndef CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
6 #define CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
7 
9 
10 #include <algorithm>
11 #include <limits>
12 #include <string>
13 #include <vector>
14 
15 #include "util/logging.h"
16 #include "util/string.h"
17 
18 template<class CatalogTraversalT, class HashFilterT>
19 const uint64_t GarbageCollector<CatalogTraversalT,
20  HashFilterT>::Configuration::kFullHistory =
21  std::numeric_limits<uint64_t>::max();
22 
23 template<class CatalogTraversalT, class HashFilterT>
24 const uint64_t GarbageCollector<CatalogTraversalT,
25  HashFilterT>::Configuration::kNoHistory = 0;
26 
27 template<class CatalogTraversalT, class HashFilterT>
28 const time_t GarbageCollector<CatalogTraversalT,
29  HashFilterT>::Configuration::kNoTimestamp = 0;
30 
31 
32 template <class CatalogTraversalT, class HashFilterT>
34  const Configuration &configuration)
35  : configuration_(configuration)
36  , catalog_info_shim_(configuration.reflog)
37  , traversal_(
38  GarbageCollector<CatalogTraversalT, HashFilterT>::GetTraversalParams(
39  configuration))
40  , hash_filter_()
41  , hash_map_delete_requests_()
42  , use_reflog_timestamps_(false)
43  , oldest_trunk_catalog_(static_cast<uint64_t>(-1))
44  , oldest_trunk_catalog_found_(false)
45  , preserved_catalogs_(0)
46  , unreferenced_trees_(0)
47  , condemned_trees_(0)
48  , condemned_catalogs_(0)
49  , last_reported_status_(0.0)
50  , condemned_objects_(0)
51  , condemned_bytes_(0)
52  , duplicate_delete_requests_(0)
53 {
55 }
56 
57 
58 template <class CatalogTraversalT, class HashFilterT>
60  traversal_.SetCatalogInfoShim(&catalog_info_shim_);
61  use_reflog_timestamps_ = true;
62 }
63 
64 
65 template <class CatalogTraversalT, class HashFilterT>
69 {
70  TraversalParameters params;
71  params.object_fetcher = config.object_fetcher;
72  params.history = config.keep_history_depth;
73  params.timestamp = config.keep_history_timestamp;
74  params.no_repeat_history = true;
75  params.ignore_load_failure = true;
76  params.quiet = !config.verbose;
77  params.num_threads = config.num_threads;
78  return params;
79 }
80 
81 
82 template <class CatalogTraversalT, class HashFilterT>
85  TraversalCallbackDataTN &data // NOLINT(runtime/references)
86 ) {
87  ++preserved_catalogs_;
88 
89  if (data.catalog->IsRoot()) {
90  const uint64_t mtime = use_reflog_timestamps_
91  ? catalog_info_shim_.GetLastModified(data.catalog)
92  : data.catalog->GetLastModified();
93  if (!oldest_trunk_catalog_found_)
94  oldest_trunk_catalog_ = std::min(oldest_trunk_catalog_, mtime);
95  if (configuration_.verbose) {
96  const uint64_t rev = data.catalog->revision();
98  "Preserving Revision %" PRIu64 " (%s / added @ %s)",
99  rev,
100  StringifyTime(data.catalog->GetLastModified(), true).c_str(),
101  StringifyTime(catalog_info_shim_.GetLastModified(data.catalog),
102  true).c_str());
103  PrintCatalogTreeEntry(data.tree_level, data.catalog);
104  }
105  if (data.catalog->schema() < 0.99) {
106  LogCvmfs(kLogGc, kLogStdout | kLogDebug, "Warning: "
107  "legacy catalog does not provide access to nested catalog hierarchy.\n"
108  " Some unreferenced objects may remain in the repository.");
109  }
110  }
111 
112  // the hash of the actual catalog needs to preserved
113  hash_filter_.Fill(data.catalog->hash());
114 
115  // all the objects referenced from this catalog need to be preserved
116  const HashVector &referenced_hashes = data.catalog->GetReferencedObjects();
117  typename HashVector::const_iterator i = referenced_hashes.begin();
118  const typename HashVector::const_iterator iend = referenced_hashes.end();
119  for (; i != iend; ++i) {
120  hash_filter_.Fill(*i);
121  }
122 }
123 
124 
125 template <class CatalogTraversalT, class HashFilterT>
128  TraversalCallbackDataTN &data // NOLINT(runtime/references)
129 ) {
130  ++condemned_catalogs_;
131  if (data.catalog->IsRoot())
132  ++condemned_trees_;
133 
134  if (configuration_.verbose) {
135  if (data.catalog->IsRoot()) {
136  const uint64_t rev = data.catalog->revision();
137  const time_t mtime = static_cast<time_t>(data.catalog->GetLastModified());
139  "Sweeping Revision %" PRIu64 " (%s)",
140  rev, StringifyTime(mtime, true).c_str());
141  }
142  PrintCatalogTreeEntry(data.tree_level, data.catalog);
143  }
144 
145  // all the objects referenced from this catalog need to be checked against the
146  // the preserved hashes in the hash_filter_ and possibly deleted
147  const HashVector &referenced_hashes = data.catalog->GetReferencedObjects();
148  typename HashVector::const_iterator i = referenced_hashes.begin();
149  const typename HashVector::const_iterator iend = referenced_hashes.end();
150  for (; i != iend; ++i) {
151  CheckAndSweep(*i);
152  }
153 
154  // the catalog itself is also condemned and needs to be removed
155  CheckAndSweep(data.catalog->hash());
156 
157  float threshold =
158  static_cast<float>(condemned_trees_) /
159  static_cast<float>(unreferenced_trees_);
160  if (threshold > last_reported_status_ + 0.1) {
162  " - %02.0f%% %" PRIu64 " / %" PRIu64
163  " unreferenced revisions removed [%s]",
164  100.0 * threshold, condemned_trees_, unreferenced_trees_,
165  RfcTimestamp().c_str());
166  last_reported_status_ = threshold;
167  }
168 }
169 
170 
171 template <class CatalogTraversalT, class HashFilterT>
173  const shash::Any &hash)
174 {
175  if (!hash_filter_.Contains(hash)) {
176  if (!hash_map_delete_requests_.Contains(hash)) {
177  hash_map_delete_requests_.Fill(hash);
178  Sweep(hash);
179  } else {
180  ++duplicate_delete_requests_;
181  LogCvmfs(kLogGc, kLogDebug, "Hash %s already marked as to delete",
182  hash.ToString().c_str());
183  }
184  }
185 }
186 
187 
188 template <class CatalogTraversalT, class HashFilterT>
190  const shash::Any &hash) {
191  ++condemned_objects_;
192  if (configuration_.extended_stats) {
193  if (!hash.HasSuffix() || hash.suffix == shash::kSuffixPartial) {
194  int64_t condemned_bytes = configuration_.uploader->GetObjectSize(hash);
195  if (condemned_bytes > 0) {
196  condemned_bytes_ += condemned_bytes;
197  }
198  }
199  }
200 
201  LogDeletion(hash);
202  if (configuration_.dry_run) {
203  return;
204  }
205 
206  configuration_.uploader->RemoveAsync(hash);
207 }
208 
209 
210 template <class CatalogTraversalT, class HashFilterT>
213 {
214  assert(catalog.suffix == shash::kSuffixCatalog);
215  return (configuration_.dry_run)
216  ? true
217  : configuration_.reflog->Remove(catalog);
218 }
219 
220 
221 template <class CatalogTraversalT, class HashFilterT>
223  return AnalyzePreservedCatalogTree() &&
224  CheckPreservedRevisions() &&
225  SweepReflog();
226 }
227 
228 
229 template <class CatalogTraversalT, class HashFilterT>
232 {
233  LogCvmfs(kLogGc, kLogStdout, " --> marking unreferenced objects [%s]",
234  RfcTimestamp().c_str());
235  if (configuration_.verbose) {
237  "Preserving data objects in latest revision");
238  }
239 
240  typename CatalogTraversalT::CallbackTN *callback =
241  traversal_.RegisterListener(
243  this);
244 
245  bool success = traversal_.Traverse();
246  oldest_trunk_catalog_found_ = true;
247  success = success && traversal_.TraverseNamedSnapshots();
248  traversal_.UnregisterListener(callback);
249 
250  return success;
251 }
252 
253 
254 template <class CatalogTraversalT, class HashFilterT>
256 {
257  const bool keeps_revisions = (preserved_catalog_count() > 0);
258  if (!keeps_revisions && configuration_.verbose) {
260  "This would delete everything! Abort.");
261  }
262 
263  return keeps_revisions;
264 }
265 
266 
267 template <class CatalogTraversalT, class HashFilterT>
269  LogCvmfs(kLogGc, kLogStdout, " --> sweeping unreferenced objects [%s]",
270  RfcTimestamp().c_str());
271 
272  const ReflogTN *reflog = configuration_.reflog;
273  std::vector<shash::Any> catalogs;
274  if (NULL == reflog || !reflog->List(SqlReflog::kRefCatalog, &catalogs)) {
275  LogCvmfs(kLogGc, kLogStderr, "Failed to list catalog reference log");
276  return false;
277  }
278 
279  typename CatalogTraversalT::CallbackTN *callback =
280  traversal_.RegisterListener(
282  this);
283 
284  std::vector<shash::Any> to_sweep;
285  std::vector<shash::Any>::const_iterator i = catalogs.begin();
286  std::vector<shash::Any>::const_iterator iend = catalogs.end();
287  for (; i != iend; ++i) {
288  if (!hash_filter_.Contains(*i)) {
289  to_sweep.push_back(*i);
290  }
291  }
292  unreferenced_trees_ = to_sweep.size();
293  bool success = traversal_.TraverseList(to_sweep,
294  CatalogTraversalT::kDepthFirst);
295  traversal_.UnregisterListener(callback);
296 
297  i = to_sweep.begin();
298  iend = to_sweep.end();
299  for (; i != iend; ++i) {
300  success = success && RemoveCatalogFromReflog(*i);
301  }
302 
303  // TODO(jblomer): turn current counters into perf::Counters
304  if (configuration_.statistics) {
305  perf::Counter *ctr_preserved_catalogs =
306  configuration_.statistics->Register(
307  "gc.n_preserved_catalogs", "number of live catalogs");
308  perf::Counter *ctr_condemned_catalogs =
309  configuration_.statistics->Register(
310  "gc.n_condemned_catalogs", "number of dead catalogs");
311  perf::Counter *ctr_condemned_objects =
312  configuration_.statistics->Register(
313  "gc.n_condemned_objects", "number of deleted objects");
314  perf::Counter *ctr_condemned_bytes =
315  configuration_.statistics->Register(
316  "gc.sz_condemned_bytes", "number of deleted bytes");
317  perf::Counter *ctr_duplicate_delete_requests =
318  configuration_.statistics->Register(
319  "gc.n_duplicate_delete_requests", "number of duplicated delete requests");
320  ctr_preserved_catalogs->Set(preserved_catalog_count());
321  ctr_condemned_catalogs->Set(condemned_catalog_count());
322  ctr_condemned_objects->Set(condemned_objects_count());
323  ctr_condemned_bytes->Set(condemned_bytes_count());
324  ctr_duplicate_delete_requests->Set(duplicate_delete_requests());
325  }
326 
327  configuration_.uploader->WaitForUpload();
328  LogCvmfs(kLogGc, kLogStdout, " --> done garbage collecting [%s]",
329  RfcTimestamp().c_str());
330  return success && (configuration_.uploader->GetNumberOfErrors() == 0);
331 }
332 
333 
334 template <class CatalogTraversalT, class HashFilterT>
336  const unsigned int tree_level,
337  const CatalogTN *catalog) const
338 {
339  std::string tree_indent;
340  for (unsigned int i = 0; i < tree_level; ++i) {
341  tree_indent += "\u2502 ";
342  }
343  tree_indent += "\u251C\u2500 ";
344 
345  const std::string hash_string = catalog->hash().ToString();
346  const std::string path =
347  (catalog->mountpoint().IsEmpty()) ? "/" : catalog->mountpoint().ToString();
348 
349  LogCvmfs(kLogGc, kLogStdout, "%s%s %s",
350  tree_indent.c_str(),
351  hash_string.c_str(),
352  path.c_str());
353  LogCvmfs(kLogGc, kLogDebug, "catalog tree entry: %s %s",
354  hash_string.c_str(), path.c_str());
355 }
356 
357 
358 template <class CatalogTraversalT, class HashFilterT>
360  const shash::Any &hash) const {
361  if (configuration_.verbose) {
362  LogCvmfs(kLogGc, kLogStdout | kLogDebug, "Sweep: %s",
363  hash.ToStringWithSuffix().c_str());
364  }
365 
366  if (configuration_.has_deletion_log()) {
367  const int written = fprintf(configuration_.deleted_objects_logfile,
368  "%s\n", hash.ToStringWithSuffix().c_str());
369  if (written < 0) {
370  LogCvmfs(kLogGc, kLogStderr, "failed to write to deleted objects log");
371  }
372  }
373 }
374 
375 #endif // CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
std::string ToString(const bool with_suffix=false) const
Definition: hash.h:249
std::string ToStringWithSuffix() const
Definition: hash.h:304
void LogDeletion(const shash::Any &hash) const
TraversalParameters GetTraversalParams(const Configuration &configuration)
bool HasSuffix() const
Definition: hash.h:239
assert((mem||(size==0))&&"Out Of Memory")
void Sweep(const shash::Any &hash)
string StringifyTime(const time_t seconds, const bool utc)
Definition: string.cc:105
CatalogTraversalT::Parameters TraversalParameters
CatalogTraversalT::CallbackDataTN TraversalCallbackDataTN
void PrintCatalogTreeEntry(const unsigned int tree_level, const CatalogTN *catalog) const
std::string RfcTimestamp()
Definition: string.cc:127
void Set(const int64_t val)
Definition: statistics.h:33
ObjectFetcherTN::ReflogTN ReflogTN
const char kSuffixPartial
Definition: hash.h:57
const char kSuffixCatalog
Definition: hash.h:54
CatalogTraversalT::CatalogTN CatalogTN
upload::AbstractUploader * uploader
bool RemoveCatalogFromReflog(const shash::Any &catalog)
void PreserveDataObjects(const TraversalCallbackDataTN &data)
void SweepDataObjects(const TraversalCallbackDataTN &data)
const Configuration configuration_
void CheckAndSweep(const shash::Any &hash)
std::vector< shash::Any > HashVector
Suffix suffix
Definition: hash.h:126
GarbageCollector(const Configuration &configuration)
CVMFS_EXPORT void LogCvmfs(const LogSource source, const int mask, const char *format,...)
Definition: logging.cc:528