CernVM-FS  2.12.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
garbage_collector_impl.h
Go to the documentation of this file.
1 
5 #ifndef CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
6 #define CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
7 
8 #include <algorithm>
9 #include <limits>
10 #include <string>
11 #include <vector>
12 
13 #include "util/logging.h"
14 #include "util/string.h"
15 
16 template<class CatalogTraversalT, class HashFilterT>
17 const unsigned int GarbageCollector<CatalogTraversalT,
18  HashFilterT>::Configuration::kFullHistory =
19  std::numeric_limits<unsigned int>::max();
20 
21 template<class CatalogTraversalT, class HashFilterT>
22 const unsigned int GarbageCollector<CatalogTraversalT,
23  HashFilterT>::Configuration::kNoHistory = 0;
24 
25 template<class CatalogTraversalT, class HashFilterT>
26 const time_t GarbageCollector<CatalogTraversalT,
27  HashFilterT>::Configuration::kNoTimestamp = 0;
28 
29 
30 template <class CatalogTraversalT, class HashFilterT>
32  const Configuration &configuration)
33  : configuration_(configuration)
34  , catalog_info_shim_(configuration.reflog)
35  , traversal_(
36  GarbageCollector<CatalogTraversalT, HashFilterT>::GetTraversalParams(
37  configuration))
38  , hash_filter_()
39  , hash_map_delete_requests_()
40  , use_reflog_timestamps_(false)
41  , oldest_trunk_catalog_(static_cast<uint64_t>(-1))
42  , oldest_trunk_catalog_found_(false)
43  , preserved_catalogs_(0)
44  , unreferenced_trees_(0)
45  , condemned_trees_(0)
46  , condemned_catalogs_(0)
47  , last_reported_status_(0.0)
48  , condemned_objects_(0)
49  , condemned_bytes_(0)
50  , duplicate_delete_requests_(0)
51 {
53 }
54 
55 
56 template <class CatalogTraversalT, class HashFilterT>
58  traversal_.SetCatalogInfoShim(&catalog_info_shim_);
59  use_reflog_timestamps_ = true;
60 }
61 
62 
63 template <class CatalogTraversalT, class HashFilterT>
67 {
68  TraversalParameters params;
69  params.object_fetcher = config.object_fetcher;
70  params.history = config.keep_history_depth;
71  params.timestamp = config.keep_history_timestamp;
72  params.no_repeat_history = true;
73  params.ignore_load_failure = true;
74  params.quiet = !config.verbose;
75  params.num_threads = config.num_threads;
76  return params;
77 }
78 
79 
80 template <class CatalogTraversalT, class HashFilterT>
83  TraversalCallbackDataTN &data // NOLINT(runtime/references)
84 ) {
85  ++preserved_catalogs_;
86 
87  if (data.catalog->IsRoot()) {
88  const uint64_t mtime = use_reflog_timestamps_
89  ? catalog_info_shim_.GetLastModified(data.catalog)
90  : data.catalog->GetLastModified();
91  if (!oldest_trunk_catalog_found_)
92  oldest_trunk_catalog_ = std::min(oldest_trunk_catalog_, mtime);
93  if (configuration_.verbose) {
94  const int rev = data.catalog->revision();
96  "Preserving Revision %d (%s / added @ %s)",
97  rev,
98  StringifyTime(data.catalog->GetLastModified(), true).c_str(),
99  StringifyTime(catalog_info_shim_.GetLastModified(data.catalog),
100  true).c_str());
101  PrintCatalogTreeEntry(data.tree_level, data.catalog);
102  }
103  if (data.catalog->schema() < 0.99) {
104  LogCvmfs(kLogGc, kLogStdout | kLogDebug, "Warning: "
105  "legacy catalog does not provide access to nested catalog hierarchy.\n"
106  " Some unreferenced objects may remain in the repository.");
107  }
108  }
109 
110  // the hash of the actual catalog needs to preserved
111  hash_filter_.Fill(data.catalog->hash());
112 
113  // all the objects referenced from this catalog need to be preserved
114  const HashVector &referenced_hashes = data.catalog->GetReferencedObjects();
115  typename HashVector::const_iterator i = referenced_hashes.begin();
116  const typename HashVector::const_iterator iend = referenced_hashes.end();
117  for (; i != iend; ++i) {
118  hash_filter_.Fill(*i);
119  }
120 }
121 
122 
123 template <class CatalogTraversalT, class HashFilterT>
126  TraversalCallbackDataTN &data // NOLINT(runtime/references)
127 ) {
128  ++condemned_catalogs_;
129  if (data.catalog->IsRoot())
130  ++condemned_trees_;
131 
132  if (configuration_.verbose) {
133  if (data.catalog->IsRoot()) {
134  const int rev = data.catalog->revision();
135  const time_t mtime = static_cast<time_t>(data.catalog->GetLastModified());
136  LogCvmfs(kLogGc, kLogStdout | kLogDebug, "Sweeping Revision %d (%s)",
137  rev, StringifyTime(mtime, true).c_str());
138  }
139  PrintCatalogTreeEntry(data.tree_level, data.catalog);
140  }
141 
142  // all the objects referenced from this catalog need to be checked against the
143  // the preserved hashes in the hash_filter_ and possibly deleted
144  const HashVector &referenced_hashes = data.catalog->GetReferencedObjects();
145  typename HashVector::const_iterator i = referenced_hashes.begin();
146  const typename HashVector::const_iterator iend = referenced_hashes.end();
147  for (; i != iend; ++i) {
148  CheckAndSweep(*i);
149  }
150 
151  // the catalog itself is also condemned and needs to be removed
152  CheckAndSweep(data.catalog->hash());
153 
154  float threshold =
155  static_cast<float>(condemned_trees_) /
156  static_cast<float>(unreferenced_trees_);
157  if (threshold > last_reported_status_ + 0.1) {
159  " - %02.0f%% %u / %u unreferenced revisions removed [%s]",
160  100.0 * threshold, condemned_trees_, unreferenced_trees_,
161  RfcTimestamp().c_str());
162  last_reported_status_ = threshold;
163  }
164 }
165 
166 
167 template <class CatalogTraversalT, class HashFilterT>
169  const shash::Any &hash)
170 {
171  if (!hash_filter_.Contains(hash)) {
172  if (!hash_map_delete_requests_.Contains(hash)) {
173  hash_map_delete_requests_.Fill(hash);
174  Sweep(hash);
175  } else {
176  ++duplicate_delete_requests_;
177  LogCvmfs(kLogGc, kLogDebug, "Hash %s already marked as to delete",
178  hash.ToString().c_str());
179  }
180  }
181 }
182 
183 
184 template <class CatalogTraversalT, class HashFilterT>
186  const shash::Any &hash) {
187  ++condemned_objects_;
188  if (configuration_.extended_stats) {
189  if (!hash.HasSuffix() || hash.suffix == shash::kSuffixPartial) {
190  int64_t condemned_bytes = configuration_.uploader->GetObjectSize(hash);
191  if (condemned_bytes > 0) {
192  condemned_bytes_ += condemned_bytes;
193  }
194  }
195  }
196 
197  LogDeletion(hash);
198  if (configuration_.dry_run) {
199  return;
200  }
201 
202  configuration_.uploader->RemoveAsync(hash);
203 }
204 
205 
206 template <class CatalogTraversalT, class HashFilterT>
209 {
210  assert(catalog.suffix == shash::kSuffixCatalog);
211  return (configuration_.dry_run)
212  ? true
213  : configuration_.reflog->Remove(catalog);
214 }
215 
216 
217 template <class CatalogTraversalT, class HashFilterT>
219  return AnalyzePreservedCatalogTree() &&
220  CheckPreservedRevisions() &&
221  SweepReflog();
222 }
223 
224 
225 template <class CatalogTraversalT, class HashFilterT>
228 {
229  LogCvmfs(kLogGc, kLogStdout, " --> marking unreferenced objects [%s]",
230  RfcTimestamp().c_str());
231  if (configuration_.verbose) {
233  "Preserving data objects in latest revision");
234  }
235 
236  typename CatalogTraversalT::CallbackTN *callback =
237  traversal_.RegisterListener(
239  this);
240 
241  bool success = traversal_.Traverse();
242  oldest_trunk_catalog_found_ = true;
243  success = success && traversal_.TraverseNamedSnapshots();
244  traversal_.UnregisterListener(callback);
245 
246  return success;
247 }
248 
249 
250 template <class CatalogTraversalT, class HashFilterT>
252 {
253  const bool keeps_revisions = (preserved_catalog_count() > 0);
254  if (!keeps_revisions && configuration_.verbose) {
256  "This would delete everything! Abort.");
257  }
258 
259  return keeps_revisions;
260 }
261 
262 
263 template <class CatalogTraversalT, class HashFilterT>
265  LogCvmfs(kLogGc, kLogStdout, " --> sweeping unreferenced objects [%s]",
266  RfcTimestamp().c_str());
267 
268  const ReflogTN *reflog = configuration_.reflog;
269  std::vector<shash::Any> catalogs;
270  if (NULL == reflog || !reflog->List(SqlReflog::kRefCatalog, &catalogs)) {
271  LogCvmfs(kLogGc, kLogStderr, "Failed to list catalog reference log");
272  return false;
273  }
274 
275  typename CatalogTraversalT::CallbackTN *callback =
276  traversal_.RegisterListener(
278  this);
279 
280  std::vector<shash::Any> to_sweep;
281  std::vector<shash::Any>::const_iterator i = catalogs.begin();
282  std::vector<shash::Any>::const_iterator iend = catalogs.end();
283  for (; i != iend; ++i) {
284  if (!hash_filter_.Contains(*i)) {
285  to_sweep.push_back(*i);
286  }
287  }
288  unreferenced_trees_ = to_sweep.size();
289  bool success = traversal_.TraverseList(to_sweep,
290  CatalogTraversalT::kDepthFirst);
291  traversal_.UnregisterListener(callback);
292 
293  i = to_sweep.begin();
294  iend = to_sweep.end();
295  for (; i != iend; ++i) {
296  success = success && RemoveCatalogFromReflog(*i);
297  }
298 
299  // TODO(jblomer): turn current counters into perf::Counters
300  if (configuration_.statistics) {
301  perf::Counter *ctr_preserved_catalogs =
302  configuration_.statistics->Register(
303  "gc.n_preserved_catalogs", "number of live catalogs");
304  perf::Counter *ctr_condemned_catalogs =
305  configuration_.statistics->Register(
306  "gc.n_condemned_catalogs", "number of dead catalogs");
307  perf::Counter *ctr_condemned_objects =
308  configuration_.statistics->Register(
309  "gc.n_condemned_objects", "number of deleted objects");
310  perf::Counter *ctr_condemned_bytes =
311  configuration_.statistics->Register(
312  "gc.sz_condemned_bytes", "number of deleted bytes");
313  perf::Counter *ctr_duplicate_delete_requests =
314  configuration_.statistics->Register(
315  "gc.n_duplicate_delete_requests", "number of duplicated delete requests");
316  ctr_preserved_catalogs->Set(preserved_catalog_count());
317  ctr_condemned_catalogs->Set(condemned_catalog_count());
318  ctr_condemned_objects->Set(condemned_objects_count());
319  ctr_condemned_bytes->Set(condemned_bytes_count());
320  ctr_duplicate_delete_requests->Set(duplicate_delete_requests());
321  }
322 
323  configuration_.uploader->WaitForUpload();
324  LogCvmfs(kLogGc, kLogStdout, " --> done garbage collecting [%s]",
325  RfcTimestamp().c_str());
326  return success && (configuration_.uploader->GetNumberOfErrors() == 0);
327 }
328 
329 
330 template <class CatalogTraversalT, class HashFilterT>
332  const unsigned int tree_level,
333  const CatalogTN *catalog) const
334 {
335  std::string tree_indent;
336  for (unsigned int i = 0; i < tree_level; ++i) {
337  tree_indent += "\u2502 ";
338  }
339  tree_indent += "\u251C\u2500 ";
340 
341  const std::string hash_string = catalog->hash().ToString();
342  const std::string path =
343  (catalog->mountpoint().IsEmpty()) ? "/" : catalog->mountpoint().ToString();
344 
345  LogCvmfs(kLogGc, kLogStdout, "%s%s %s",
346  tree_indent.c_str(),
347  hash_string.c_str(),
348  path.c_str());
349  LogCvmfs(kLogGc, kLogDebug, "catalog tree entry: %s %s",
350  hash_string.c_str(), path.c_str());
351 }
352 
353 
354 template <class CatalogTraversalT, class HashFilterT>
356  const shash::Any &hash) const {
357  if (configuration_.verbose) {
358  LogCvmfs(kLogGc, kLogStdout | kLogDebug, "Sweep: %s",
359  hash.ToStringWithSuffix().c_str());
360  }
361 
362  if (configuration_.has_deletion_log()) {
363  const int written = fprintf(configuration_.deleted_objects_logfile,
364  "%s\n", hash.ToStringWithSuffix().c_str());
365  if (written < 0) {
366  LogCvmfs(kLogGc, kLogStderr, "failed to write to deleted objects log");
367  }
368  }
369 }
370 
371 #endif // CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
#define LogCvmfs(source, mask,...)
Definition: logging.h:25
std::string ToString(const bool with_suffix=false) const
Definition: hash.h:249
std::string ToStringWithSuffix() const
Definition: hash.h:304
void LogDeletion(const shash::Any &hash) const
TraversalParameters GetTraversalParams(const Configuration &configuration)
bool HasSuffix() const
Definition: hash.h:239
assert((mem||(size==0))&&"Out Of Memory")
void Sweep(const shash::Any &hash)
string StringifyTime(const time_t seconds, const bool utc)
Definition: string.cc:105
CatalogTraversalT::Parameters TraversalParameters
CatalogTraversalT::CallbackDataTN TraversalCallbackDataTN
void PrintCatalogTreeEntry(const unsigned int tree_level, const CatalogTN *catalog) const
std::string RfcTimestamp()
Definition: string.cc:127
void Set(const int64_t val)
Definition: statistics.h:33
ObjectFetcherTN::ReflogTN ReflogTN
const char kSuffixPartial
Definition: hash.h:57
const char kSuffixCatalog
Definition: hash.h:54
CatalogTraversalT::CatalogTN CatalogTN
upload::AbstractUploader * uploader
bool RemoveCatalogFromReflog(const shash::Any &catalog)
void PreserveDataObjects(const TraversalCallbackDataTN &data)
void SweepDataObjects(const TraversalCallbackDataTN &data)
const Configuration configuration_
void CheckAndSweep(const shash::Any &hash)
std::vector< shash::Any > HashVector
Suffix suffix
Definition: hash.h:126
GarbageCollector(const Configuration &configuration)