CernVM-FS  2.9.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
garbage_collector_impl.h
Go to the documentation of this file.
1 
5 #ifndef CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
6 #define CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
7 
8 #include <algorithm>
9 #include <limits>
10 #include <string>
11 #include <vector>
12 
13 #include "logging.h"
14 #include "util/string.h"
15 
16 template<class CatalogTraversalT, class HashFilterT>
17 const unsigned int GarbageCollector<CatalogTraversalT,
18  HashFilterT>::Configuration::kFullHistory =
19  std::numeric_limits<unsigned int>::max();
20 
21 template<class CatalogTraversalT, class HashFilterT>
22 const unsigned int GarbageCollector<CatalogTraversalT,
23  HashFilterT>::Configuration::kNoHistory = 0;
24 
25 template<class CatalogTraversalT, class HashFilterT>
26 const time_t GarbageCollector<CatalogTraversalT,
27  HashFilterT>::Configuration::kNoTimestamp = 0;
28 
29 
30 template <class CatalogTraversalT, class HashFilterT>
32  const Configuration &configuration)
33  : configuration_(configuration)
34  , catalog_info_shim_(configuration.reflog)
35  , traversal_(
36  GarbageCollector<CatalogTraversalT, HashFilterT>::GetTraversalParams(
37  configuration))
38  , hash_filter_()
39  , use_reflog_timestamps_(false)
40  , oldest_trunk_catalog_(static_cast<uint64_t>(-1))
41  , oldest_trunk_catalog_found_(false)
42  , preserved_catalogs_(0)
43  , unreferenced_trees_(0)
44  , condemned_trees_(0)
45  , condemned_catalogs_(0)
46  , last_reported_status_(0.0)
47  , condemned_objects_(0)
48  , condemned_bytes_(0)
49 {
51 }
52 
53 
54 template <class CatalogTraversalT, class HashFilterT>
56  traversal_.SetCatalogInfoShim(&catalog_info_shim_);
57  use_reflog_timestamps_ = true;
58 }
59 
60 
61 template <class CatalogTraversalT, class HashFilterT>
65 {
66  TraversalParameters params;
67  params.object_fetcher = config.object_fetcher;
68  params.history = config.keep_history_depth;
69  params.timestamp = config.keep_history_timestamp;
70  params.no_repeat_history = true;
71  params.ignore_load_failure = true;
72  params.quiet = !config.verbose;
73  params.num_threads = config.num_threads;
74  return params;
75 }
76 
77 
78 template <class CatalogTraversalT, class HashFilterT>
81  TraversalCallbackDataTN &data // NOLINT(runtime/references)
82 ) {
83  ++preserved_catalogs_;
84 
85  if (data.catalog->IsRoot()) {
86  const uint64_t mtime = use_reflog_timestamps_
87  ? catalog_info_shim_.GetLastModified(data.catalog)
88  : data.catalog->GetLastModified();
89  if (!oldest_trunk_catalog_found_)
90  oldest_trunk_catalog_ = std::min(oldest_trunk_catalog_, mtime);
91  if (configuration_.verbose) {
92  const int rev = data.catalog->revision();
94  "Preserving Revision %d (%s / added @ %s)",
95  rev,
96  StringifyTime(data.catalog->GetLastModified(), true).c_str(),
97  StringifyTime(catalog_info_shim_.GetLastModified(data.catalog),
98  true).c_str());
99  PrintCatalogTreeEntry(data.tree_level, data.catalog);
100  }
101  if (data.catalog->schema() < 0.99) {
102  LogCvmfs(kLogGc, kLogStdout | kLogDebug, "Warning: "
103  "legacy catalog does not provide access to nested catalog hierarchy.\n"
104  " Some unreferenced objects may remain in the repository.");
105  }
106  }
107 
108  // the hash of the actual catalog needs to preserved
109  hash_filter_.Fill(data.catalog->hash());
110 
111  // all the objects referenced from this catalog need to be preserved
112  const HashVector &referenced_hashes = data.catalog->GetReferencedObjects();
113  typename HashVector::const_iterator i = referenced_hashes.begin();
114  const typename HashVector::const_iterator iend = referenced_hashes.end();
115  for (; i != iend; ++i) {
116  hash_filter_.Fill(*i);
117  }
118 }
119 
120 
121 template <class CatalogTraversalT, class HashFilterT>
124  TraversalCallbackDataTN &data // NOLINT(runtime/references)
125 ) {
126  ++condemned_catalogs_;
127  if (data.catalog->IsRoot())
128  ++condemned_trees_;
129 
130  if (configuration_.verbose) {
131  if (data.catalog->IsRoot()) {
132  const int rev = data.catalog->revision();
133  const time_t mtime = static_cast<time_t>(data.catalog->GetLastModified());
134  LogCvmfs(kLogGc, kLogStdout | kLogDebug, "Sweeping Revision %d (%s)",
135  rev, StringifyTime(mtime, true).c_str());
136  }
137  PrintCatalogTreeEntry(data.tree_level, data.catalog);
138  }
139 
140  // all the objects referenced from this catalog need to be checked against the
141  // the preserved hashes in the hash_filter_ and possibly deleted
142  const HashVector &referenced_hashes = data.catalog->GetReferencedObjects();
143  typename HashVector::const_iterator i = referenced_hashes.begin();
144  const typename HashVector::const_iterator iend = referenced_hashes.end();
145  for (; i != iend; ++i) {
146  CheckAndSweep(*i);
147  }
148 
149  // the catalog itself is also condemned and needs to be removed
150  CheckAndSweep(data.catalog->hash());
151 
152  float threshold =
153  static_cast<float>(condemned_trees_) /
154  static_cast<float>(unreferenced_trees_);
155  if (threshold > last_reported_status_ + 0.1) {
157  " - %02.0f%% %u / %u unreferenced revisions removed [%s]",
158  100.0 * threshold, condemned_trees_, unreferenced_trees_,
159  RfcTimestamp().c_str());
160  last_reported_status_ = threshold;
161  }
162 }
163 
164 
165 template <class CatalogTraversalT, class HashFilterT>
167  const shash::Any &hash)
168 {
169  if (!hash_filter_.Contains(hash))
170  Sweep(hash);
171 }
172 
173 
174 template <class CatalogTraversalT, class HashFilterT>
176  const shash::Any &hash) {
177  ++condemned_objects_;
178  if (configuration_.extended_stats) {
179  if (!hash.HasSuffix() || hash.suffix == shash::kSuffixPartial) {
180  int64_t condemned_bytes = configuration_.uploader->GetObjectSize(hash);
181  if (condemned_bytes > 0) {
182  condemned_bytes_ += condemned_bytes;
183  }
184  }
185  }
186 
187  LogDeletion(hash);
188  if (configuration_.dry_run) {
189  return;
190  }
191 
192  configuration_.uploader->RemoveAsync(hash);
193 }
194 
195 
196 template <class CatalogTraversalT, class HashFilterT>
199 {
200  assert(catalog.suffix == shash::kSuffixCatalog);
201  return (configuration_.dry_run)
202  ? true
203  : configuration_.reflog->Remove(catalog);
204 }
205 
206 
207 template <class CatalogTraversalT, class HashFilterT>
209  return AnalyzePreservedCatalogTree() &&
210  CheckPreservedRevisions() &&
211  SweepReflog();
212 }
213 
214 
215 template <class CatalogTraversalT, class HashFilterT>
218 {
219  LogCvmfs(kLogGc, kLogStdout, " --> marking unreferenced objects [%s]",
220  RfcTimestamp().c_str());
221  if (configuration_.verbose) {
223  "Preserving data objects in latest revision");
224  }
225 
226  typename CatalogTraversalT::CallbackTN *callback =
227  traversal_.RegisterListener(
229  this);
230 
231  bool success = traversal_.Traverse();
232  oldest_trunk_catalog_found_ = true;
233  success = success && traversal_.TraverseNamedSnapshots();
234  traversal_.UnregisterListener(callback);
235 
236  return success;
237 }
238 
239 
240 template <class CatalogTraversalT, class HashFilterT>
242 {
243  const bool keeps_revisions = (preserved_catalog_count() > 0);
244  if (!keeps_revisions && configuration_.verbose) {
246  "This would delete everything! Abort.");
247  }
248 
249  return keeps_revisions;
250 }
251 
252 
253 template <class CatalogTraversalT, class HashFilterT>
255  LogCvmfs(kLogGc, kLogStdout, " --> sweeping unreferenced objects [%s]",
256  RfcTimestamp().c_str());
257 
258  const ReflogTN *reflog = configuration_.reflog;
259  std::vector<shash::Any> catalogs;
260  if (NULL == reflog || !reflog->List(SqlReflog::kRefCatalog, &catalogs)) {
261  LogCvmfs(kLogGc, kLogStderr, "Failed to list catalog reference log");
262  return false;
263  }
264 
265  typename CatalogTraversalT::CallbackTN *callback =
266  traversal_.RegisterListener(
268  this);
269 
270  std::vector<shash::Any> to_sweep;
271  std::vector<shash::Any>::const_iterator i = catalogs.begin();
272  std::vector<shash::Any>::const_iterator iend = catalogs.end();
273  for (; i != iend; ++i) {
274  if (!hash_filter_.Contains(*i)) {
275  to_sweep.push_back(*i);
276  }
277  }
278  unreferenced_trees_ = to_sweep.size();
279  bool success = traversal_.TraverseList(to_sweep,
280  CatalogTraversalT::kDepthFirst);
281  traversal_.UnregisterListener(callback);
282 
283  i = to_sweep.begin();
284  iend = to_sweep.end();
285  for (; i != iend; ++i) {
286  success = success && RemoveCatalogFromReflog(*i);
287  }
288 
289  // TODO(jblomer): turn current counters into perf::Counters
290  if (configuration_.statistics) {
291  perf::Counter *ctr_preserved_catalogs =
292  configuration_.statistics->Register(
293  "gc.n_preserved_catalogs", "number of live catalogs");
294  perf::Counter *ctr_condemned_catalogs =
295  configuration_.statistics->Register(
296  "gc.n_condemned_catalogs", "number of dead catalogs");
297  perf::Counter *ctr_condemned_objects =
298  configuration_.statistics->Register(
299  "gc.n_condemned_objects", "number of deleted objects");
300  perf::Counter *ctr_condemned_bytes =
301  configuration_.statistics->Register(
302  "gc.sz_condemned_bytes", "number of deleted bytes");
303  ctr_preserved_catalogs->Set(preserved_catalog_count());
304  ctr_condemned_catalogs->Set(condemned_catalog_count());
305  ctr_condemned_objects->Set(condemned_objects_count());
306  ctr_condemned_bytes->Set(condemned_bytes_count());
307  }
308 
309  configuration_.uploader->WaitForUpload();
310  LogCvmfs(kLogGc, kLogStdout, " --> done garbage collecting [%s]",
311  RfcTimestamp().c_str());
312  return success && (configuration_.uploader->GetNumberOfErrors() == 0);
313 }
314 
315 
316 template <class CatalogTraversalT, class HashFilterT>
318  const unsigned int tree_level,
319  const CatalogTN *catalog) const
320 {
321  std::string tree_indent;
322  for (unsigned int i = 0; i < tree_level; ++i) {
323  tree_indent += "\u2502 ";
324  }
325  tree_indent += "\u251C\u2500 ";
326 
327  const std::string hash_string = catalog->hash().ToString();
328  const std::string path =
329  (catalog->mountpoint().IsEmpty()) ? "/" : catalog->mountpoint().ToString();
330 
331  LogCvmfs(kLogGc, kLogStdout, "%s%s %s",
332  tree_indent.c_str(),
333  hash_string.c_str(),
334  path.c_str());
335  LogCvmfs(kLogGc, kLogDebug, "catalog tree entry: %s %s",
336  hash_string.c_str(), path.c_str());
337 }
338 
339 
340 template <class CatalogTraversalT, class HashFilterT>
342  const shash::Any &hash) const {
343  if (configuration_.verbose) {
344  LogCvmfs(kLogGc, kLogStdout | kLogDebug, "Sweep: %s",
345  hash.ToStringWithSuffix().c_str());
346  }
347 
348  if (configuration_.has_deletion_log()) {
349  const int written = fprintf(configuration_.deleted_objects_logfile,
350  "%s\n", hash.ToStringWithSuffix().c_str());
351  if (written < 0) {
352  LogCvmfs(kLogGc, kLogStderr, "failed to write to deleted objects log");
353  }
354  }
355 }
356 
357 #endif // CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
#define LogCvmfs(source, mask,...)
Definition: logging.h:20
std::string ToStringWithSuffix() const
Definition: hash.h:300
void LogDeletion(const shash::Any &hash) const
TraversalParameters GetTraversalParams(const Configuration &configuration)
bool HasSuffix() const
Definition: hash.h:235
assert((mem||(size==0))&&"Out Of Memory")
void Sweep(const shash::Any &hash)
string StringifyTime(const time_t seconds, const bool utc)
Definition: string.cc:104
CatalogTraversalT::Parameters TraversalParameters
CatalogTraversalT::CallbackDataTN TraversalCallbackDataTN
void PrintCatalogTreeEntry(const unsigned int tree_level, const CatalogTN *catalog) const
std::string RfcTimestamp()
Definition: string.cc:126
void Set(const int64_t val)
Definition: statistics.h:33
ObjectFetcherTN::ReflogTN ReflogTN
const char kSuffixPartial
Definition: hash.h:55
const char kSuffixCatalog
Definition: hash.h:52
CatalogTraversalT::CatalogTN CatalogTN
upload::AbstractUploader * uploader
bool RemoveCatalogFromReflog(const shash::Any &catalog)
void PreserveDataObjects(const TraversalCallbackDataTN &data)
void SweepDataObjects(const TraversalCallbackDataTN &data)
const Configuration configuration_
void CheckAndSweep(const shash::Any &hash)
std::vector< shash::Any > HashVector
Suffix suffix
Definition: hash.h:124
GarbageCollector(const Configuration &configuration)