GCC Code Coverage Report
Directory: cvmfs/ Exec Total Coverage
File: cvmfs/garbage_collection/garbage_collector_impl.h Lines: 94 130 72.3 %
Date: 2019-02-03 02:48:13 Branches: 47 100 47.0 %

Line Branch Exec Source
1
/**
2
 * This file is part of the CernVM File System.
3
 */
4
5
#ifndef CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
6
#define CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_
7
8
#include <algorithm>
9
#include <limits>
10
#include <string>
11
#include <vector>
12
13
#include "logging.h"
14
#include "util/string.h"
15
16
template<class CatalogTraversalT, class HashFilterT>
17
const unsigned int GarbageCollector<CatalogTraversalT,
18
15
                                    HashFilterT>::Configuration::kFullHistory =
19
  std::numeric_limits<unsigned int>::max();
20
21
template<class CatalogTraversalT, class HashFilterT>
22
const unsigned int GarbageCollector<CatalogTraversalT,
23
                                    HashFilterT>::Configuration::kNoHistory = 0;
24
25
template<class CatalogTraversalT, class HashFilterT>
26
const time_t GarbageCollector<CatalogTraversalT,
27
                              HashFilterT>::Configuration::kNoTimestamp = 0;
28
29
30
template <class CatalogTraversalT, class HashFilterT>
31
22
GarbageCollector<CatalogTraversalT, HashFilterT>::GarbageCollector(
32
                                             const Configuration &configuration)
33
  : configuration_(configuration)
34
  , catalog_info_shim_(configuration.reflog)
35
  , traversal_(
36
      GarbageCollector<CatalogTraversalT, HashFilterT>::GetTraversalParams(
37
                                                                configuration))
38
  , hash_filter_()
39
  , use_reflog_timestamps_(false)
40
  , oldest_trunk_catalog_(static_cast<uint64_t>(-1))
41
  , oldest_trunk_catalog_found_(false)
42
  , preserved_catalogs_(0)
43
  , condemned_catalogs_(0)
44
  , condemned_objects_(0)
45
22
  , condemned_bytes_(0)
46
{
47
22
  assert(configuration_.uploader != NULL);
48
22
}
49
50
51
template <class CatalogTraversalT, class HashFilterT>
52
1
void GarbageCollector<CatalogTraversalT, HashFilterT>::UseReflogTimestamps() {
53
1
  traversal_.SetCatalogInfoShim(&catalog_info_shim_);
54
1
  use_reflog_timestamps_ = true;
55
1
}
56
57
58
template <class CatalogTraversalT, class HashFilterT>
59
typename GarbageCollector<CatalogTraversalT, HashFilterT>::TraversalParameters
60
22
  GarbageCollector<CatalogTraversalT, HashFilterT>::GetTraversalParams(
61
  const GarbageCollector<CatalogTraversalT, HashFilterT>::Configuration &config)
62
{
63
22
  TraversalParameters params;
64
22
  params.object_fetcher      = config.object_fetcher;
65
22
  params.history             = config.keep_history_depth;
66
22
  params.timestamp           = config.keep_history_timestamp;
67
22
  params.no_repeat_history   = true;
68
22
  params.ignore_load_failure = true;
69
22
  params.quiet               = !config.verbose;
70
22
  return params;
71
}
72
73
74
template <class CatalogTraversalT, class HashFilterT>
75
233
void GarbageCollector<CatalogTraversalT, HashFilterT>::PreserveDataObjects(
76
  const GarbageCollector<CatalogTraversalT, HashFilterT>::
77
    TraversalCallbackDataTN &data  // NOLINT(runtime/references)
78
) {
79
233
  ++preserved_catalogs_;
80
81
233
  if (data.catalog->IsRoot()) {
82
    const uint64_t mtime = use_reflog_timestamps_
83
      ? catalog_info_shim_.GetLastModified(data.catalog)
84
65
      : data.catalog->GetLastModified();
85
65
    if (!oldest_trunk_catalog_found_)
86
46
      oldest_trunk_catalog_ = std::min(oldest_trunk_catalog_, mtime);
87
65
    if (configuration_.verbose) {
88
      const int    rev   = data.catalog->revision();
89
      LogCvmfs(kLogGc, kLogStdout | kLogDebug,
90
               "Preserving Revision %d (%s / added @ %s)",
91
               rev,
92
               StringifyTime(data.catalog->GetLastModified(), true).c_str(),
93
               StringifyTime(catalog_info_shim_.GetLastModified(data.catalog),
94
                             true).c_str());
95
      PrintCatalogTreeEntry(data.tree_level, data.catalog);
96
    }
97
  }
98
99
  // the hash of the actual catalog needs to preserved
100
233
  hash_filter_.Fill(data.catalog->hash());
101
102
  // all the objects referenced from this catalog need to be preserved
103
233
  const HashVector &referenced_hashes = data.catalog->GetReferencedObjects();
104
233
        typename HashVector::const_iterator i    = referenced_hashes.begin();
105
233
  const typename HashVector::const_iterator iend = referenced_hashes.end();
106
1426
  for (; i != iend; ++i) {
107
1193
    hash_filter_.Fill(*i);
108
  }
109
233
}
110
111
112
template <class CatalogTraversalT, class HashFilterT>
113
66
void GarbageCollector<CatalogTraversalT, HashFilterT>::SweepDataObjects(
114
  const GarbageCollector<CatalogTraversalT, HashFilterT>::
115
    TraversalCallbackDataTN &data  // NOLINT(runtime/references)
116
) {
117
66
  ++condemned_catalogs_;
118
119
66
  if (configuration_.verbose) {
120
    if (data.catalog->IsRoot()) {
121
      const int    rev   = data.catalog->revision();
122
      const time_t mtime = static_cast<time_t>(data.catalog->GetLastModified());
123
      LogCvmfs(kLogGc, kLogStdout | kLogDebug, "Sweeping Revision %d (%s)",
124
               rev, StringifyTime(mtime, true).c_str());
125
    }
126
    PrintCatalogTreeEntry(data.tree_level, data.catalog);
127
  }
128
129
  // all the objects referenced from this catalog need to be checked against the
130
  // the preserved hashes in the hash_filter_ and possibly deleted
131
66
  const HashVector &referenced_hashes = data.catalog->GetReferencedObjects();
132
66
        typename HashVector::const_iterator i    = referenced_hashes.begin();
133
66
  const typename HashVector::const_iterator iend = referenced_hashes.end();
134
465
  for (; i != iend; ++i) {
135
399
    CheckAndSweep(*i);
136
  }
137
138
  // the catalog itself is also condemned and needs to be removed
139
66
  CheckAndSweep(data.catalog->hash());
140
66
}
141
142
143
template <class CatalogTraversalT, class HashFilterT>
144
465
void GarbageCollector<CatalogTraversalT, HashFilterT>::CheckAndSweep(
145
  const shash::Any &hash)
146
{
147
465
  if (!hash_filter_.Contains(hash))
148
214
    Sweep(hash);
149
465
}
150
151
152
template <class CatalogTraversalT, class HashFilterT>
153
214
void GarbageCollector<CatalogTraversalT, HashFilterT>::Sweep(
154
                                                       const shash::Any &hash) {
155
214
  ++condemned_objects_;
156
214
  if (configuration_.extended_stats) {
157
    if (!hash.HasSuffix() || hash.suffix == shash::kSuffixPartial) {
158
      int64_t condemned_bytes = configuration_.uploader->GetObjectSize(hash);
159
      if (condemned_bytes > 0) {
160
        condemned_bytes_ += condemned_bytes;
161
      }
162
    }
163
  }
164
165
214
  LogDeletion(hash);
166
214
  if (configuration_.dry_run) {
167
    return;
168
  }
169
170
214
  configuration_.uploader->RemoveAsync(hash);
171
}
172
173
174
template <class CatalogTraversalT, class HashFilterT>
175
27
bool GarbageCollector<CatalogTraversalT, HashFilterT>::
176
  RemoveCatalogFromReflog(const shash::Any &catalog)
177
{
178
27
  assert(catalog.suffix == shash::kSuffixCatalog);
179
  return (configuration_.dry_run)
180
    ? true
181
27
    : configuration_.reflog->Remove(catalog);
182
}
183
184
185
template <class CatalogTraversalT, class HashFilterT>
186
21
bool GarbageCollector<CatalogTraversalT, HashFilterT>::Collect() {
187
  return AnalyzePreservedCatalogTree() &&
188
         CheckPreservedRevisions()     &&
189

21
         SweepReflog();
190
}
191
192
193
template <class CatalogTraversalT, class HashFilterT>
194
21
bool GarbageCollector<CatalogTraversalT, HashFilterT>::
195
  AnalyzePreservedCatalogTree()
196
{
197
21
  LogCvmfs(kLogGc, kLogStdout, "  --> marking unreferenced objects [%s]",
198
           RfcTimestamp().c_str());
199
21
  if (configuration_.verbose) {
200
    LogCvmfs(kLogGc, kLogStdout | kLogDebug,
201
             "Preserving data objects in latest revision");
202
  }
203
204
  typename CatalogTraversalT::CallbackTN *callback =
205
    traversal_.RegisterListener(
206
       &GarbageCollector<CatalogTraversalT, HashFilterT>::PreserveDataObjects,
207
21
        this);
208
209
21
  bool success = traversal_.Traverse();
210
21
  oldest_trunk_catalog_found_ = true;
211

21
  success = success && traversal_.TraverseNamedSnapshots();
212
21
  traversal_.UnregisterListener(callback);
213
214
21
  return success;
215
}
216
217
218
template <class CatalogTraversalT, class HashFilterT>
219
21
bool GarbageCollector<CatalogTraversalT, HashFilterT>::CheckPreservedRevisions()
220
{
221
21
  const bool keeps_revisions = (preserved_catalog_count() > 0);
222

21
  if (!keeps_revisions && configuration_.verbose) {
223
    LogCvmfs(kLogGc, kLogStderr | kLogDebug,
224
             "This would delete everything! Abort.");
225
  }
226
227
21
  return keeps_revisions;
228
}
229
230
231
template <class CatalogTraversalT, class HashFilterT>
232
21
bool GarbageCollector<CatalogTraversalT, HashFilterT>::SweepReflog() {
233
21
  LogCvmfs(kLogGc, kLogStdout, "  --> sweeping unreferenced objects [%s]",
234
           RfcTimestamp().c_str());
235
236
21
  const ReflogTN *reflog = configuration_.reflog;
237
21
  std::vector<shash::Any> catalogs;
238

21
  if (NULL == reflog || !reflog->List(SqlReflog::kRefCatalog, &catalogs)) {
239
    LogCvmfs(kLogGc, kLogStderr, "Failed to list catalog reference log");
240
    return false;
241
  }
242
243
  typename CatalogTraversalT::CallbackTN *callback =
244
    traversal_.RegisterListener(
245
       &GarbageCollector<CatalogTraversalT, HashFilterT>::SweepDataObjects,
246
21
        this);
247
248
21
  bool success = true;
249
  const typename CatalogTraversalT::TraversalType traversal_type =
250
21
                                        CatalogTraversalT::kDepthFirstTraversal;
251
21
        std::vector<shash::Any>::const_iterator i    = catalogs.begin();
252
21
  const std::vector<shash::Any>::const_iterator iend = catalogs.end();
253

113
  for (; i != iend && success; ++i) {
254
92
    if (!hash_filter_.Contains(*i)) {
255

27
      success =
256
        success                                         &&
257
        traversal_.TraverseRevision(*i, traversal_type) &&
258
        RemoveCatalogFromReflog(*i);
259
    }
260
  }
261
262
21
  traversal_.UnregisterListener(callback);
263
264
  // TODO(jblomer): turn current counters into perf::Counters
265
21
  if (configuration_.statistics) {
266
    perf::Counter *ctr_preserved_catalogs =
267
      configuration_.statistics->Register(
268
        "gc.n_preserved_catalogs", "number of live catalogs");
269
    perf::Counter *ctr_condemned_catalogs =
270
      configuration_.statistics->Register(
271
        "gc.n_condemned_catalogs", "number of dead catalogs");
272
    perf::Counter *ctr_condemned_objects =
273
      configuration_.statistics->Register(
274
        "gc.n_condemned_objects", "number of deleted objects");
275
    perf::Counter *ctr_condemned_bytes =
276
      configuration_.statistics->Register(
277
        "gc.sz_condemned_bytes", "number of deleted bytes");
278
    ctr_preserved_catalogs->Set(preserved_catalog_count());
279
    ctr_condemned_catalogs->Set(condemned_catalog_count());
280
    ctr_condemned_objects->Set(condemned_objects_count());
281
    ctr_condemned_bytes->Set(condemned_bytes_count());
282
  }
283
284
21
  configuration_.uploader->WaitForUpload();
285
21
  LogCvmfs(kLogGc, kLogStdout, "  --> done garbage collecting [%s]",
286
           RfcTimestamp().c_str());
287

21
  return success && (configuration_.uploader->GetNumberOfErrors() == 0);
288
}
289
290
291
template <class CatalogTraversalT, class HashFilterT>
292
void GarbageCollector<CatalogTraversalT, HashFilterT>::PrintCatalogTreeEntry(
293
                                              const unsigned int  tree_level,
294
                                              const CatalogTN    *catalog) const
295
{
296
  std::string tree_indent;
297
  for (unsigned int i = 0; i < tree_level; ++i) {
298
    tree_indent += "\u2502  ";
299
  }
300
  tree_indent += "\u251C\u2500 ";
301
302
  const std::string hash_string = catalog->hash().ToString();
303
  const std::string path =
304
    (catalog->mountpoint().IsEmpty()) ? "/" : catalog->mountpoint().ToString();
305
306
  LogCvmfs(kLogGc, kLogStdout, "%s%s %s",
307
    tree_indent.c_str(),
308
    hash_string.c_str(),
309
    path.c_str());
310
  LogCvmfs(kLogGc, kLogDebug, "catalog tree entry: %s %s",
311
           hash_string.c_str(), path.c_str());
312
}
313
314
315
template <class CatalogTraversalT, class HashFilterT>
316
214
void GarbageCollector<CatalogTraversalT, HashFilterT>::LogDeletion(
317
                                                 const shash::Any &hash) const {
318
214
  if (configuration_.verbose) {
319
    LogCvmfs(kLogGc, kLogStdout | kLogDebug, "Sweep: %s",
320
             hash.ToStringWithSuffix().c_str());
321
  }
322
323
214
  if (configuration_.has_deletion_log()) {
324
    const int written = fprintf(configuration_.deleted_objects_logfile,
325
11
                                "%s\n", hash.ToStringWithSuffix().c_str());
326
11
    if (written < 0) {
327
      LogCvmfs(kLogGc, kLogStderr, "failed to write to deleted objects log");
328
    }
329
  }
330
214
}
331
332
#endif  // CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_IMPL_H_