GCC Code Coverage Report
Directory: cvmfs/ Exec Total Coverage
File: cvmfs/garbage_collection/garbage_collector.h Lines: 13 14 92.9 %
Date: 2019-02-03 02:48:13 Branches: 2 4 50.0 %

Line Branch Exec Source
1
/**
2
 * This file is part of the CernVM File System.
3
 *
4
 * The GarbageCollector class is figuring out which data objects (represented by
5
 * their content hashes) can be deleted as outdated garbage.
6
 * Garbage collection is performed on the granularity of catalog revisions, thus
7
 * a complete repository revision is either considered to be outdated or active.
8
 * This way, a mountable repository revision stays completely usable (no nested
9
 * catalogs or data objects become unavailable). A revision is defined by it's
10
 * root catalog; the revision numbers of nested catalogs are irrelevant, since
11
 * they might be referenced by newer (preserved) repository revisions.
12
 * Thus, garbage objects are those that are _not_ referenced by any of the pre-
13
 * served root catalogs or their direct subordinate nested catalog try.
14
 *
15
 * We use a two-stage approach:
16
 *
17
 *   1st Stage - Initialization
18
 *               The GarbageCollector is reading all the catalogs that are meant
19
 *               to be preserved. It builds up a filter (HashFilterT) containing
20
 *               all content hashes that are _not_ to be deleted
21
 *
22
 *   2nd Stage - Sweeping
23
 *               The initialized HashFilterT is presented with all content
24
 *               hashes found in condemned catalogs and decides if they are
25
 *               referenced by the preserved catalog revisions or not.
26
 *
27
 * The GarbageCollector is templated with CatalogTraversalT mainly for
28
 * testability and with HashFilterT as an instance of the Strategy Pattern to
29
 * abstract from the actual hash filtering method to be used.
30
 */
31
32
#ifndef CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_H_
33
#define CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_H_
34
35
#include <inttypes.h>
36
37
#include <vector>
38
39
#include "catalog_traversal.h"
40
#include "garbage_collection/hash_filter.h"
41
#include "statistics.h"
42
#include "upload_facility.h"
43
44
template<class CatalogTraversalT, class HashFilterT>
45
22
class GarbageCollector {
46
 protected:
47
  typedef typename CatalogTraversalT::ObjectFetcherTN ObjectFetcherTN;
48
  typedef typename ObjectFetcherTN::HistoryTN         HistoryTN;
49
  typedef typename ObjectFetcherTN::ReflogTN          ReflogTN;
50
  typedef typename CatalogTraversalT::CatalogTN       CatalogTN;
51
  typedef typename CatalogTraversalT::CallbackDataTN  TraversalCallbackDataTN;
52
  typedef typename CatalogTraversalT::Parameters      TraversalParameters;
53
  typedef std::vector<shash::Any>                     HashVector;
54
55
 public:
56
  struct Configuration {
57
    static const unsigned int kFullHistory;
58
    static const unsigned int kNoHistory;
59
    static const time_t       kNoTimestamp;
60
    static const shash::Any   kLatestHistoryDatabase;
61
62
14
    Configuration()
63
      : uploader(NULL)
64
      , object_fetcher(NULL)
65
      , reflog(NULL)
66
      , keep_history_depth(kFullHistory)
67
      , keep_history_timestamp(kNoTimestamp)
68
      , dry_run(false)
69
      , verbose(false)
70
      , deleted_objects_logfile(NULL)
71
      , statistics(NULL)
72
14
      , extended_stats(false) {}
73
74
214
    bool has_deletion_log() const { return deleted_objects_logfile != NULL; }
75
76
    upload::AbstractUploader  *uploader;
77
    ObjectFetcherTN           *object_fetcher;
78
    ReflogTN                  *reflog;
79
    unsigned int               keep_history_depth;
80
    time_t                     keep_history_timestamp;
81
    bool                       dry_run;
82
    bool                       verbose;
83
    FILE                      *deleted_objects_logfile;
84
    perf::Statistics          *statistics;
85
    bool                       extended_stats;
86
  };
87
88
 public:
89
  explicit GarbageCollector(const Configuration &configuration);
90
91
  void UseReflogTimestamps();
92
  bool Collect();
93
94
42
  unsigned int preserved_catalog_count() const { return preserved_catalogs_; }
95
19
  unsigned int condemned_catalog_count() const { return condemned_catalogs_; }
96
1
  unsigned int condemned_objects_count() const { return condemned_objects_;  }
97
  uint64_t condemned_bytes_count() const { return condemned_bytes_;  }
98
21
  uint64_t oldest_trunk_catalog() const { return oldest_trunk_catalog_; }
99
100
 protected:
101
  TraversalParameters GetTraversalParams(const Configuration &configuration);
102
103
  void PreserveDataObjects(const TraversalCallbackDataTN &data);
104
  void SweepDataObjects(const TraversalCallbackDataTN &data);
105
106
  bool AnalyzePreservedCatalogTree();
107
  bool CheckPreservedRevisions();
108
  bool SweepReflog();
109
110
  void CheckAndSweep(const shash::Any &hash);
111
  void Sweep(const shash::Any &hash);
112
  bool RemoveCatalogFromReflog(const shash::Any &catalog);
113
114
  void PrintCatalogTreeEntry(const unsigned int  tree_level,
115
                             const CatalogTN    *catalog) const;
116
  void LogDeletion(const shash::Any &hash) const;
117
118
 private:
119
  class ReflogBasedInfoShim :
120
    public swissknife::CatalogTraversalInfoShim<CatalogTN>
121
  {
122
   public:
123
22
    explicit ReflogBasedInfoShim(ReflogTN *reflog) : reflog_(reflog) { }
124
22
    virtual ~ReflogBasedInfoShim() { }
125
9
    virtual uint64_t GetLastModified(const CatalogTN *catalog) {
126
      uint64_t timestamp;
127
9
      bool retval = reflog_->GetCatalogTimestamp(catalog->hash(), &timestamp);
128
9
      return retval ? timestamp : catalog->GetLastModified();
129
    }
130
131
   private:
132
    ReflogTN *reflog_;
133
  };
134
135
  const Configuration  configuration_;
136
  ReflogBasedInfoShim  catalog_info_shim_;
137
  CatalogTraversalT    traversal_;
138
  HashFilterT          hash_filter_;
139
140
  bool use_reflog_timestamps_;
141
  /**
142
   * A marker for the garbage collection grace period, the time span that is
143
   * walked back from the current head catalog.  There can be named snapshots
144
   * older than this snapshot.  The oldest_trunk_catalog_ is used as a marker
145
   * for when to remove auxiliary files (meta info, history, ...).
146
   */
147
  uint64_t              oldest_trunk_catalog_;
148
  bool                  oldest_trunk_catalog_found_;
149
  unsigned int          preserved_catalogs_;
150
  unsigned int          condemned_catalogs_;
151
152
  unsigned int          condemned_objects_;
153
  uint64_t              condemned_bytes_;
154
};
155
156
#include "garbage_collector_impl.h"
157
158
#endif  // CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_H_