1 |
|
|
/** |
2 |
|
|
* This file is part of the CernVM File System. |
3 |
|
|
* |
4 |
|
|
* The GarbageCollector class is figuring out which data objects (represented by |
5 |
|
|
* their content hashes) can be deleted as outdated garbage. |
6 |
|
|
* Garbage collection is performed on the granularity of catalog revisions, thus |
7 |
|
|
* a complete repository revision is either considered to be outdated or active. |
8 |
|
|
* This way, a mountable repository revision stays completely usable (no nested |
9 |
|
|
* catalogs or data objects become unavailable). A revision is defined by it's |
10 |
|
|
* root catalog; the revision numbers of nested catalogs are irrelevant, since |
11 |
|
|
* they might be referenced by newer (preserved) repository revisions. |
12 |
|
|
* Thus, garbage objects are those that are _not_ referenced by any of the pre- |
13 |
|
|
* served root catalogs or their direct subordinate nested catalog try. |
14 |
|
|
* |
15 |
|
|
* We use a two-stage approach: |
16 |
|
|
* |
17 |
|
|
* 1st Stage - Initialization |
18 |
|
|
* The GarbageCollector is reading all the catalogs that are meant |
19 |
|
|
* to be preserved. It builds up a filter (HashFilterT) containing |
20 |
|
|
* all content hashes that are _not_ to be deleted |
21 |
|
|
* |
22 |
|
|
* 2nd Stage - Sweeping |
23 |
|
|
* The initialized HashFilterT is presented with all content |
24 |
|
|
* hashes found in condemned catalogs and decides if they are |
25 |
|
|
* referenced by the preserved catalog revisions or not. |
26 |
|
|
* |
27 |
|
|
* The GarbageCollector is templated with CatalogTraversalT mainly for |
28 |
|
|
* testability and with HashFilterT as an instance of the Strategy Pattern to |
29 |
|
|
* abstract from the actual hash filtering method to be used. |
30 |
|
|
*/ |
31 |
|
|
|
32 |
|
|
#ifndef CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_H_ |
33 |
|
|
#define CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_H_ |
34 |
|
|
|
35 |
|
|
#include <inttypes.h> |
36 |
|
|
|
37 |
|
|
#include <vector> |
38 |
|
|
|
39 |
|
|
#include "catalog_traversal.h" |
40 |
|
|
#include "garbage_collection/hash_filter.h" |
41 |
|
|
#include "statistics.h" |
42 |
|
|
#include "upload_facility.h" |
43 |
|
|
|
44 |
|
|
template<class CatalogTraversalT, class HashFilterT> |
45 |
|
22 |
class GarbageCollector { |
46 |
|
|
protected: |
47 |
|
|
typedef typename CatalogTraversalT::ObjectFetcherTN ObjectFetcherTN; |
48 |
|
|
typedef typename ObjectFetcherTN::HistoryTN HistoryTN; |
49 |
|
|
typedef typename ObjectFetcherTN::ReflogTN ReflogTN; |
50 |
|
|
typedef typename CatalogTraversalT::CatalogTN CatalogTN; |
51 |
|
|
typedef typename CatalogTraversalT::CallbackDataTN TraversalCallbackDataTN; |
52 |
|
|
typedef typename CatalogTraversalT::Parameters TraversalParameters; |
53 |
|
|
typedef std::vector<shash::Any> HashVector; |
54 |
|
|
|
55 |
|
|
public: |
56 |
|
|
struct Configuration { |
57 |
|
|
static const unsigned int kFullHistory; |
58 |
|
|
static const unsigned int kNoHistory; |
59 |
|
|
static const time_t kNoTimestamp; |
60 |
|
|
static const shash::Any kLatestHistoryDatabase; |
61 |
|
|
|
62 |
|
14 |
Configuration() |
63 |
|
|
: uploader(NULL) |
64 |
|
|
, object_fetcher(NULL) |
65 |
|
|
, reflog(NULL) |
66 |
|
|
, keep_history_depth(kFullHistory) |
67 |
|
|
, keep_history_timestamp(kNoTimestamp) |
68 |
|
|
, dry_run(false) |
69 |
|
|
, verbose(false) |
70 |
|
|
, deleted_objects_logfile(NULL) |
71 |
|
|
, statistics(NULL) |
72 |
|
14 |
, extended_stats(false) {} |
73 |
|
|
|
74 |
|
214 |
bool has_deletion_log() const { return deleted_objects_logfile != NULL; } |
75 |
|
|
|
76 |
|
|
upload::AbstractUploader *uploader; |
77 |
|
|
ObjectFetcherTN *object_fetcher; |
78 |
|
|
ReflogTN *reflog; |
79 |
|
|
unsigned int keep_history_depth; |
80 |
|
|
time_t keep_history_timestamp; |
81 |
|
|
bool dry_run; |
82 |
|
|
bool verbose; |
83 |
|
|
FILE *deleted_objects_logfile; |
84 |
|
|
perf::Statistics *statistics; |
85 |
|
|
bool extended_stats; |
86 |
|
|
}; |
87 |
|
|
|
88 |
|
|
public: |
89 |
|
|
explicit GarbageCollector(const Configuration &configuration); |
90 |
|
|
|
91 |
|
|
void UseReflogTimestamps(); |
92 |
|
|
bool Collect(); |
93 |
|
|
|
94 |
|
42 |
unsigned int preserved_catalog_count() const { return preserved_catalogs_; } |
95 |
|
19 |
unsigned int condemned_catalog_count() const { return condemned_catalogs_; } |
96 |
|
1 |
unsigned int condemned_objects_count() const { return condemned_objects_; } |
97 |
|
|
uint64_t condemned_bytes_count() const { return condemned_bytes_; } |
98 |
|
21 |
uint64_t oldest_trunk_catalog() const { return oldest_trunk_catalog_; } |
99 |
|
|
|
100 |
|
|
protected: |
101 |
|
|
TraversalParameters GetTraversalParams(const Configuration &configuration); |
102 |
|
|
|
103 |
|
|
void PreserveDataObjects(const TraversalCallbackDataTN &data); |
104 |
|
|
void SweepDataObjects(const TraversalCallbackDataTN &data); |
105 |
|
|
|
106 |
|
|
bool AnalyzePreservedCatalogTree(); |
107 |
|
|
bool CheckPreservedRevisions(); |
108 |
|
|
bool SweepReflog(); |
109 |
|
|
|
110 |
|
|
void CheckAndSweep(const shash::Any &hash); |
111 |
|
|
void Sweep(const shash::Any &hash); |
112 |
|
|
bool RemoveCatalogFromReflog(const shash::Any &catalog); |
113 |
|
|
|
114 |
|
|
void PrintCatalogTreeEntry(const unsigned int tree_level, |
115 |
|
|
const CatalogTN *catalog) const; |
116 |
|
|
void LogDeletion(const shash::Any &hash) const; |
117 |
|
|
|
118 |
|
|
private: |
119 |
|
|
class ReflogBasedInfoShim : |
120 |
|
|
public swissknife::CatalogTraversalInfoShim<CatalogTN> |
121 |
|
|
{ |
122 |
|
|
public: |
123 |
|
22 |
explicit ReflogBasedInfoShim(ReflogTN *reflog) : reflog_(reflog) { } |
124 |
✗✓ |
22 |
virtual ~ReflogBasedInfoShim() { } |
125 |
|
9 |
virtual uint64_t GetLastModified(const CatalogTN *catalog) { |
126 |
|
|
uint64_t timestamp; |
127 |
|
9 |
bool retval = reflog_->GetCatalogTimestamp(catalog->hash(), ×tamp); |
128 |
✓✗ |
9 |
return retval ? timestamp : catalog->GetLastModified(); |
129 |
|
|
} |
130 |
|
|
|
131 |
|
|
private: |
132 |
|
|
ReflogTN *reflog_; |
133 |
|
|
}; |
134 |
|
|
|
135 |
|
|
const Configuration configuration_; |
136 |
|
|
ReflogBasedInfoShim catalog_info_shim_; |
137 |
|
|
CatalogTraversalT traversal_; |
138 |
|
|
HashFilterT hash_filter_; |
139 |
|
|
|
140 |
|
|
bool use_reflog_timestamps_; |
141 |
|
|
/** |
142 |
|
|
* A marker for the garbage collection grace period, the time span that is |
143 |
|
|
* walked back from the current head catalog. There can be named snapshots |
144 |
|
|
* older than this snapshot. The oldest_trunk_catalog_ is used as a marker |
145 |
|
|
* for when to remove auxiliary files (meta info, history, ...). |
146 |
|
|
*/ |
147 |
|
|
uint64_t oldest_trunk_catalog_; |
148 |
|
|
bool oldest_trunk_catalog_found_; |
149 |
|
|
unsigned int preserved_catalogs_; |
150 |
|
|
unsigned int condemned_catalogs_; |
151 |
|
|
|
152 |
|
|
unsigned int condemned_objects_; |
153 |
|
|
uint64_t condemned_bytes_; |
154 |
|
|
}; |
155 |
|
|
|
156 |
|
|
#include "garbage_collector_impl.h" |
157 |
|
|
|
158 |
|
|
#endif // CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_H_ |