| Directory: | cvmfs/ |
|---|---|
| File: | cvmfs/garbage_collection/garbage_collector.h |
| Date: | 2025-10-19 02:35:28 |
| Exec | Total | Coverage | |
|---|---|---|---|
| Lines: | 26 | 29 | 89.7% |
| Branches: | 2 | 4 | 50.0% |
| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /** | ||
| 2 | * This file is part of the CernVM File System. | ||
| 3 | * | ||
| 4 | * The GarbageCollector class is figuring out which data objects (represented by | ||
| 5 | * their content hashes) can be deleted as outdated garbage. | ||
| 6 | * Garbage collection is performed on the granularity of catalog revisions, thus | ||
| 7 | * a complete repository revision is either considered to be outdated or active. | ||
| 8 | * This way, a mountable repository revision stays completely usable (no nested | ||
| 9 | * catalogs or data objects become unavailable). A revision is defined by it's | ||
| 10 | * root catalog; the revision numbers of nested catalogs are irrelevant, since | ||
| 11 | * they might be referenced by newer (preserved) repository revisions. | ||
| 12 | * Thus, garbage objects are those that are _not_ referenced by any of the pre- | ||
| 13 | * served root catalogs or their direct subordinate nested catalog try. | ||
| 14 | * | ||
| 15 | * We use a two-stage approach: | ||
| 16 | * | ||
| 17 | * 1st Stage - Initialization | ||
| 18 | * The GarbageCollector is reading all the catalogs that are meant | ||
| 19 | * to be preserved. It builds up a filter (HashFilterT) containing | ||
| 20 | * all content hashes that are _not_ to be deleted | ||
| 21 | * | ||
| 22 | * 2nd Stage - Sweeping | ||
| 23 | * The initialized HashFilterT is presented with all content | ||
| 24 | * hashes found in condemned catalogs and decides if they are | ||
| 25 | * referenced by the preserved catalog revisions or not. | ||
| 26 | * | ||
| 27 | * The GarbageCollector is templated with CatalogTraversalT mainly for | ||
| 28 | * testability and with HashFilterT as an instance of the Strategy Pattern to | ||
| 29 | * abstract from the actual hash filtering method to be used. | ||
| 30 | */ | ||
| 31 | |||
| 32 | #ifndef CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_H_ | ||
| 33 | #define CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_H_ | ||
| 34 | |||
| 35 | #include <inttypes.h> | ||
| 36 | |||
| 37 | #include <vector> | ||
| 38 | |||
| 39 | #include "catalog_traversal_parallel.h" | ||
| 40 | #include "garbage_collection/hash_filter.h" | ||
| 41 | #include "statistics.h" | ||
| 42 | #include "upload_facility.h" | ||
| 43 | |||
| 44 | template<class CatalogTraversalT, class HashFilterT> | ||
| 45 | class GarbageCollector { | ||
| 46 | protected: | ||
| 47 | typedef typename CatalogTraversalT::ObjectFetcherTN ObjectFetcherTN; | ||
| 48 | typedef typename ObjectFetcherTN::HistoryTN HistoryTN; | ||
| 49 | typedef typename ObjectFetcherTN::ReflogTN ReflogTN; | ||
| 50 | typedef typename CatalogTraversalT::CatalogTN CatalogTN; | ||
| 51 | typedef typename CatalogTraversalT::CallbackDataTN TraversalCallbackDataTN; | ||
| 52 | typedef typename CatalogTraversalT::Parameters TraversalParameters; | ||
| 53 | typedef std::vector<shash::Any> HashVector; | ||
| 54 | |||
| 55 | public: | ||
| 56 | struct Configuration { | ||
| 57 | static const uint64_t kFullHistory; | ||
| 58 | static const uint64_t kNoHistory; | ||
| 59 | static const time_t kNoTimestamp; | ||
| 60 | static const shash::Any kLatestHistoryDatabase; | ||
| 61 | |||
| 62 | 1176 | Configuration() | |
| 63 | 1176 | : uploader(NULL) | |
| 64 | 1176 | , object_fetcher(NULL) | |
| 65 | 1176 | , reflog(NULL) | |
| 66 | 1176 | , keep_history_depth(kFullHistory) | |
| 67 | 1176 | , keep_history_timestamp(kNoTimestamp) | |
| 68 | 1176 | , dry_run(false) | |
| 69 | 1176 | , verbose(false) | |
| 70 | 1176 | , deleted_objects_logfile(NULL) | |
| 71 | 1176 | , statistics(NULL) | |
| 72 | 1176 | , extended_stats(false) | |
| 73 | 1176 | , num_threads(8) { } | |
| 74 | |||
| 75 | 15204 | bool has_deletion_log() const { return deleted_objects_logfile != NULL; } | |
| 76 | |||
| 77 | upload::AbstractUploader *uploader; | ||
| 78 | ObjectFetcherTN *object_fetcher; | ||
| 79 | ReflogTN *reflog; | ||
| 80 | uint64_t keep_history_depth; | ||
| 81 | time_t keep_history_timestamp; | ||
| 82 | bool dry_run; | ||
| 83 | bool verbose; | ||
| 84 | FILE *deleted_objects_logfile; | ||
| 85 | perf::Statistics *statistics; | ||
| 86 | bool extended_stats; | ||
| 87 | unsigned int num_threads; | ||
| 88 | }; | ||
| 89 | |||
| 90 | public: | ||
| 91 | explicit GarbageCollector(const Configuration &configuration); | ||
| 92 | |||
| 93 | void UseReflogTimestamps(); | ||
| 94 | bool Collect(); | ||
| 95 | |||
| 96 | 3528 | uint64_t preserved_catalog_count() const { return preserved_catalogs_; } | |
| 97 | 798 | uint64_t condemned_catalog_count() const { return condemned_catalogs_; } | |
| 98 | 42 | uint64_t condemned_objects_count() const { return condemned_objects_; } | |
| 99 | ✗ | uint64_t duplicate_delete_requests() const { | |
| 100 | ✗ | return duplicate_delete_requests_; | |
| 101 | } | ||
| 102 | ✗ | uint64_t condemned_bytes_count() const { return condemned_bytes_; } | |
| 103 | 882 | uint64_t oldest_trunk_catalog() const { return oldest_trunk_catalog_; } | |
| 104 | |||
| 105 | protected: | ||
| 106 | TraversalParameters GetTraversalParams(const Configuration &configuration); | ||
| 107 | |||
| 108 | void PreserveDataObjects(const TraversalCallbackDataTN &data); | ||
| 109 | void SweepDataObjects(const TraversalCallbackDataTN &data); | ||
| 110 | |||
| 111 | bool AnalyzePreservedCatalogTree(); | ||
| 112 | bool CheckPreservedRevisions(); | ||
| 113 | bool SweepReflog(); | ||
| 114 | |||
| 115 | void CheckAndSweep(const shash::Any &hash); | ||
| 116 | void Sweep(const shash::Any &hash); | ||
| 117 | bool RemoveCatalogFromReflog(const shash::Any &catalog); | ||
| 118 | |||
| 119 | void PrintCatalogTreeEntry(const unsigned int tree_level, | ||
| 120 | const CatalogTN *catalog) const; | ||
| 121 | void LogDeletion(const shash::Any &hash) const; | ||
| 122 | |||
| 123 | private: | ||
| 124 | class ReflogBasedInfoShim | ||
| 125 | : public swissknife::CatalogTraversalInfoShim<CatalogTN> { | ||
| 126 | public: | ||
| 127 | 1848 | explicit ReflogBasedInfoShim(ReflogTN *reflog) : reflog_(reflog) { | |
| 128 | 1848 | pthread_mutex_init(&reflog_mutex_, NULL); | |
| 129 | 1848 | } | |
| 130 | 924 | virtual ~ReflogBasedInfoShim() { pthread_mutex_destroy(&reflog_mutex_); } | |
| 131 | 378 | virtual uint64_t GetLastModified(const CatalogTN *catalog) { | |
| 132 | uint64_t timestamp; | ||
| 133 | 756 | MutexLockGuard m(&reflog_mutex_); | |
| 134 |
1/2✓ Branch 2 taken 378 times.
✗ Branch 3 not taken.
|
756 | bool retval = reflog_->GetCatalogTimestamp(catalog->hash(), ×tamp); |
| 135 |
1/2✓ Branch 0 taken 378 times.
✗ Branch 1 not taken.
|
1512 | return retval ? timestamp : catalog->GetLastModified(); |
| 136 | 756 | } | |
| 137 | |||
| 138 | private: | ||
| 139 | ReflogTN *reflog_; | ||
| 140 | pthread_mutex_t reflog_mutex_; | ||
| 141 | }; | ||
| 142 | |||
| 143 | const Configuration configuration_; | ||
| 144 | ReflogBasedInfoShim catalog_info_shim_; | ||
| 145 | CatalogTraversalT traversal_; | ||
| 146 | HashFilterT hash_filter_; | ||
| 147 | HashFilterT hash_map_delete_requests_; | ||
| 148 | |||
| 149 | |||
| 150 | bool use_reflog_timestamps_; | ||
| 151 | /** | ||
| 152 | * A marker for the garbage collection grace period, the time span that is | ||
| 153 | * walked back from the current head catalog. There can be named snapshots | ||
| 154 | * older than this snapshot. The oldest_trunk_catalog_ is used as a marker | ||
| 155 | * for when to remove auxiliary files (meta info, history, ...). | ||
| 156 | */ | ||
| 157 | uint64_t oldest_trunk_catalog_; | ||
| 158 | bool oldest_trunk_catalog_found_; | ||
| 159 | uint64_t preserved_catalogs_; | ||
| 160 | /** | ||
| 161 | * Number of catalogs in the reflog that are to be deleted (in fact, some of | ||
| 162 | * them might not exist anymore). | ||
| 163 | */ | ||
| 164 | uint64_t unreferenced_trees_; | ||
| 165 | /** | ||
| 166 | * Number of root catalogs garbage collected, count grows as GC progresses | ||
| 167 | */ | ||
| 168 | uint64_t condemned_trees_; | ||
| 169 | /** | ||
| 170 | * Number of catalogs garbage collected, count grows as GC progresses | ||
| 171 | */ | ||
| 172 | uint64_t condemned_catalogs_; | ||
| 173 | /** | ||
| 174 | * Keeps track if the last status report issued, between 0 and 1 | ||
| 175 | */ | ||
| 176 | float last_reported_status_; | ||
| 177 | |||
| 178 | uint64_t condemned_objects_; | ||
| 179 | uint64_t condemned_bytes_; | ||
| 180 | uint64_t duplicate_delete_requests_; | ||
| 181 | }; | ||
| 182 | |||
| 183 | #include "garbage_collector_impl.h" | ||
| 184 | |||
| 185 | #endif // CVMFS_GARBAGE_COLLECTION_GARBAGE_COLLECTOR_H_ | ||
| 186 |