GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/swissknife_gc.cc
Date: 2026-06-28 02:36:10
Exec Total Coverage
Lines: 0 168 0.0%
Branches: 0 102 0.0%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 *
4 * This command processes a repository's catalog structure to detect and remove
5 * outdated and/or unneeded data objects.
6 */
7
8
9 #include "swissknife_gc.h"
10
11 #include <string>
12
13 #include "garbage_collection/garbage_collector.h"
14 #include "path_filters/inclusion_spec.h"
15 #include "garbage_collection/gc_aux.h"
16 #include "garbage_collection/hash_filter.h"
17 #include "manifest.h"
18 #include "reflog.h"
19 #include "statistics_database.h"
20 #include "upload_facility.h"
21 #include "util/posix.h"
22 #include "util/string.h"
23
24 namespace swissknife {
25
26 typedef HttpObjectFetcher<> ObjectFetcher;
27 typedef CatalogTraversalParallel<ObjectFetcher> ReadonlyCatalogTraversal;
28 typedef SmallhashFilter HashFilter;
29 typedef GarbageCollector<ReadonlyCatalogTraversal, HashFilter> GC;
30 typedef GarbageCollectorAux<ReadonlyCatalogTraversal, HashFilter> GCAux;
31 typedef GC::Configuration GcConfig;
32
33
34 ParameterList CommandGc::GetParams() const {
35 ParameterList r;
36 r.push_back(Parameter::Mandatory('r', "repository url"));
37 r.push_back(Parameter::Mandatory('u', "spooler definition string"));
38 r.push_back(Parameter::Mandatory('n', "fully qualified repository name"));
39 r.push_back(Parameter::Mandatory('R', "path to reflog.chksum file"));
40 r.push_back(Parameter::Optional('h', "conserve <h> revisions"));
41 r.push_back(Parameter::Optional('z', "conserve revisions younger than <z>"));
42 r.push_back(Parameter::Optional('k', "repository master key(s) / dir"));
43 r.push_back(Parameter::Optional('t', "temporary directory"));
44 r.push_back(Parameter::Optional('L', "path to deletion log file"));
45 r.push_back(Parameter::Optional('N', "number of threads to use"));
46 r.push_back(Parameter::Optional('@', "proxy url"));
47 r.push_back(Parameter::Switch('d', "dry run"));
48 r.push_back(Parameter::Switch('l', "list objects to be removed"));
49 r.push_back(Parameter::Switch('I', "upload updated statistics DB file"));
50 r.push_back(Parameter::Optional('E',
51 "inclusion spec for partial replication"));
52 return r;
53 }
54
55
56 int CommandGc::Main(const ArgumentList &args) {
57 const std::string start_time = GetGMTimestamp();
58
59 const std::string &repo_url = *args.find('r')->second;
60 const std::string &spooler = *args.find('u')->second;
61 const std::string &repo_name = *args.find('n')->second;
62 const std::string &reflog_chksum_path = *args.find('R')->second;
63 shash::Any reflog_hash;
64 if (!manifest::Reflog::ReadChecksum(reflog_chksum_path, &reflog_hash)) {
65 LogCvmfs(kLogCvmfs, kLogStderr, "Could not read reflog checksum");
66 return 1;
67 }
68
69 const uint64_t revisions = (args.count('h') > 0)
70 ? String2Int64(*args.find('h')->second)
71 : GcConfig::kFullHistory;
72 const time_t timestamp = (args.count('z') > 0)
73 ? static_cast<time_t>(
74 String2Int64(*args.find('z')->second))
75 : GcConfig::kNoTimestamp;
76 std::string repo_keys = (args.count('k') > 0) ? *args.find('k')->second : "";
77 if (DirectoryExists(repo_keys))
78 repo_keys = JoinStrings(FindFilesBySuffix(repo_keys, ".pub"), ":");
79 const bool dry_run = (args.count('d') > 0);
80 const bool list_condemned_objects = (args.count('l') > 0);
81 const std::string temp_directory = (args.count('t') > 0)
82 ? *args.find('t')->second
83 : "/tmp";
84 const std::string deletion_log_path = (args.count('L') > 0)
85 ? *args.find('L')->second
86 : "";
87 const bool upload_statsdb = (args.count('I') > 0);
88 const unsigned int num_threads = (args.count('N') > 0)
89 ? String2Uint64(*args.find('N')->second)
90 : 8;
91
92 // Partial replication: no functional changes needed. Excluded subtrees are
93 // pruned during snapshot, so both their catalogs and their objects are
94 // absent. The catalog traversal already runs with ignore_load_failure=true
95 // (see GarbageCollector::GetTraversalParams), so pruned nested catalogs are
96 // silently skipped along with their entire subtree; and the sweep phase is a
97 // no-op for objects that were never downloaded.
98 if (args.count('E') > 0) {
99 catalog::InclusionSpec *inclusion_spec =
100 catalog::InclusionSpec::Create(*args.find('E')->second);
101 if (inclusion_spec != NULL && inclusion_spec->IsValid()) {
102 LogCvmfs(kLogCvmfs, kLogStdout,
103 "Partial replication mode: GC will skip pruned (excluded) "
104 "catalogs and their objects");
105 } else {
106 LogCvmfs(kLogCvmfs, kLogStderr,
107 "Warning: could not parse inclusion spec '%s', "
108 "continuing without partial replication awareness",
109 args.find('E')->second->c_str());
110 }
111 delete inclusion_spec;
112 }
113
114 if (timestamp == GcConfig::kNoTimestamp
115 && revisions == GcConfig::kFullHistory) {
116 LogCvmfs(kLogCvmfs, kLogStderr,
117 "neither a timestamp nor history threshold given");
118 return 1;
119 }
120
121 const bool follow_redirects = false;
122 const std::string proxy = ((args.count('@') > 0) ? *args.find('@')->second
123 : "");
124 if (!this->InitDownloadManager(follow_redirects, proxy)
125 || !this->InitSignatureManager(repo_keys)) {
126 LogCvmfs(kLogCatalog, kLogStderr, "failed to init repo connection");
127 return 1;
128 }
129
130 ObjectFetcher object_fetcher(repo_name,
131 repo_url,
132 temp_directory,
133 download_manager(),
134 signature_manager());
135
136 UniquePtr<manifest::Manifest> manifest;
137 const ObjectFetcher::Failures retval = object_fetcher.FetchManifest(
138 &manifest);
139 if (retval != ObjectFetcher::kFailOk) {
140 LogCvmfs(kLogCvmfs, kLogStderr,
141 "failed to load repository manifest "
142 "(%d - %s)",
143 retval, Code2Ascii(retval));
144 return 1;
145 }
146
147 if (!manifest->garbage_collectable()) {
148 LogCvmfs(kLogCvmfs, kLogStderr,
149 "repository does not allow garbage collection "
150 "(manifest does not have the garbage collection flag set; "
151 "ensure CVMFS_GARBAGE_COLLECTION=true in server.conf and "
152 "run a transaction + publish to update the manifest)");
153 return 1;
154 }
155
156 UniquePtr<manifest::Reflog> reflog;
157 reflog = FetchReflog(&object_fetcher, repo_name, reflog_hash);
158 assert(reflog.IsValid());
159
160 const upload::SpoolerDefinition spooler_definition(spooler, shash::kAny);
161 const UniquePtr<upload::AbstractUploader> uploader(
162 upload::AbstractUploader::Construct(spooler_definition));
163
164 if (!uploader.IsValid()) {
165 LogCvmfs(kLogCvmfs, kLogStderr, "failed to initialize spooler for '%s'",
166 spooler.c_str());
167 return 1;
168 }
169
170 FILE *deletion_log_file = NULL;
171 if (!deletion_log_path.empty()) {
172 deletion_log_file = fopen(deletion_log_path.c_str(), "a+");
173 if (NULL == deletion_log_file) {
174 LogCvmfs(kLogCvmfs, kLogStderr,
175 "failed to open deletion log file "
176 "(errno: %d)",
177 errno);
178 uploader->TearDown();
179 return 1;
180 }
181 }
182
183 const bool extended_stats = StatisticsDatabase::GcExtendedStats(repo_name);
184
185 reflog->BeginTransaction();
186
187 GcConfig config;
188 config.uploader = uploader.weak_ref();
189 config.keep_history_depth = revisions;
190 config.keep_history_timestamp = timestamp;
191 config.dry_run = dry_run;
192 config.verbose = list_condemned_objects;
193 config.object_fetcher = &object_fetcher;
194 config.reflog = reflog.weak_ref();
195 config.deleted_objects_logfile = deletion_log_file;
196 config.statistics = statistics();
197 config.extended_stats = extended_stats;
198 config.num_threads = num_threads;
199
200 if (deletion_log_file != NULL) {
201 const int bytes_written = fprintf(deletion_log_file,
202 "# Garbage Collection started at %s\n",
203 StringifyTime(time(NULL), true).c_str());
204 if (bytes_written < 0) {
205 LogCvmfs(kLogCvmfs, kLogStderr,
206 "failed to write to deletion log '%s' "
207 "(errno: %d)",
208 deletion_log_path.c_str(), errno);
209 uploader->TearDown();
210 return 1;
211 }
212 }
213
214 StatisticsDatabase *stats_db = StatisticsDatabase::OpenStandardDB(repo_name);
215
216 // File catalogs
217 GC collector(config);
218 collector.UseReflogTimestamps();
219 bool success = collector.Collect();
220
221 if (!success) {
222 LogCvmfs(kLogCvmfs, kLogStderr, "garbage collection failed");
223 if (!dry_run) {
224 stats_db->StoreGCStatistics(this->statistics(), start_time, false);
225 if (upload_statsdb) {
226 stats_db->UploadStatistics(uploader.weak_ref());
227 }
228 }
229 uploader->TearDown();
230 return 1;
231 }
232
233 // Tag databases, meta infos, certificates
234 HashFilter preserved_objects;
235 preserved_objects.Fill(manifest->certificate());
236 preserved_objects.Fill(manifest->history());
237 preserved_objects.Fill(manifest->meta_info());
238 GCAux collector_aux(config);
239 success = collector_aux.CollectOlderThan(collector.oldest_trunk_catalog(),
240 preserved_objects);
241 if (!success) {
242 LogCvmfs(kLogCvmfs, kLogStderr,
243 "garbage collection of auxiliary files failed");
244 if (!dry_run) {
245 stats_db->StoreGCStatistics(this->statistics(), start_time, false);
246 if (upload_statsdb) {
247 stats_db->UploadStatistics(uploader.weak_ref());
248 }
249 }
250 uploader->TearDown();
251 return 1;
252 }
253
254 // As of here: garbage collection succeeded, cleanup & commit
255
256 if (deletion_log_file != NULL) {
257 const int bytes_written = fprintf(deletion_log_file,
258 "# Garbage Collection finished at %s\n\n",
259 StringifyTime(time(NULL), true).c_str());
260 assert(bytes_written >= 0);
261 fclose(deletion_log_file);
262 }
263
264 reflog->CommitTransaction();
265 // Has to be outside the transaction
266 success = reflog->Vacuum();
267 assert(success);
268 reflog->DropDatabaseFileOwnership();
269 const std::string reflog_db = reflog->database_file();
270 reflog.Destroy();
271
272 if (!dry_run) {
273 uploader->UploadFile(reflog_db, ".cvmfsreflog");
274 manifest::Reflog::HashDatabase(reflog_db, &reflog_hash);
275 uploader->WaitForUpload();
276 manifest::Reflog::WriteChecksum(reflog_chksum_path, reflog_hash);
277 }
278
279 unlink(reflog_db.c_str());
280
281 if (uploader->GetNumberOfErrors() > 0 && !dry_run) {
282 LogCvmfs(kLogCvmfs, kLogStderr, "failed to upload updated Reflog");
283
284 stats_db->StoreGCStatistics(this->statistics(), start_time, false);
285 if (upload_statsdb) {
286 stats_db->UploadStatistics(uploader.weak_ref());
287 }
288
289 uploader->TearDown();
290 return 1;
291 }
292
293 if (!dry_run) {
294 stats_db->StoreGCStatistics(this->statistics(), start_time, true);
295 if (upload_statsdb) {
296 stats_db->UploadStatistics(uploader.weak_ref());
297 }
298 }
299 uploader->TearDown();
300 return 0;
301 }
302
303 } // namespace swissknife
304