GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/swissknife_gc.cc
Date: 2025-06-22 02:36:02
Exec Total Coverage
Lines: 0 161 0.0%
Branches: 0 92 0.0%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 *
4 * This command processes a repository's catalog structure to detect and remove
5 * outdated and/or unneeded data objects.
6 */
7
8
9 #include "swissknife_gc.h"
10
11 #include <string>
12
13 #include "garbage_collection/garbage_collector.h"
14 #include "garbage_collection/gc_aux.h"
15 #include "garbage_collection/hash_filter.h"
16 #include "manifest.h"
17 #include "reflog.h"
18 #include "statistics_database.h"
19 #include "upload_facility.h"
20 #include "util/posix.h"
21 #include "util/string.h"
22
23 namespace swissknife {
24
25 typedef HttpObjectFetcher<> ObjectFetcher;
26 typedef CatalogTraversalParallel<ObjectFetcher> ReadonlyCatalogTraversal;
27 typedef SmallhashFilter HashFilter;
28 typedef GarbageCollector<ReadonlyCatalogTraversal, HashFilter> GC;
29 typedef GarbageCollectorAux<ReadonlyCatalogTraversal, HashFilter> GCAux;
30 typedef GC::Configuration GcConfig;
31
32
33 ParameterList CommandGc::GetParams() const {
34 ParameterList r;
35 r.push_back(Parameter::Mandatory('r', "repository url"));
36 r.push_back(Parameter::Mandatory('u', "spooler definition string"));
37 r.push_back(Parameter::Mandatory('n', "fully qualified repository name"));
38 r.push_back(Parameter::Mandatory('R', "path to reflog.chksum file"));
39 r.push_back(Parameter::Optional('h', "conserve <h> revisions"));
40 r.push_back(Parameter::Optional('z', "conserve revisions younger than <z>"));
41 r.push_back(Parameter::Optional('k', "repository master key(s) / dir"));
42 r.push_back(Parameter::Optional('t', "temporary directory"));
43 r.push_back(Parameter::Optional('L', "path to deletion log file"));
44 r.push_back(Parameter::Optional('N', "number of threads to use"));
45 r.push_back(Parameter::Optional('@', "proxy url"));
46 r.push_back(Parameter::Switch('d', "dry run"));
47 r.push_back(Parameter::Switch('l', "list objects to be removed"));
48 r.push_back(Parameter::Switch('I', "upload updated statistics DB file"));
49 return r;
50 }
51
52
53 int CommandGc::Main(const ArgumentList &args) {
54 const std::string start_time = GetGMTimestamp();
55
56 const std::string &repo_url = *args.find('r')->second;
57 const std::string &spooler = *args.find('u')->second;
58 const std::string &repo_name = *args.find('n')->second;
59 const std::string &reflog_chksum_path = *args.find('R')->second;
60 shash::Any reflog_hash;
61 if (!manifest::Reflog::ReadChecksum(reflog_chksum_path, &reflog_hash)) {
62 LogCvmfs(kLogCvmfs, kLogStderr, "Could not read reflog checksum");
63 return 1;
64 }
65
66 const uint64_t revisions = (args.count('h') > 0)
67 ? String2Int64(*args.find('h')->second)
68 : GcConfig::kFullHistory;
69 const time_t timestamp = (args.count('z') > 0)
70 ? static_cast<time_t>(
71 String2Int64(*args.find('z')->second))
72 : GcConfig::kNoTimestamp;
73 std::string repo_keys = (args.count('k') > 0) ? *args.find('k')->second : "";
74 if (DirectoryExists(repo_keys))
75 repo_keys = JoinStrings(FindFilesBySuffix(repo_keys, ".pub"), ":");
76 const bool dry_run = (args.count('d') > 0);
77 const bool list_condemned_objects = (args.count('l') > 0);
78 const std::string temp_directory = (args.count('t') > 0)
79 ? *args.find('t')->second
80 : "/tmp";
81 const std::string deletion_log_path = (args.count('L') > 0)
82 ? *args.find('L')->second
83 : "";
84 const bool upload_statsdb = (args.count('I') > 0);
85 const unsigned int num_threads = (args.count('N') > 0)
86 ? String2Uint64(*args.find('N')->second)
87 : 8;
88
89 if (timestamp == GcConfig::kNoTimestamp
90 && revisions == GcConfig::kFullHistory) {
91 LogCvmfs(kLogCvmfs, kLogStderr,
92 "neither a timestamp nor history threshold given");
93 return 1;
94 }
95
96 const bool follow_redirects = false;
97 const std::string proxy = ((args.count('@') > 0) ? *args.find('@')->second
98 : "");
99 if (!this->InitDownloadManager(follow_redirects, proxy)
100 || !this->InitSignatureManager(repo_keys)) {
101 LogCvmfs(kLogCatalog, kLogStderr, "failed to init repo connection");
102 return 1;
103 }
104
105 ObjectFetcher object_fetcher(repo_name,
106 repo_url,
107 temp_directory,
108 download_manager(),
109 signature_manager());
110
111 UniquePtr<manifest::Manifest> manifest;
112 const ObjectFetcher::Failures retval =
113 object_fetcher.FetchManifest(&manifest);
114 if (retval != ObjectFetcher::kFailOk) {
115 LogCvmfs(kLogCvmfs, kLogStderr,
116 "failed to load repository manifest "
117 "(%d - %s)",
118 retval, Code2Ascii(retval));
119 return 1;
120 }
121
122 if (!manifest->garbage_collectable()) {
123 LogCvmfs(kLogCvmfs, kLogStderr,
124 "repository does not allow garbage collection");
125 return 1;
126 }
127
128 UniquePtr<manifest::Reflog> reflog;
129 reflog = FetchReflog(&object_fetcher, repo_name, reflog_hash);
130 assert(reflog.IsValid());
131
132 const upload::SpoolerDefinition spooler_definition(spooler, shash::kAny);
133 const UniquePtr<upload::AbstractUploader> uploader(
134 upload::AbstractUploader::Construct(spooler_definition));
135
136 if (!uploader.IsValid()) {
137 LogCvmfs(kLogCvmfs, kLogStderr, "failed to initialize spooler for '%s'",
138 spooler.c_str());
139 return 1;
140 }
141
142 FILE *deletion_log_file = NULL;
143 if (!deletion_log_path.empty()) {
144 deletion_log_file = fopen(deletion_log_path.c_str(), "a+");
145 if (NULL == deletion_log_file) {
146 LogCvmfs(kLogCvmfs, kLogStderr,
147 "failed to open deletion log file "
148 "(errno: %d)",
149 errno);
150 uploader->TearDown();
151 return 1;
152 }
153 }
154
155 const bool extended_stats = StatisticsDatabase::GcExtendedStats(repo_name);
156
157 reflog->BeginTransaction();
158
159 GcConfig config;
160 config.uploader = uploader.weak_ref();
161 config.keep_history_depth = revisions;
162 config.keep_history_timestamp = timestamp;
163 config.dry_run = dry_run;
164 config.verbose = list_condemned_objects;
165 config.object_fetcher = &object_fetcher;
166 config.reflog = reflog.weak_ref();
167 config.deleted_objects_logfile = deletion_log_file;
168 config.statistics = statistics();
169 config.extended_stats = extended_stats;
170 config.num_threads = num_threads;
171
172 if (deletion_log_file != NULL) {
173 const int bytes_written = fprintf(deletion_log_file,
174 "# Garbage Collection started at %s\n",
175 StringifyTime(time(NULL), true).c_str());
176 if (bytes_written < 0) {
177 LogCvmfs(kLogCvmfs, kLogStderr,
178 "failed to write to deletion log '%s' "
179 "(errno: %d)",
180 deletion_log_path.c_str(), errno);
181 uploader->TearDown();
182 return 1;
183 }
184 }
185
186 StatisticsDatabase *stats_db = StatisticsDatabase::OpenStandardDB(repo_name);
187
188 // File catalogs
189 GC collector(config);
190 collector.UseReflogTimestamps();
191 bool success = collector.Collect();
192
193 if (!success) {
194 LogCvmfs(kLogCvmfs, kLogStderr, "garbage collection failed");
195 if (!dry_run) {
196 stats_db->StoreGCStatistics(this->statistics(), start_time, false);
197 if (upload_statsdb) {
198 stats_db->UploadStatistics(uploader.weak_ref());
199 }
200 }
201 uploader->TearDown();
202 return 1;
203 }
204
205 // Tag databases, meta infos, certificates
206 HashFilter preserved_objects;
207 preserved_objects.Fill(manifest->certificate());
208 preserved_objects.Fill(manifest->history());
209 preserved_objects.Fill(manifest->meta_info());
210 GCAux collector_aux(config);
211 success = collector_aux.CollectOlderThan(collector.oldest_trunk_catalog(),
212 preserved_objects);
213 if (!success) {
214 LogCvmfs(kLogCvmfs, kLogStderr,
215 "garbage collection of auxiliary files failed");
216 if (!dry_run) {
217 stats_db->StoreGCStatistics(this->statistics(), start_time, false);
218 if (upload_statsdb) {
219 stats_db->UploadStatistics(uploader.weak_ref());
220 }
221 }
222 uploader->TearDown();
223 return 1;
224 }
225
226 // As of here: garbage collection succeeded, cleanup & commit
227
228 if (deletion_log_file != NULL) {
229 const int bytes_written = fprintf(deletion_log_file,
230 "# Garbage Collection finished at %s\n\n",
231 StringifyTime(time(NULL), true).c_str());
232 assert(bytes_written >= 0);
233 fclose(deletion_log_file);
234 }
235
236 reflog->CommitTransaction();
237 // Has to be outside the transaction
238 success = reflog->Vacuum();
239 assert(success);
240 reflog->DropDatabaseFileOwnership();
241 const std::string reflog_db = reflog->database_file();
242 reflog.Destroy();
243
244 if (!dry_run) {
245 uploader->UploadFile(reflog_db, ".cvmfsreflog");
246 manifest::Reflog::HashDatabase(reflog_db, &reflog_hash);
247 uploader->WaitForUpload();
248 manifest::Reflog::WriteChecksum(reflog_chksum_path, reflog_hash);
249 }
250
251 unlink(reflog_db.c_str());
252
253 if (uploader->GetNumberOfErrors() > 0 && !dry_run) {
254 LogCvmfs(kLogCvmfs, kLogStderr, "failed to upload updated Reflog");
255
256 stats_db->StoreGCStatistics(this->statistics(), start_time, false);
257 if (upload_statsdb) {
258 stats_db->UploadStatistics(uploader.weak_ref());
259 }
260
261 uploader->TearDown();
262 return 1;
263 }
264
265 if (!dry_run) {
266 stats_db->StoreGCStatistics(this->statistics(), start_time, true);
267 if (upload_statsdb) {
268 stats_db->UploadStatistics(uploader.weak_ref());
269 }
270 }
271 uploader->TearDown();
272 return 0;
273 }
274
275 } // namespace swissknife
276