GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/swissknife_gc.cc
Date: 2024-04-21 02:33:16
Exec Total Coverage
Lines: 0 156 0.0%
Branches: 0 84 0.0%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 *
4 * This command processes a repository's catalog structure to detect and remove
5 * outdated and/or unneeded data objects.
6 */
7
8 #include "cvmfs_config.h"
9 #include "swissknife_gc.h"
10
11 #include <string>
12
13 #include "garbage_collection/garbage_collector.h"
14 #include "garbage_collection/gc_aux.h"
15 #include "garbage_collection/hash_filter.h"
16 #include "manifest.h"
17 #include "reflog.h"
18 #include "statistics_database.h"
19 #include "upload_facility.h"
20 #include "util/posix.h"
21 #include "util/string.h"
22
23 namespace swissknife {
24
25 typedef HttpObjectFetcher<> ObjectFetcher;
26 typedef CatalogTraversalParallel<ObjectFetcher> ReadonlyCatalogTraversal;
27 typedef SmallhashFilter HashFilter;
28 typedef GarbageCollector<ReadonlyCatalogTraversal, HashFilter> GC;
29 typedef GarbageCollectorAux<ReadonlyCatalogTraversal, HashFilter> GCAux;
30 typedef GC::Configuration GcConfig;
31
32
33 ParameterList CommandGc::GetParams() const {
34 ParameterList r;
35 r.push_back(Parameter::Mandatory('r', "repository url"));
36 r.push_back(Parameter::Mandatory('u', "spooler definition string"));
37 r.push_back(Parameter::Mandatory('n', "fully qualified repository name"));
38 r.push_back(Parameter::Mandatory('R', "path to reflog.chksum file"));
39 r.push_back(Parameter::Optional('h', "conserve <h> revisions"));
40 r.push_back(Parameter::Optional('z', "conserve revisions younger than <z>"));
41 r.push_back(Parameter::Optional('k', "repository master key(s) / dir"));
42 r.push_back(Parameter::Optional('t', "temporary directory"));
43 r.push_back(Parameter::Optional('L', "path to deletion log file"));
44 r.push_back(Parameter::Optional('N', "number of threads to use"));
45 r.push_back(Parameter::Optional('@', "proxy url"));
46 r.push_back(Parameter::Switch('d', "dry run"));
47 r.push_back(Parameter::Switch('l', "list objects to be removed"));
48 r.push_back(Parameter::Switch('I', "upload updated statistics DB file"));
49 return r;
50 }
51
52
53 int CommandGc::Main(const ArgumentList &args) {
54 std::string start_time = GetGMTimestamp();
55
56 const std::string &repo_url = *args.find('r')->second;
57 const std::string &spooler = *args.find('u')->second;
58 const std::string &repo_name = *args.find('n')->second;
59 const std::string &reflog_chksum_path = *args.find('R')->second;
60 shash::Any reflog_hash;
61 if (!manifest::Reflog::ReadChecksum(reflog_chksum_path, &reflog_hash)) {
62 LogCvmfs(kLogCvmfs, kLogStderr, "Could not read reflog checksum");
63 return 1;
64 }
65
66 const uint64_t revisions = (args.count('h') > 0) ?
67 String2Int64(*args.find('h')->second) : GcConfig::kFullHistory;
68 const time_t timestamp = (args.count('z') > 0)
69 ? static_cast<time_t>(String2Int64(*args.find('z')->second))
70 : GcConfig::kNoTimestamp;
71 std::string repo_keys = (args.count('k') > 0) ?
72 *args.find('k')->second : "";
73 if (DirectoryExists(repo_keys))
74 repo_keys = JoinStrings(FindFilesBySuffix(repo_keys, ".pub"), ":");
75 const bool dry_run = (args.count('d') > 0);
76 const bool list_condemned_objects = (args.count('l') > 0);
77 const std::string temp_directory = (args.count('t') > 0) ?
78 *args.find('t')->second : "/tmp";
79 const std::string deletion_log_path = (args.count('L') > 0) ?
80 *args.find('L')->second : "";
81 const bool upload_statsdb = (args.count('I') > 0);
82 const unsigned int num_threads = (args.count('N') > 0) ?
83 String2Uint64(*args.find('N')->second) : 8;
84
85 if (timestamp == GcConfig::kNoTimestamp &&
86 revisions == GcConfig::kFullHistory) {
87 LogCvmfs(kLogCvmfs, kLogStderr,
88 "neither a timestamp nor history threshold given");
89 return 1;
90 }
91
92 const bool follow_redirects = false;
93 const std::string proxy = ((args.count('@') > 0) ?
94 *args.find('@')->second : "");
95 if (!this->InitDownloadManager(follow_redirects, proxy) ||
96 !this->InitVerifyingSignatureManager(repo_keys)) {
97 LogCvmfs(kLogCatalog, kLogStderr, "failed to init repo connection");
98 return 1;
99 }
100
101 ObjectFetcher object_fetcher(repo_name,
102 repo_url,
103 temp_directory,
104 download_manager(),
105 signature_manager());
106
107 UniquePtr<manifest::Manifest> manifest;
108 ObjectFetcher::Failures retval = object_fetcher.FetchManifest(&manifest);
109 if (retval != ObjectFetcher::kFailOk) {
110 LogCvmfs(kLogCvmfs, kLogStderr, "failed to load repository manifest "
111 "(%d - %s)",
112 retval, Code2Ascii(retval));
113 return 1;
114 }
115
116 if (!manifest->garbage_collectable()) {
117 LogCvmfs(kLogCvmfs, kLogStderr,
118 "repository does not allow garbage collection");
119 return 1;
120 }
121
122 UniquePtr<manifest::Reflog> reflog;
123 reflog = FetchReflog(&object_fetcher, repo_name, reflog_hash);
124 assert(reflog.IsValid());
125
126 const upload::SpoolerDefinition spooler_definition(spooler, shash::kAny);
127 UniquePtr<upload::AbstractUploader> uploader(
128 upload::AbstractUploader::Construct(spooler_definition));
129
130 if (!uploader.IsValid()) {
131 LogCvmfs(kLogCvmfs, kLogStderr, "failed to initialize spooler for '%s'",
132 spooler.c_str());
133 return 1;
134 }
135
136 FILE *deletion_log_file = NULL;
137 if (!deletion_log_path.empty()) {
138 deletion_log_file = fopen(deletion_log_path.c_str(), "a+");
139 if (NULL == deletion_log_file) {
140 LogCvmfs(kLogCvmfs, kLogStderr, "failed to open deletion log file "
141 "(errno: %d)", errno);
142 uploader->TearDown();
143 return 1;
144 }
145 }
146
147 bool extended_stats = StatisticsDatabase::GcExtendedStats(repo_name);
148
149 reflog->BeginTransaction();
150
151 GcConfig config;
152 config.uploader = uploader.weak_ref();
153 config.keep_history_depth = revisions;
154 config.keep_history_timestamp = timestamp;
155 config.dry_run = dry_run;
156 config.verbose = list_condemned_objects;
157 config.object_fetcher = &object_fetcher;
158 config.reflog = reflog.weak_ref();
159 config.deleted_objects_logfile = deletion_log_file;
160 config.statistics = statistics();
161 config.extended_stats = extended_stats;
162 config.num_threads = num_threads;
163
164 if (deletion_log_file != NULL) {
165 const int bytes_written = fprintf(deletion_log_file,
166 "# Garbage Collection started at %s\n",
167 StringifyTime(time(NULL), true).c_str());
168 if (bytes_written < 0) {
169 LogCvmfs(kLogCvmfs, kLogStderr, "failed to write to deletion log '%s' "
170 "(errno: %d)",
171 deletion_log_path.c_str(), errno);
172 uploader->TearDown();
173 return 1;
174 }
175 }
176
177 StatisticsDatabase *stats_db = StatisticsDatabase::OpenStandardDB(repo_name);
178
179 // File catalogs
180 GC collector(config);
181 collector.UseReflogTimestamps();
182 bool success = collector.Collect();
183
184 if (!success) {
185 LogCvmfs(kLogCvmfs, kLogStderr, "garbage collection failed");
186 if (!dry_run) {
187 stats_db->StoreGCStatistics(this->statistics(), start_time, false);
188 if (upload_statsdb) {
189 stats_db->UploadStatistics(uploader.weak_ref());
190 }
191 }
192 uploader->TearDown();
193 return 1;
194 }
195
196 // Tag databases, meta infos, certificates
197 HashFilter preserved_objects;
198 preserved_objects.Fill(manifest->certificate());
199 preserved_objects.Fill(manifest->history());
200 preserved_objects.Fill(manifest->meta_info());
201 GCAux collector_aux(config);
202 success = collector_aux.CollectOlderThan(
203 collector.oldest_trunk_catalog(), preserved_objects);
204 if (!success) {
205 LogCvmfs(kLogCvmfs, kLogStderr,
206 "garbage collection of auxiliary files failed");
207 if (!dry_run) {
208 stats_db->StoreGCStatistics(this->statistics(), start_time, false);
209 if (upload_statsdb) {
210 stats_db->UploadStatistics(uploader.weak_ref());
211 }
212 }
213 uploader->TearDown();
214 return 1;
215 }
216
217 // As of here: garbage collection succeeded, cleanup & commit
218
219 if (deletion_log_file != NULL) {
220 const int bytes_written = fprintf(deletion_log_file,
221 "# Garbage Collection finished at %s\n\n",
222 StringifyTime(time(NULL), true).c_str());
223 assert(bytes_written >= 0);
224 fclose(deletion_log_file);
225 }
226
227 reflog->CommitTransaction();
228 // Has to be outside the transaction
229 success = reflog->Vacuum();
230 assert(success);
231 reflog->DropDatabaseFileOwnership();
232 const std::string reflog_db = reflog->database_file();
233 reflog.Destroy();
234
235 if (!dry_run) {
236 uploader->UploadFile(reflog_db, ".cvmfsreflog");
237 manifest::Reflog::HashDatabase(reflog_db, &reflog_hash);
238 uploader->WaitForUpload();
239 manifest::Reflog::WriteChecksum(reflog_chksum_path, reflog_hash);
240 }
241
242 unlink(reflog_db.c_str());
243
244 if (uploader->GetNumberOfErrors() > 0 && !dry_run) {
245 LogCvmfs(kLogCvmfs, kLogStderr, "failed to upload updated Reflog");
246
247 stats_db->StoreGCStatistics(this->statistics(), start_time, false);
248 if (upload_statsdb) {
249 stats_db->UploadStatistics(uploader.weak_ref());
250 }
251
252 uploader->TearDown();
253 return 1;
254 }
255
256 if (!dry_run) {
257 stats_db->StoreGCStatistics(this->statistics(), start_time, true);
258 if (upload_statsdb) {
259 stats_db->UploadStatistics(uploader.weak_ref());
260 }
261 }
262 uploader->TearDown();
263 return 0;
264 }
265
266 } // namespace swissknife
267