CernVM-FS  2.9.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
swissknife_gc.cc
Go to the documentation of this file.
1 
8 #include "cvmfs_config.h"
9 #include "swissknife_gc.h"
10 
11 #include <string>
12 
16 #include "manifest.h"
17 #include "reflog.h"
18 #include "statistics_database.h"
19 #include "upload_facility.h"
20 #include "util/posix.h"
21 #include "util/string.h"
22 
23 namespace swissknife {
24 
31 
32 
34  ParameterList r;
35  r.push_back(Parameter::Mandatory('r', "repository url"));
36  r.push_back(Parameter::Mandatory('u', "spooler definition string"));
37  r.push_back(Parameter::Mandatory('n', "fully qualified repository name"));
38  r.push_back(Parameter::Mandatory('R', "path to reflog.chksum file"));
39  r.push_back(Parameter::Optional('h', "conserve <h> revisions"));
40  r.push_back(Parameter::Optional('z', "conserve revisions younger than <z>"));
41  r.push_back(Parameter::Optional('k', "repository master key(s) / dir"));
42  r.push_back(Parameter::Optional('t', "temporary directory"));
43  r.push_back(Parameter::Optional('L', "path to deletion log file"));
44  r.push_back(Parameter::Optional('N', "number of threads to use"));
45  r.push_back(Parameter::Switch('d', "dry run"));
46  r.push_back(Parameter::Switch('l', "list objects to be removed"));
47  r.push_back(Parameter::Switch('I', "upload updated statistics DB file"));
48  return r;
49 }
50 
51 
53  std::string start_time = GetGMTimestamp();
54 
55  const std::string &repo_url = *args.find('r')->second;
56  const std::string &spooler = *args.find('u')->second;
57  const std::string &repo_name = *args.find('n')->second;
58  const std::string &reflog_chksum_path = *args.find('R')->second;
59  shash::Any reflog_hash;
60  if (!manifest::Reflog::ReadChecksum(reflog_chksum_path, &reflog_hash)) {
61  LogCvmfs(kLogCvmfs, kLogStderr, "Could not read reflog checksum");
62  return 1;
63  }
64 
65  const int64_t revisions = (args.count('h') > 0) ?
66  String2Int64(*args.find('h')->second) : GcConfig::kFullHistory;
67  const time_t timestamp = (args.count('z') > 0)
68  ? static_cast<time_t>(String2Int64(*args.find('z')->second))
70  std::string repo_keys = (args.count('k') > 0) ?
71  *args.find('k')->second : "";
72  if (DirectoryExists(repo_keys))
73  repo_keys = JoinStrings(FindFilesBySuffix(repo_keys, ".pub"), ":");
74  const bool dry_run = (args.count('d') > 0);
75  const bool list_condemned_objects = (args.count('l') > 0);
76  const std::string temp_directory = (args.count('t') > 0) ?
77  *args.find('t')->second : "/tmp";
78  const std::string deletion_log_path = (args.count('L') > 0) ?
79  *args.find('L')->second : "";
80  const bool upload_statsdb = (args.count('I') > 0);
81  const unsigned int num_threads = (args.count('N') > 0) ?
82  String2Uint64(*args.find('N')->second) : 8;
83 
84  if (revisions < 0) {
86  "at least one revision needs to be preserved");
87  return 1;
88  }
89 
90  if (timestamp == GcConfig::kNoTimestamp &&
91  revisions == GcConfig::kFullHistory) {
93  "neither a timestamp nor history threshold given");
94  return 1;
95  }
96 
97  const bool follow_redirects = false;
98  if (!this->InitDownloadManager(follow_redirects) ||
99  !this->InitVerifyingSignatureManager(repo_keys)) {
100  LogCvmfs(kLogCatalog, kLogStderr, "failed to init repo connection");
101  return 1;
102  }
103 
104  ObjectFetcher object_fetcher(repo_name,
105  repo_url,
106  temp_directory,
109 
111  ObjectFetcher::Failures retval = object_fetcher.FetchManifest(&manifest);
112  if (retval != ObjectFetcher::kFailOk) {
113  LogCvmfs(kLogCvmfs, kLogStderr, "failed to load repository manifest "
114  "(%d - %s)",
115  retval, Code2Ascii(retval));
116  return 1;
117  }
118 
119  if (!manifest->garbage_collectable()) {
121  "repository does not allow garbage collection");
122  return 1;
123  }
124 
126  reflog = FetchReflog(&object_fetcher, repo_name, reflog_hash);
127  assert(reflog.IsValid());
128 
129  const upload::SpoolerDefinition spooler_definition(spooler, shash::kAny);
131  upload::AbstractUploader::Construct(spooler_definition));
132 
133  if (!uploader.IsValid()) {
134  LogCvmfs(kLogCvmfs, kLogStderr, "failed to initialize spooler for '%s'",
135  spooler.c_str());
136  return 1;
137  }
138 
139  FILE *deletion_log_file = NULL;
140  if (!deletion_log_path.empty()) {
141  deletion_log_file = fopen(deletion_log_path.c_str(), "a+");
142  if (NULL == deletion_log_file) {
143  LogCvmfs(kLogCvmfs, kLogStderr, "failed to open deletion log file "
144  "(errno: %d)", errno);
145  uploader->TearDown();
146  return 1;
147  }
148  }
149 
150  bool extended_stats = StatisticsDatabase::GcExtendedStats(repo_name);
151 
152  reflog->BeginTransaction();
153 
154  GcConfig config;
155  config.uploader = uploader.weak_ref();
156  config.keep_history_depth = revisions;
157  config.keep_history_timestamp = timestamp;
158  config.dry_run = dry_run;
159  config.verbose = list_condemned_objects;
160  config.object_fetcher = &object_fetcher;
161  config.reflog = reflog.weak_ref();
162  config.deleted_objects_logfile = deletion_log_file;
163  config.statistics = statistics();
164  config.extended_stats = extended_stats;
165  config.num_threads = num_threads;
166 
167  if (deletion_log_file != NULL) {
168  const int bytes_written = fprintf(deletion_log_file,
169  "# Garbage Collection started at %s\n",
170  StringifyTime(time(NULL), true).c_str());
171  if (bytes_written < 0) {
172  LogCvmfs(kLogCvmfs, kLogStderr, "failed to write to deletion log '%s' "
173  "(errno: %d)",
174  deletion_log_path.c_str(), errno);
175  uploader->TearDown();
176  return 1;
177  }
178  }
179 
181 
182  // File catalogs
183  GC collector(config);
184  collector.UseReflogTimestamps();
185  bool success = collector.Collect();
186 
187  if (!success) {
188  LogCvmfs(kLogCvmfs, kLogStderr, "garbage collection failed");
189  if (!dry_run) {
190  stats_db->StoreGCStatistics(this->statistics(), start_time, false);
191  if (upload_statsdb) {
192  stats_db->UploadStatistics(uploader);
193  }
194  }
195  uploader->TearDown();
196  return 1;
197  }
198 
199  // Tag databases, meta infos, certificates
200  HashFilter preserved_objects;
201  preserved_objects.Fill(manifest->certificate());
202  preserved_objects.Fill(manifest->history());
203  preserved_objects.Fill(manifest->meta_info());
204  GCAux collector_aux(config);
205  success = collector_aux.CollectOlderThan(
206  collector.oldest_trunk_catalog(), preserved_objects);
207  if (!success) {
209  "garbage collection of auxiliary files failed");
210  if (!dry_run) {
211  stats_db->StoreGCStatistics(this->statistics(), start_time, false);
212  if (upload_statsdb) {
213  stats_db->UploadStatistics(uploader);
214  }
215  }
216  uploader->TearDown();
217  return 1;
218  }
219 
220  // As of here: garbage collection succeeded, cleanup & commit
221 
222  if (deletion_log_file != NULL) {
223  const int bytes_written = fprintf(deletion_log_file,
224  "# Garbage Collection finished at %s\n\n",
225  StringifyTime(time(NULL), true).c_str());
226  assert(bytes_written >= 0);
227  fclose(deletion_log_file);
228  }
229 
230  reflog->CommitTransaction();
231  // Has to be outside the transaction
232  success = reflog->Vacuum();
233  assert(success);
234  reflog->DropDatabaseFileOwnership();
235  const std::string reflog_db = reflog->database_file();
236  reflog.Destroy();
237 
238  if (!dry_run) {
239  uploader->UploadFile(reflog_db, ".cvmfsreflog");
240  manifest::Reflog::HashDatabase(reflog_db, &reflog_hash);
241  uploader->WaitForUpload();
242  manifest::Reflog::WriteChecksum(reflog_chksum_path, reflog_hash);
243  }
244 
245  unlink(reflog_db.c_str());
246 
247  if (uploader->GetNumberOfErrors() > 0 && !dry_run) {
248  LogCvmfs(kLogCvmfs, kLogStderr, "failed to upload updated Reflog");
249 
250  stats_db->StoreGCStatistics(this->statistics(), start_time, false);
251  if (upload_statsdb) {
252  stats_db->UploadStatistics(uploader);
253  }
254 
255  uploader->TearDown();
256  return 1;
257  }
258 
259  if (!dry_run) {
260  stats_db->StoreGCStatistics(this->statistics(), start_time, true);
261  if (upload_statsdb) {
262  stats_db->UploadStatistics(uploader);
263  }
264  }
265  uploader->TearDown();
266  return 0;
267 }
268 
269 } // namespace swissknife
static Parameter Optional(const char key, const std::string &desc)
Definition: swissknife.h:41
#define LogCvmfs(source, mask,...)
Definition: logging.h:20
const char * Code2Ascii(const ObjectFetcherFailures::Failures error)
std::string database_file() const
Definition: reflog.cc:337
const manifest::Manifest * manifest() const
Definition: repository.h:123
static Parameter Switch(const char key, const std::string &desc)
Definition: swissknife.h:44
bool UploadStatistics(upload::Spooler *spooler, std::string local_path="")
T * weak_ref() const
Definition: pointer.h:43
static bool ReadChecksum(const std::string &path, shash::Any *checksum)
Definition: reflog.cc:47
bool StoreGCStatistics(const perf::Statistics *statistics, const std::string &start_time, const bool success)
std::vector< Parameter > ParameterList
Definition: swissknife.h:71
string JoinStrings(const vector< string > &strings, const string &joint)
Definition: string.cc:317
uint64_t oldest_trunk_catalog() const
manifest::Reflog * FetchReflog(ObjectFetcherT *object_fetcher, const std::string &repo_name, const shash::Any &reflog_hash)
CatalogTraversalParallel< ObjectFetcher > ReadonlyCatalogTraversal
GarbageCollectorAux< ReadonlyCatalogTraversal, HashFilter > GCAux
assert((mem||(size==0))&&"Out Of Memory")
GarbageCollector< ReadonlyCatalogTraversal, HashFilter > GC
string StringifyTime(const time_t seconds, const bool utc)
Definition: string.cc:104
bool InitVerifyingSignatureManager(const std::string &pubkey_path, const std::string &trusted_certs="")
Definition: server_tool.cc:43
virtual ParameterList GetParams() const
signature::SignatureManager * signature_manager() const
Definition: server_tool.cc:112
void Fill(const shash::Any &hash)
Definition: hash_filter.h:110
static AbstractUploader * Construct(const SpoolerDefinition &param)
Definition: plugin.h:188
int64_t String2Int64(const string &value)
Definition: string.cc:221
download::DownloadManager * download_manager() const
Definition: server_tool.cc:107
bool InitDownloadManager(const bool follow_redirects, const unsigned max_pool_handles=1, const bool use_system_proxy=true)
Definition: server_tool.cc:21
static Parameter Mandatory(const char key, const std::string &desc)
Definition: swissknife.h:38
int Main(const ArgumentList &args)
perf::Statistics * statistics()
Definition: server_tool.h:49
void BeginTransaction()
Definition: reflog.cc:295
static void HashDatabase(const std::string &database_path, shash::Any *hash_reflog)
Definition: reflog.cc:322
upload::AbstractUploader * uploader
static const unsigned int kFullHistory
void DropDatabaseFileOwnership()
Definition: reflog.cc:313
static StatisticsDatabase * OpenStandardDB(const std::string repo_name)
bool DirectoryExists(const std::string &path)
Definition: posix.cc:833
static bool GcExtendedStats(const std::string &repo_name)
SmallhashFilter HashFilter
bool CollectOlderThan(uint64_t timestamp, const HashFilterT &preserved_objects)
Definition: gc_aux_impl.h:25
uint64_t String2Uint64(const string &value)
Definition: string.cc:227
std::map< char, SharedPtr< std::string > > ArgumentList
Definition: swissknife.h:72
void CommitTransaction()
Definition: reflog.cc:301
bool Vacuum()
Definition: reflog.h:79
HttpObjectFetcher ObjectFetcher
Failures FetchManifest(manifest::Manifest **manifest)
static bool WriteChecksum(const std::string &path, const shash::Any &value)
Definition: reflog.cc:64
GC::Configuration GcConfig
std::vector< std::string > FindFilesBySuffix(const std::string &dir, const std::string &suffix)
Definition: posix.cc:1144
std::string GetGMTimestamp(std::string format)
Definition: string.cc:583