CernVM-FS  2.12.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
swissknife_gc.cc
Go to the documentation of this file.
1 
8 #include "cvmfs_config.h"
9 #include "swissknife_gc.h"
10 
11 #include <string>
12 
16 #include "manifest.h"
17 #include "reflog.h"
18 #include "statistics_database.h"
19 #include "upload_facility.h"
20 #include "util/posix.h"
21 #include "util/string.h"
22 
23 namespace swissknife {
24 
31 
32 
34  ParameterList r;
35  r.push_back(Parameter::Mandatory('r', "repository url"));
36  r.push_back(Parameter::Mandatory('u', "spooler definition string"));
37  r.push_back(Parameter::Mandatory('n', "fully qualified repository name"));
38  r.push_back(Parameter::Mandatory('R', "path to reflog.chksum file"));
39  r.push_back(Parameter::Optional('h', "conserve <h> revisions"));
40  r.push_back(Parameter::Optional('z', "conserve revisions younger than <z>"));
41  r.push_back(Parameter::Optional('k', "repository master key(s) / dir"));
42  r.push_back(Parameter::Optional('t', "temporary directory"));
43  r.push_back(Parameter::Optional('L', "path to deletion log file"));
44  r.push_back(Parameter::Optional('N', "number of threads to use"));
45  r.push_back(Parameter::Optional('@', "proxy url"));
46  r.push_back(Parameter::Switch('d', "dry run"));
47  r.push_back(Parameter::Switch('l', "list objects to be removed"));
48  r.push_back(Parameter::Switch('I', "upload updated statistics DB file"));
49  return r;
50 }
51 
52 
54  std::string start_time = GetGMTimestamp();
55 
56  const std::string &repo_url = *args.find('r')->second;
57  const std::string &spooler = *args.find('u')->second;
58  const std::string &repo_name = *args.find('n')->second;
59  const std::string &reflog_chksum_path = *args.find('R')->second;
60  shash::Any reflog_hash;
61  if (!manifest::Reflog::ReadChecksum(reflog_chksum_path, &reflog_hash)) {
62  LogCvmfs(kLogCvmfs, kLogStderr, "Could not read reflog checksum");
63  return 1;
64  }
65 
66  const int64_t revisions = (args.count('h') > 0) ?
67  String2Int64(*args.find('h')->second) : GcConfig::kFullHistory;
68  const time_t timestamp = (args.count('z') > 0)
69  ? static_cast<time_t>(String2Int64(*args.find('z')->second))
71  std::string repo_keys = (args.count('k') > 0) ?
72  *args.find('k')->second : "";
73  if (DirectoryExists(repo_keys))
74  repo_keys = JoinStrings(FindFilesBySuffix(repo_keys, ".pub"), ":");
75  const bool dry_run = (args.count('d') > 0);
76  const bool list_condemned_objects = (args.count('l') > 0);
77  const std::string temp_directory = (args.count('t') > 0) ?
78  *args.find('t')->second : "/tmp";
79  const std::string deletion_log_path = (args.count('L') > 0) ?
80  *args.find('L')->second : "";
81  const bool upload_statsdb = (args.count('I') > 0);
82  const unsigned int num_threads = (args.count('N') > 0) ?
83  String2Uint64(*args.find('N')->second) : 8;
84 
85  if (revisions < 0) {
87  "at least one revision needs to be preserved");
88  return 1;
89  }
90 
91  if (timestamp == GcConfig::kNoTimestamp &&
92  revisions == GcConfig::kFullHistory) {
94  "neither a timestamp nor history threshold given");
95  return 1;
96  }
97 
98  const bool follow_redirects = false;
99  const std::string proxy = ((args.count('@') > 0) ?
100  *args.find('@')->second : "");
101  if (!this->InitDownloadManager(follow_redirects, proxy) ||
102  !this->InitVerifyingSignatureManager(repo_keys)) {
103  LogCvmfs(kLogCatalog, kLogStderr, "failed to init repo connection");
104  return 1;
105  }
106 
107  ObjectFetcher object_fetcher(repo_name,
108  repo_url,
109  temp_directory,
112 
114  ObjectFetcher::Failures retval = object_fetcher.FetchManifest(&manifest);
115  if (retval != ObjectFetcher::kFailOk) {
116  LogCvmfs(kLogCvmfs, kLogStderr, "failed to load repository manifest "
117  "(%d - %s)",
118  retval, Code2Ascii(retval));
119  return 1;
120  }
121 
122  if (!manifest->garbage_collectable()) {
124  "repository does not allow garbage collection");
125  return 1;
126  }
127 
129  reflog = FetchReflog(&object_fetcher, repo_name, reflog_hash);
130  assert(reflog.IsValid());
131 
132  const upload::SpoolerDefinition spooler_definition(spooler, shash::kAny);
134  upload::AbstractUploader::Construct(spooler_definition));
135 
136  if (!uploader.IsValid()) {
137  LogCvmfs(kLogCvmfs, kLogStderr, "failed to initialize spooler for '%s'",
138  spooler.c_str());
139  return 1;
140  }
141 
142  FILE *deletion_log_file = NULL;
143  if (!deletion_log_path.empty()) {
144  deletion_log_file = fopen(deletion_log_path.c_str(), "a+");
145  if (NULL == deletion_log_file) {
146  LogCvmfs(kLogCvmfs, kLogStderr, "failed to open deletion log file "
147  "(errno: %d)", errno);
148  uploader->TearDown();
149  return 1;
150  }
151  }
152 
153  bool extended_stats = StatisticsDatabase::GcExtendedStats(repo_name);
154 
155  reflog->BeginTransaction();
156 
157  GcConfig config;
158  config.uploader = uploader.weak_ref();
159  config.keep_history_depth = revisions;
160  config.keep_history_timestamp = timestamp;
161  config.dry_run = dry_run;
162  config.verbose = list_condemned_objects;
163  config.object_fetcher = &object_fetcher;
164  config.reflog = reflog.weak_ref();
165  config.deleted_objects_logfile = deletion_log_file;
166  config.statistics = statistics();
167  config.extended_stats = extended_stats;
168  config.num_threads = num_threads;
169 
170  if (deletion_log_file != NULL) {
171  const int bytes_written = fprintf(deletion_log_file,
172  "# Garbage Collection started at %s\n",
173  StringifyTime(time(NULL), true).c_str());
174  if (bytes_written < 0) {
175  LogCvmfs(kLogCvmfs, kLogStderr, "failed to write to deletion log '%s' "
176  "(errno: %d)",
177  deletion_log_path.c_str(), errno);
178  uploader->TearDown();
179  return 1;
180  }
181  }
182 
184 
185  // File catalogs
186  GC collector(config);
187  collector.UseReflogTimestamps();
188  bool success = collector.Collect();
189 
190  if (!success) {
191  LogCvmfs(kLogCvmfs, kLogStderr, "garbage collection failed");
192  if (!dry_run) {
193  stats_db->StoreGCStatistics(this->statistics(), start_time, false);
194  if (upload_statsdb) {
195  stats_db->UploadStatistics(uploader.weak_ref());
196  }
197  }
198  uploader->TearDown();
199  return 1;
200  }
201 
202  // Tag databases, meta infos, certificates
203  HashFilter preserved_objects;
204  preserved_objects.Fill(manifest->certificate());
205  preserved_objects.Fill(manifest->history());
206  preserved_objects.Fill(manifest->meta_info());
207  GCAux collector_aux(config);
208  success = collector_aux.CollectOlderThan(
209  collector.oldest_trunk_catalog(), preserved_objects);
210  if (!success) {
212  "garbage collection of auxiliary files failed");
213  if (!dry_run) {
214  stats_db->StoreGCStatistics(this->statistics(), start_time, false);
215  if (upload_statsdb) {
216  stats_db->UploadStatistics(uploader.weak_ref());
217  }
218  }
219  uploader->TearDown();
220  return 1;
221  }
222 
223  // As of here: garbage collection succeeded, cleanup & commit
224 
225  if (deletion_log_file != NULL) {
226  const int bytes_written = fprintf(deletion_log_file,
227  "# Garbage Collection finished at %s\n\n",
228  StringifyTime(time(NULL), true).c_str());
229  assert(bytes_written >= 0);
230  fclose(deletion_log_file);
231  }
232 
233  reflog->CommitTransaction();
234  // Has to be outside the transaction
235  success = reflog->Vacuum();
236  assert(success);
237  reflog->DropDatabaseFileOwnership();
238  const std::string reflog_db = reflog->database_file();
239  reflog.Destroy();
240 
241  if (!dry_run) {
242  uploader->UploadFile(reflog_db, ".cvmfsreflog");
243  manifest::Reflog::HashDatabase(reflog_db, &reflog_hash);
244  uploader->WaitForUpload();
245  manifest::Reflog::WriteChecksum(reflog_chksum_path, reflog_hash);
246  }
247 
248  unlink(reflog_db.c_str());
249 
250  if (uploader->GetNumberOfErrors() > 0 && !dry_run) {
251  LogCvmfs(kLogCvmfs, kLogStderr, "failed to upload updated Reflog");
252 
253  stats_db->StoreGCStatistics(this->statistics(), start_time, false);
254  if (upload_statsdb) {
255  stats_db->UploadStatistics(uploader.weak_ref());
256  }
257 
258  uploader->TearDown();
259  return 1;
260  }
261 
262  if (!dry_run) {
263  stats_db->StoreGCStatistics(this->statistics(), start_time, true);
264  if (upload_statsdb) {
265  stats_db->UploadStatistics(uploader.weak_ref());
266  }
267  }
268  uploader->TearDown();
269  return 0;
270 }
271 
272 } // namespace swissknife
static Parameter Optional(const char key, const std::string &desc)
Definition: swissknife.h:41
#define LogCvmfs(source, mask,...)
Definition: logging.h:25
const char * Code2Ascii(const ObjectFetcherFailures::Failures error)
std::string database_file() const
Definition: reflog.cc:337
const manifest::Manifest * manifest() const
Definition: repository.h:125
static Parameter Switch(const char key, const std::string &desc)
Definition: swissknife.h:44
std::string GetGMTimestamp(const std::string &format)
Definition: string.cc:615
bool UploadStatistics(upload::Spooler *spooler, std::string local_path="")
T * weak_ref() const
Definition: pointer.h:42
static bool ReadChecksum(const std::string &path, shash::Any *checksum)
Definition: reflog.cc:47
bool StoreGCStatistics(const perf::Statistics *statistics, const std::string &start_time, const bool success)
std::vector< Parameter > ParameterList
Definition: swissknife.h:71
string JoinStrings(const vector< string > &strings, const string &joint)
Definition: string.cc:325
uint64_t oldest_trunk_catalog() const
manifest::Reflog * FetchReflog(ObjectFetcherT *object_fetcher, const std::string &repo_name, const shash::Any &reflog_hash)
CatalogTraversalParallel< ObjectFetcher > ReadonlyCatalogTraversal
GarbageCollectorAux< ReadonlyCatalogTraversal, HashFilter > GCAux
assert((mem||(size==0))&&"Out Of Memory")
GarbageCollector< ReadonlyCatalogTraversal, HashFilter > GC
string StringifyTime(const time_t seconds, const bool utc)
Definition: string.cc:105
bool InitVerifyingSignatureManager(const std::string &pubkey_path, const std::string &trusted_certs="")
Definition: server_tool.cc:44
virtual ParameterList GetParams() const
signature::SignatureManager * signature_manager() const
Definition: server_tool.cc:113
void Fill(const shash::Any &hash)
Definition: hash_filter.h:110
static AbstractUploader * Construct(const SpoolerDefinition &param)
Definition: plugin.h:188
int64_t String2Int64(const string &value)
Definition: string.cc:222
download::DownloadManager * download_manager() const
Definition: server_tool.cc:108
static Parameter Mandatory(const char key, const std::string &desc)
Definition: swissknife.h:38
int Main(const ArgumentList &args)
perf::Statistics * statistics()
Definition: server_tool.h:49
void BeginTransaction()
Definition: reflog.cc:295
static void HashDatabase(const std::string &database_path, shash::Any *hash_reflog)
Definition: reflog.cc:322
upload::AbstractUploader * uploader
static const unsigned int kFullHistory
void DropDatabaseFileOwnership()
Definition: reflog.cc:313
static StatisticsDatabase * OpenStandardDB(const std::string repo_name)
bool DirectoryExists(const std::string &path)
Definition: posix.cc:813
static bool GcExtendedStats(const std::string &repo_name)
SmallhashFilter HashFilter
bool CollectOlderThan(uint64_t timestamp, const HashFilterT &preserved_objects)
Definition: gc_aux_impl.h:25
uint64_t String2Uint64(const string &value)
Definition: string.cc:228
std::map< char, SharedPtr< std::string > > ArgumentList
Definition: swissknife.h:72
void CommitTransaction()
Definition: reflog.cc:301
bool Vacuum()
Definition: reflog.h:79
HttpObjectFetcher ObjectFetcher
Failures FetchManifest(manifest::Manifest **manifest)
static bool WriteChecksum(const std::string &path, const shash::Any &value)
Definition: reflog.cc:64
GC::Configuration GcConfig
bool InitDownloadManager(const bool follow_redirects, const std::string &proxy, const unsigned max_pool_handles=1)
Definition: server_tool.cc:17
std::vector< std::string > FindFilesBySuffix(const std::string &dir, const std::string &suffix)
Definition: posix.cc:1124