CernVM-FS  2.13.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
swissknife_gc.cc
Go to the documentation of this file.
1 
9 #include "swissknife_gc.h"
10 
11 #include <string>
12 
16 #include "manifest.h"
17 #include "reflog.h"
18 #include "statistics_database.h"
19 #include "upload_facility.h"
20 #include "util/posix.h"
21 #include "util/string.h"
22 
23 namespace swissknife {
24 
31 
32 
34  ParameterList r;
35  r.push_back(Parameter::Mandatory('r', "repository url"));
36  r.push_back(Parameter::Mandatory('u', "spooler definition string"));
37  r.push_back(Parameter::Mandatory('n', "fully qualified repository name"));
38  r.push_back(Parameter::Mandatory('R', "path to reflog.chksum file"));
39  r.push_back(Parameter::Optional('h', "conserve <h> revisions"));
40  r.push_back(Parameter::Optional('z', "conserve revisions younger than <z>"));
41  r.push_back(Parameter::Optional('k', "repository master key(s) / dir"));
42  r.push_back(Parameter::Optional('t', "temporary directory"));
43  r.push_back(Parameter::Optional('L', "path to deletion log file"));
44  r.push_back(Parameter::Optional('N', "number of threads to use"));
45  r.push_back(Parameter::Optional('@', "proxy url"));
46  r.push_back(Parameter::Switch('d', "dry run"));
47  r.push_back(Parameter::Switch('l', "list objects to be removed"));
48  r.push_back(Parameter::Switch('I', "upload updated statistics DB file"));
49  return r;
50 }
51 
52 
54  const std::string start_time = GetGMTimestamp();
55 
56  const std::string &repo_url = *args.find('r')->second;
57  const std::string &spooler = *args.find('u')->second;
58  const std::string &repo_name = *args.find('n')->second;
59  const std::string &reflog_chksum_path = *args.find('R')->second;
60  shash::Any reflog_hash;
61  if (!manifest::Reflog::ReadChecksum(reflog_chksum_path, &reflog_hash)) {
62  LogCvmfs(kLogCvmfs, kLogStderr, "Could not read reflog checksum");
63  return 1;
64  }
65 
66  const uint64_t revisions = (args.count('h') > 0)
67  ? String2Int64(*args.find('h')->second)
69  const time_t timestamp = (args.count('z') > 0)
70  ? static_cast<time_t>(
71  String2Int64(*args.find('z')->second))
73  std::string repo_keys = (args.count('k') > 0) ? *args.find('k')->second : "";
74  if (DirectoryExists(repo_keys))
75  repo_keys = JoinStrings(FindFilesBySuffix(repo_keys, ".pub"), ":");
76  const bool dry_run = (args.count('d') > 0);
77  const bool list_condemned_objects = (args.count('l') > 0);
78  const std::string temp_directory = (args.count('t') > 0)
79  ? *args.find('t')->second
80  : "/tmp";
81  const std::string deletion_log_path = (args.count('L') > 0)
82  ? *args.find('L')->second
83  : "";
84  const bool upload_statsdb = (args.count('I') > 0);
85  const unsigned int num_threads = (args.count('N') > 0)
86  ? String2Uint64(*args.find('N')->second)
87  : 8;
88 
89  if (timestamp == GcConfig::kNoTimestamp
90  && revisions == GcConfig::kFullHistory) {
92  "neither a timestamp nor history threshold given");
93  return 1;
94  }
95 
96  const bool follow_redirects = false;
97  const std::string proxy = ((args.count('@') > 0) ? *args.find('@')->second
98  : "");
99  if (!this->InitDownloadManager(follow_redirects, proxy)
100  || !this->InitSignatureManager(repo_keys)) {
101  LogCvmfs(kLogCatalog, kLogStderr, "failed to init repo connection");
102  return 1;
103  }
104 
105  ObjectFetcher object_fetcher(repo_name,
106  repo_url,
107  temp_directory,
110 
112  const ObjectFetcher::Failures retval =
113  object_fetcher.FetchManifest(&manifest);
114  if (retval != ObjectFetcher::kFailOk) {
116  "failed to load repository manifest "
117  "(%d - %s)",
118  retval, Code2Ascii(retval));
119  return 1;
120  }
121 
122  if (!manifest->garbage_collectable()) {
124  "repository does not allow garbage collection");
125  return 1;
126  }
127 
129  reflog = FetchReflog(&object_fetcher, repo_name, reflog_hash);
130  assert(reflog.IsValid());
131 
132  const upload::SpoolerDefinition spooler_definition(spooler, shash::kAny);
134  upload::AbstractUploader::Construct(spooler_definition));
135 
136  if (!uploader.IsValid()) {
137  LogCvmfs(kLogCvmfs, kLogStderr, "failed to initialize spooler for '%s'",
138  spooler.c_str());
139  return 1;
140  }
141 
142  FILE *deletion_log_file = NULL;
143  if (!deletion_log_path.empty()) {
144  deletion_log_file = fopen(deletion_log_path.c_str(), "a+");
145  if (NULL == deletion_log_file) {
147  "failed to open deletion log file "
148  "(errno: %d)",
149  errno);
150  uploader->TearDown();
151  return 1;
152  }
153  }
154 
155  const bool extended_stats = StatisticsDatabase::GcExtendedStats(repo_name);
156 
157  reflog->BeginTransaction();
158 
159  GcConfig config;
160  config.uploader = uploader.weak_ref();
161  config.keep_history_depth = revisions;
162  config.keep_history_timestamp = timestamp;
163  config.dry_run = dry_run;
164  config.verbose = list_condemned_objects;
165  config.object_fetcher = &object_fetcher;
166  config.reflog = reflog.weak_ref();
167  config.deleted_objects_logfile = deletion_log_file;
168  config.statistics = statistics();
169  config.extended_stats = extended_stats;
170  config.num_threads = num_threads;
171 
172  if (deletion_log_file != NULL) {
173  const int bytes_written = fprintf(deletion_log_file,
174  "# Garbage Collection started at %s\n",
175  StringifyTime(time(NULL), true).c_str());
176  if (bytes_written < 0) {
178  "failed to write to deletion log '%s' "
179  "(errno: %d)",
180  deletion_log_path.c_str(), errno);
181  uploader->TearDown();
182  return 1;
183  }
184  }
185 
187 
188  // File catalogs
189  GC collector(config);
190  collector.UseReflogTimestamps();
191  bool success = collector.Collect();
192 
193  if (!success) {
194  LogCvmfs(kLogCvmfs, kLogStderr, "garbage collection failed");
195  if (!dry_run) {
196  stats_db->StoreGCStatistics(this->statistics(), start_time, false);
197  if (upload_statsdb) {
198  stats_db->UploadStatistics(uploader.weak_ref());
199  }
200  }
201  uploader->TearDown();
202  return 1;
203  }
204 
205  // Tag databases, meta infos, certificates
206  HashFilter preserved_objects;
207  preserved_objects.Fill(manifest->certificate());
208  preserved_objects.Fill(manifest->history());
209  preserved_objects.Fill(manifest->meta_info());
210  GCAux collector_aux(config);
211  success = collector_aux.CollectOlderThan(collector.oldest_trunk_catalog(),
212  preserved_objects);
213  if (!success) {
215  "garbage collection of auxiliary files failed");
216  if (!dry_run) {
217  stats_db->StoreGCStatistics(this->statistics(), start_time, false);
218  if (upload_statsdb) {
219  stats_db->UploadStatistics(uploader.weak_ref());
220  }
221  }
222  uploader->TearDown();
223  return 1;
224  }
225 
226  // As of here: garbage collection succeeded, cleanup & commit
227 
228  if (deletion_log_file != NULL) {
229  const int bytes_written = fprintf(deletion_log_file,
230  "# Garbage Collection finished at %s\n\n",
231  StringifyTime(time(NULL), true).c_str());
232  assert(bytes_written >= 0);
233  fclose(deletion_log_file);
234  }
235 
236  reflog->CommitTransaction();
237  // Has to be outside the transaction
238  success = reflog->Vacuum();
239  assert(success);
240  reflog->DropDatabaseFileOwnership();
241  const std::string reflog_db = reflog->database_file();
242  reflog.Destroy();
243 
244  if (!dry_run) {
245  uploader->UploadFile(reflog_db, ".cvmfsreflog");
246  manifest::Reflog::HashDatabase(reflog_db, &reflog_hash);
247  uploader->WaitForUpload();
248  manifest::Reflog::WriteChecksum(reflog_chksum_path, reflog_hash);
249  }
250 
251  unlink(reflog_db.c_str());
252 
253  if (uploader->GetNumberOfErrors() > 0 && !dry_run) {
254  LogCvmfs(kLogCvmfs, kLogStderr, "failed to upload updated Reflog");
255 
256  stats_db->StoreGCStatistics(this->statistics(), start_time, false);
257  if (upload_statsdb) {
258  stats_db->UploadStatistics(uploader.weak_ref());
259  }
260 
261  uploader->TearDown();
262  return 1;
263  }
264 
265  if (!dry_run) {
266  stats_db->StoreGCStatistics(this->statistics(), start_time, true);
267  if (upload_statsdb) {
268  stats_db->UploadStatistics(uploader.weak_ref());
269  }
270  }
271  uploader->TearDown();
272  return 0;
273 }
274 
275 } // namespace swissknife
static Parameter Optional(const char key, const std::string &desc)
Definition: swissknife.h:41
const char * Code2Ascii(const ObjectFetcherFailures::Failures error)
std::string database_file() const
Definition: reflog.cc:322
const manifest::Manifest * manifest() const
Definition: repository.h:125
static Parameter Switch(const char key, const std::string &desc)
Definition: swissknife.h:44
std::string GetGMTimestamp(const std::string &format)
Definition: string.cc:654
bool UploadStatistics(upload::Spooler *spooler, std::string local_path="")
T * weak_ref() const
Definition: pointer.h:46
static bool ReadChecksum(const std::string &path, shash::Any *checksum)
Definition: reflog.cc:48
bool StoreGCStatistics(const perf::Statistics *statistics, const std::string &start_time, const bool success)
std::vector< Parameter > ParameterList
Definition: swissknife.h:71
string JoinStrings(const vector< string > &strings, const string &joint)
Definition: string.cc:356
uint64_t oldest_trunk_catalog() const
manifest::Reflog * FetchReflog(ObjectFetcherT *object_fetcher, const std::string &repo_name, const shash::Any &reflog_hash)
CatalogTraversalParallel< ObjectFetcher > ReadonlyCatalogTraversal
GarbageCollectorAux< ReadonlyCatalogTraversal, HashFilter > GCAux
assert((mem||(size==0))&&"Out Of Memory")
GarbageCollector< ReadonlyCatalogTraversal, HashFilter > GC
string StringifyTime(const time_t seconds, const bool utc)
Definition: string.cc:104
virtual ParameterList GetParams() const
signature::SignatureManager * signature_manager() const
Definition: server_tool.cc:101
void Fill(const shash::Any &hash)
Definition: hash_filter.h:110
static AbstractUploader * Construct(const SpoolerDefinition &param)
Definition: plugin.h:170
int64_t String2Int64(const string &value)
Definition: string.cc:234
download::DownloadManager * download_manager() const
Definition: server_tool.cc:96
static Parameter Mandatory(const char key, const std::string &desc)
Definition: swissknife.h:38
int Main(const ArgumentList &args)
perf::Statistics * statistics()
Definition: server_tool.h:47
void BeginTransaction()
Definition: reflog.cc:282
static void HashDatabase(const std::string &database_path, shash::Any *hash_reflog)
Definition: reflog.cc:309
upload::AbstractUploader * uploader
void DropDatabaseFileOwnership()
Definition: reflog.cc:300
static StatisticsDatabase * OpenStandardDB(const std::string repo_name)
bool DirectoryExists(const std::string &path)
Definition: posix.cc:824
static bool GcExtendedStats(const std::string &repo_name)
SmallhashFilter HashFilter
bool InitSignatureManager(const std::string &pubkey_path, const std::string &certificate_path="", const std::string &private_key_path="")
Definition: server_tool.cc:44
bool CollectOlderThan(uint64_t timestamp, const HashFilterT &preserved_objects)
Definition: gc_aux_impl.h:24
uint64_t String2Uint64(const string &value)
Definition: string.cc:240
std::map< char, SharedPtr< std::string > > ArgumentList
Definition: swissknife.h:72
static const uint64_t kFullHistory
void CommitTransaction()
Definition: reflog.cc:288
bool Vacuum()
Definition: reflog.h:79
HttpObjectFetcher ObjectFetcher
Failures FetchManifest(manifest::Manifest **manifest)
static bool WriteChecksum(const std::string &path, const shash::Any &value)
Definition: reflog.cc:65
GC::Configuration GcConfig
bool InitDownloadManager(const bool follow_redirects, const std::string &proxy, const unsigned max_pool_handles=1)
Definition: server_tool.cc:17
std::vector< std::string > FindFilesBySuffix(const std::string &dir, const std::string &suffix)
Definition: posix.cc:1129
CVMFS_EXPORT void LogCvmfs(const LogSource source, const int mask, const char *format,...)
Definition: logging.cc:545