CernVM-FS  2.12.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
swissknife_gc.cc
Go to the documentation of this file.
1 
9 #include "swissknife_gc.h"
10 
11 #include <string>
12 
16 #include "manifest.h"
17 #include "reflog.h"
18 #include "statistics_database.h"
19 #include "upload_facility.h"
20 #include "util/posix.h"
21 #include "util/string.h"
22 
23 namespace swissknife {
24 
31 
32 
34  ParameterList r;
35  r.push_back(Parameter::Mandatory('r', "repository url"));
36  r.push_back(Parameter::Mandatory('u', "spooler definition string"));
37  r.push_back(Parameter::Mandatory('n', "fully qualified repository name"));
38  r.push_back(Parameter::Mandatory('R', "path to reflog.chksum file"));
39  r.push_back(Parameter::Optional('h', "conserve <h> revisions"));
40  r.push_back(Parameter::Optional('z', "conserve revisions younger than <z>"));
41  r.push_back(Parameter::Optional('k', "repository master key(s) / dir"));
42  r.push_back(Parameter::Optional('t', "temporary directory"));
43  r.push_back(Parameter::Optional('L', "path to deletion log file"));
44  r.push_back(Parameter::Optional('N', "number of threads to use"));
45  r.push_back(Parameter::Optional('@', "proxy url"));
46  r.push_back(Parameter::Switch('d', "dry run"));
47  r.push_back(Parameter::Switch('l', "list objects to be removed"));
48  r.push_back(Parameter::Switch('I', "upload updated statistics DB file"));
49  return r;
50 }
51 
52 
54  std::string start_time = GetGMTimestamp();
55 
56  const std::string &repo_url = *args.find('r')->second;
57  const std::string &spooler = *args.find('u')->second;
58  const std::string &repo_name = *args.find('n')->second;
59  const std::string &reflog_chksum_path = *args.find('R')->second;
60  shash::Any reflog_hash;
61  if (!manifest::Reflog::ReadChecksum(reflog_chksum_path, &reflog_hash)) {
62  LogCvmfs(kLogCvmfs, kLogStderr, "Could not read reflog checksum");
63  return 1;
64  }
65 
66  const uint64_t revisions = (args.count('h') > 0) ?
67  String2Int64(*args.find('h')->second) : GcConfig::kFullHistory;
68  const time_t timestamp = (args.count('z') > 0)
69  ? static_cast<time_t>(String2Int64(*args.find('z')->second))
71  std::string repo_keys = (args.count('k') > 0) ?
72  *args.find('k')->second : "";
73  if (DirectoryExists(repo_keys))
74  repo_keys = JoinStrings(FindFilesBySuffix(repo_keys, ".pub"), ":");
75  const bool dry_run = (args.count('d') > 0);
76  const bool list_condemned_objects = (args.count('l') > 0);
77  const std::string temp_directory = (args.count('t') > 0) ?
78  *args.find('t')->second : "/tmp";
79  const std::string deletion_log_path = (args.count('L') > 0) ?
80  *args.find('L')->second : "";
81  const bool upload_statsdb = (args.count('I') > 0);
82  const unsigned int num_threads = (args.count('N') > 0) ?
83  String2Uint64(*args.find('N')->second) : 8;
84 
85  if (timestamp == GcConfig::kNoTimestamp &&
86  revisions == GcConfig::kFullHistory) {
88  "neither a timestamp nor history threshold given");
89  return 1;
90  }
91 
92  const bool follow_redirects = false;
93  const std::string proxy = ((args.count('@') > 0) ?
94  *args.find('@')->second : "");
95  if (!this->InitDownloadManager(follow_redirects, proxy) ||
96  !this->InitSignatureManager(repo_keys)) {
97  LogCvmfs(kLogCatalog, kLogStderr, "failed to init repo connection");
98  return 1;
99  }
100 
101  ObjectFetcher object_fetcher(repo_name,
102  repo_url,
103  temp_directory,
106 
108  ObjectFetcher::Failures retval = object_fetcher.FetchManifest(&manifest);
109  if (retval != ObjectFetcher::kFailOk) {
110  LogCvmfs(kLogCvmfs, kLogStderr, "failed to load repository manifest "
111  "(%d - %s)",
112  retval, Code2Ascii(retval));
113  return 1;
114  }
115 
116  if (!manifest->garbage_collectable()) {
118  "repository does not allow garbage collection");
119  return 1;
120  }
121 
123  reflog = FetchReflog(&object_fetcher, repo_name, reflog_hash);
124  assert(reflog.IsValid());
125 
126  const upload::SpoolerDefinition spooler_definition(spooler, shash::kAny);
128  upload::AbstractUploader::Construct(spooler_definition));
129 
130  if (!uploader.IsValid()) {
131  LogCvmfs(kLogCvmfs, kLogStderr, "failed to initialize spooler for '%s'",
132  spooler.c_str());
133  return 1;
134  }
135 
136  FILE *deletion_log_file = NULL;
137  if (!deletion_log_path.empty()) {
138  deletion_log_file = fopen(deletion_log_path.c_str(), "a+");
139  if (NULL == deletion_log_file) {
140  LogCvmfs(kLogCvmfs, kLogStderr, "failed to open deletion log file "
141  "(errno: %d)", errno);
142  uploader->TearDown();
143  return 1;
144  }
145  }
146 
147  bool extended_stats = StatisticsDatabase::GcExtendedStats(repo_name);
148 
149  reflog->BeginTransaction();
150 
151  GcConfig config;
152  config.uploader = uploader.weak_ref();
153  config.keep_history_depth = revisions;
154  config.keep_history_timestamp = timestamp;
155  config.dry_run = dry_run;
156  config.verbose = list_condemned_objects;
157  config.object_fetcher = &object_fetcher;
158  config.reflog = reflog.weak_ref();
159  config.deleted_objects_logfile = deletion_log_file;
160  config.statistics = statistics();
161  config.extended_stats = extended_stats;
162  config.num_threads = num_threads;
163 
164  if (deletion_log_file != NULL) {
165  const int bytes_written = fprintf(deletion_log_file,
166  "# Garbage Collection started at %s\n",
167  StringifyTime(time(NULL), true).c_str());
168  if (bytes_written < 0) {
169  LogCvmfs(kLogCvmfs, kLogStderr, "failed to write to deletion log '%s' "
170  "(errno: %d)",
171  deletion_log_path.c_str(), errno);
172  uploader->TearDown();
173  return 1;
174  }
175  }
176 
178 
179  // File catalogs
180  GC collector(config);
181  collector.UseReflogTimestamps();
182  bool success = collector.Collect();
183 
184  if (!success) {
185  LogCvmfs(kLogCvmfs, kLogStderr, "garbage collection failed");
186  if (!dry_run) {
187  stats_db->StoreGCStatistics(this->statistics(), start_time, false);
188  if (upload_statsdb) {
189  stats_db->UploadStatistics(uploader.weak_ref());
190  }
191  }
192  uploader->TearDown();
193  return 1;
194  }
195 
196  // Tag databases, meta infos, certificates
197  HashFilter preserved_objects;
198  preserved_objects.Fill(manifest->certificate());
199  preserved_objects.Fill(manifest->history());
200  preserved_objects.Fill(manifest->meta_info());
201  GCAux collector_aux(config);
202  success = collector_aux.CollectOlderThan(
203  collector.oldest_trunk_catalog(), preserved_objects);
204  if (!success) {
206  "garbage collection of auxiliary files failed");
207  if (!dry_run) {
208  stats_db->StoreGCStatistics(this->statistics(), start_time, false);
209  if (upload_statsdb) {
210  stats_db->UploadStatistics(uploader.weak_ref());
211  }
212  }
213  uploader->TearDown();
214  return 1;
215  }
216 
217  // As of here: garbage collection succeeded, cleanup & commit
218 
219  if (deletion_log_file != NULL) {
220  const int bytes_written = fprintf(deletion_log_file,
221  "# Garbage Collection finished at %s\n\n",
222  StringifyTime(time(NULL), true).c_str());
223  assert(bytes_written >= 0);
224  fclose(deletion_log_file);
225  }
226 
227  reflog->CommitTransaction();
228  // Has to be outside the transaction
229  success = reflog->Vacuum();
230  assert(success);
231  reflog->DropDatabaseFileOwnership();
232  const std::string reflog_db = reflog->database_file();
233  reflog.Destroy();
234 
235  if (!dry_run) {
236  uploader->UploadFile(reflog_db, ".cvmfsreflog");
237  manifest::Reflog::HashDatabase(reflog_db, &reflog_hash);
238  uploader->WaitForUpload();
239  manifest::Reflog::WriteChecksum(reflog_chksum_path, reflog_hash);
240  }
241 
242  unlink(reflog_db.c_str());
243 
244  if (uploader->GetNumberOfErrors() > 0 && !dry_run) {
245  LogCvmfs(kLogCvmfs, kLogStderr, "failed to upload updated Reflog");
246 
247  stats_db->StoreGCStatistics(this->statistics(), start_time, false);
248  if (upload_statsdb) {
249  stats_db->UploadStatistics(uploader.weak_ref());
250  }
251 
252  uploader->TearDown();
253  return 1;
254  }
255 
256  if (!dry_run) {
257  stats_db->StoreGCStatistics(this->statistics(), start_time, true);
258  if (upload_statsdb) {
259  stats_db->UploadStatistics(uploader.weak_ref());
260  }
261  }
262  uploader->TearDown();
263  return 0;
264 }
265 
266 } // namespace swissknife
static Parameter Optional(const char key, const std::string &desc)
Definition: swissknife.h:41
const char * Code2Ascii(const ObjectFetcherFailures::Failures error)
std::string database_file() const
Definition: reflog.cc:337
const manifest::Manifest * manifest() const
Definition: repository.h:125
static Parameter Switch(const char key, const std::string &desc)
Definition: swissknife.h:44
std::string GetGMTimestamp(const std::string &format)
Definition: string.cc:633
bool UploadStatistics(upload::Spooler *spooler, std::string local_path="")
T * weak_ref() const
Definition: pointer.h:42
static bool ReadChecksum(const std::string &path, shash::Any *checksum)
Definition: reflog.cc:47
bool StoreGCStatistics(const perf::Statistics *statistics, const std::string &start_time, const bool success)
std::vector< Parameter > ParameterList
Definition: swissknife.h:71
string JoinStrings(const vector< string > &strings, const string &joint)
Definition: string.cc:343
uint64_t oldest_trunk_catalog() const
manifest::Reflog * FetchReflog(ObjectFetcherT *object_fetcher, const std::string &repo_name, const shash::Any &reflog_hash)
CatalogTraversalParallel< ObjectFetcher > ReadonlyCatalogTraversal
GarbageCollectorAux< ReadonlyCatalogTraversal, HashFilter > GCAux
assert((mem||(size==0))&&"Out Of Memory")
GarbageCollector< ReadonlyCatalogTraversal, HashFilter > GC
string StringifyTime(const time_t seconds, const bool utc)
Definition: string.cc:105
virtual ParameterList GetParams() const
signature::SignatureManager * signature_manager() const
Definition: server_tool.cc:103
void Fill(const shash::Any &hash)
Definition: hash_filter.h:110
static AbstractUploader * Construct(const SpoolerDefinition &param)
Definition: plugin.h:188
int64_t String2Int64(const string &value)
Definition: string.cc:240
download::DownloadManager * download_manager() const
Definition: server_tool.cc:98
static Parameter Mandatory(const char key, const std::string &desc)
Definition: swissknife.h:38
int Main(const ArgumentList &args)
perf::Statistics * statistics()
Definition: server_tool.h:47
void BeginTransaction()
Definition: reflog.cc:295
static void HashDatabase(const std::string &database_path, shash::Any *hash_reflog)
Definition: reflog.cc:322
upload::AbstractUploader * uploader
void DropDatabaseFileOwnership()
Definition: reflog.cc:313
static StatisticsDatabase * OpenStandardDB(const std::string repo_name)
bool DirectoryExists(const std::string &path)
Definition: posix.cc:824
static bool GcExtendedStats(const std::string &repo_name)
SmallhashFilter HashFilter
bool InitSignatureManager(const std::string &pubkey_path, const std::string &certificate_path="", const std::string &private_key_path="")
Definition: server_tool.cc:44
bool CollectOlderThan(uint64_t timestamp, const HashFilterT &preserved_objects)
Definition: gc_aux_impl.h:25
uint64_t String2Uint64(const string &value)
Definition: string.cc:246
std::map< char, SharedPtr< std::string > > ArgumentList
Definition: swissknife.h:72
static const uint64_t kFullHistory
void CommitTransaction()
Definition: reflog.cc:301
bool Vacuum()
Definition: reflog.h:79
HttpObjectFetcher ObjectFetcher
Failures FetchManifest(manifest::Manifest **manifest)
static bool WriteChecksum(const std::string &path, const shash::Any &value)
Definition: reflog.cc:64
GC::Configuration GcConfig
bool InitDownloadManager(const bool follow_redirects, const std::string &proxy, const unsigned max_pool_handles=1)
Definition: server_tool.cc:17
std::vector< std::string > FindFilesBySuffix(const std::string &dir, const std::string &suffix)
Definition: posix.cc:1135
CVMFS_EXPORT void LogCvmfs(const LogSource source, const int mask, const char *format,...)
Definition: logging.cc:528