CernVM-FS  2.12.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
swissknife_migrate.h
Go to the documentation of this file.
1 
5 #ifndef CVMFS_SWISSKNIFE_MIGRATE_H_
6 #define CVMFS_SWISSKNIFE_MIGRATE_H_
7 
8 #include "swissknife.h"
9 
10 #include <map>
11 #include <string>
12 #include <vector>
13 
14 #include "catalog.h"
15 #include "catalog_traversal.h"
16 #include "crypto/hash.h"
17 #include "history_sqlite.h"
18 #include "manifest.h"
19 #include "uid_map.h"
20 #include "upload.h"
21 #include "util/algorithm.h"
22 #include "util/atomic.h"
23 #include "util/concurrency.h"
24 #include "util/logging.h"
25 #include "util/pointer.h"
26 
27 namespace catalog {
28 class WritableCatalog;
29 }
30 
31 namespace swissknife {
32 
33 class CommandMigrate : public Command {
34  protected:
37  : max_row_id(0)
38  , entry_count(0)
41  , migration_time(0.0) { }
42  unsigned int max_row_id;
43  unsigned int entry_count;
44 
45  unsigned int hardlink_group_count;
46  unsigned int aggregated_linkcounts;
47 
49 
50  std::string root_path;
51  };
52 
53  class CatalogStatisticsList : protected std::vector<CatalogStatistics>,
54  public Lockable {
55  friend class CommandMigrate;
56 
57  public:
58  inline void Insert(const CatalogStatistics &statistics) {
60  this->push_back(statistics);
61  }
62  };
63 
64  public:
66  typedef std::vector<PendingCatalog *> PendingCatalogList;
67  struct PendingCatalog {
69  : success(false)
71  , new_catalog(NULL)
72  , new_catalog_size(0) { }
73  virtual ~PendingCatalog();
74 
75  inline const std::string root_path() const {
76  return old_catalog->mountpoint().ToString();
77  }
78  inline bool IsRoot() const { return old_catalog->IsRoot(); }
79  inline bool HasNew() const { return new_catalog != NULL; }
80 
81  inline bool HasChanges() const {
82  return (new_catalog != NULL ||
84  }
85 
86  inline shash::Any GetOldContentHash() const {
87  return old_catalog->hash();
88  }
89 
90  bool success;
91 
94 
98 
100 
101  // Note: As soon as the `was_updated` future is set to 'true', both
102  // `new_catalog_hash` and `new_catalog_size` are assumed to be set
103  // accordingly. If it is set to 'false' they will be ignored.
107  };
108 
109  class PendingCatalogMap : public std::map<std::string, const PendingCatalog*>,
110  public Lockable {};
111 
112  template<class DerivedT>
113  class AbstractMigrationWorker : public ConcurrentWorker<DerivedT> {
114  public:
117 
118  struct worker_context {
120  const bool collect_catalog_statistics) :
121  temporary_directory(temporary_directory),
122  collect_catalog_statistics(collect_catalog_statistics) {}
123  const std::string temporary_directory;
125  };
126 
127  public:
128  explicit AbstractMigrationWorker(const worker_context *context);
129  virtual ~AbstractMigrationWorker();
130 
131  void operator()(const expected_data &data);
132 
133  protected:
134  bool RunMigration(PendingCatalog *data) const { return false; }
135 
137  bool UpdateCatalogMetadata(PendingCatalog *data) const;
138  bool CleanupNestedCatalogs(PendingCatalog *data) const;
140 
142  GetWritable(const catalog::Catalog *catalog) const;
143 
144  protected:
145  const std::string temporary_directory_;
147 
149  };
150 
152  public AbstractMigrationWorker<MigrationWorker_20x>
153  {
155  protected:
156  static const float kSchema;
157  static const unsigned kSchemaRevision;
158 
159  public:
160  struct worker_context :
161  AbstractMigrationWorker<MigrationWorker_20x>::worker_context
162  {
164  const bool collect_catalog_statistics,
166  const bool analyze_file_linkcounts,
167  const uid_t uid,
168  const gid_t gid)
170  temporary_directory, collect_catalog_statistics)
171  , fix_nested_catalog_transitions(fix_nested_catalog_transitions)
172  , analyze_file_linkcounts(analyze_file_linkcounts)
173  , uid(uid)
174  , gid(gid) { }
177  const uid_t uid;
178  const gid_t gid;
179  };
180 
181  public:
182  explicit MigrationWorker_20x(const worker_context *context);
183 
184  protected:
185  bool RunMigration(PendingCatalog *data) const;
186 
187  bool CreateNewEmptyCatalog(PendingCatalog *data) const;
189  bool AttachOldCatalogDatabase(PendingCatalog *data) const;
190  bool StartDatabaseTransaction(PendingCatalog *data) const;
191  bool MigrateFileMetadata(PendingCatalog *data) const;
192  bool AnalyzeFileLinkcounts(PendingCatalog *data) const;
196  bool GenerateCatalogStatistics(PendingCatalog *data) const;
197  bool FindRootEntryInformation(PendingCatalog *data) const;
198  bool CommitDatabaseTransaction(PendingCatalog *data) const;
199  bool DetachOldCatalogDatabase(PendingCatalog *data) const;
200 
201  private:
204  const uid_t uid_;
205  const gid_t gid_;
206  };
207 
209  public AbstractMigrationWorker<MigrationWorker_217>
210  {
212 
213  public:
214  explicit MigrationWorker_217(const worker_context *context);
215 
216  protected:
217  bool RunMigration(PendingCatalog *data) const;
218 
220  bool StartDatabaseTransaction(PendingCatalog *data) const;
222  bool UpdateCatalogSchema(PendingCatalog *data) const;
223  bool CommitDatabaseTransaction(PendingCatalog *data) const;
224  };
225 
227  public AbstractMigrationWorker<ChownMigrationWorker>
228  {
230  public:
231  struct worker_context :
232  AbstractMigrationWorker<ChownMigrationWorker>::worker_context
233  {
235  const bool collect_catalog_statistics,
236  const UidMap &uid_map,
237  const GidMap &gid_map)
239  temporary_directory, collect_catalog_statistics)
240  , uid_map(uid_map)
241  , gid_map(gid_map) { }
242  const UidMap &uid_map;
243  const GidMap &gid_map;
244  };
245 
246  public:
247  explicit ChownMigrationWorker(const worker_context *context);
248 
249  protected:
250  bool RunMigration(PendingCatalog *data) const;
251  bool ApplyPersonaMappings(PendingCatalog *data) const;
252 
253  private:
254  template <class MapT>
255  std::string GenerateMappingStatement(const MapT &map,
256  const std::string &column) const;
257 
258  private:
259  const std::string uid_map_statement_;
260  const std::string gid_map_statement_;
261  };
262 
264  public AbstractMigrationWorker<HardlinkRemovalMigrationWorker>
265  {
267 
268  public:
269  explicit HardlinkRemovalMigrationWorker(const worker_context *context) :
271 
272  protected:
273  bool RunMigration(PendingCatalog *data) const;
274 
276  bool BreakUpHardlinks(PendingCatalog *data) const;
277  };
278 
280  public AbstractMigrationWorker<BulkhashRemovalMigrationWorker>
281  {
283 
284  public:
285  explicit BulkhashRemovalMigrationWorker(const worker_context *context) :
287 
288  protected:
289  bool RunMigration(PendingCatalog *data) const;
290 
292  bool RemoveRedundantBulkHashes(PendingCatalog *data) const;
293  };
294 
295  // Regenerate / repair statistics counters
297  public AbstractMigrationWorker<StatsMigrationWorker>
298  {
300 
301  public:
302  explicit StatsMigrationWorker(const worker_context *context);
303 
304  protected:
305  bool RunMigration(PendingCatalog *data) const;
306 
308  bool StartDatabaseTransaction(PendingCatalog *data) const;
309  bool RepairStatisticsCounters(PendingCatalog *data) const;
310  bool CommitDatabaseTransaction(PendingCatalog *data) const;
311  };
312 
313  public:
314  CommandMigrate();
316  virtual std::string GetName() const { return "migrate"; }
317  virtual std::string GetDescription() const {
318  return "CernVM-FS catalog repository migration \n"
319  "This command migrates the whole catalog structure of a given repository";
320  }
321  virtual ParameterList GetParams() const;
322 
323  int Main(const ArgumentList &args);
324 
326  const catalog::DirectoryEntry &nested_root,
327  catalog::DirectoryEntry *mountpoint);
329 
330  protected:
331  template <class ObjectFetcherT>
332  bool LoadCatalogs(const shash::Any &manual_root_hash,
333  ObjectFetcherT *object_fetcher)
334  {
336  retval = object_fetcher->FetchManifest(&manifest_upstream_);
337  if (retval != ObjectFetcherFailures::kFailOk) {
338  LogCvmfs(kLogCvmfs, kLogStdout, "could not get manifest (%d)", retval);
339  return false;
340  }
341 
342  if (!manifest_upstream_->history().IsNull()) {
343  retval = object_fetcher->FetchHistory(
345  if (retval != ObjectFetcherFailures::kFailOk) {
346  LogCvmfs(kLogCvmfs, kLogStdout, "could not get history (%d)", retval);
347  return false;
348  }
349  }
350 
352  const bool generate_full_catalog_tree = true;
353  params.no_close = generate_full_catalog_tree;
354  params.object_fetcher = object_fetcher;
355  CatalogTraversal<ObjectFetcherT> traversal(params);
357 
358  if (manual_root_hash.IsNull())
359  return traversal.Traverse();
360  return traversal.Traverse(manual_root_hash);
361  }
362 
363  void CatalogCallback(
365  void MigrationCallback(PendingCatalog *const &data);
366  void UploadCallback(const upload::SpoolerResult &result);
367 
368  void PrintStatusMessage(const PendingCatalog *catalog,
369  const shash::Any &content_hash,
370  const std::string &message);
371 
372  template <class MigratorT>
373  bool DoMigrationAndCommit(const std::string &manifest_path,
374  typename MigratorT::worker_context *context);
375 
376  template <class MigratorT>
377  void ConvertCatalogsRecursively(PendingCatalog *catalog, MigratorT *migrator);
378  bool RaiseFileDescriptorLimit() const;
379  bool ConfigureSQLite() const;
380  void AnalyzeCatalogStatistics() const;
381  bool ReadPersona(const std::string &uid, const std::string &gid);
382  bool ReadPersonaMaps(const std::string &uid_map_path,
383  const std::string &gid_map_path,
384  UidMap *uid_map,
385  GidMap *gid_map) const;
386 
388  void CreateNestedCatalogMarkerDirent(const shash::Any &content_hash);
389 
390  void UploadHistoryClosure(const upload::SpoolerResult &result,
391  Future<shash::Any> *hash);
392  bool UpdateUndoTags(PendingCatalog *root_catalog,
393  unsigned revision,
394  time_t timestamp,
395  shash::Any *history_hash);
396 
397  private:
400  unsigned int catalog_count_;
403 
404  uid_t uid_;
405  gid_t gid_;
406 
407  std::string temporary_directory_;
410 
416 
419 };
420 
421 } // namespace swissknife
422 
423 #endif // CVMFS_SWISSKNIFE_MIGRATE_H_
catalog::Catalog const * root_catalog_
void ConvertCatalogsRecursively(PendingCatalog *catalog, MigratorT *migrator)
#define LogCvmfs(source, mask,...)
Definition: logging.h:25
bool UpdateCatalogMetadata(PendingCatalog *data) const
PendingCatalog(const catalog::Catalog *old_catalog=NULL)
bool AnalyzeFileLinkcounts(PendingCatalog *data) const
UniquePtr< upload::Spooler > spooler_
CallbackPtr RegisterListener(typename BoundClosure< CatalogTraversalData< ObjectFetcherT::CatalogTN >, DelegateT, ClosureDataT >::CallbackMethod method, DelegateT *delegate, ClosureDataT data)
PendingCatalogMap pending_catalogs_
bool IsNull() const
Definition: hash.h:383
bool ReadPersonaMaps(const std::string &uid_map_path, const std::string &gid_map_path, UidMap *uid_map, GidMap *gid_map) const
bool IsRoot() const
Definition: catalog.h:193
bool MigrateFileMetadata(PendingCatalog *data) const
bool UpdateCatalogSchema(PendingCatalog *data) const
void Insert(const CatalogStatistics &statistics)
bool FindRootEntryInformation(PendingCatalog *data) const
bool StartDatabaseTransaction(PendingCatalog *data) const
bool LoadCatalogs(const shash::Any &manual_root_hash, ObjectFetcherT *object_fetcher)
bool FixNestedCatalogTransitionPoints(PendingCatalog *data) const
virtual std::string GetName() const
std::vector< Parameter > ParameterList
Definition: swissknife.h:71
void CreateNestedCatalogMarkerDirent(const shash::Any &content_hash)
void UploadCallback(const upload::SpoolerResult &result)
static void FixNestedCatalogTransitionPoint(const catalog::DirectoryEntry &nested_root, catalog::DirectoryEntry *mountpoint)
virtual ParameterList GetParams() const
void PrintStatusMessage(const PendingCatalog *catalog, const shash::Any &content_hash, const std::string &message)
worker_context(const std::string &temporary_directory, const bool collect_catalog_statistics, const bool fix_nested_catalog_transitions, const bool analyze_file_linkcounts, const uid_t uid, const gid_t gid)
bool RemoveDanglingNestedMountpoints(PendingCatalog *data) const
static catalog::DirectoryEntry nested_catalog_marker_
bool AttachOldCatalogDatabase(PendingCatalog *data) const
bool ReadPersona(const std::string &uid, const std::string &gid)
bool MigrateNestedCatalogMountPoints(PendingCatalog *data) const
void CatalogCallback(const CatalogTraversalData< catalog::WritableCatalog > &data)
UniquePtr< history::SqliteHistory > history_upstream_
bool CommitDatabaseTransaction(PendingCatalog *data) const
int32_t atomic_int32
Definition: atomic.h:17
worker_context(const std::string &temporary_directory, const bool collect_catalog_statistics, const UidMap &uid_map, const GidMap &gid_map)
bool GenerateCatalogStatistics(PendingCatalog *data) const
void UploadHistoryClosure(const upload::SpoolerResult &result, Future< shash::Any > *hash)
Future< catalog::DirectoryEntry > root_entry
unsigned GetModifiedRowCount() const
Definition: sql_impl.h:358
std::string GenerateMappingStatement(const MapT &map, const std::string &column) const
bool RepairStatisticsCounters(PendingCatalog *data) const
bool CheckDatabaseSchemaCompatibility(PendingCatalog *data) const
std::vector< PendingCatalog * > PendingCatalogList
bool StartDatabaseTransaction(PendingCatalog *data) const
bool DetachOldCatalogDatabase(PendingCatalog *data) const
bool ApplyPersonaMappings(PendingCatalog *data) const
perf::Statistics * statistics()
Definition: server_tool.h:49
bool CommitDatabaseTransaction(PendingCatalog *data) const
static const catalog::DirectoryEntry & GetNestedCatalogMarkerDirent()
bool RunMigration(PendingCatalog *data) const
PathString mountpoint() const
Definition: catalog.h:179
virtual std::string GetDescription() const
Future< catalog::DeltaCounters > nested_statistics
bool CollectAndAggregateStatistics(PendingCatalog *data) const
bool Traverse(const TraversalType type=Base::kBreadthFirst)
worker_context(const std::string &temporary_directory, const bool collect_catalog_statistics)
bool GenerateNewStatisticsCounters(PendingCatalog *data) const
bool CommitDatabaseTransaction(PendingCatalog *data) const
bool RunMigration(PendingCatalog *data) const
std::string ToString() const
Definition: shortstring.h:141
int Main(const ArgumentList &args)
std::map< char, SharedPtr< std::string > > ArgumentList
Definition: swissknife.h:72
MigrationWorker_20x(const worker_context *context)
UniquePtr< manifest::Manifest > manifest_upstream_
bool UpdateUndoTags(PendingCatalog *root_catalog, unsigned revision, time_t timestamp, shash::Any *history_hash)
bool CleanupNestedCatalogs(PendingCatalog *data) const
bool UpdateNestedCatalogReferences(PendingCatalog *data) const
bool CreateNewEmptyCatalog(PendingCatalog *data) const
CatalogStatisticsList catalog_statistics_list_
const CatalogDatabase & database() const
Definition: catalog.h:249
bool CheckDatabaseSchemaCompatibility(PendingCatalog *data) const
bool StartDatabaseTransaction(PendingCatalog *data) const
shash::Any hash() const
Definition: catalog.h:186
bool CheckDatabaseSchemaCompatibility(PendingCatalog *data) const
void MigrationCallback(PendingCatalog *const &data)
catalog::WritableCatalog * GetWritable(const catalog::Catalog *catalog) const
bool DoMigrationAndCommit(const std::string &manifest_path, typename MigratorT::worker_context *context)