| Line |
Branch |
Exec |
Source |
| 1 |
|
|
/** |
| 2 |
|
|
* This file is part of the CernVM File System |
| 3 |
|
|
* |
| 4 |
|
|
* This file defines a class which derives `SyncUnion` to |
| 5 |
|
|
* provide support for tarballs |
| 6 |
|
|
* |
| 7 |
|
|
*/ |
| 8 |
|
|
|
| 9 |
|
|
#ifndef CVMFS_SYNC_UNION_TARBALL_H_ |
| 10 |
|
|
#define CVMFS_SYNC_UNION_TARBALL_H_ |
| 11 |
|
|
|
| 12 |
|
|
#include <pthread.h> |
| 13 |
|
|
|
| 14 |
|
|
#include <list> |
| 15 |
|
|
#include <map> |
| 16 |
|
|
#include <set> |
| 17 |
|
|
#include <string> |
| 18 |
|
|
|
| 19 |
|
|
#include "duplex_libarchive.h" |
| 20 |
|
|
#include "sync_union.h" |
| 21 |
|
|
#include "util/concurrency.h" |
| 22 |
|
|
|
| 23 |
|
|
namespace publish { |
| 24 |
|
|
|
| 25 |
|
|
class AbstractSyncMediator; |
| 26 |
|
|
|
| 27 |
|
|
class SyncUnionTarball : public SyncUnion { |
| 28 |
|
|
public: |
| 29 |
|
|
SyncUnionTarball(AbstractSyncMediator *mediator, |
| 30 |
|
|
const std::string &rdonly_path, |
| 31 |
|
|
const std::string &tarball_path, |
| 32 |
|
|
const std::string &base_directory, |
| 33 |
|
|
const uid_t uid, |
| 34 |
|
|
const gid_t gid, |
| 35 |
|
|
const std::string &to_delete, |
| 36 |
|
|
const bool create_catalog_on_root, |
| 37 |
|
|
const bool fast_delete = false, |
| 38 |
|
|
const std::string &path_delimiter = ":", |
| 39 |
|
|
const bool tolerate_missing_hardlinks = false); |
| 40 |
|
|
|
| 41 |
|
|
~SyncUnionTarball(); |
| 42 |
|
|
|
| 43 |
|
|
/* |
| 44 |
|
|
* Check that the tarball is actually valid and that can be open. |
| 45 |
|
|
*/ |
| 46 |
|
|
bool Initialize(); |
| 47 |
|
|
|
| 48 |
|
|
/* |
| 49 |
|
|
* We start by deleting the entity that we are request to delete. |
| 50 |
|
|
* Then we move on to extracting the tarball. |
| 51 |
|
|
* For each directory we found we remember it associated with its SyncItem on |
| 52 |
|
|
* the `dirs_` map. |
| 53 |
|
|
* Similarly we remember where nested catalogs should be placed in |
| 54 |
|
|
* `to_create_catalog_dirs_`. |
| 55 |
|
|
* After we finish to uncompress the tarball we come back to iterate over |
| 56 |
|
|
* `to_create_catalog_dirs_` and we created the nested catalogs. |
| 57 |
|
|
*/ |
| 58 |
|
|
void Traverse(); |
| 59 |
|
|
|
| 60 |
|
|
void PostUpload(); |
| 61 |
|
|
|
| 62 |
|
|
std::string UnwindWhiteoutFilename(SharedPtr<SyncItem> entry) const; |
| 63 |
|
|
bool IsOpaqueDirectory(SharedPtr<SyncItem> directory) const; |
| 64 |
|
|
bool IsWhiteoutEntry(SharedPtr<SyncItem> entry) const; |
| 65 |
|
|
|
| 66 |
|
|
private: |
| 67 |
|
|
struct archive *src; |
| 68 |
|
|
const std::string tarball_path_; |
| 69 |
|
|
const std::string base_directory_; |
| 70 |
|
|
const uid_t uid_; |
| 71 |
|
|
const gid_t gid_; |
| 72 |
|
|
const std::string to_delete_; ///< entity to delete before to extract the tar |
| 73 |
|
|
const bool create_catalog_on_root_; |
| 74 |
|
|
const bool fast_delete_; |
| 75 |
|
|
const std::string path_delimiter_; ///< delimiter used to split paths |
| 76 |
|
|
/// when true, a hardlink whose target is not present in the archive is |
| 77 |
|
|
/// materialized as an empty file (with a warning) instead of aborting the |
| 78 |
|
|
/// ingestion (used by ducc for OCI image layers that may contain cross-layer |
| 79 |
|
|
/// hardlinks) |
| 80 |
|
|
const bool tolerate_missing_hardlinks_; |
| 81 |
|
|
std::set<std::string> |
| 82 |
|
|
know_directories_; ///< directory that we know already exist |
| 83 |
|
|
|
| 84 |
|
|
/** |
| 85 |
|
|
* directories where we found catalog marker, after the main traverse we |
| 86 |
|
|
* iterate through them and we add the catalog |
| 87 |
|
|
*/ |
| 88 |
|
|
std::set<std::string> to_create_catalog_dirs_; |
| 89 |
|
|
|
| 90 |
|
|
/** |
| 91 |
|
|
* map of all directories found, we need them since we don't know, at priori, |
| 92 |
|
|
* where the catalog files appears |
| 93 |
|
|
*/ |
| 94 |
|
|
std::map<std::string, SharedPtr<SyncItem> > dirs_; |
| 95 |
|
|
|
| 96 |
|
|
/** |
| 97 |
|
|
* A hardlink destination: the path of the link plus the ownership and |
| 98 |
|
|
* permission metadata captured from its tar header. The metadata is only |
| 99 |
|
|
* needed to materialize an empty file when the hardlink target is missing |
| 100 |
|
|
* from the archive (see tolerate_missing_hardlinks_). |
| 101 |
|
|
*/ |
| 102 |
|
|
struct HardlinkDestination { |
| 103 |
|
✗ |
HardlinkDestination(const std::string &p, unsigned int m, uid_t u, gid_t g, |
| 104 |
|
|
time_t t) |
| 105 |
|
✗ |
: path(p), mode(m), uid(u), gid(g), mtime(t) { } |
| 106 |
|
|
std::string path; |
| 107 |
|
|
unsigned int mode; |
| 108 |
|
|
uid_t uid; |
| 109 |
|
|
gid_t gid; |
| 110 |
|
|
time_t mtime; |
| 111 |
|
|
}; |
| 112 |
|
|
|
| 113 |
|
|
/** |
| 114 |
|
|
* map the path of a hardlink target to the list of links pointing to it |
| 115 |
|
|
*/ |
| 116 |
|
|
std::map<const std::string, std::list<HardlinkDestination> > hardlinks_; |
| 117 |
|
|
|
| 118 |
|
|
/** |
| 119 |
|
|
* Conditional variable to keep track of when is possible to read the tar file |
| 120 |
|
|
*/ |
| 121 |
|
|
Signal *read_archive_signal_; |
| 122 |
|
|
|
| 123 |
|
|
static const size_t kBlockSize = 4096 * 4; |
| 124 |
|
|
|
| 125 |
|
|
/** |
| 126 |
|
|
* create missing directory and all the ancestors |
| 127 |
|
|
* It is possible to find the leaf of the filesystem tree before than its root |
| 128 |
|
|
* while |
| 129 |
|
|
* traversing a tar file, however we need to have all the directories in place |
| 130 |
|
|
* before adding entities. This method is called whener we find a new |
| 131 |
|
|
* directory. |
| 132 |
|
|
* The method create a new dummy directory and, if necessary, all of its |
| 133 |
|
|
* parents. |
| 134 |
|
|
* @param target the directory to create |
| 135 |
|
|
*/ |
| 136 |
|
|
void CreateDirectories(const std::string &target); |
| 137 |
|
|
void ProcessArchiveEntry(struct archive_entry *entry); |
| 138 |
|
|
std::string SanitizePath(const std::string &path); |
| 139 |
|
|
}; // class SyncUnionTarball |
| 140 |
|
|
|
| 141 |
|
|
} // namespace publish |
| 142 |
|
|
|
| 143 |
|
|
#endif // CVMFS_SYNC_UNION_TARBALL_H_ |
| 144 |
|
|
|