CernVM-FS  2.13.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
catalog_merge_tool_impl.h
Go to the documentation of this file.
1 
5 #ifndef CVMFS_RECEIVER_CATALOG_MERGE_TOOL_IMPL_H_
6 #define CVMFS_RECEIVER_CATALOG_MERGE_TOOL_IMPL_H_
7 
8 #include <string>
9 
10 #include "catalog.h"
11 #include "crypto/hash.h"
12 #include "manifest.h"
13 #include "options.h"
14 #include "upload.h"
15 #include "util/exception.h"
16 #include "util/logging.h"
17 #include "util/posix.h"
18 #include "util/raii_temp_dir.h"
19 #include "shortstring.h"
20 #include "catalog_merge_tool.h"
21 
22 inline PathString MakeRelative(const PathString &path) {
23  std::string rel_path;
24  std::string abs_path = path.ToString();
25  if (abs_path[0] == '/') {
26  rel_path = abs_path.substr(1);
27  } else {
28  rel_path = abs_path;
29  }
30  return PathString(rel_path);
31 }
32 
34  if (entry->linkcount() > 1) {
36  "CatalogMergeTool - Hardlink found: %s. Hardlinks are not "
37  "supported when publishing through repository gateway and "
38  "will be split.",
39  entry->name().c_str());
40  entry->set_linkcount(1);
41  }
42 }
43 
44 inline void AbortIfHardlinked(const catalog::DirectoryEntry &entry) {
45  if (entry.linkcount() > 1) {
47  "CatalogMergeTool - Removal of file %s with linkcount > 1 is "
48  "not supported. Aborting",
49  entry.name().c_str());
50  }
51 }
52 
53 namespace receiver {
54 
55 template<typename RwCatalogMgr, typename RoCatalogMgr>
57  const Params &params, std::string *new_manifest_path,
58  shash::Any *new_manifest_hash, uint64_t *final_rev) {
60  perf::StatisticsTemplate stats_tmpl("publish", statistics_);
61  counters_ = new perf::FsCounters(stats_tmpl);
62 
63  UniquePtr<RaiiTempDir> raii_temp_dir(RaiiTempDir::Create(temp_dir_prefix_));
64  if (needs_setup_) {
65  upload::SpoolerDefinition definition(
66  params.spooler_configuration, params.hash_alg, params.compression_alg,
68  params.min_chunk_size, params.avg_chunk_size, params.max_chunk_size,
69  "dummy_token", "dummy_key");
70  spooler = upload::Spooler::Construct(definition, &stats_tmpl);
71  const std::string temp_dir = raii_temp_dir->dir();
72  output_catalog_mgr_ = new RwCatalogMgr(
73  manifest_->catalog_hash(), repo_path_, temp_dir, spooler.weak_ref(),
74  download_manager_, params.enforce_limits, params.nested_kcatalog_limit,
76  params.use_autocatalogs, params.max_weight, params.min_weight,
77  cache_dir_);
78  output_catalog_mgr_->Init();
79  }
80 
82 
83  ret &= CreateNewManifest(new_manifest_path);
84  *new_manifest_hash = manifest_->catalog_hash();
85  *final_rev = manifest_->revision();
86 
87  output_catalog_mgr_.Destroy();
88 
89  return ret;
90 }
91 
92 template<typename RwCatalogMgr, typename RoCatalogMgr>
94  const PathString &path) {
95  const PathString rel_path = MakeRelative(path);
96 
97  // Ignore any paths that are not either within the lease path or
98  // above the lease path
99  return !(IsSubPath(lease_path_, rel_path)
100  || IsSubPath(rel_path, lease_path_));
101 }
102 
103 template<typename RwCatalogMgr, typename RoCatalogMgr>
105  const PathString &path) {
106  const PathString rel_path = MakeRelative(path);
107 
108  // Do not report any changes occurring outside the lease path (which
109  // will be due to other concurrent writers)
110  return IsSubPath(lease_path_, rel_path);
111 }
112 
113 template<typename RwCatalogMgr, typename RoCatalogMgr>
115  const PathString &path, const catalog::DirectoryEntry &entry,
116  const XattrList &xattrs, const FileChunkList &chunks) {
117  const PathString rel_path = MakeRelative(path);
118 
119  const std::string parent_path = std::strchr(rel_path.c_str(), '/')
120  ? GetParentPath(rel_path).c_str()
121  : "";
122 
123  if (entry.IsDirectory()) {
124  if (entry.IsNestedCatalogMountpoint()) {
125  // Install the provided nested catalog in the output catalog manager
126  RoCatalogMgr
128  PathString mountpoint;
129  shash::Any nested_hash;
130  uint64_t nested_size;
131  const bool found = new_catalog_mgr->LookupNested(
132  path, &mountpoint, &nested_hash, &nested_size);
133  if (!found || !nested_size) {
135  "CatalogMergeTool - nested catalog %s not found. Aborting",
136  rel_path.c_str());
137  }
138  output_catalog_mgr_->GraftNestedCatalog(rel_path.ToString(), nested_hash,
139  nested_size);
140  return false;
141  } else {
142  output_catalog_mgr_->AddDirectory(entry, xattrs, parent_path);
143  }
144  perf::Inc(counters_->n_directories_added);
145  } else if (entry.IsRegular() || entry.IsLink()) {
146  catalog::DirectoryEntry modified_entry = entry;
147  SplitHardlink(&modified_entry);
149  *base_entry = static_cast<const catalog::DirectoryEntryBase *>(
150  &modified_entry);
151  if (entry.IsChunkedFile()) {
152  assert(!chunks.IsEmpty());
153  output_catalog_mgr_->AddChunkedFile(*base_entry, xattrs, parent_path,
154  chunks);
155  } else {
156  output_catalog_mgr_->AddFile(*base_entry, xattrs, parent_path);
157  }
158  if (entry.IsLink()) {
159  perf::Inc(counters_->n_symlinks_added);
160  } else {
161  perf::Inc(counters_->n_files_added);
162  }
163  perf::Xadd(counters_->sz_added_bytes, static_cast<int64_t>(entry.size()));
164  }
165  return true;
166 }
167 
168 template<typename RwCatalogMgr, typename RoCatalogMgr>
170  const PathString &path, const catalog::DirectoryEntry &entry) {
171  const PathString rel_path = MakeRelative(path);
172 
173  if (entry.IsDirectory()) {
174  if (entry.IsNestedCatalogMountpoint()) {
175  output_catalog_mgr_->RemoveNestedCatalog(std::string(rel_path.c_str()),
176  false);
177  }
178 
179  output_catalog_mgr_->RemoveDirectory(rel_path.c_str());
180  perf::Inc(counters_->n_directories_removed);
181  } else if (entry.IsRegular() || entry.IsLink()) {
182  AbortIfHardlinked(entry);
183  output_catalog_mgr_->RemoveFile(rel_path.c_str());
184 
185  if (entry.IsLink()) {
186  perf::Inc(counters_->n_symlinks_removed);
187  } else {
188  perf::Inc(counters_->n_files_removed);
189  }
190 
191  perf::Xadd(counters_->sz_removed_bytes, static_cast<int64_t>(entry.size()));
192  }
193 }
194 
195 template<typename RwCatalogMgr, typename RoCatalogMgr>
197  const PathString &path, const catalog::DirectoryEntry &entry1,
198  const catalog::DirectoryEntry &entry2, const XattrList &xattrs,
199  const FileChunkList &chunks) {
200  const PathString rel_path = MakeRelative(path);
201 
202  const std::string parent_path = std::strchr(rel_path.c_str(), '/')
203  ? GetParentPath(rel_path).c_str()
204  : "";
205 
206  if (entry1.IsNestedCatalogMountpoint()
207  && entry2.IsNestedCatalogMountpoint()) {
208  // From nested catalog to nested catalog
209  RoCatalogMgr
211  PathString mountpoint;
212  shash::Any new_hash;
213  uint64_t new_size;
214  const bool found = new_catalog_mgr->LookupNested(path, &mountpoint,
215  &new_hash, &new_size);
216  if (!found || !new_size) {
218  "CatalogMergeTool - nested catalog %s not found. Aborting",
219  rel_path.c_str());
220  }
221  output_catalog_mgr_->SwapNestedCatalog(rel_path.ToString(), new_hash,
222  new_size);
223  return false; // skip recursion into nested catalog mountpoints
224  } else if (entry1.IsDirectory() && entry2.IsDirectory()) {
225  // From directory to directory
227  *base_entry = static_cast<const catalog::DirectoryEntryBase *>(&entry2);
228  output_catalog_mgr_->TouchDirectory(*base_entry, xattrs, rel_path.c_str());
229  if (!entry1.IsNestedCatalogMountpoint()
230  && entry2.IsNestedCatalogMountpoint()) {
231  output_catalog_mgr_->CreateNestedCatalog(std::string(rel_path.c_str()));
232  } else if (entry1.IsNestedCatalogMountpoint()
233  && !entry2.IsNestedCatalogMountpoint()) {
234  output_catalog_mgr_->RemoveNestedCatalog(std::string(rel_path.c_str()));
235  }
236  perf::Inc(counters_->n_directories_changed);
237  } else if ((entry1.IsRegular() || entry1.IsLink()) && entry2.IsDirectory()) {
238  // From file to directory
239  AbortIfHardlinked(entry1);
240  output_catalog_mgr_->RemoveFile(rel_path.c_str());
241  output_catalog_mgr_->AddDirectory(entry2, xattrs, parent_path);
242  if (entry2.IsNestedCatalogMountpoint()) {
243  output_catalog_mgr_->CreateNestedCatalog(std::string(rel_path.c_str()));
244  }
245  if (entry1.IsLink()) {
246  perf::Inc(counters_->n_symlinks_removed);
247  } else {
248  perf::Inc(counters_->n_files_removed);
249  }
250  perf::Xadd(counters_->sz_removed_bytes,
251  static_cast<int64_t>(entry1.size()));
252  perf::Inc(counters_->n_directories_added);
253 
254  } else if (entry1.IsDirectory() && (entry2.IsRegular() || entry2.IsLink())) {
255  // From directory to file
256  if (entry1.IsNestedCatalogMountpoint()) {
257  // we merge the nested catalog with its parent, it will be the recursive
258  // procedure that will take care of deleting all the files.
259  output_catalog_mgr_->RemoveNestedCatalog(std::string(rel_path.c_str()),
260  /* merge = */ true);
261  }
262 
263  catalog::DirectoryEntry modified_entry = entry2;
264  SplitHardlink(&modified_entry);
266  *base_entry = static_cast<const catalog::DirectoryEntryBase *>(
267  &modified_entry);
268 
269  output_catalog_mgr_->RemoveDirectory(rel_path.c_str());
270 
271  if (entry2.IsChunkedFile()) {
272  assert(!chunks.IsEmpty());
273  output_catalog_mgr_->AddChunkedFile(*base_entry, xattrs, parent_path,
274  chunks);
275  } else {
276  output_catalog_mgr_->AddFile(*base_entry, xattrs, parent_path);
277  }
278 
279  perf::Inc(counters_->n_directories_removed);
280  if (entry2.IsLink()) {
281  perf::Inc(counters_->n_symlinks_added);
282  } else {
283  perf::Inc(counters_->n_files_added);
284  }
285  perf::Xadd(counters_->sz_added_bytes, static_cast<int64_t>(entry2.size()));
286 
287  } else if ((entry1.IsRegular() || entry1.IsLink())
288  && (entry2.IsRegular() || entry2.IsLink())) {
289  // From file to file
290  AbortIfHardlinked(entry1);
291  catalog::DirectoryEntry modified_entry = entry2;
292  SplitHardlink(&modified_entry);
294  *base_entry = static_cast<const catalog::DirectoryEntryBase *>(
295  &modified_entry);
296  output_catalog_mgr_->RemoveFile(rel_path.c_str());
297  if (entry2.IsChunkedFile()) {
298  assert(!chunks.IsEmpty());
299  output_catalog_mgr_->AddChunkedFile(*base_entry, xattrs, parent_path,
300  chunks);
301  } else {
302  output_catalog_mgr_->AddFile(*base_entry, xattrs, parent_path);
303  }
304 
305  if (entry1.IsRegular() && entry2.IsRegular()) {
306  perf::Inc(counters_->n_files_changed);
307  } else if (entry1.IsRegular() && entry2.IsLink()) {
308  perf::Inc(counters_->n_files_removed);
309  perf::Inc(counters_->n_symlinks_added);
310  } else if (entry1.IsLink() && entry2.IsRegular()) {
311  perf::Inc(counters_->n_symlinks_removed);
312  perf::Inc(counters_->n_files_added);
313  } else {
314  perf::Inc(counters_->n_symlinks_changed);
315  }
316  perf::Xadd(counters_->sz_removed_bytes,
317  static_cast<int64_t>(entry1.size()));
318  perf::Xadd(counters_->sz_added_bytes, static_cast<int64_t>(entry2.size()));
319  }
320  return true;
321 }
322 
323 template<typename RwCatalogMgr, typename RoCatalogMgr>
325  std::string *new_manifest_path) {
326  if (!output_catalog_mgr_->Commit(false, 0, manifest_)) {
328  "CatalogMergeTool - Could not commit output catalog");
329  return false;
330  }
331 
332  const std::string new_path = CreateTempPath(temp_dir_prefix_, 0600);
333 
334  if (!manifest_->Export(new_path)) {
336  "CatalogMergeTool - Could not export new manifest");
337  }
338 
339  *new_manifest_path = new_path;
340 
341  return true;
342 }
343 
344 } // namespace receiver
345 
346 #endif // CVMFS_RECEIVER_CATALOG_MERGE_TOOL_IMPL_H_
uint32_t linkcount() const
bool IsSubPath(const PathString &parent, const PathString &path)
Definition: shortstring.cc:47
virtual bool ReportAddition(const PathString &path, const catalog::DirectoryEntry &entry, const XattrList &xattrs, const FileChunkList &chunks)
bool Export(const std::string &path) const
Definition: manifest.cc:217
int64_t Xadd(class Counter *counter, const int64_t delta)
Definition: statistics.h:51
bool IsDirectory() const
size_t min_weight
Definition: params.h:37
size_t avg_chunk_size
Definition: params.h:28
std::string spooler_configuration
Definition: params.h:20
T * weak_ref() const
Definition: pointer.h:46
bool IsChunkedFile() const
#define PANIC(...)
Definition: exception.h:29
uint64_t size() const
void set_linkcount(const uint32_t linkcount)
zlib::Algorithms compression_alg
Definition: params.h:24
size_t nested_kcatalog_limit
Definition: params.h:31
bool use_autocatalogs
Definition: params.h:35
perf::Statistics * statistics_
Definition: repository.h:138
std::string CreateTempPath(const std::string &path_prefix, const int mode)
Definition: posix.cc:1042
virtual bool ReportModification(const PathString &path, const catalog::DirectoryEntry &old_entry, const catalog::DirectoryEntry &new_entry, const XattrList &xattrs, const FileChunkList &chunks)
assert((mem||(size==0))&&"Out Of Memory")
void AbortIfHardlinked(const catalog::DirectoryEntry &entry)
bool IsNestedCatalogMountpoint() const
uint64_t revision() const
Definition: manifest.h:122
PathString MakeRelative(const PathString &path)
bool use_file_chunking
Definition: params.h:26
bool Run(const Params &params, std::string *new_manifest_path, shash::Any *new_manifest_hash, uint64_t *final_rev)
NameString name() const
bool IsLink() const
size_t max_weight
Definition: params.h:36
size_t max_chunk_size
Definition: params.h:29
bool IsRegular() const
virtual bool IsReportablePath(const PathString &path)
RoCatalogMgr * GetNewCatalogMgr()
virtual bool IsIgnoredPath(const PathString &path)
static RaiiTempDir * Create(const std::string &prefix)
Definition: raii_temp_dir.cc:9
shash::Any catalog_hash() const
Definition: manifest.h:125
void Inc(class Counter *counter)
Definition: statistics.h:50
bool IsEmpty() const
Definition: bigvector.h:70
manifest::Manifest * manifest_
Definition: repository.h:147
std::string ToString() const
Definition: shortstring.h:139
size_t min_chunk_size
Definition: params.h:27
void SplitHardlink(catalog::DirectoryEntry *entry)
size_t file_mbyte_limit
Definition: params.h:33
ShortString< kDefaultMaxPath, 0 > PathString
Definition: shortstring.h:213
PathString GetParentPath(const PathString &path)
Definition: shortstring.cc:14
bool CreateNewManifest(std::string *new_manifest_path)
size_t root_kcatalog_limit
Definition: params.h:32
bool Run(const PathString &path)
bool generate_legacy_bulk_chunks
Definition: params.h:25
const char * c_str() const
Definition: shortstring.h:143
bool enforce_limits
Definition: params.h:30
virtual void ReportRemoval(const PathString &path, const catalog::DirectoryEntry &entry)
shash::Algorithms hash_alg
Definition: params.h:22
CVMFS_EXPORT void LogCvmfs(const LogSource source, const int mask, const char *format,...)
Definition: logging.cc:545