CernVM-FS  2.12.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
sync_item.cc
Go to the documentation of this file.
1 
5 #include "sync_item.h"
6 
7 
8 #if !defined(__APPLE__)
9 #include <sys/sysmacros.h>
10 #endif // __APPLE__
11 
12 #include <cerrno>
13 #include <vector>
14 
15 #include "duplex_libarchive.h"
17 #include "sync_mediator.h"
18 #include "sync_union.h"
19 #include "util/exception.h"
20 
21 using namespace std; // NOLINT
22 
23 namespace publish {
24 
25 SyncItem::SyncItem() :
26  rdonly_type_(static_cast<SyncItemType>(0)),
27  graft_size_(-1),
28  scratch_type_(static_cast<SyncItemType>(0)),
29  union_engine_(NULL),
30  whiteout_(false),
31  opaque_(false),
32  masked_hardlink_(false),
33  has_catalog_marker_(false),
34  valid_graft_(false),
35  graft_marker_present_(false),
36  external_data_(false),
37  direct_io_(false),
38  graft_chunklist_(NULL),
39  compression_algorithm_(zlib::kZlibDefault),
40  has_compression_algorithm_(false) {}
41 
42 SyncItem::SyncItem(const std::string &relative_parent_path,
43  const std::string &filename,
44  const SyncUnion *union_engine,
45  const SyncItemType entry_type) :
46  rdonly_type_(kItemUnknown),
47  graft_size_(-1),
48  scratch_type_(entry_type),
49  filename_(filename),
50  union_engine_(union_engine),
51  whiteout_(false),
52  opaque_(false),
53  masked_hardlink_(false),
54  has_catalog_marker_(false),
55  valid_graft_(false),
56  graft_marker_present_(false),
57  external_data_(false),
58  direct_io_(false),
59  relative_parent_path_(relative_parent_path),
60  graft_chunklist_(NULL),
61  compression_algorithm_(zlib::kZlibDefault),
62  has_compression_algorithm_(false) {
64 }
65 
67  delete graft_chunklist_;
68 }
69 
70 
72 {
73  const SyncItemType type = stat.GetSyncItemType();
74  if (type == kItemUnknown) {
76  "[WARNING] '%s' has an unsupported file type (st_mode: %d errno: %d)",
77  GetRelativePath().c_str(), stat.stat.st_mode, stat.error_code);
78  }
79  return type;
80 }
81 
82 
84  StatRdOnly();
85  // file could not exist in read-only branch, or a regular file could have
86  // been replaced by a directory in the read/write branch, like:
87  // rdonly:
88  // /foo/bar/regular_file <-- ENOTDIR when asking for (.../is_dir_now)
89  // r/w:
90  // /foo/bar/regular_file/
91  // /foo/bar/regular_file/is_dir_now
92  if (rdonly_stat_.error_code == ENOENT ||
93  rdonly_stat_.error_code == ENOTDIR) return kItemNew;
95 }
96 
97 
99  StatScratch(/* refresh= */ false);
100  if (scratch_stat_.error_code != 0) {
101  PANIC(kLogStderr, "[WARNING] Failed to stat() '%s' in scratch. (errno: %s)",
103  }
104 
106 }
107 
109  StatUnion();
110  if (union_stat_.error_code == ENOENT || union_stat_.error_code == ENOTDIR)
111  return kItemUnknown;
113 }
114 
115 bool SyncItemNative::IsType(const SyncItemType expected_type) const {
116  if (filename().substr(0, 12) == ".cvmfsgraft-") {
118  } else if (scratch_type_ == kItemUnknown) {
120  }
121  return scratch_type_ == expected_type;
122 }
123 
124 void SyncItem::MarkAsWhiteout(const std::string &actual_filename) {
125  StatScratch(/* refresh= */ true);
126  // Mark the file as whiteout entry and strip the whiteout prefix
127  whiteout_ = true;
128  filename_ = actual_filename;
129 
130  // Find the entry in the repository
131  StatRdOnly(true); // <== refreshing the stat (filename might have changed)
132 
133  const SyncItemType deleted_type = (rdonly_stat_.error_code == 0)
135  : kItemUnknown;
136 
137  rdonly_type_ = deleted_type;
138  scratch_type_ = deleted_type;
139 
140  if (deleted_type == kItemUnknown) {
141  // Marking a SyncItem as 'whiteout' but no file to be removed found: This
142  // should not happen (actually AUFS prevents users from creating whiteouts)
143  // but can be provoked through an AUFS 'bug' (see test 593 or CVM-880).
144  // --> Warn the user, continue with kItemUnknown and cross your fingers!
145  PrintWarning("'" + GetRelativePath() +
146  "' should be deleted, but was not found in repository.");
147  }
148 }
149 
150 
152  assert(IsDirectory());
153  opaque_ = true;
154 }
155 
156 
157 unsigned int SyncItem::GetRdOnlyLinkcount() const {
158  StatRdOnly();
159  return rdonly_stat_.stat.st_nlink;
160 }
161 
162 
163 uint64_t SyncItem::GetRdOnlyInode() const {
164  StatRdOnly();
165  return rdonly_stat_.stat.st_ino;
166 }
167 
168 
169 unsigned int SyncItem::GetUnionLinkcount() const {
170  StatUnion();
171  return union_stat_.stat.st_nlink;
172 }
173 
174 
175 uint64_t SyncItem::GetUnionInode() const {
176  StatUnion();
177  return union_stat_.stat.st_ino;
178 }
179 
180 uint64_t SyncItem::GetScratchSize() const {
181  StatScratch(/* refresh= */ false);
182  return scratch_stat_.stat.st_size;
183 }
184 
185 uint64_t SyncItem::GetRdOnlySize() const {
186  StatRdOnly();
187  return rdonly_stat_.stat.st_size;
188 }
189 
191  return new FileIngestionSource(GetUnionPath());
192 }
193 
194 void SyncItem::StatGeneric(const string &path,
195  EntryStat *info,
196  const bool refresh) {
197  if (info->obtained && !refresh) return;
198  int retval = platform_lstat(path.c_str(), &info->stat);
199  info->error_code = (retval != 0) ? errno : 0;
200  info->obtained = true;
201 }
202 
203 
205  bool enable_mtime_ns) const
206 {
208 
209  // inode and parent inode is determined at runtime of client
211 
212  // this might mask the actual link count in case hardlinks are not supported
213  // (i.e. on setups using OverlayFS)
214  dirent.linkcount_ = HasHardlinks() ? this->GetUnionStat().st_nlink : 1;
215 
216  dirent.mode_ = this->GetUnionStat().st_mode;
217  dirent.uid_ = this->GetUnionStat().st_uid;
218  dirent.gid_ = this->GetUnionStat().st_gid;
219  dirent.size_ = graft_size_ > -1 ? graft_size_ :
220  this->GetUnionStat().st_size;
221  dirent.mtime_ = this->GetUnionStat().st_mtime;
222  dirent.checksum_ = this->GetContentHash();
223  dirent.is_external_file_ = this->IsExternalData();
224  dirent.is_direct_io_ = this->IsDirectIo();
226 
227  dirent.name_.Assign(filename().data(), filename().length());
228 
229  if (this->IsSymlink()) {
230  char slnk[PATH_MAX+1];
231  const ssize_t length =
232  readlink((this->GetUnionPath()).c_str(), slnk, PATH_MAX);
233  assert(length >= 0);
234  dirent.symlink_.Assign(slnk, length);
235  }
236 
237  if (this->IsCharacterDevice() || this->IsBlockDevice()) {
238  dirent.size_ = makedev(GetRdevMajor(), GetRdevMinor());
239  }
240 
241  if (enable_mtime_ns) {
242 #ifdef __APPLE__
243  dirent.mtime_ns_ = static_cast<int32_t>(
244  this->GetUnionStat().st_mtimespec.tv_nsec);
245 #else
246  dirent.mtime_ns_ = static_cast<int32_t>(
247  this->GetUnionStat().st_mtim.tv_nsec);
248 #endif
249  }
250 
251  return dirent;
252 }
253 
254 
255 std::string SyncItem::GetRdOnlyPath() const {
256  const string relative_path = GetRelativePath().empty() ?
257  "" : "/" + GetRelativePath();
258  return union_engine_->rdonly_path() + relative_path;
259 }
260 
261 std::string SyncItem::GetUnionPath() const {
262  const string relative_path = GetRelativePath().empty() ?
263  "" : "/" + GetRelativePath();
264  return union_engine_->union_path() + relative_path;
265 }
266 
267 std::string SyncItem::GetScratchPath() const {
268  const string relative_path = GetRelativePath().empty() ?
269  "" : "/" + GetRelativePath();
270  return union_engine_->scratch_path() + relative_path;
271  // return union_engine_->scratch_path() + filename();
272 }
273 
275  if (IsRegularFile()) {
276  CheckGraft();
277  } else if (IsDirectory()) {
279  }
280 }
281 
283  std::string path(GetUnionPath() + "/.cvmfscatalog");
284  EntryStat stat;
285  StatGeneric(path, &stat, false);
286  if (stat.error_code) {
287  has_catalog_marker_ = false;
288  return;
289  }
290  if (stat.GetSyncItemType() == kItemFile) {
291  has_catalog_marker_ = true;
292  return;
293  }
294  PANIC(kLogStderr, "Error: '%s' is not a regular file.", path.c_str());
295 }
296 
297 
298 std::string SyncItem::GetGraftMarkerPath() const {
299  return union_engine_->scratch_path() + "/" +
300  ((relative_parent_path_.empty()) ?
301  ".cvmfsgraft-" + filename_ :
302  relative_parent_path_ + (filename_.empty() ? "" :
303  ("/.cvmfsgraft-" + filename_)));
304 }
305 
307  valid_graft_ = false;
308  bool found_checksum = false;
309  std::string checksum_type;
310  std::string checksum_value;
311  std::string graftfile = GetGraftMarkerPath();
312  LogCvmfs(kLogFsTraversal, kLogDebug, "Checking potential graft path %s.",
313  graftfile.c_str());
314  FILE *fp = fopen(graftfile.c_str(), "r");
315  if (fp == NULL) {
316  // This sync item can be a file from a removed directory tree on overlayfs.
317  // In this case, the entire tree is missing on the scratch directory and
318  // the errno is ENOTDIR.
319  if ((errno != ENOENT) && (errno != ENOTDIR)) {
320  LogCvmfs(kLogFsTraversal, kLogWarning, "Unable to open graft file "
321  "(%s): %s (errno=%d)",
322  graftfile.c_str(), strerror(errno), errno);
323  }
324  return;
325  }
326  graft_marker_present_ = true;
327  valid_graft_ = true;
328  std::string line;
329  std::vector<std::string> contents;
330 
331  std::vector<off_t> chunk_offsets;
332  std::vector<shash::Any> chunk_checksums;
333 
334  while (GetLineFile(fp, &line)) {
335  std::string trimmed_line = Trim(line);
336 
337  if (!trimmed_line.size()) {continue;}
338  if (trimmed_line[0] == '#') {continue;}
339 
340  std::vector<std::string> info = SplitStringBounded(2, trimmed_line, '=');
341 
342  if (info.size() != 2) {
343  LogCvmfs(kLogFsTraversal, kLogWarning, "Invalid line in graft file: %s",
344  trimmed_line.c_str());
345  }
346  info[0] = Trim(info[0]);
347  info[1] = Trim(info[1]);
348  if (info[0] == "size") {
349  uint64_t tmp_size;
350  if (!String2Uint64Parse(info[1], &tmp_size)) {
351  LogCvmfs(kLogFsTraversal, kLogWarning, "Failed to parse value of %s "
352  "to integer: %s (errno=%d)", trimmed_line.c_str(),
353  strerror(errno), errno);
354  continue;
355  }
356  graft_size_ = tmp_size;
357  } else if (info[0] == "checksum") {
358  std::string hash_str = info[1];
359  shash::HexPtr hashP(hash_str);
360  if (hashP.IsValid()) {
362  found_checksum = true;
363  } else {
364  LogCvmfs(kLogFsTraversal, kLogWarning, "Invalid checksum value: %s.",
365  info[1].c_str());
366  }
367  continue;
368  } else if (info[0] == "chunk_offsets") {
369  std::vector<std::string> offsets = SplitString(info[1], ',');
370  for (std::vector<std::string>::const_iterator it = offsets.begin();
371  it != offsets.end(); it++)
372  {
373  uint64_t val;
374  if (!String2Uint64Parse(*it, &val)) {
375  valid_graft_ = false;
376  LogCvmfs(kLogFsTraversal, kLogWarning, "Invalid chunk offset: %s.",
377  it->c_str());
378  break;
379  }
380  chunk_offsets.push_back(val);
381  }
382  } else if (info[0] == "chunk_checksums") {
383  std::vector<std::string> csums = SplitString(info[1], ',');
384  for (std::vector<std::string>::const_iterator it = csums.begin();
385  it != csums.end(); it++)
386  {
387  shash::HexPtr hashP(*it);
388  if (hashP.IsValid()) {
389  chunk_checksums.push_back(shash::MkFromHexPtr(hashP));
390  } else {
391  LogCvmfs(kLogFsTraversal, kLogWarning, "Invalid chunk checksum "
392  "value: %s.", it->c_str());
393  valid_graft_ = false;
394  break;
395  }
396  }
397  } else if (info[0] == "compression") {
399  }
400  }
401  if (!feof(fp)) {
402  LogCvmfs(kLogFsTraversal, kLogWarning, "Unable to read from catalog "
403  "marker (%s): %s (errno=%d)",
404  graftfile.c_str(), strerror(errno), errno);
405  }
406  fclose(fp);
407  valid_graft_ = valid_graft_ && (graft_size_ > -1) && found_checksum
408  && (chunk_checksums.size() == chunk_offsets.size());
409 
410  if (!valid_graft_ || chunk_offsets.empty())
411  return;
412 
413  // Parse chunks
414  graft_chunklist_ = new FileChunkList(chunk_offsets.size());
415  off_t last_offset = chunk_offsets[0];
416  if (last_offset != 0) {
417  LogCvmfs(kLogFsTraversal, kLogWarning, "First chunk offset must be 0"
418  " (in graft marker %s).", graftfile.c_str());
419  valid_graft_ = false;
420  }
421  for (unsigned idx = 1; idx < chunk_offsets.size(); idx++) {
422  off_t cur_offset = chunk_offsets[idx];
423  if (last_offset >= cur_offset) {
424  LogCvmfs(kLogFsTraversal, kLogWarning, "Chunk offsets must be sorted "
425  "in strictly increasing order (in graft marker %s).",
426  graftfile.c_str());
427  valid_graft_ = false;
428  break;
429  }
430  size_t cur_size = cur_offset - last_offset;
431  graft_chunklist_->PushBack(FileChunk(chunk_checksums[idx - 1],
432  last_offset,
433  cur_size));
434  last_offset = cur_offset;
435  }
436  if (graft_size_ <= last_offset) {
437  LogCvmfs(kLogFsTraversal, kLogWarning, "Last offset must be strictly "
438  "less than total file size (in graft marker %s).",
439  graftfile.c_str());
440  valid_graft_ = false;
441  }
442  graft_chunklist_->PushBack(FileChunk(chunk_checksums.back(),
443  last_offset,
444  graft_size_ - last_offset));
445 }
446 
447 } // namespace publish
EntryStat union_stat_
Definition: sync_item.h:295
SyncItemType GetGenericFiletype(const EntryStat &stat) const
Definition: sync_item.cc:71
Algorithms ParseCompressionAlgorithm(const std::string &algorithm_option)
Definition: compression.cc:148
unsigned int GetRdevMinor() const
Definition: sync_item.h:109
shash::Any GetContentHash() const
Definition: sync_item.h:121
SyncItemType rdonly_type_
Definition: sync_item.h:274
inode_t inode_
ssize_t graft_size_
Definition: sync_item.h:277
#define PANIC(...)
Definition: exception.h:29
string Trim(const string &raw, bool trim_newline)
Definition: string.cc:446
unsigned int GetRdevMajor() const
Definition: sync_item.h:104
void Assign(const char *chars, const unsigned length)
Definition: shortstring.h:61
gid_t gid_
virtual catalog::DirectoryEntryBase CreateBasicCatalogDirent(bool enable_mtime_ns) const
Definition: sync_item.cc:204
const int kLogWarning
const SyncUnion * union_engine_
Definition: sync_item.h:292
std::string relative_parent_path_
Definition: sync_item.h:306
std::string scratch_path() const
Definition: sync_union.h:98
bool IsBlockDevice() const
Definition: sync_item.h:77
bool IsDirectory() const
Definition: sync_item.h:64
bool IsCharacterDevice() const
Definition: sync_item.h:76
assert((mem||(size==0))&&"Out Of Memory")
int32_t mtime_ns_
Algorithms algorithm
Definition: hash.h:125
SyncItemType GetUnionFiletype() const
Definition: sync_item.cc:108
uint64_t size_
uint64_t GetUnionInode() const
Definition: sync_item.cc:175
void CheckCatalogMarker()
Definition: sync_item.cc:282
SyncItemType scratch_type_
Definition: sync_item.h:282
void CheckMarkerFiles()
Definition: sync_item.cc:274
bool String2Uint64Parse(const std::string &value, uint64_t *result)
Definition: string.cc:263
uint64_t GetRdOnlyInode() const
Definition: sync_item.cc:163
virtual bool IsType(const SyncItemType expected_type) const
Definition: sync_item.cc:115
NameString name_
platform_stat64 stat
Definition: sync_item.h:265
zlib::Algorithms compression_algorithm_
bool GetLineFile(FILE *f, std::string *line)
Definition: string.cc:404
BigVector< FileChunk > FileChunkList
Definition: file_chunk.h:51
std::string union_path() const
Definition: sync_union.h:97
EntryStat rdonly_stat_
Definition: sync_item.h:294
vector< string > SplitString(const string &str, char delim)
Definition: string.cc:308
uint64_t GetScratchSize() const
Definition: sync_item.cc:180
bool IsRegularFile() const
Definition: sync_item.h:66
platform_stat64 GetUnionStat() const
Definition: sync_item.h:210
std::string GetUnionPath() const
Definition: sync_item.cc:261
uint32_t linkcount_
std::string GetRdOnlyPath() const
Definition: sync_item.cc:255
void StatRdOnly(const bool refresh=false) const
Definition: sync_item.h:319
bool is_external_file_
zlib::Algorithms GetCompressionAlgorithm() const
Definition: sync_item.h:127
int platform_lstat(const char *path, platform_stat64 *buf)
std::string GetScratchPath() const
Definition: sync_item.cc:267
vector< string > SplitStringBounded(unsigned max_chunks, const string &str, char delim)
Definition: string.cc:312
time_t mtime_
static const inode_t kInvalidInode
void SetCompressionAlgorithm(const zlib::Algorithms &alg)
Definition: sync_item.h:130
FileChunkList * graft_chunklist_
Definition: sync_item.h:311
void PrintWarning(const string &message)
Definition: logging.cc:547
EntryStat scratch_stat_
Definition: sync_item.h:275
virtual ~SyncItem()
Definition: sync_item.cc:66
unsigned int GetRdOnlyLinkcount() const
Definition: sync_item.cc:157
Definition: sync_item.h:246
shash::Any content_hash_
Definition: sync_item.h:280
std::string filename_
Definition: sync_item.h:287
virtual SyncItemType GetScratchFiletype() const
Definition: sync_item.cc:98
void PushBack(const Item &item)
Definition: bigvector.h:60
bool IsDirectIo() const
Definition: sync_item.h:82
void MarkAsOpaqueDirectory()
Definition: sync_item.cc:151
unsigned int mode_
SyncItemType
Definition: sync_item.h:29
virtual void StatScratch(const bool refresh) const =0
static void StatGeneric(const std::string &path, EntryStat *info, const bool refresh)
Definition: sync_item.cc:194
Any MkFromHexPtr(const HexPtr hex, const char suffix)
Definition: hash.cc:83
std::string GetGraftMarkerPath() const
Definition: sync_item.cc:298
virtual void StatScratch(const bool refresh) const
Definition: sync_item.h:338
bool has_catalog_marker_
Definition: sync_item.h:300
virtual IngestionSource * CreateIngestionSource() const
Definition: sync_item.cc:190
void MarkAsWhiteout(const std::string &actual_filename)
Definition: sync_item.cc:124
unsigned int GetUnionLinkcount() const
Definition: sync_item.cc:169
bool graft_marker_present_
Definition: sync_item.h:302
shash::Any checksum_
bool IsSymlink() const
Definition: sync_item.h:68
std::string filename() const
Definition: sync_item.h:180
bool IsValid() const
Definition: hash.cc:37
bool is_direct_io_
bool IsExternalData() const
Definition: sync_item.h:81
bool obtained
Definition: sync_item.h:263
LinkString symlink_
SyncItemType GetSyncItemType() const
Definition: sync_item.h:251
std::string rdonly_path() const
Definition: sync_union.h:96
void StatUnion(const bool refresh=false) const
Definition: sync_item.h:322
std::string GetRelativePath() const
Definition: sync_item.h:149
int error_code
Definition: sync_item.h:264
uint64_t GetRdOnlySize() const
Definition: sync_item.cc:185
bool HasHardlinks() const
Definition: sync_item.h:169
SyncItemType GetRdOnlyFiletype() const
Definition: sync_item.cc:83
uid_t uid_
CVMFS_EXPORT void LogCvmfs(const LogSource source, const int mask, const char *format,...)
Definition: logging.cc:528