CernVM-FS  2.10.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
sync_item.cc
Go to the documentation of this file.
1 
5 #include "sync_item.h"
6 
7 
8 #if !defined(__APPLE__)
9 #include <sys/sysmacros.h>
10 #endif // __APPLE__
11 
12 #include <cerrno>
13 #include <vector>
14 
15 #include "duplex_libarchive.h"
17 #include "sync_mediator.h"
18 #include "sync_union.h"
19 #include "util/exception.h"
20 
21 using namespace std; // NOLINT
22 
23 namespace publish {
24 
25 SyncItem::SyncItem() :
26  rdonly_type_(static_cast<SyncItemType>(0)),
27  graft_size_(-1),
28  scratch_type_(static_cast<SyncItemType>(0)),
29  union_engine_(NULL),
30  whiteout_(false),
31  opaque_(false),
32  masked_hardlink_(false),
33  has_catalog_marker_(false),
34  valid_graft_(false),
35  graft_marker_present_(false),
36  external_data_(false),
37  direct_io_(false),
38  graft_chunklist_(NULL),
39  compression_algorithm_(zlib::kZlibDefault),
40  has_compression_algorithm_(false) {}
41 
42 SyncItem::SyncItem(const std::string &relative_parent_path,
43  const std::string &filename,
44  const SyncUnion *union_engine,
45  const SyncItemType entry_type) :
46  rdonly_type_(kItemUnknown),
47  graft_size_(-1),
48  scratch_type_(entry_type),
49  filename_(filename),
50  union_engine_(union_engine),
51  whiteout_(false),
52  opaque_(false),
53  masked_hardlink_(false),
54  has_catalog_marker_(false),
55  valid_graft_(false),
56  graft_marker_present_(false),
57  external_data_(false),
58  direct_io_(false),
59  relative_parent_path_(relative_parent_path),
60  graft_chunklist_(NULL),
61  compression_algorithm_(zlib::kZlibDefault),
62  has_compression_algorithm_(false) {
64 }
65 
67  delete graft_chunklist_;
68 }
69 
70 
72 {
73  const SyncItemType type = stat.GetSyncItemType();
74  if (type == kItemUnknown) {
76  "[WARNING] '%s' has an unsupported file type (st_mode: %d errno: %d)",
77  GetRelativePath().c_str(), stat.stat.st_mode, stat.error_code);
78  }
79  return type;
80 }
81 
82 
84  StatRdOnly();
85  // file could not exist in read-only branch, or a regular file could have
86  // been replaced by a directory in the read/write branch, like:
87  // rdonly:
88  // /foo/bar/regular_file <-- ENOTDIR when asking for (.../is_dir_now)
89  // r/w:
90  // /foo/bar/regular_file/
91  // /foo/bar/regular_file/is_dir_now
92  if (rdonly_stat_.error_code == ENOENT ||
93  rdonly_stat_.error_code == ENOTDIR) return kItemNew;
95 }
96 
97 
99  StatScratch(/* refresh= */ false);
100  if (scratch_stat_.error_code != 0) {
101  PANIC(kLogStderr, "[WARNING] Failed to stat() '%s' in scratch. (errno: %s)",
103  }
104 
106 }
107 
109  StatUnion();
110  if (union_stat_.error_code == ENOENT || union_stat_.error_code == ENOTDIR)
111  return kItemUnknown;
113 }
114 
115 bool SyncItemNative::IsType(const SyncItemType expected_type) const {
116  if (filename().substr(0, 12) == ".cvmfsgraft-") {
118  } else if (scratch_type_ == kItemUnknown) {
120  }
121  return scratch_type_ == expected_type;
122 }
123 
124 void SyncItem::MarkAsWhiteout(const std::string &actual_filename) {
125  StatScratch(/* refresh= */ true);
126  // Mark the file as whiteout entry and strip the whiteout prefix
127  whiteout_ = true;
128  filename_ = actual_filename;
129 
130  // Find the entry in the repository
131  StatRdOnly(true); // <== refreshing the stat (filename might have changed)
132 
133  const SyncItemType deleted_type = (rdonly_stat_.error_code == 0)
135  : kItemUnknown;
136 
137  rdonly_type_ = deleted_type;
138  scratch_type_ = deleted_type;
139 
140  if (deleted_type == kItemUnknown) {
141  // Marking a SyncItem as 'whiteout' but no file to be removed found: This
142  // should not happen (actually AUFS prevents users from creating whiteouts)
143  // but can be provoked through an AUFS 'bug' (see test 593 or CVM-880).
144  // --> Warn the user, continue with kItemUnknown and cross your fingers!
145  PrintWarning("'" + GetRelativePath() +
146  "' should be deleted, but was not found in repository.");
147  }
148 }
149 
150 
152  assert(IsDirectory());
153  opaque_ = true;
154 }
155 
156 
157 unsigned int SyncItem::GetRdOnlyLinkcount() const {
158  StatRdOnly();
159  return rdonly_stat_.stat.st_nlink;
160 }
161 
162 
163 uint64_t SyncItem::GetRdOnlyInode() const {
164  StatRdOnly();
165  return rdonly_stat_.stat.st_ino;
166 }
167 
168 
169 unsigned int SyncItem::GetUnionLinkcount() const {
170  StatUnion();
171  return union_stat_.stat.st_nlink;
172 }
173 
174 
175 uint64_t SyncItem::GetUnionInode() const {
176  StatUnion();
177  return union_stat_.stat.st_ino;
178 }
179 
180 uint64_t SyncItem::GetScratchSize() const {
181  StatScratch(/* refresh= */ false);
182  return scratch_stat_.stat.st_size;
183 }
184 
185 uint64_t SyncItem::GetRdOnlySize() const {
186  StatRdOnly();
187  return rdonly_stat_.stat.st_size;
188 }
189 
191  return new FileIngestionSource(GetUnionPath());
192 }
193 
194 void SyncItem::StatGeneric(const string &path,
195  EntryStat *info,
196  const bool refresh) {
197  if (info->obtained && !refresh) return;
198  int retval = platform_lstat(path.c_str(), &info->stat);
199  info->error_code = (retval != 0) ? errno : 0;
200  info->obtained = true;
201 }
202 
203 
206 
207  // inode and parent inode is determined at runtime of client
209 
210  // this might mask the actual link count in case hardlinks are not supported
211  // (i.e. on setups using OverlayFS)
212  dirent.linkcount_ = HasHardlinks() ? this->GetUnionStat().st_nlink : 1;
213 
214  dirent.mode_ = this->GetUnionStat().st_mode;
215  dirent.uid_ = this->GetUnionStat().st_uid;
216  dirent.gid_ = this->GetUnionStat().st_gid;
217  dirent.size_ = graft_size_ > -1 ? graft_size_ :
218  this->GetUnionStat().st_size;
219  dirent.mtime_ = this->GetUnionStat().st_mtime;
220  dirent.checksum_ = this->GetContentHash();
221  dirent.is_external_file_ = this->IsExternalData();
222  dirent.is_direct_io_ = this->IsDirectIo();
224 
225  dirent.name_.Assign(filename().data(), filename().length());
226 
227  if (this->IsSymlink()) {
228  char slnk[PATH_MAX+1];
229  const ssize_t length =
230  readlink((this->GetUnionPath()).c_str(), slnk, PATH_MAX);
231  assert(length >= 0);
232  dirent.symlink_.Assign(slnk, length);
233  }
234 
235  if (this->IsCharacterDevice() || this->IsBlockDevice()) {
236  dirent.size_ = makedev(GetRdevMajor(), GetRdevMinor());
237  }
238 
239  return dirent;
240 }
241 
242 
243 std::string SyncItem::GetRdOnlyPath() const {
244  const string relative_path = GetRelativePath().empty() ?
245  "" : "/" + GetRelativePath();
246  return union_engine_->rdonly_path() + relative_path;
247 }
248 
249 std::string SyncItem::GetUnionPath() const {
250  const string relative_path = GetRelativePath().empty() ?
251  "" : "/" + GetRelativePath();
252  return union_engine_->union_path() + relative_path;
253 }
254 
255 std::string SyncItem::GetScratchPath() const {
256  const string relative_path = GetRelativePath().empty() ?
257  "" : "/" + GetRelativePath();
258  return union_engine_->scratch_path() + relative_path;
259  // return union_engine_->scratch_path() + filename();
260 }
261 
263  if (IsRegularFile()) {
264  CheckGraft();
265  } else if (IsDirectory()) {
267  }
268 }
269 
271  std::string path(GetUnionPath() + "/.cvmfscatalog");
272  EntryStat stat;
273  StatGeneric(path, &stat, false);
274  if (stat.error_code) {
275  has_catalog_marker_ = false;
276  return;
277  }
278  if (stat.GetSyncItemType() == kItemFile) {
279  has_catalog_marker_ = true;
280  return;
281  }
282  PANIC(kLogStderr, "Error: '%s' is not a regular file.", path.c_str());
283 }
284 
285 
286 std::string SyncItem::GetGraftMarkerPath() const {
287  return union_engine_->scratch_path() + "/" +
288  ((relative_parent_path_.empty()) ?
289  ".cvmfsgraft-" + filename_ :
290  relative_parent_path_ + (filename_.empty() ? "" :
291  ("/.cvmfsgraft-" + filename_)));
292 }
293 
295  valid_graft_ = false;
296  bool found_checksum = false;
297  std::string checksum_type;
298  std::string checksum_value;
299  std::string graftfile = GetGraftMarkerPath();
300  LogCvmfs(kLogFsTraversal, kLogDebug, "Checking potential graft path %s.",
301  graftfile.c_str());
302  FILE *fp = fopen(graftfile.c_str(), "r");
303  if (fp == NULL) {
304  // This sync item can be a file from a removed directory tree on overlayfs.
305  // In this case, the entire tree is missing on the scratch directory and
306  // the errno is ENOTDIR.
307  if ((errno != ENOENT) && (errno != ENOTDIR)) {
308  LogCvmfs(kLogFsTraversal, kLogWarning, "Unable to open graft file "
309  "(%s): %s (errno=%d)",
310  graftfile.c_str(), strerror(errno), errno);
311  }
312  return;
313  }
314  graft_marker_present_ = true;
315  valid_graft_ = true;
316  std::string line;
317  std::vector<std::string> contents;
318 
319  std::vector<off_t> chunk_offsets;
320  std::vector<shash::Any> chunk_checksums;
321 
322  while (GetLineFile(fp, &line)) {
323  std::string trimmed_line = Trim(line);
324 
325  if (!trimmed_line.size()) {continue;}
326  if (trimmed_line[0] == '#') {continue;}
327 
328  std::vector<std::string> info = SplitString(trimmed_line, '=', 2);
329 
330  if (info.size() != 2) {
331  LogCvmfs(kLogFsTraversal, kLogWarning, "Invalid line in graft file: %s",
332  trimmed_line.c_str());
333  }
334  info[0] = Trim(info[0]);
335  info[1] = Trim(info[1]);
336  if (info[0] == "size") {
337  uint64_t tmp_size;
338  if (!String2Uint64Parse(info[1], &tmp_size)) {
339  LogCvmfs(kLogFsTraversal, kLogWarning, "Failed to parse value of %s "
340  "to integer: %s (errno=%d)", trimmed_line.c_str(),
341  strerror(errno), errno);
342  continue;
343  }
344  graft_size_ = tmp_size;
345  } else if (info[0] == "checksum") {
346  std::string hash_str = info[1];
347  shash::HexPtr hashP(hash_str);
348  if (hashP.IsValid()) {
350  found_checksum = true;
351  } else {
352  LogCvmfs(kLogFsTraversal, kLogWarning, "Invalid checksum value: %s.",
353  info[1].c_str());
354  }
355  continue;
356  } else if (info[0] == "chunk_offsets") {
357  std::vector<std::string> offsets = SplitString(info[1], ',');
358  for (std::vector<std::string>::const_iterator it = offsets.begin();
359  it != offsets.end(); it++)
360  {
361  uint64_t val;
362  if (!String2Uint64Parse(*it, &val)) {
363  valid_graft_ = false;
364  LogCvmfs(kLogFsTraversal, kLogWarning, "Invalid chunk offset: %s.",
365  it->c_str());
366  break;
367  }
368  chunk_offsets.push_back(val);
369  }
370  } else if (info[0] == "chunk_checksums") {
371  std::vector<std::string> csums = SplitString(info[1], ',');
372  for (std::vector<std::string>::const_iterator it = csums.begin();
373  it != csums.end(); it++)
374  {
375  shash::HexPtr hashP(*it);
376  if (hashP.IsValid()) {
377  chunk_checksums.push_back(shash::MkFromHexPtr(hashP));
378  } else {
379  LogCvmfs(kLogFsTraversal, kLogWarning, "Invalid chunk checksum "
380  "value: %s.", it->c_str());
381  valid_graft_ = false;
382  break;
383  }
384  }
385  } else if (info[0] == "compression") {
387  }
388  }
389  if (!feof(fp)) {
390  LogCvmfs(kLogFsTraversal, kLogWarning, "Unable to read from catalog "
391  "marker (%s): %s (errno=%d)",
392  graftfile.c_str(), strerror(errno), errno);
393  }
394  fclose(fp);
395  valid_graft_ = valid_graft_ && (graft_size_ > -1) && found_checksum
396  && (chunk_checksums.size() == chunk_offsets.size());
397 
398  if (!valid_graft_ || chunk_offsets.empty())
399  return;
400 
401  // Parse chunks
402  graft_chunklist_ = new FileChunkList(chunk_offsets.size());
403  off_t last_offset = chunk_offsets[0];
404  if (last_offset != 0) {
405  LogCvmfs(kLogFsTraversal, kLogWarning, "First chunk offset must be 0"
406  " (in graft marker %s).", graftfile.c_str());
407  valid_graft_ = false;
408  }
409  for (unsigned idx = 1; idx < chunk_offsets.size(); idx++) {
410  off_t cur_offset = chunk_offsets[idx];
411  if (last_offset >= cur_offset) {
412  LogCvmfs(kLogFsTraversal, kLogWarning, "Chunk offsets must be sorted "
413  "in strictly increasing order (in graft marker %s).",
414  graftfile.c_str());
415  valid_graft_ = false;
416  break;
417  }
418  size_t cur_size = cur_offset - last_offset;
419  graft_chunklist_->PushBack(FileChunk(chunk_checksums[idx - 1],
420  last_offset,
421  cur_size));
422  last_offset = cur_offset;
423  }
424  if (graft_size_ <= last_offset) {
425  LogCvmfs(kLogFsTraversal, kLogWarning, "Last offset must be strictly "
426  "less than total file size (in graft marker %s).",
427  graftfile.c_str());
428  valid_graft_ = false;
429  }
430  graft_chunklist_->PushBack(FileChunk(chunk_checksums.back(),
431  last_offset,
432  graft_size_ - last_offset));
433 }
434 
435 } // namespace publish
EntryStat union_stat_
Definition: sync_item.h:289
#define LogCvmfs(source, mask,...)
Definition: logging.h:20
SyncItemType GetGenericFiletype(const EntryStat &stat) const
Definition: sync_item.cc:71
Algorithms ParseCompressionAlgorithm(const std::string &algorithm_option)
Definition: compression.cc:148
unsigned int GetRdevMinor() const
Definition: sync_item.h:109
shash::Any GetContentHash() const
Definition: sync_item.h:121
SyncItemType rdonly_type_
Definition: sync_item.h:268
inode_t inode_
vector< string > SplitString(const string &str, const char delim, const unsigned max_chunks)
Definition: string.cc:288
ssize_t graft_size_
Definition: sync_item.h:271
#define PANIC(...)
Definition: exception.h:26
string Trim(const string &raw, bool trim_newline)
Definition: string.cc:421
unsigned int GetRdevMajor() const
Definition: sync_item.h:104
void Assign(const char *chars, const unsigned length)
Definition: shortstring.h:53
gid_t gid_
const int kLogWarning
virtual catalog::DirectoryEntryBase CreateBasicCatalogDirent() const
Definition: sync_item.cc:204
const SyncUnion * union_engine_
Definition: sync_item.h:286
std::string relative_parent_path_
Definition: sync_item.h:300
std::string scratch_path() const
Definition: sync_union.h:98
bool IsBlockDevice() const
Definition: sync_item.h:77
bool IsDirectory() const
Definition: sync_item.h:64
bool IsCharacterDevice() const
Definition: sync_item.h:76
assert((mem||(size==0))&&"Out Of Memory")
Algorithms algorithm
Definition: hash.h:124
SyncItemType GetUnionFiletype() const
Definition: sync_item.cc:108
uint64_t size_
uint64_t GetUnionInode() const
Definition: sync_item.cc:175
void CheckCatalogMarker()
Definition: sync_item.cc:270
SyncItemType scratch_type_
Definition: sync_item.h:276
void CheckMarkerFiles()
Definition: sync_item.cc:262
bool String2Uint64Parse(const std::string &value, uint64_t *result)
Definition: string.cc:243
uint64_t GetRdOnlyInode() const
Definition: sync_item.cc:163
virtual bool IsType(const SyncItemType expected_type) const
Definition: sync_item.cc:115
NameString name_
platform_stat64 stat
Definition: sync_item.h:259
zlib::Algorithms compression_algorithm_
bool GetLineFile(FILE *f, std::string *line)
Definition: string.cc:379
BigVector< FileChunk > FileChunkList
Definition: file_chunk.h:51
std::string union_path() const
Definition: sync_union.h:97
EntryStat rdonly_stat_
Definition: sync_item.h:288
uint64_t GetScratchSize() const
Definition: sync_item.cc:180
bool IsRegularFile() const
Definition: sync_item.h:66
platform_stat64 GetUnionStat() const
Definition: sync_item.h:204
std::string GetUnionPath() const
Definition: sync_item.cc:249
uint32_t linkcount_
std::string GetRdOnlyPath() const
Definition: sync_item.cc:243
void StatRdOnly(const bool refresh=false) const
Definition: sync_item.h:313
bool is_external_file_
zlib::Algorithms GetCompressionAlgorithm() const
Definition: sync_item.h:127
int platform_lstat(const char *path, platform_stat64 *buf)
Any MkFromHexPtr(const HexPtr hex, const char suffix)
Definition: hash.cc:83
std::string GetScratchPath() const
Definition: sync_item.cc:255
time_t mtime_
static const inode_t kInvalidInode
void SetCompressionAlgorithm(const zlib::Algorithms &alg)
Definition: sync_item.h:130
FileChunkList * graft_chunklist_
Definition: sync_item.h:305
void PrintWarning(const string &message)
Definition: logging.cc:536
EntryStat scratch_stat_
Definition: sync_item.h:269
virtual ~SyncItem()
Definition: sync_item.cc:66
unsigned int GetRdOnlyLinkcount() const
Definition: sync_item.cc:157
Definition: sync_item.h:240
shash::Any content_hash_
Definition: sync_item.h:274
std::string filename_
Definition: sync_item.h:281
virtual SyncItemType GetScratchFiletype() const
Definition: sync_item.cc:98
void PushBack(const Item &item)
Definition: bigvector.h:60
bool IsDirectIo() const
Definition: sync_item.h:82
void MarkAsOpaqueDirectory()
Definition: sync_item.cc:151
unsigned int mode_
SyncItemType
Definition: sync_item.h:29
virtual void StatScratch(const bool refresh) const =0
static void StatGeneric(const std::string &path, EntryStat *info, const bool refresh)
Definition: sync_item.cc:194
std::string GetGraftMarkerPath() const
Definition: sync_item.cc:286
virtual void StatScratch(const bool refresh) const
Definition: sync_item.h:331
bool has_catalog_marker_
Definition: sync_item.h:294
virtual IngestionSource * CreateIngestionSource() const
Definition: sync_item.cc:190
void MarkAsWhiteout(const std::string &actual_filename)
Definition: sync_item.cc:124
unsigned int GetUnionLinkcount() const
Definition: sync_item.cc:169
bool graft_marker_present_
Definition: sync_item.h:296
shash::Any checksum_
bool IsSymlink() const
Definition: sync_item.h:68
std::string filename() const
Definition: sync_item.h:174
bool IsValid() const
Definition: hash.cc:37
bool is_direct_io_
bool IsExternalData() const
Definition: sync_item.h:81
bool obtained
Definition: sync_item.h:257
LinkString symlink_
SyncItemType GetSyncItemType() const
Definition: sync_item.h:245
std::string rdonly_path() const
Definition: sync_union.h:96
void StatUnion(const bool refresh=false) const
Definition: sync_item.h:316
std::string GetRelativePath() const
Definition: sync_item.h:143
int error_code
Definition: sync_item.h:258
uint64_t GetRdOnlySize() const
Definition: sync_item.cc:185
bool HasHardlinks() const
Definition: sync_item.h:163
SyncItemType GetRdOnlyFiletype() const
Definition: sync_item.cc:83
uid_t uid_