CernVM-FS  2.13.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
sync_item.cc
Go to the documentation of this file.
1 
5 #include "sync_item.h"
6 
7 
8 #if !defined(__APPLE__)
9 #include <sys/sysmacros.h>
10 #endif // __APPLE__
11 
12 #include <cerrno>
13 #include <vector>
14 
15 #include "duplex_libarchive.h"
17 #include "sync_mediator.h"
18 #include "sync_union.h"
19 #include "util/exception.h"
20 
21 using namespace std; // NOLINT
22 
23 namespace publish {
24 
25 SyncItem::SyncItem()
26  : rdonly_type_(static_cast<SyncItemType>(0))
27  , graft_size_(-1)
28  , scratch_type_(static_cast<SyncItemType>(0))
29  , union_engine_(NULL)
30  , whiteout_(false)
31  , opaque_(false)
32  , masked_hardlink_(false)
33  , has_catalog_marker_(false)
34  , valid_graft_(false)
35  , graft_marker_present_(false)
36  , external_data_(false)
37  , direct_io_(false)
38  , graft_chunklist_(NULL)
39  , compression_algorithm_(zlib::kZlibDefault)
40  , has_compression_algorithm_(false) { }
41 
42 SyncItem::SyncItem(const std::string &relative_parent_path,
43  const std::string &filename,
44  const SyncUnion *union_engine,
45  const SyncItemType entry_type)
46  : rdonly_type_(kItemUnknown)
47  , graft_size_(-1)
48  , scratch_type_(entry_type)
49  , filename_(filename)
50  , union_engine_(union_engine)
51  , whiteout_(false)
52  , opaque_(false)
53  , masked_hardlink_(false)
54  , has_catalog_marker_(false)
55  , valid_graft_(false)
56  , graft_marker_present_(false)
57  , external_data_(false)
58  , direct_io_(false)
59  , relative_parent_path_(relative_parent_path)
60  , graft_chunklist_(NULL)
61  , compression_algorithm_(zlib::kZlibDefault)
62  , has_compression_algorithm_(false) {
64 }
65 
67 
68 
70  const SyncItem::EntryStat &stat) const {
71  const SyncItemType type = stat.GetSyncItemType();
72  if (type == kItemUnknown) {
74  "[WARNING] '%s' has an unsupported file type (st_mode: %d errno: %d)",
75  GetRelativePath().c_str(), stat.stat.st_mode, stat.error_code);
76  }
77  return type;
78 }
79 
80 
82  StatRdOnly();
83  // file could not exist in read-only branch, or a regular file could have
84  // been replaced by a directory in the read/write branch, like:
85  // rdonly:
86  // /foo/bar/regular_file <-- ENOTDIR when asking for (.../is_dir_now)
87  // r/w:
88  // /foo/bar/regular_file/
89  // /foo/bar/regular_file/is_dir_now
90  if (rdonly_stat_.error_code == ENOENT || rdonly_stat_.error_code == ENOTDIR)
91  return kItemNew;
93 }
94 
95 
97  StatScratch(/* refresh= */ false);
98  if (scratch_stat_.error_code != 0) {
99  PANIC(kLogStderr, "[WARNING] Failed to stat() '%s' in scratch. (errno: %s)",
101  }
102 
104 }
105 
107  StatUnion();
108  if (union_stat_.error_code == ENOENT || union_stat_.error_code == ENOTDIR)
109  return kItemUnknown;
111 }
112 
113 bool SyncItemNative::IsType(const SyncItemType expected_type) const {
114  if (filename().substr(0, 12) == ".cvmfsgraft-") {
116  } else if (scratch_type_ == kItemUnknown) {
118  }
119  return scratch_type_ == expected_type;
120 }
121 
122 void SyncItem::MarkAsWhiteout(const std::string &actual_filename) {
123  StatScratch(/* refresh= */ true);
124  // Mark the file as whiteout entry and strip the whiteout prefix
125  whiteout_ = true;
126  filename_ = actual_filename;
127 
128  // Find the entry in the repository
129  StatRdOnly(true); // <== refreshing the stat (filename might have changed)
130 
131  const SyncItemType deleted_type = (rdonly_stat_.error_code == 0)
133  : kItemUnknown;
134 
135  rdonly_type_ = deleted_type;
136  scratch_type_ = deleted_type;
137 
138  if (deleted_type == kItemUnknown) {
139  // Marking a SyncItem as 'whiteout' but no file to be removed found: This
140  // should not happen (actually AUFS prevents users from creating whiteouts)
141  // but can be provoked through an AUFS 'bug' (see test 593 or CVM-880).
142  // --> Warn the user, continue with kItemUnknown and cross your fingers!
144  + "' should be deleted, but was not found in repository.");
145  }
146 }
147 
148 
150  assert(IsDirectory());
151  opaque_ = true;
152 }
153 
154 
155 unsigned int SyncItem::GetRdOnlyLinkcount() const {
156  StatRdOnly();
157  return rdonly_stat_.stat.st_nlink;
158 }
159 
160 
161 uint64_t SyncItem::GetRdOnlyInode() const {
162  StatRdOnly();
163  return rdonly_stat_.stat.st_ino;
164 }
165 
166 
167 unsigned int SyncItem::GetUnionLinkcount() const {
168  StatUnion();
169  return union_stat_.stat.st_nlink;
170 }
171 
172 
173 uint64_t SyncItem::GetUnionInode() const {
174  StatUnion();
175  return union_stat_.stat.st_ino;
176 }
177 
178 uint64_t SyncItem::GetScratchSize() const {
179  StatScratch(/* refresh= */ false);
180  return scratch_stat_.stat.st_size;
181 }
182 
183 uint64_t SyncItem::GetRdOnlySize() const {
184  StatRdOnly();
185  return rdonly_stat_.stat.st_size;
186 }
187 
189  return new FileIngestionSource(GetUnionPath());
190 }
191 
192 void SyncItem::StatGeneric(const string &path,
193  EntryStat *info,
194  const bool refresh) {
195  if (info->obtained && !refresh)
196  return;
197  const int retval = platform_lstat(path.c_str(), &info->stat);
198  info->error_code = (retval != 0) ? errno : 0;
199  info->obtained = true;
200 }
201 
202 
204  bool enable_mtime_ns) const {
206 
207  // inode and parent inode is determined at runtime of client
209 
210  // this might mask the actual link count in case hardlinks are not supported
211  // (i.e. on setups using OverlayFS)
212  dirent.linkcount_ = HasHardlinks() ? this->GetUnionStat().st_nlink : 1;
213 
214  dirent.mode_ = this->GetUnionStat().st_mode;
215  dirent.uid_ = this->GetUnionStat().st_uid;
216  dirent.gid_ = this->GetUnionStat().st_gid;
217  dirent.size_ = graft_size_ > -1 ? graft_size_ : this->GetUnionStat().st_size;
218  dirent.mtime_ = this->GetUnionStat().st_mtime;
219  dirent.checksum_ = this->GetContentHash();
220  dirent.is_external_file_ = this->IsExternalData();
221  dirent.is_direct_io_ = this->IsDirectIo();
223 
224  dirent.name_.Assign(filename().data(), filename().length());
225 
226  if (this->IsSymlink()) {
227  char slnk[PATH_MAX + 1];
228  const ssize_t length = readlink((this->GetUnionPath()).c_str(), slnk,
229  PATH_MAX);
230  assert(length >= 0);
231  dirent.symlink_.Assign(slnk, length);
232  }
233 
234  if (this->IsCharacterDevice() || this->IsBlockDevice()) {
235  dirent.size_ = makedev(GetRdevMajor(), GetRdevMinor());
236  }
237 
238  if (enable_mtime_ns) {
239 #ifdef __APPLE__
240  dirent.mtime_ns_ = static_cast<int32_t>(
241  this->GetUnionStat().st_mtimespec.tv_nsec);
242 #else
243  dirent.mtime_ns_ = static_cast<int32_t>(
244  this->GetUnionStat().st_mtim.tv_nsec);
245 #endif
246  }
247 
248  return dirent;
249 }
250 
251 
252 std::string SyncItem::GetRdOnlyPath() const {
253  const string relative_path = GetRelativePath().empty()
254  ? ""
255  : "/" + GetRelativePath();
256  return union_engine_->rdonly_path() + relative_path;
257 }
258 
259 std::string SyncItem::GetUnionPath() const {
260  const string relative_path = GetRelativePath().empty()
261  ? ""
262  : "/" + GetRelativePath();
263  return union_engine_->union_path() + relative_path;
264 }
265 
266 std::string SyncItem::GetScratchPath() const {
267  const string relative_path = GetRelativePath().empty()
268  ? ""
269  : "/" + GetRelativePath();
270  return union_engine_->scratch_path() + relative_path;
271  // return union_engine_->scratch_path() + filename();
272 }
273 
275  if (IsRegularFile()) {
276  CheckGraft();
277  } else if (IsDirectory()) {
279  }
280 }
281 
283  const std::string path(GetUnionPath() + "/.cvmfscatalog");
284  EntryStat stat;
285  StatGeneric(path, &stat, false);
286  if (stat.error_code) {
287  has_catalog_marker_ = false;
288  return;
289  }
290  if (stat.GetSyncItemType() == kItemFile) {
291  has_catalog_marker_ = true;
292  return;
293  }
294  PANIC(kLogStderr, "Error: '%s' is not a regular file.", path.c_str());
295 }
296 
297 
298 std::string SyncItem::GetGraftMarkerPath() const {
299  return union_engine_->scratch_path() + "/"
300  + ((relative_parent_path_.empty())
301  ? ".cvmfsgraft-" + filename_
303  + (filename_.empty() ? ""
304  : ("/.cvmfsgraft-" + filename_)));
305 }
306 
308  valid_graft_ = false;
309  bool found_checksum = false;
310  const std::string checksum_type;
311  const std::string checksum_value;
312  const std::string graftfile = GetGraftMarkerPath();
313  LogCvmfs(kLogFsTraversal, kLogDebug, "Checking potential graft path %s.",
314  graftfile.c_str());
315  FILE *fp = fopen(graftfile.c_str(), "r");
316  if (fp == NULL) {
317  // This sync item can be a file from a removed directory tree on overlayfs.
318  // In this case, the entire tree is missing on the scratch directory and
319  // the errno is ENOTDIR.
320  if ((errno != ENOENT) && (errno != ENOTDIR)) {
322  "Unable to open graft file "
323  "(%s): %s (errno=%d)",
324  graftfile.c_str(), strerror(errno), errno);
325  }
326  return;
327  }
328  graft_marker_present_ = true;
329  valid_graft_ = true;
330  std::string line;
331  const std::vector<std::string> contents;
332 
333  std::vector<off_t> chunk_offsets;
334  std::vector<shash::Any> chunk_checksums;
335 
336  while (GetLineFile(fp, &line)) {
337  std::string trimmed_line = Trim(line);
338 
339  if (!trimmed_line.size()) {
340  continue;
341  }
342  if (trimmed_line[0] == '#') {
343  continue;
344  }
345 
346  std::vector<std::string> info = SplitStringBounded(2, trimmed_line, '=');
347 
348  if (info.size() != 2) {
349  LogCvmfs(kLogFsTraversal, kLogWarning, "Invalid line in graft file: %s",
350  trimmed_line.c_str());
351  }
352  info[0] = Trim(info[0]);
353  info[1] = Trim(info[1]);
354  if (info[0] == "size") {
355  uint64_t tmp_size;
356  if (!String2Uint64Parse(info[1], &tmp_size)) {
358  "Failed to parse value of %s "
359  "to integer: %s (errno=%d)",
360  trimmed_line.c_str(), strerror(errno), errno);
361  continue;
362  }
363  graft_size_ = tmp_size;
364  } else if (info[0] == "checksum") {
365  const std::string hash_str = info[1];
366  const shash::HexPtr hashP(hash_str);
367  if (hashP.IsValid()) {
369  found_checksum = true;
370  } else {
371  LogCvmfs(kLogFsTraversal, kLogWarning, "Invalid checksum value: %s.",
372  info[1].c_str());
373  }
374  continue;
375  } else if (info[0] == "chunk_offsets") {
376  std::vector<std::string> offsets = SplitString(info[1], ',');
377  for (std::vector<std::string>::const_iterator it = offsets.begin();
378  it != offsets.end();
379  it++) {
380  uint64_t val;
381  if (!String2Uint64Parse(*it, &val)) {
382  valid_graft_ = false;
383  LogCvmfs(kLogFsTraversal, kLogWarning, "Invalid chunk offset: %s.",
384  it->c_str());
385  break;
386  }
387  chunk_offsets.push_back(val);
388  }
389  } else if (info[0] == "chunk_checksums") {
390  std::vector<std::string> csums = SplitString(info[1], ',');
391  for (std::vector<std::string>::const_iterator it = csums.begin();
392  it != csums.end();
393  it++) {
394  const shash::HexPtr hashP(*it);
395  if (hashP.IsValid()) {
396  chunk_checksums.push_back(shash::MkFromHexPtr(hashP));
397  } else {
399  "Invalid chunk checksum "
400  "value: %s.",
401  it->c_str());
402  valid_graft_ = false;
403  break;
404  }
405  }
406  } else if (info[0] == "compression") {
408  }
409  }
410  if (!feof(fp)) {
412  "Unable to read from catalog "
413  "marker (%s): %s (errno=%d)",
414  graftfile.c_str(), strerror(errno), errno);
415  }
416  fclose(fp);
417  valid_graft_ = valid_graft_ && (graft_size_ > -1) && found_checksum
418  && (chunk_checksums.size() == chunk_offsets.size());
419 
420  if (!valid_graft_ || chunk_offsets.empty())
421  return;
422 
423  // Parse chunks
424  graft_chunklist_ = new FileChunkList(chunk_offsets.size());
425  off_t last_offset = chunk_offsets[0];
426  if (last_offset != 0) {
428  "First chunk offset must be 0"
429  " (in graft marker %s).",
430  graftfile.c_str());
431  valid_graft_ = false;
432  }
433  for (unsigned idx = 1; idx < chunk_offsets.size(); idx++) {
434  const off_t cur_offset = chunk_offsets[idx];
435  if (last_offset >= cur_offset) {
437  "Chunk offsets must be sorted "
438  "in strictly increasing order (in graft marker %s).",
439  graftfile.c_str());
440  valid_graft_ = false;
441  break;
442  }
443  const size_t cur_size = cur_offset - last_offset;
445  FileChunk(chunk_checksums[idx - 1], last_offset, cur_size));
446  last_offset = cur_offset;
447  }
448  if (graft_size_ <= last_offset) {
450  "Last offset must be strictly "
451  "less than total file size (in graft marker %s).",
452  graftfile.c_str());
453  valid_graft_ = false;
454  }
456  chunk_checksums.back(), last_offset, graft_size_ - last_offset));
457 }
458 
459 } // namespace publish
EntryStat union_stat_
Definition: sync_item.h:301
SyncItemType GetGenericFiletype(const EntryStat &stat) const
Definition: sync_item.cc:69
Algorithms ParseCompressionAlgorithm(const std::string &algorithm_option)
Definition: compression.cc:153
unsigned int GetRdevMinor() const
Definition: sync_item.h:104
shash::Any GetContentHash() const
Definition: sync_item.h:119
SyncItemType rdonly_type_
Definition: sync_item.h:280
inode_t inode_
ssize_t graft_size_
Definition: sync_item.h:283
#define PANIC(...)
Definition: exception.h:29
string Trim(const string &raw, bool trim_newline)
Definition: string.cc:466
unsigned int GetRdevMajor() const
Definition: sync_item.h:98
void Assign(const char *chars, const unsigned length)
Definition: shortstring.h:61
gid_t gid_
virtual catalog::DirectoryEntryBase CreateBasicCatalogDirent(bool enable_mtime_ns) const
Definition: sync_item.cc:203
const int kLogWarning
const SyncUnion * union_engine_
Definition: sync_item.h:298
std::string relative_parent_path_
Definition: sync_item.h:312
std::string scratch_path() const
Definition: sync_union.h:98
bool IsBlockDevice() const
Definition: sync_item.h:77
bool IsDirectory() const
Definition: sync_item.h:64
bool IsCharacterDevice() const
Definition: sync_item.h:76
assert((mem||(size==0))&&"Out Of Memory")
int32_t mtime_ns_
Algorithms algorithm
Definition: hash.h:122
SyncItemType GetUnionFiletype() const
Definition: sync_item.cc:106
uint64_t size_
uint64_t GetUnionInode() const
Definition: sync_item.cc:173
void CheckCatalogMarker()
Definition: sync_item.cc:282
SyncItemType scratch_type_
Definition: sync_item.h:288
void CheckMarkerFiles()
Definition: sync_item.cc:274
bool String2Uint64Parse(const std::string &value, uint64_t *result)
Definition: string.cc:257
uint64_t GetRdOnlyInode() const
Definition: sync_item.cc:161
virtual bool IsType(const SyncItemType expected_type) const
Definition: sync_item.cc:113
NameString name_
platform_stat64 stat
Definition: sync_item.h:271
zlib::Algorithms compression_algorithm_
bool GetLineFile(FILE *f, std::string *line)
Definition: string.cc:422
BigVector< FileChunk > FileChunkList
Definition: file_chunk.h:47
std::string union_path() const
Definition: sync_union.h:97
EntryStat rdonly_stat_
Definition: sync_item.h:300
vector< string > SplitString(const string &str, char delim)
Definition: string.cc:306
uint64_t GetScratchSize() const
Definition: sync_item.cc:178
bool IsRegularFile() const
Definition: sync_item.h:66
platform_stat64 GetUnionStat() const
Definition: sync_item.h:209
std::string GetUnionPath() const
Definition: sync_item.cc:259
uint32_t linkcount_
std::string GetRdOnlyPath() const
Definition: sync_item.cc:252
void StatRdOnly(const bool refresh=false) const
Definition: sync_item.h:325
bool is_external_file_
zlib::Algorithms GetCompressionAlgorithm() const
Definition: sync_item.h:125
int platform_lstat(const char *path, platform_stat64 *buf)
std::string GetScratchPath() const
Definition: sync_item.cc:266
vector< string > SplitStringBounded(unsigned max_chunks, const string &str, char delim)
Definition: string.cc:310
time_t mtime_
static const inode_t kInvalidInode
void SetCompressionAlgorithm(const zlib::Algorithms &alg)
Definition: sync_item.h:128
FileChunkList * graft_chunklist_
Definition: sync_item.h:317
void PrintWarning(const string &message)
Definition: logging.cc:560
EntryStat scratch_stat_
Definition: sync_item.h:281
virtual ~SyncItem()
Definition: sync_item.cc:66
unsigned int GetRdOnlyLinkcount() const
Definition: sync_item.cc:155
Definition: sync_item.h:245
shash::Any content_hash_
Definition: sync_item.h:286
std::string filename_
Definition: sync_item.h:293
virtual SyncItemType GetScratchFiletype() const
Definition: sync_item.cc:96
void PushBack(const Item &item)
Definition: bigvector.h:58
bool IsDirectIo() const
Definition: sync_item.h:82
void MarkAsOpaqueDirectory()
Definition: sync_item.cc:149
unsigned int mode_
SyncItemType
Definition: sync_item.h:29
virtual void StatScratch(const bool refresh) const =0
static void StatGeneric(const std::string &path, EntryStat *info, const bool refresh)
Definition: sync_item.cc:192
Any MkFromHexPtr(const HexPtr hex, const char suffix)
Definition: hash.cc:82
std::string GetGraftMarkerPath() const
Definition: sync_item.cc:298
virtual void StatScratch(const bool refresh) const
Definition: sync_item.h:344
bool has_catalog_marker_
Definition: sync_item.h:306
virtual IngestionSource * CreateIngestionSource() const
Definition: sync_item.cc:188
void MarkAsWhiteout(const std::string &actual_filename)
Definition: sync_item.cc:122
unsigned int GetUnionLinkcount() const
Definition: sync_item.cc:167
bool graft_marker_present_
Definition: sync_item.h:308
shash::Any checksum_
bool IsSymlink() const
Definition: sync_item.h:68
std::string filename() const
Definition: sync_item.h:179
bool IsValid() const
Definition: hash.cc:36
bool is_direct_io_
bool IsExternalData() const
Definition: sync_item.h:81
bool obtained
Definition: sync_item.h:269
LinkString symlink_
SyncItemType GetSyncItemType() const
Definition: sync_item.h:250
std::string rdonly_path() const
Definition: sync_union.h:96
void StatUnion(const bool refresh=false) const
Definition: sync_item.h:328
std::string GetRelativePath() const
Definition: sync_item.h:147
int error_code
Definition: sync_item.h:270
uint64_t GetRdOnlySize() const
Definition: sync_item.cc:183
bool HasHardlinks() const
Definition: sync_item.h:168
SyncItemType GetRdOnlyFiletype() const
Definition: sync_item.cc:81
uid_t uid_
CVMFS_EXPORT void LogCvmfs(const LogSource source, const int mask, const char *format,...)
Definition: logging.cc:545