GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/sync_item.cc
Date: 2025-06-22 02:36:02
Exec Total Coverage
Lines: 58 265 21.9%
Branches: 29 364 8.0%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM file system
3 */
4
5 #include "sync_item.h"
6
7
8 #if !defined(__APPLE__)
9 #include <sys/sysmacros.h>
10 #endif // __APPLE__
11
12 #include <cerrno>
13 #include <vector>
14
15 #include "duplex_libarchive.h"
16 #include "ingestion/ingestion_source.h"
17 #include "sync_mediator.h"
18 #include "sync_union.h"
19 #include "util/exception.h"
20
21 using namespace std; // NOLINT
22
23 namespace publish {
24
25 SyncItem::SyncItem()
26 : rdonly_type_(static_cast<SyncItemType>(0))
27 , graft_size_(-1)
28 , scratch_type_(static_cast<SyncItemType>(0))
29 , union_engine_(NULL)
30 , whiteout_(false)
31 , opaque_(false)
32 , masked_hardlink_(false)
33 , has_catalog_marker_(false)
34 , valid_graft_(false)
35 , graft_marker_present_(false)
36 , external_data_(false)
37 , direct_io_(false)
38 , graft_chunklist_(NULL)
39 , compression_algorithm_(zlib::kZlibDefault)
40 , has_compression_algorithm_(false) { }
41
42 266 SyncItem::SyncItem(const std::string &relative_parent_path,
43 const std::string &filename,
44 const SyncUnion *union_engine,
45 266 const SyncItemType entry_type)
46 266 : rdonly_type_(kItemUnknown)
47 266 , graft_size_(-1)
48 266 , scratch_type_(entry_type)
49 266 , filename_(filename)
50 266 , union_engine_(union_engine)
51 266 , whiteout_(false)
52 266 , opaque_(false)
53 266 , masked_hardlink_(false)
54 266 , has_catalog_marker_(false)
55 266 , valid_graft_(false)
56 266 , graft_marker_present_(false)
57 266 , external_data_(false)
58 266 , direct_io_(false)
59
1/2
✓ Branch 1 taken 266 times.
✗ Branch 2 not taken.
266 , relative_parent_path_(relative_parent_path)
60 266 , graft_chunklist_(NULL)
61 266 , compression_algorithm_(zlib::kZlibDefault)
62 532 , has_compression_algorithm_(false) {
63 266 content_hash_.algorithm = shash::kAny;
64 266 }
65
66
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 266 times.
532 SyncItem::~SyncItem() { delete graft_chunklist_; }
67
68
69 SyncItemType SyncItem::GetGenericFiletype(
70 const SyncItem::EntryStat &stat) const {
71 const SyncItemType type = stat.GetSyncItemType();
72 if (type == kItemUnknown) {
73 PANIC(kLogStderr,
74 "[WARNING] '%s' has an unsupported file type (st_mode: %d errno: %d)",
75 GetRelativePath().c_str(), stat.stat.st_mode, stat.error_code);
76 }
77 return type;
78 }
79
80
81 238 SyncItemType SyncItem::GetRdOnlyFiletype() const {
82 238 StatRdOnly();
83 // file could not exist in read-only branch, or a regular file could have
84 // been replaced by a directory in the read/write branch, like:
85 // rdonly:
86 // /foo/bar/regular_file <-- ENOTDIR when asking for (.../is_dir_now)
87 // r/w:
88 // /foo/bar/regular_file/
89 // /foo/bar/regular_file/is_dir_now
90
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 238 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
238 if (rdonly_stat_.error_code == ENOENT || rdonly_stat_.error_code == ENOTDIR)
91 238 return kItemNew;
92 return GetGenericFiletype(rdonly_stat_);
93 }
94
95
96 SyncItemType SyncItemNative::GetScratchFiletype() const {
97 StatScratch(/* refresh= */ false);
98 if (scratch_stat_.error_code != 0) {
99 PANIC(kLogStderr, "[WARNING] Failed to stat() '%s' in scratch. (errno: %s)",
100 GetRelativePath().c_str(), scratch_stat_.error_code);
101 }
102
103 return GetGenericFiletype(scratch_stat_);
104 }
105
106 SyncItemType SyncItem::GetUnionFiletype() const {
107 StatUnion();
108 if (union_stat_.error_code == ENOENT || union_stat_.error_code == ENOTDIR)
109 return kItemUnknown;
110 return GetGenericFiletype(union_stat_);
111 }
112
113 140 bool SyncItemNative::IsType(const SyncItemType expected_type) const {
114
2/6
✓ Branch 2 taken 140 times.
✗ Branch 3 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✓ Branch 8 taken 140 times.
140 if (filename().substr(0, 12) == ".cvmfsgraft-") {
115 scratch_type_ = kItemMarker;
116
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 140 times.
140 } else if (scratch_type_ == kItemUnknown) {
117 scratch_type_ = GetScratchFiletype();
118 }
119 140 return scratch_type_ == expected_type;
120 }
121
122 void SyncItem::MarkAsWhiteout(const std::string &actual_filename) {
123 StatScratch(/* refresh= */ true);
124 // Mark the file as whiteout entry and strip the whiteout prefix
125 whiteout_ = true;
126 filename_ = actual_filename;
127
128 // Find the entry in the repository
129 StatRdOnly(true); // <== refreshing the stat (filename might have changed)
130
131 const SyncItemType deleted_type = (rdonly_stat_.error_code == 0)
132 ? GetRdOnlyFiletype()
133 : kItemUnknown;
134
135 rdonly_type_ = deleted_type;
136 scratch_type_ = deleted_type;
137
138 if (deleted_type == kItemUnknown) {
139 // Marking a SyncItem as 'whiteout' but no file to be removed found: This
140 // should not happen (actually AUFS prevents users from creating whiteouts)
141 // but can be provoked through an AUFS 'bug' (see test 593 or CVM-880).
142 // --> Warn the user, continue with kItemUnknown and cross your fingers!
143 PrintWarning("'" + GetRelativePath()
144 + "' should be deleted, but was not found in repository.");
145 }
146 }
147
148
149 void SyncItem::MarkAsOpaqueDirectory() {
150 assert(IsDirectory());
151 opaque_ = true;
152 }
153
154
155 unsigned int SyncItem::GetRdOnlyLinkcount() const {
156 StatRdOnly();
157 return rdonly_stat_.stat.st_nlink;
158 }
159
160
161 uint64_t SyncItem::GetRdOnlyInode() const {
162 StatRdOnly();
163 return rdonly_stat_.stat.st_ino;
164 }
165
166
167 unsigned int SyncItem::GetUnionLinkcount() const {
168 StatUnion();
169 return union_stat_.stat.st_nlink;
170 }
171
172
173 uint64_t SyncItem::GetUnionInode() const {
174 StatUnion();
175 return union_stat_.stat.st_ino;
176 }
177
178 uint64_t SyncItem::GetScratchSize() const {
179 StatScratch(/* refresh= */ false);
180 return scratch_stat_.stat.st_size;
181 }
182
183 uint64_t SyncItem::GetRdOnlySize() const {
184 StatRdOnly();
185 return rdonly_stat_.stat.st_size;
186 }
187
188 IngestionSource *SyncItemNative::CreateIngestionSource() const {
189 return new FileIngestionSource(GetUnionPath());
190 }
191
192 308 void SyncItem::StatGeneric(const string &path,
193 EntryStat *info,
194 const bool refresh) {
195
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 308 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
308 if (info->obtained && !refresh)
196 return;
197 308 const int retval = platform_lstat(path.c_str(), &info->stat);
198
1/2
✓ Branch 0 taken 308 times.
✗ Branch 1 not taken.
308 info->error_code = (retval != 0) ? errno : 0;
199 308 info->obtained = true;
200 }
201
202
203 catalog::DirectoryEntryBase SyncItemNative::CreateBasicCatalogDirent(
204 bool enable_mtime_ns) const {
205 catalog::DirectoryEntryBase dirent;
206
207 // inode and parent inode is determined at runtime of client
208 dirent.inode_ = catalog::DirectoryEntry::kInvalidInode;
209
210 // this might mask the actual link count in case hardlinks are not supported
211 // (i.e. on setups using OverlayFS)
212 dirent.linkcount_ = HasHardlinks() ? this->GetUnionStat().st_nlink : 1;
213
214 dirent.mode_ = this->GetUnionStat().st_mode;
215 dirent.uid_ = this->GetUnionStat().st_uid;
216 dirent.gid_ = this->GetUnionStat().st_gid;
217 dirent.size_ = graft_size_ > -1 ? graft_size_ : this->GetUnionStat().st_size;
218 dirent.mtime_ = this->GetUnionStat().st_mtime;
219 dirent.checksum_ = this->GetContentHash();
220 dirent.is_external_file_ = this->IsExternalData();
221 dirent.is_direct_io_ = this->IsDirectIo();
222 dirent.compression_algorithm_ = this->GetCompressionAlgorithm();
223
224 dirent.name_.Assign(filename().data(), filename().length());
225
226 if (this->IsSymlink()) {
227 char slnk[PATH_MAX + 1];
228 const ssize_t length = readlink((this->GetUnionPath()).c_str(), slnk,
229 PATH_MAX);
230 assert(length >= 0);
231 dirent.symlink_.Assign(slnk, length);
232 }
233
234 if (this->IsCharacterDevice() || this->IsBlockDevice()) {
235 dirent.size_ = makedev(GetRdevMajor(), GetRdevMinor());
236 }
237
238 if (enable_mtime_ns) {
239 #ifdef __APPLE__
240 dirent.mtime_ns_ = static_cast<int32_t>(
241 this->GetUnionStat().st_mtimespec.tv_nsec);
242 #else
243 dirent.mtime_ns_ = static_cast<int32_t>(
244 this->GetUnionStat().st_mtim.tv_nsec);
245 #endif
246 }
247
248 return dirent;
249 }
250
251
252 238 std::string SyncItem::GetRdOnlyPath() const {
253
1/2
✓ Branch 1 taken 238 times.
✗ Branch 2 not taken.
238 const string relative_path = GetRelativePath().empty()
254 ? ""
255
5/18
✗ Branch 0 not taken.
✓ Branch 1 taken 238 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 238 times.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 238 times.
✗ Branch 11 not taken.
✓ Branch 12 taken 238 times.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✓ Branch 15 taken 238 times.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
476 : "/" + GetRelativePath();
256
2/4
✓ Branch 1 taken 238 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 238 times.
✗ Branch 5 not taken.
714 return union_engine_->rdonly_path() + relative_path;
257 238 }
258
259 196 std::string SyncItem::GetUnionPath() const {
260
1/2
✓ Branch 1 taken 196 times.
✗ Branch 2 not taken.
196 const string relative_path = GetRelativePath().empty()
261 ? ""
262
5/18
✗ Branch 0 not taken.
✓ Branch 1 taken 196 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 196 times.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✓ Branch 10 taken 196 times.
✗ Branch 11 not taken.
✓ Branch 12 taken 196 times.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✓ Branch 15 taken 196 times.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
392 : "/" + GetRelativePath();
263
2/4
✓ Branch 1 taken 196 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 196 times.
✗ Branch 5 not taken.
588 return union_engine_->union_path() + relative_path;
264 196 }
265
266 std::string SyncItem::GetScratchPath() const {
267 const string relative_path = GetRelativePath().empty()
268 ? ""
269 : "/" + GetRelativePath();
270 return union_engine_->scratch_path() + relative_path;
271 // return union_engine_->scratch_path() + filename();
272 }
273
274 70 void SyncItem::CheckMarkerFiles() {
275
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 70 times.
70 if (IsRegularFile()) {
276 CheckGraft();
277
1/2
✓ Branch 1 taken 70 times.
✗ Branch 2 not taken.
70 } else if (IsDirectory()) {
278 70 CheckCatalogMarker();
279 }
280 70 }
281
282 70 void SyncItem::CheckCatalogMarker() {
283
2/4
✓ Branch 1 taken 70 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 70 times.
✗ Branch 5 not taken.
70 const std::string path(GetUnionPath() + "/.cvmfscatalog");
284 70 EntryStat stat;
285 70 StatGeneric(path, &stat, false);
286
1/2
✓ Branch 0 taken 70 times.
✗ Branch 1 not taken.
70 if (stat.error_code) {
287 70 has_catalog_marker_ = false;
288 70 return;
289 }
290 if (stat.GetSyncItemType() == kItemFile) {
291 has_catalog_marker_ = true;
292 return;
293 }
294 PANIC(kLogStderr, "Error: '%s' is not a regular file.", path.c_str());
295 70 }
296
297
298 std::string SyncItem::GetGraftMarkerPath() const {
299 return union_engine_->scratch_path() + "/"
300 + ((relative_parent_path_.empty())
301 ? ".cvmfsgraft-" + filename_
302 : relative_parent_path_
303 + (filename_.empty() ? ""
304 : ("/.cvmfsgraft-" + filename_)));
305 }
306
307 void SyncItem::CheckGraft() {
308 valid_graft_ = false;
309 bool found_checksum = false;
310 const std::string checksum_type;
311 const std::string checksum_value;
312 const std::string graftfile = GetGraftMarkerPath();
313 LogCvmfs(kLogFsTraversal, kLogDebug, "Checking potential graft path %s.",
314 graftfile.c_str());
315 FILE *fp = fopen(graftfile.c_str(), "r");
316 if (fp == NULL) {
317 // This sync item can be a file from a removed directory tree on overlayfs.
318 // In this case, the entire tree is missing on the scratch directory and
319 // the errno is ENOTDIR.
320 if ((errno != ENOENT) && (errno != ENOTDIR)) {
321 LogCvmfs(kLogFsTraversal, kLogWarning,
322 "Unable to open graft file "
323 "(%s): %s (errno=%d)",
324 graftfile.c_str(), strerror(errno), errno);
325 }
326 return;
327 }
328 graft_marker_present_ = true;
329 valid_graft_ = true;
330 std::string line;
331 const std::vector<std::string> contents;
332
333 std::vector<off_t> chunk_offsets;
334 std::vector<shash::Any> chunk_checksums;
335
336 while (GetLineFile(fp, &line)) {
337 std::string trimmed_line = Trim(line);
338
339 if (!trimmed_line.size()) {
340 continue;
341 }
342 if (trimmed_line[0] == '#') {
343 continue;
344 }
345
346 std::vector<std::string> info = SplitStringBounded(2, trimmed_line, '=');
347
348 if (info.size() != 2) {
349 LogCvmfs(kLogFsTraversal, kLogWarning, "Invalid line in graft file: %s",
350 trimmed_line.c_str());
351 }
352 info[0] = Trim(info[0]);
353 info[1] = Trim(info[1]);
354 if (info[0] == "size") {
355 uint64_t tmp_size;
356 if (!String2Uint64Parse(info[1], &tmp_size)) {
357 LogCvmfs(kLogFsTraversal, kLogWarning,
358 "Failed to parse value of %s "
359 "to integer: %s (errno=%d)",
360 trimmed_line.c_str(), strerror(errno), errno);
361 continue;
362 }
363 graft_size_ = tmp_size;
364 } else if (info[0] == "checksum") {
365 const std::string hash_str = info[1];
366 const shash::HexPtr hashP(hash_str);
367 if (hashP.IsValid()) {
368 content_hash_ = shash::MkFromHexPtr(hashP);
369 found_checksum = true;
370 } else {
371 LogCvmfs(kLogFsTraversal, kLogWarning, "Invalid checksum value: %s.",
372 info[1].c_str());
373 }
374 continue;
375 } else if (info[0] == "chunk_offsets") {
376 std::vector<std::string> offsets = SplitString(info[1], ',');
377 for (std::vector<std::string>::const_iterator it = offsets.begin();
378 it != offsets.end();
379 it++) {
380 uint64_t val;
381 if (!String2Uint64Parse(*it, &val)) {
382 valid_graft_ = false;
383 LogCvmfs(kLogFsTraversal, kLogWarning, "Invalid chunk offset: %s.",
384 it->c_str());
385 break;
386 }
387 chunk_offsets.push_back(val);
388 }
389 } else if (info[0] == "chunk_checksums") {
390 std::vector<std::string> csums = SplitString(info[1], ',');
391 for (std::vector<std::string>::const_iterator it = csums.begin();
392 it != csums.end();
393 it++) {
394 const shash::HexPtr hashP(*it);
395 if (hashP.IsValid()) {
396 chunk_checksums.push_back(shash::MkFromHexPtr(hashP));
397 } else {
398 LogCvmfs(kLogFsTraversal, kLogWarning,
399 "Invalid chunk checksum "
400 "value: %s.",
401 it->c_str());
402 valid_graft_ = false;
403 break;
404 }
405 }
406 } else if (info[0] == "compression") {
407 SetCompressionAlgorithm(zlib::ParseCompressionAlgorithm(info[1]));
408 }
409 }
410 if (!feof(fp)) {
411 LogCvmfs(kLogFsTraversal, kLogWarning,
412 "Unable to read from catalog "
413 "marker (%s): %s (errno=%d)",
414 graftfile.c_str(), strerror(errno), errno);
415 }
416 fclose(fp);
417 valid_graft_ = valid_graft_ && (graft_size_ > -1) && found_checksum
418 && (chunk_checksums.size() == chunk_offsets.size());
419
420 if (!valid_graft_ || chunk_offsets.empty())
421 return;
422
423 // Parse chunks
424 graft_chunklist_ = new FileChunkList(chunk_offsets.size());
425 off_t last_offset = chunk_offsets[0];
426 if (last_offset != 0) {
427 LogCvmfs(kLogFsTraversal, kLogWarning,
428 "First chunk offset must be 0"
429 " (in graft marker %s).",
430 graftfile.c_str());
431 valid_graft_ = false;
432 }
433 for (unsigned idx = 1; idx < chunk_offsets.size(); idx++) {
434 const off_t cur_offset = chunk_offsets[idx];
435 if (last_offset >= cur_offset) {
436 LogCvmfs(kLogFsTraversal, kLogWarning,
437 "Chunk offsets must be sorted "
438 "in strictly increasing order (in graft marker %s).",
439 graftfile.c_str());
440 valid_graft_ = false;
441 break;
442 }
443 const size_t cur_size = cur_offset - last_offset;
444 graft_chunklist_->PushBack(
445 FileChunk(chunk_checksums[idx - 1], last_offset, cur_size));
446 last_offset = cur_offset;
447 }
448 if (graft_size_ <= last_offset) {
449 LogCvmfs(kLogFsTraversal, kLogWarning,
450 "Last offset must be strictly "
451 "less than total file size (in graft marker %s).",
452 graftfile.c_str());
453 valid_graft_ = false;
454 }
455 graft_chunklist_->PushBack(FileChunk(
456 chunk_checksums.back(), last_offset, graft_size_ - last_offset));
457 }
458
459 } // namespace publish
460