GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/sync_union_tarball.cc
Date: 2026-06-28 02:36:10
Exec Total Coverage
Lines: 115 188 61.2%
Branches: 105 329 31.9%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System
3 */
4
5 #include "sync_union_tarball.h"
6
7 #include <pthread.h>
8 #include <unistd.h>
9
10 #include <cassert>
11 #include <cstdio>
12 #include <list>
13 #include <set>
14 #include <string>
15 #include <vector>
16
17 #include "duplex_libarchive.h"
18 #include "sync_item.h"
19 #include "sync_item_dummy.h"
20 #include "sync_item_tar.h"
21 #include "sync_mediator.h"
22 #include "sync_union.h"
23 #include "util/concurrency.h"
24 #include "util/exception.h"
25 #include "util/fs_traversal.h"
26 #include "util/posix.h"
27 #include "util/smalloc.h"
28
29 namespace publish {
30
31 210 SyncUnionTarball::SyncUnionTarball(AbstractSyncMediator *mediator,
32 const std::string &rdonly_path,
33 const std::string &tarball_path,
34 const std::string &base_directory,
35 const uid_t uid,
36 const gid_t gid,
37 const std::string &to_delete,
38 const bool create_catalog_on_root,
39 const bool fast_delete,
40 const std::string &path_delimiter,
41 210 const bool tolerate_missing_hardlinks)
42 : SyncUnion(mediator, rdonly_path, "", "")
43 210 , src(NULL)
44 210 , tarball_path_(tarball_path)
45
1/2
✓ Branch 1 taken 210 times.
✗ Branch 2 not taken.
210 , base_directory_(base_directory)
46 210 , uid_(uid)
47 210 , gid_(gid)
48
1/2
✓ Branch 1 taken 210 times.
✗ Branch 2 not taken.
210 , to_delete_(to_delete)
49 210 , create_catalog_on_root_(create_catalog_on_root)
50 210 , fast_delete_(fast_delete)
51
1/2
✓ Branch 1 taken 210 times.
✗ Branch 2 not taken.
210 , path_delimiter_(path_delimiter)
52 210 , tolerate_missing_hardlinks_(tolerate_missing_hardlinks)
53
6/12
✓ Branch 2 taken 210 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 210 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 210 times.
✗ Branch 10 not taken.
✓ Branch 16 taken 210 times.
✗ Branch 17 not taken.
✓ Branch 23 taken 210 times.
✗ Branch 24 not taken.
✓ Branch 26 taken 210 times.
✗ Branch 27 not taken.
420 , read_archive_signal_(new Signal) { }
54
55
1/2
✓ Branch 0 taken 210 times.
✗ Branch 1 not taken.
420 SyncUnionTarball::~SyncUnionTarball() { delete read_archive_signal_; }
56
57 210 bool SyncUnionTarball::Initialize() {
58 bool result;
59
60 // We are just deleting entity from the repo
61
2/2
✓ Branch 1 taken 105 times.
✓ Branch 2 taken 105 times.
210 if (tarball_path_ == "") {
62
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 105 times.
105 assert(NULL == src);
63 105 return SyncUnion::Initialize();
64 }
65
66 105 src = archive_read_new();
67
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 105 times.
105 assert(ARCHIVE_OK == archive_read_support_format_tar(src));
68
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 105 times.
105 assert(ARCHIVE_OK == archive_read_support_format_empty(src));
69
70
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 105 times.
105 if (tarball_path_ == "-") {
71 result = archive_read_open_filename(src, NULL, kBlockSize);
72 } else {
73
1/2
✓ Branch 1 taken 105 times.
✗ Branch 2 not taken.
105 const std::string tarball_absolute_path = GetAbsolutePath(tarball_path_);
74
1/2
✓ Branch 2 taken 105 times.
✗ Branch 3 not taken.
105 result = archive_read_open_filename(src, tarball_absolute_path.c_str(),
75 kBlockSize);
76 105 }
77
78
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 105 times.
105 if (result != ARCHIVE_OK) {
79 LogCvmfs(kLogUnionFs, kLogStderr, "Impossible to open the archive: %s",
80 archive_error_string(src));
81 return false;
82 }
83
84 105 return SyncUnion::Initialize();
85 }
86
87 /*
88 * Libarchive is not thread aware, so we need to make sure that before
89 * to read/"open" the next header in the archive the content of the
90 *
91 * present header is been consumed completely.
92 * Different thread read/"open" the header from the one that consumes
93 * it so we opted for a Signal that is backed by a conditional variable.
94 * We wait for the signal just before to read the header.
95 * Then when we have done with the header the Signal is fired.
96 * The Signal can be fired inside the main loop if we don't need to read
97 * data, or when the IngestionSource get closed, which means that we are
98 * not reading data anymore from there.
99 * This whole process is not necessary for directories since we don't
100 * actually need to read data from them.
101 *
102 * It may be needed to add a catalog as a root of the archive.
103 * A possible way to do it is by creating an virtual `.cvmfscatalog` file and
104 * push it into the usual pipeline.
105 * This operation must be done only once, and it seems like a good idea to do
106 * it at the first iteration of the loop, hence this logic is managed by the
107 * `first_iteration` boolean flag.
108 */
109 210 void SyncUnionTarball::Traverse() {
110
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 210 times.
210 assert(this->IsInitialized());
111
112 /*
113 * As first step we eliminate the requested directories.
114 */
115
2/2
✓ Branch 1 taken 105 times.
✓ Branch 2 taken 105 times.
210 if (to_delete_ != "") {
116 vector<std::string> to_eliminate_vec = SplitStringMultiChar(
117
1/2
✓ Branch 1 taken 105 times.
✗ Branch 2 not taken.
105 to_delete_, path_delimiter_);
118
119 105 for (vector<string>::iterator s = to_eliminate_vec.begin();
120
2/2
✓ Branch 2 taken 140 times.
✓ Branch 3 taken 105 times.
245 s != to_eliminate_vec.end();
121 140 ++s) {
122 140 std::string parent_path;
123 140 std::string filename;
124
1/2
✓ Branch 2 taken 140 times.
✗ Branch 3 not taken.
140 SplitPath(*s, &parent_path, &filename);
125
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 140 times.
140 if (parent_path == ".")
126 parent_path = "";
127 const SharedPtr<SyncItem> sync_entry = CreateSyncItem(parent_path,
128
1/2
✓ Branch 1 taken 140 times.
✗ Branch 2 not taken.
140 filename, kItemDir);
129
1/2
✓ Branch 2 taken 140 times.
✗ Branch 3 not taken.
140 mediator_->Remove(sync_entry, fast_delete_);
130 140 }
131 105 }
132
133 // we are simply deleting entity from the repo
134
2/2
✓ Branch 0 taken 105 times.
✓ Branch 1 taken 105 times.
210 if (NULL == src)
135 105 return;
136
137 // Prime the signal so the first Wait() in the loop below can proceed.
138 105 read_archive_signal_->Wakeup();
139
140 105 struct archive_entry *entry = archive_entry_new();
141 while (true) {
142 // Get the lock, wait if lock is not available yet
143 595 read_archive_signal_->Wait();
144
145 595 const int result = archive_read_next_header2(src, entry);
146
147
2/6
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✓ Branch 2 taken 105 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 490 times.
✗ Branch 5 not taken.
595 switch (result) {
148 case ARCHIVE_FATAL: {
149 PANIC(kLogStderr, "Fatal error in reading the archive.\n%s\n",
150 archive_error_string(src));
151 break; // Only exit point with error
152 }
153
154 case ARCHIVE_RETRY: {
155 LogCvmfs(kLogUnionFs, kLogStdout,
156 "Error in reading the header, retrying.\n%s\n",
157 archive_error_string(src));
158 continue;
159 break;
160 }
161
162 105 case ARCHIVE_EOF: {
163
2/6
✗ Branch 0 not taken.
✓ Branch 1 taken 105 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 105 times.
105 if (create_catalog_on_root_ && (base_directory_ != "/")) {
164 CreateDirectories(base_directory_); // necessary for empty archives
165 const SharedPtr<SyncItem> catalog = SharedPtr<SyncItem>(
166 new SyncItemDummyCatalog(base_directory_, this));
167 ProcessFile(catalog);
168 to_create_catalog_dirs_.insert(base_directory_);
169 }
170 105 for (set<string>::iterator dir = to_create_catalog_dirs_.begin();
171
1/2
✗ Branch 2 not taken.
✓ Branch 3 taken 105 times.
105 dir != to_create_catalog_dirs_.end();
172 ++dir) {
173 assert(dirs_.find(*dir) != dirs_.end());
174 const SharedPtr<SyncItem> to_mark = dirs_[*dir];
175 assert(to_mark->IsDirectory());
176 to_mark->SetCatalogMarker();
177 to_mark->MakePlaceholderDirectory();
178 ProcessDirectory(to_mark);
179 }
180 105 return; // Only successful exit point
181 break;
182 }
183
184 case ARCHIVE_WARN: {
185 LogCvmfs(kLogUnionFs, kLogStderr,
186 "Warning in uncompression reading, going on.\n %s",
187 archive_error_string(src));
188 // We actually want this to enter the ARCHIVE_OK case
189 }
190
191 490 case ARCHIVE_OK: {
192 490 ProcessArchiveEntry(entry);
193 490 break;
194 }
195
196 default: {
197 // We should never enter in this branch, but just for safeness we prefer
198 // to abort in case we hit a case we don't how to manage.
199 PANIC(kLogStderr, "Enter in unknown state. Aborting.\nError: %s\n",
200 result, archive_error_string(src));
201 }
202 }
203 490 }
204 }
205
206 490 void SyncUnionTarball::ProcessArchiveEntry(struct archive_entry *entry) {
207
2/4
✓ Branch 2 taken 490 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 490 times.
✗ Branch 6 not taken.
490 std::string archive_file_path(archive_entry_pathname(entry));
208
1/2
✓ Branch 1 taken 490 times.
✗ Branch 2 not taken.
490 archive_file_path = SanitizePath(archive_file_path);
209
210 490 const std::string complete_path = base_directory_ != "/"
211 490 ? MakeCanonicalPath(base_directory_
212
1/4
✓ Branch 1 taken 490 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
490 + "/"
213
3/8
✓ Branch 1 taken 490 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 490 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 490 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
1470 + archive_file_path)
214
2/6
✓ Branch 0 taken 490 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 490 times.
✗ Branch 4 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
980 : MakeCanonicalPath(archive_file_path);
215
216 490 std::string parent_path;
217 490 std::string filename;
218
1/2
✓ Branch 1 taken 490 times.
✗ Branch 2 not taken.
490 SplitPath(complete_path, &parent_path, &filename);
219
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 490 times.
490 if (parent_path == ".")
220 parent_path.clear();
221
222
1/2
✓ Branch 1 taken 490 times.
✗ Branch 2 not taken.
490 CreateDirectories(parent_path);
223
224 const SharedPtr<SyncItem> sync_entry = SharedPtr<SyncItem>(
225 new SyncItemTar(parent_path, filename, src, entry, read_archive_signal_,
226
3/6
✓ Branch 1 taken 490 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 490 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 490 times.
✗ Branch 8 not taken.
490 this, uid_, gid_));
227
228
2/4
✓ Branch 1 taken 490 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 490 times.
490 if (NULL != archive_entry_hardlink(entry)) {
229 const std::string hardlink_name(
230 SanitizePath(archive_entry_hardlink(entry)));
231 const std::string hardlink = base_directory_ != "/"
232 ? base_directory_ + "/" + hardlink_name
233 : hardlink_name;
234
235 // Capture the link's own ownership/permissions so that, if the target is
236 // not part of this archive, an empty file can be materialized in its place
237 // (see PostUpload and tolerate_missing_hardlinks_). Read directly from the
238 // tar header, applying the same uid/gid override as SyncItemTar.
239 const struct stat *link_stat = archive_entry_stat(entry);
240 const uid_t link_uid = (uid_ != -1u) ? uid_ : link_stat->st_uid;
241 const gid_t link_gid = (gid_ != -1u) ? gid_ : link_stat->st_gid;
242 const HardlinkDestination destination(complete_path, link_stat->st_mode,
243 link_uid, link_gid,
244 link_stat->st_mtime);
245 if (hardlinks_.find(hardlink) != hardlinks_.end()) {
246 hardlinks_.find(hardlink)->second.push_back(destination);
247 } else {
248 std::list<HardlinkDestination> to_hardlink;
249 to_hardlink.push_back(destination);
250 hardlinks_[hardlink] = to_hardlink;
251 }
252 if (filename == ".cvmfscatalog") {
253 // the file is created in the PostUpload phase
254 to_create_catalog_dirs_.insert(parent_path);
255 }
256 read_archive_signal_->Wakeup();
257 return;
258 }
259
260
3/4
✓ Branch 2 taken 490 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 140 times.
✓ Branch 5 taken 350 times.
490 if (sync_entry->IsDirectory()) {
261
3/5
✓ Branch 2 taken 140 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 70 times.
✓ Branch 6 taken 70 times.
140 if (know_directories_.find(complete_path) != know_directories_.end()) {
262
1/2
✓ Branch 2 taken 70 times.
✗ Branch 3 not taken.
70 sync_entry->MakePlaceholderDirectory();
263 }
264
1/2
✓ Branch 2 taken 140 times.
✗ Branch 3 not taken.
140 ProcessUnmaterializedDirectory(sync_entry);
265
2/4
✓ Branch 1 taken 140 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 140 times.
✗ Branch 5 not taken.
140 dirs_[complete_path] = sync_entry;
266
1/2
✓ Branch 1 taken 140 times.
✗ Branch 2 not taken.
140 know_directories_.insert(complete_path);
267
268
1/2
✓ Branch 1 taken 140 times.
✗ Branch 2 not taken.
140 read_archive_signal_->Wakeup(); // We don't need to read data and we
269 // can read the next header
270
271
3/4
✓ Branch 2 taken 350 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 315 times.
✓ Branch 5 taken 35 times.
350 } else if (sync_entry->IsRegularFile()) {
272 // inside the process pipeline we will wake up the signal
273
1/2
✓ Branch 2 taken 315 times.
✗ Branch 3 not taken.
315 ProcessFile(sync_entry);
274
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 315 times.
315 if (filename == ".cvmfscatalog") {
275 to_create_catalog_dirs_.insert(parent_path);
276 }
277
278
1/8
✓ Branch 2 taken 35 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
35 } else if (sync_entry->IsSymlink() || sync_entry->IsFifo()
279 || sync_entry->IsSocket() || sync_entry->IsCharacterDevice()
280
2/8
✗ Branch 0 not taken.
✓ Branch 1 taken 35 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✓ Branch 8 taken 35 times.
✗ Branch 9 not taken.
35 || sync_entry->IsBlockDevice()) {
281 // we avoid to add an entity called as a catalog marker if it is not a
282 // regular file
283
1/2
✓ Branch 1 taken 35 times.
✗ Branch 2 not taken.
35 if (filename != ".cvmfscatalog") {
284
1/2
✓ Branch 2 taken 35 times.
✗ Branch 3 not taken.
35 ProcessFile(sync_entry);
285 } else {
286 PANIC(kLogStderr,
287 "Found entity called as a catalog marker '%s' that however is "
288 "not a regular file, abort",
289 complete_path.c_str());
290 }
291
292 // here we don't need to read data from the tar file so we can wake up
293 // immediately the signal
294
1/2
✓ Branch 1 taken 35 times.
✗ Branch 2 not taken.
35 read_archive_signal_->Wakeup();
295
296 } else {
297 PANIC(kLogStderr, "Fatal error found unexpected file: \n%s\n",
298 filename.c_str());
299 // if for any reason this code path change and we don't abort anymore,
300 // remember to wakeup the signal, otherwise we will be stuck in a deadlock
301 //
302 // read_archive_signal_->Wakeup();
303 }
304
5/10
✓ Branch 1 taken 490 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 490 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 490 times.
✗ Branch 8 not taken.
✓ Branch 10 taken 490 times.
✗ Branch 11 not taken.
✓ Branch 13 taken 490 times.
✗ Branch 14 not taken.
490 }
305
306 490 std::string SyncUnionTarball::SanitizePath(const std::string &path) {
307
1/2
✓ Branch 1 taken 490 times.
✗ Branch 2 not taken.
490 if (path.length() >= 2) {
308
2/6
✗ Branch 1 not taken.
✓ Branch 2 taken 490 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 490 times.
490 if (path[0] == '.' && path[1] == '/') {
309 return path.substr(2);
310 }
311 }
312
1/2
✓ Branch 1 taken 490 times.
✗ Branch 2 not taken.
490 if (path.length() >= 1) {
313
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 490 times.
490 if (path[0] == '/') {
314 return path.substr(1);
315 }
316 }
317 490 return path;
318 }
319
320 void SyncUnionTarball::PostUpload() {
321 // When tolerating missing hardlink targets we ask Clone not to abort on a
322 // missing source; it returns false instead and we materialize an empty file.
323 const bool fail_if_target_missing = !tolerate_missing_hardlinks_;
324 std::map<const std::string, std::list<HardlinkDestination> >::iterator
325 hardlink;
326 for (hardlink = hardlinks_.begin(); hardlink != hardlinks_.end();
327 ++hardlink) {
328 const std::string &target = hardlink->first;
329 std::list<HardlinkDestination>::iterator entry;
330 for (entry = hardlink->second.begin(); entry != hardlink->second.end();
331 ++entry) {
332 const bool cloned = mediator_->Clone(entry->path, target,
333 fail_if_target_missing);
334 if (cloned)
335 continue;
336
337 // The hardlink target is not part of this archive (e.g. a cross-layer
338 // hardlink in an OCI image layer). Materialize an empty regular file at
339 // the link's path, preserving its ownership and permissions, and push it
340 // through the normal ingestion pipeline so the empty object is stored.
341 std::string parent_path;
342 std::string filename;
343 SplitPath(entry->path, &parent_path, &filename);
344 if (parent_path == ".")
345 parent_path.clear();
346 const SharedPtr<SyncItem> empty_file = SharedPtr<SyncItem>(
347 new SyncItemDummyFile(parent_path, filename, this, entry->mode,
348 entry->uid, entry->gid, entry->mtime));
349 ProcessFile(empty_file);
350 LogCvmfs(kLogUnionFs, kLogStderr | kLogSyslogWarn,
351 "hardlink target '%s' is not present in the tarball; "
352 "materialized an empty file at '%s'",
353 target.c_str(), entry->path.c_str());
354 }
355 }
356 }
357
358 std::string SyncUnionTarball::UnwindWhiteoutFilename(
359 SharedPtr<SyncItem> entry) const {
360 return entry->filename();
361 }
362
363 140 bool SyncUnionTarball::IsOpaqueDirectory(SharedPtr<SyncItem> directory) const {
364 140 return false;
365 }
366
367 140 bool SyncUnionTarball::IsWhiteoutEntry(SharedPtr<SyncItem> entry) const {
368 140 return false;
369 }
370
371 /* Tar files are not necessarily traversed in order from root to leave.
372 * So it may happens that we are expanding the file `/a/b/c.txt` without
373 * having created yet the directory `/a/b/`.
374 * In order to overcome this limitation the following function create dummy
375 * directories that can be used as placeholder and that they will be overwritten
376 * as soon as the real directory is found in the tarball
377 */
378 665 void SyncUnionTarball::CreateDirectories(const std::string &target) {
379
3/5
✓ Branch 2 taken 665 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 385 times.
✓ Branch 6 taken 280 times.
665 if (know_directories_.find(target) != know_directories_.end())
380 490 return;
381
2/2
✓ Branch 1 taken 105 times.
✓ Branch 2 taken 175 times.
280 if (target == ".")
382 105 return;
383
384
1/2
✓ Branch 2 taken 175 times.
✗ Branch 3 not taken.
175 std::string dirname = "";
385
1/2
✓ Branch 2 taken 175 times.
✗ Branch 3 not taken.
175 std::string filename = "";
386
1/2
✓ Branch 1 taken 175 times.
✗ Branch 2 not taken.
175 SplitPath(target, &dirname, &filename);
387
1/2
✓ Branch 1 taken 175 times.
✗ Branch 2 not taken.
175 CreateDirectories(dirname);
388
389
2/2
✓ Branch 1 taken 105 times.
✓ Branch 2 taken 70 times.
175 if (dirname == ".")
390
1/2
✓ Branch 1 taken 105 times.
✗ Branch 2 not taken.
105 dirname = "";
391 const SharedPtr<SyncItem> dummy = SharedPtr<SyncItem>(
392
3/6
✓ Branch 1 taken 175 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 175 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 175 times.
✗ Branch 8 not taken.
175 new SyncItemDummyDir(dirname, filename, this, kItemDir, uid_, gid_));
393
394
1/2
✓ Branch 2 taken 175 times.
✗ Branch 3 not taken.
175 ProcessUnmaterializedDirectory(dummy);
395
2/4
✓ Branch 1 taken 175 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 175 times.
✗ Branch 5 not taken.
175 dirs_[target] = dummy;
396
1/2
✓ Branch 1 taken 175 times.
✗ Branch 2 not taken.
175 know_directories_.insert(target);
397 175 }
398
399 } // namespace publish
400