GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/sync_union_tarball.cc
Date: 2026-05-03 02:36:16
Exec Total Coverage
Lines: 114 168 67.9%
Branches: 105 304 34.5%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System
3 */
4
5 #include "sync_union_tarball.h"
6
7 #include <pthread.h>
8 #include <unistd.h>
9
10 #include <cassert>
11 #include <cstdio>
12 #include <list>
13 #include <set>
14 #include <string>
15 #include <vector>
16
17 #include "duplex_libarchive.h"
18 #include "sync_item.h"
19 #include "sync_item_dummy.h"
20 #include "sync_item_tar.h"
21 #include "sync_mediator.h"
22 #include "sync_union.h"
23 #include "util/concurrency.h"
24 #include "util/exception.h"
25 #include "util/fs_traversal.h"
26 #include "util/posix.h"
27 #include "util/smalloc.h"
28
29 namespace publish {
30
31 72 SyncUnionTarball::SyncUnionTarball(AbstractSyncMediator *mediator,
32 const std::string &rdonly_path,
33 const std::string &tarball_path,
34 const std::string &base_directory,
35 const uid_t uid,
36 const gid_t gid,
37 const std::string &to_delete,
38 const bool create_catalog_on_root,
39 const bool fast_delete,
40 72 const std::string &path_delimiter)
41 : SyncUnion(mediator, rdonly_path, "", "")
42 72 , src(NULL)
43 72 , tarball_path_(tarball_path)
44
1/2
✓ Branch 1 taken 72 times.
✗ Branch 2 not taken.
72 , base_directory_(base_directory)
45 72 , uid_(uid)
46 72 , gid_(gid)
47
1/2
✓ Branch 1 taken 72 times.
✗ Branch 2 not taken.
72 , to_delete_(to_delete)
48 72 , create_catalog_on_root_(create_catalog_on_root)
49 72 , fast_delete_(fast_delete)
50
1/2
✓ Branch 1 taken 72 times.
✗ Branch 2 not taken.
72 , path_delimiter_(path_delimiter)
51
6/12
✓ Branch 2 taken 72 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 72 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 72 times.
✗ Branch 10 not taken.
✓ Branch 16 taken 72 times.
✗ Branch 17 not taken.
✓ Branch 23 taken 72 times.
✗ Branch 24 not taken.
✓ Branch 26 taken 72 times.
✗ Branch 27 not taken.
144 , read_archive_signal_(new Signal) { }
52
53
1/2
✓ Branch 0 taken 72 times.
✗ Branch 1 not taken.
144 SyncUnionTarball::~SyncUnionTarball() { delete read_archive_signal_; }
54
55 72 bool SyncUnionTarball::Initialize() {
56 bool result;
57
58 // We are just deleting entity from the repo
59
2/2
✓ Branch 1 taken 36 times.
✓ Branch 2 taken 36 times.
72 if (tarball_path_ == "") {
60
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 36 times.
36 assert(NULL == src);
61 36 return SyncUnion::Initialize();
62 }
63
64 36 src = archive_read_new();
65
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 36 times.
36 assert(ARCHIVE_OK == archive_read_support_format_tar(src));
66
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 36 times.
36 assert(ARCHIVE_OK == archive_read_support_format_empty(src));
67
68
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 36 times.
36 if (tarball_path_ == "-") {
69 result = archive_read_open_filename(src, NULL, kBlockSize);
70 } else {
71
1/2
✓ Branch 1 taken 36 times.
✗ Branch 2 not taken.
36 const std::string tarball_absolute_path = GetAbsolutePath(tarball_path_);
72
1/2
✓ Branch 2 taken 36 times.
✗ Branch 3 not taken.
36 result = archive_read_open_filename(src, tarball_absolute_path.c_str(),
73 kBlockSize);
74 36 }
75
76
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 36 times.
36 if (result != ARCHIVE_OK) {
77 LogCvmfs(kLogUnionFs, kLogStderr, "Impossible to open the archive: %s",
78 archive_error_string(src));
79 return false;
80 }
81
82 36 return SyncUnion::Initialize();
83 }
84
85 /*
86 * Libarchive is not thread aware, so we need to make sure that before
87 * to read/"open" the next header in the archive the content of the
88 *
89 * present header is been consumed completely.
90 * Different thread read/"open" the header from the one that consumes
91 * it so we opted for a Signal that is backed by a conditional variable.
92 * We wait for the signal just before to read the header.
93 * Then when we have done with the header the Signal is fired.
94 * The Signal can be fired inside the main loop if we don't need to read
95 * data, or when the IngestionSource get closed, which means that we are
96 * not reading data anymore from there.
97 * This whole process is not necessary for directories since we don't
98 * actually need to read data from them.
99 *
100 * It may be needed to add a catalog as a root of the archive.
101 * A possible way to do it is by creating an virtual `.cvmfscatalog` file and
102 * push it into the usual pipeline.
103 * This operation must be done only once, and it seems like a good idea to do
104 * it at the first iteration of the loop, hence this logic is managed by the
105 * `first_iteration` boolean flag.
106 */
107 72 void SyncUnionTarball::Traverse() {
108
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 72 times.
72 assert(this->IsInitialized());
109
110 /*
111 * As first step we eliminate the requested directories.
112 */
113
2/2
✓ Branch 1 taken 36 times.
✓ Branch 2 taken 36 times.
72 if (to_delete_ != "") {
114 vector<std::string> to_eliminate_vec = SplitStringMultiChar(
115
1/2
✓ Branch 1 taken 36 times.
✗ Branch 2 not taken.
36 to_delete_, path_delimiter_);
116
117 36 for (vector<string>::iterator s = to_eliminate_vec.begin();
118
2/2
✓ Branch 2 taken 48 times.
✓ Branch 3 taken 36 times.
84 s != to_eliminate_vec.end();
119 48 ++s) {
120 48 std::string parent_path;
121 48 std::string filename;
122
1/2
✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
48 SplitPath(*s, &parent_path, &filename);
123
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 48 times.
48 if (parent_path == ".")
124 parent_path = "";
125 const SharedPtr<SyncItem> sync_entry = CreateSyncItem(parent_path,
126
1/2
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
48 filename, kItemDir);
127
1/2
✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
48 mediator_->Remove(sync_entry, fast_delete_);
128 48 }
129 36 }
130
131 // we are simply deleting entity from the repo
132
2/2
✓ Branch 0 taken 36 times.
✓ Branch 1 taken 36 times.
72 if (NULL == src)
133 36 return;
134
135 // Prime the signal so the first Wait() in the loop below can proceed.
136 36 read_archive_signal_->Wakeup();
137
138 36 struct archive_entry *entry = archive_entry_new();
139 while (true) {
140 // Get the lock, wait if lock is not available yet
141 204 read_archive_signal_->Wait();
142
143 204 const int result = archive_read_next_header2(src, entry);
144
145
2/6
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✓ Branch 2 taken 36 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 168 times.
✗ Branch 5 not taken.
204 switch (result) {
146 case ARCHIVE_FATAL: {
147 PANIC(kLogStderr, "Fatal error in reading the archive.\n%s\n",
148 archive_error_string(src));
149 break; // Only exit point with error
150 }
151
152 case ARCHIVE_RETRY: {
153 LogCvmfs(kLogUnionFs, kLogStdout,
154 "Error in reading the header, retrying.\n%s\n",
155 archive_error_string(src));
156 continue;
157 break;
158 }
159
160 36 case ARCHIVE_EOF: {
161
2/6
✗ Branch 0 not taken.
✓ Branch 1 taken 36 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 36 times.
36 if (create_catalog_on_root_ && (base_directory_ != "/")) {
162 CreateDirectories(base_directory_); // necessary for empty archives
163 const SharedPtr<SyncItem> catalog = SharedPtr<SyncItem>(
164 new SyncItemDummyCatalog(base_directory_, this));
165 ProcessFile(catalog);
166 to_create_catalog_dirs_.insert(base_directory_);
167 }
168 36 for (set<string>::iterator dir = to_create_catalog_dirs_.begin();
169
1/2
✗ Branch 2 not taken.
✓ Branch 3 taken 36 times.
36 dir != to_create_catalog_dirs_.end();
170 ++dir) {
171 assert(dirs_.find(*dir) != dirs_.end());
172 const SharedPtr<SyncItem> to_mark = dirs_[*dir];
173 assert(to_mark->IsDirectory());
174 to_mark->SetCatalogMarker();
175 to_mark->MakePlaceholderDirectory();
176 ProcessDirectory(to_mark);
177 }
178 36 return; // Only successful exit point
179 break;
180 }
181
182 case ARCHIVE_WARN: {
183 LogCvmfs(kLogUnionFs, kLogStderr,
184 "Warning in uncompression reading, going on.\n %s",
185 archive_error_string(src));
186 // We actually want this to enter the ARCHIVE_OK case
187 }
188
189 168 case ARCHIVE_OK: {
190 168 ProcessArchiveEntry(entry);
191 168 break;
192 }
193
194 default: {
195 // We should never enter in this branch, but just for safeness we prefer
196 // to abort in case we hit a case we don't how to manage.
197 PANIC(kLogStderr, "Enter in unknown state. Aborting.\nError: %s\n",
198 result, archive_error_string(src));
199 }
200 }
201 168 }
202 }
203
204 168 void SyncUnionTarball::ProcessArchiveEntry(struct archive_entry *entry) {
205
2/4
✓ Branch 2 taken 168 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 168 times.
✗ Branch 6 not taken.
168 std::string archive_file_path(archive_entry_pathname(entry));
206
1/2
✓ Branch 1 taken 168 times.
✗ Branch 2 not taken.
168 archive_file_path = SanitizePath(archive_file_path);
207
208 168 const std::string complete_path = base_directory_ != "/"
209 168 ? MakeCanonicalPath(base_directory_
210
1/4
✓ Branch 1 taken 168 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
168 + "/"
211
3/8
✓ Branch 1 taken 168 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 168 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 168 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
504 + archive_file_path)
212
2/6
✓ Branch 0 taken 168 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 168 times.
✗ Branch 4 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
336 : MakeCanonicalPath(archive_file_path);
213
214 168 std::string parent_path;
215 168 std::string filename;
216
1/2
✓ Branch 1 taken 168 times.
✗ Branch 2 not taken.
168 SplitPath(complete_path, &parent_path, &filename);
217
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 168 times.
168 if (parent_path == ".")
218 parent_path.clear();
219
220
1/2
✓ Branch 1 taken 168 times.
✗ Branch 2 not taken.
168 CreateDirectories(parent_path);
221
222 const SharedPtr<SyncItem> sync_entry = SharedPtr<SyncItem>(
223 new SyncItemTar(parent_path, filename, src, entry, read_archive_signal_,
224
3/6
✓ Branch 1 taken 168 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 168 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 168 times.
✗ Branch 8 not taken.
168 this, uid_, gid_));
225
226
2/4
✓ Branch 1 taken 168 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 168 times.
168 if (NULL != archive_entry_hardlink(entry)) {
227 const std::string hardlink_name(
228 SanitizePath(archive_entry_hardlink(entry)));
229 const std::string hardlink = base_directory_ != "/"
230 ? base_directory_ + "/" + hardlink_name
231 : hardlink_name;
232
233 if (hardlinks_.find(hardlink) != hardlinks_.end()) {
234 hardlinks_.find(hardlink)->second.push_back(complete_path);
235 } else {
236 std::list<std::string> to_hardlink;
237 to_hardlink.push_back(complete_path);
238 hardlinks_[hardlink] = to_hardlink;
239 }
240 if (filename == ".cvmfscatalog") {
241 // the file is created in the PostUpload phase
242 to_create_catalog_dirs_.insert(parent_path);
243 }
244 read_archive_signal_->Wakeup();
245 return;
246 }
247
248
3/4
✓ Branch 2 taken 168 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 48 times.
✓ Branch 5 taken 120 times.
168 if (sync_entry->IsDirectory()) {
249
3/5
✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 24 times.
✓ Branch 6 taken 24 times.
48 if (know_directories_.find(complete_path) != know_directories_.end()) {
250
1/2
✓ Branch 2 taken 24 times.
✗ Branch 3 not taken.
24 sync_entry->MakePlaceholderDirectory();
251 }
252
1/2
✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
48 ProcessUnmaterializedDirectory(sync_entry);
253
2/4
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 48 times.
✗ Branch 5 not taken.
48 dirs_[complete_path] = sync_entry;
254
1/2
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
48 know_directories_.insert(complete_path);
255
256
1/2
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
48 read_archive_signal_->Wakeup(); // We don't need to read data and we
257 // can read the next header
258
259
3/4
✓ Branch 2 taken 120 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 108 times.
✓ Branch 5 taken 12 times.
120 } else if (sync_entry->IsRegularFile()) {
260 // inside the process pipeline we will wake up the signal
261
1/2
✓ Branch 2 taken 108 times.
✗ Branch 3 not taken.
108 ProcessFile(sync_entry);
262
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 108 times.
108 if (filename == ".cvmfscatalog") {
263 to_create_catalog_dirs_.insert(parent_path);
264 }
265
266
1/8
✓ Branch 2 taken 12 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
12 } else if (sync_entry->IsSymlink() || sync_entry->IsFifo()
267 || sync_entry->IsSocket() || sync_entry->IsCharacterDevice()
268
2/8
✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✓ Branch 8 taken 12 times.
✗ Branch 9 not taken.
12 || sync_entry->IsBlockDevice()) {
269 // we avoid to add an entity called as a catalog marker if it is not a
270 // regular file
271
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 if (filename != ".cvmfscatalog") {
272
1/2
✓ Branch 2 taken 12 times.
✗ Branch 3 not taken.
12 ProcessFile(sync_entry);
273 } else {
274 PANIC(kLogStderr,
275 "Found entity called as a catalog marker '%s' that however is "
276 "not a regular file, abort",
277 complete_path.c_str());
278 }
279
280 // here we don't need to read data from the tar file so we can wake up
281 // immediately the signal
282
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 read_archive_signal_->Wakeup();
283
284 } else {
285 PANIC(kLogStderr, "Fatal error found unexpected file: \n%s\n",
286 filename.c_str());
287 // if for any reason this code path change and we don't abort anymore,
288 // remember to wakeup the signal, otherwise we will be stuck in a deadlock
289 //
290 // read_archive_signal_->Wakeup();
291 }
292
5/10
✓ Branch 1 taken 168 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 168 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 168 times.
✗ Branch 8 not taken.
✓ Branch 10 taken 168 times.
✗ Branch 11 not taken.
✓ Branch 13 taken 168 times.
✗ Branch 14 not taken.
168 }
293
294 168 std::string SyncUnionTarball::SanitizePath(const std::string &path) {
295
1/2
✓ Branch 1 taken 168 times.
✗ Branch 2 not taken.
168 if (path.length() >= 2) {
296
2/6
✗ Branch 1 not taken.
✓ Branch 2 taken 168 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 168 times.
168 if (path[0] == '.' && path[1] == '/') {
297 return path.substr(2);
298 }
299 }
300
1/2
✓ Branch 1 taken 168 times.
✗ Branch 2 not taken.
168 if (path.length() >= 1) {
301
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 168 times.
168 if (path[0] == '/') {
302 return path.substr(1);
303 }
304 }
305 168 return path;
306 }
307
308 void SyncUnionTarball::PostUpload() {
309 std::map<const std::string, std::list<std::string> >::iterator hardlink;
310 for (hardlink = hardlinks_.begin(); hardlink != hardlinks_.end();
311 ++hardlink) {
312 std::list<std::string>::iterator entry;
313 for (entry = hardlink->second.begin(); entry != hardlink->second.end();
314 ++entry) {
315 mediator_->Clone(*entry, hardlink->first);
316 }
317 }
318 }
319
320 std::string SyncUnionTarball::UnwindWhiteoutFilename(
321 SharedPtr<SyncItem> entry) const {
322 return entry->filename();
323 }
324
325 48 bool SyncUnionTarball::IsOpaqueDirectory(SharedPtr<SyncItem> directory) const {
326 48 return false;
327 }
328
329 48 bool SyncUnionTarball::IsWhiteoutEntry(SharedPtr<SyncItem> entry) const {
330 48 return false;
331 }
332
333 /* Tar files are not necessarily traversed in order from root to leave.
334 * So it may happens that we are expanding the file `/a/b/c.txt` without
335 * having created yet the directory `/a/b/`.
336 * In order to overcome this limitation the following function create dummy
337 * directories that can be used as placeholder and that they will be overwritten
338 * as soon as the real directory is found in the tarball
339 */
340 228 void SyncUnionTarball::CreateDirectories(const std::string &target) {
341
3/5
✓ Branch 2 taken 228 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 132 times.
✓ Branch 6 taken 96 times.
228 if (know_directories_.find(target) != know_directories_.end())
342 168 return;
343
2/2
✓ Branch 1 taken 36 times.
✓ Branch 2 taken 60 times.
96 if (target == ".")
344 36 return;
345
346
1/2
✓ Branch 2 taken 60 times.
✗ Branch 3 not taken.
60 std::string dirname = "";
347
1/2
✓ Branch 2 taken 60 times.
✗ Branch 3 not taken.
60 std::string filename = "";
348
1/2
✓ Branch 1 taken 60 times.
✗ Branch 2 not taken.
60 SplitPath(target, &dirname, &filename);
349
1/2
✓ Branch 1 taken 60 times.
✗ Branch 2 not taken.
60 CreateDirectories(dirname);
350
351
2/2
✓ Branch 1 taken 36 times.
✓ Branch 2 taken 24 times.
60 if (dirname == ".")
352
1/2
✓ Branch 1 taken 36 times.
✗ Branch 2 not taken.
36 dirname = "";
353 const SharedPtr<SyncItem> dummy = SharedPtr<SyncItem>(
354
3/6
✓ Branch 1 taken 60 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 60 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 60 times.
✗ Branch 8 not taken.
60 new SyncItemDummyDir(dirname, filename, this, kItemDir, uid_, gid_));
355
356
1/2
✓ Branch 2 taken 60 times.
✗ Branch 3 not taken.
60 ProcessUnmaterializedDirectory(dummy);
357
2/4
✓ Branch 1 taken 60 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 60 times.
✗ Branch 5 not taken.
60 dirs_[target] = dummy;
358
1/2
✓ Branch 1 taken 60 times.
✗ Branch 2 not taken.
60 know_directories_.insert(target);
359 60 }
360
361 } // namespace publish
362