GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/sync_union_tarball.cc
Date: 2026-03-08 02:37:57
Exec Total Coverage
Lines: 114 168 67.9%
Branches: 105 304 34.5%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System
3 */
4
5 #define __STDC_FORMAT_MACROS
6
7 #include "sync_union_tarball.h"
8
9 #include <pthread.h>
10 #include <unistd.h>
11
12 #include <cassert>
13 #include <cstdio>
14 #include <list>
15 #include <set>
16 #include <string>
17 #include <vector>
18
19 #include "duplex_libarchive.h"
20 #include "sync_item.h"
21 #include "sync_item_dummy.h"
22 #include "sync_item_tar.h"
23 #include "sync_mediator.h"
24 #include "sync_union.h"
25 #include "util/concurrency.h"
26 #include "util/exception.h"
27 #include "util/fs_traversal.h"
28 #include "util/posix.h"
29 #include "util/smalloc.h"
30
31 namespace publish {
32
33 42 SyncUnionTarball::SyncUnionTarball(AbstractSyncMediator *mediator,
34 const std::string &rdonly_path,
35 const std::string &tarball_path,
36 const std::string &base_directory,
37 const uid_t uid,
38 const gid_t gid,
39 const std::string &to_delete,
40 const bool create_catalog_on_root,
41 const bool fast_delete,
42 42 const std::string &path_delimiter)
43 : SyncUnion(mediator, rdonly_path, "", "")
44 42 , src(NULL)
45 42 , tarball_path_(tarball_path)
46
1/2
✓ Branch 1 taken 42 times.
✗ Branch 2 not taken.
42 , base_directory_(base_directory)
47 42 , uid_(uid)
48 42 , gid_(gid)
49
1/2
✓ Branch 1 taken 42 times.
✗ Branch 2 not taken.
42 , to_delete_(to_delete)
50 42 , create_catalog_on_root_(create_catalog_on_root)
51 42 , fast_delete_(fast_delete)
52
1/2
✓ Branch 1 taken 42 times.
✗ Branch 2 not taken.
42 , path_delimiter_(path_delimiter)
53
6/12
✓ Branch 2 taken 42 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 42 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 42 times.
✗ Branch 10 not taken.
✓ Branch 16 taken 42 times.
✗ Branch 17 not taken.
✓ Branch 23 taken 42 times.
✗ Branch 24 not taken.
✓ Branch 26 taken 42 times.
✗ Branch 27 not taken.
84 , read_archive_signal_(new Signal) { }
54
55
1/2
✓ Branch 0 taken 42 times.
✗ Branch 1 not taken.
84 SyncUnionTarball::~SyncUnionTarball() { delete read_archive_signal_; }
56
57 42 bool SyncUnionTarball::Initialize() {
58 bool result;
59
60 // We are just deleting entity from the repo
61
2/2
✓ Branch 1 taken 21 times.
✓ Branch 2 taken 21 times.
42 if (tarball_path_ == "") {
62
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 21 times.
21 assert(NULL == src);
63 21 return SyncUnion::Initialize();
64 }
65
66 21 src = archive_read_new();
67
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 21 times.
21 assert(ARCHIVE_OK == archive_read_support_format_tar(src));
68
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 21 times.
21 assert(ARCHIVE_OK == archive_read_support_format_empty(src));
69
70
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 21 times.
21 if (tarball_path_ == "-") {
71 result = archive_read_open_filename(src, NULL, kBlockSize);
72 } else {
73
1/2
✓ Branch 1 taken 21 times.
✗ Branch 2 not taken.
21 const std::string tarball_absolute_path = GetAbsolutePath(tarball_path_);
74
1/2
✓ Branch 2 taken 21 times.
✗ Branch 3 not taken.
21 result = archive_read_open_filename(src, tarball_absolute_path.c_str(),
75 kBlockSize);
76 21 }
77
78
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 21 times.
21 if (result != ARCHIVE_OK) {
79 LogCvmfs(kLogUnionFs, kLogStderr, "Impossible to open the archive: %s",
80 archive_error_string(src));
81 return false;
82 }
83
84 21 return SyncUnion::Initialize();
85 }
86
87 /*
88 * Libarchive is not thread aware, so we need to make sure that before
89 * to read/"open" the next header in the archive the content of the
90 *
91 * present header is been consumed completely.
92 * Different thread read/"open" the header from the one that consumes
93 * it so we opted for a Signal that is backed by a conditional variable.
94 * We wait for the signal just before to read the header.
95 * Then when we have done with the header the Signal is fired.
96 * The Signal can be fired inside the main loop if we don't need to read
97 * data, or when the IngestionSource get closed, which means that we are
98 * not reading data anymore from there.
99 * This whole process is not necessary for directories since we don't
100 * actually need to read data from them.
101 *
102 * It may be needed to add a catalog as a root of the archive.
103 * A possible way to do it is by creating an virtual `.cvmfscatalog` file and
104 * push it into the usual pipeline.
105 * This operation must be done only once, and it seems like a good idea to do
106 * it at the first iteration of the loop, hence this logic is managed by the
107 * `first_iteration` boolean flag.
108 */
109 42 void SyncUnionTarball::Traverse() {
110
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 42 times.
42 assert(this->IsInitialized());
111
112 /*
113 * As first step we eliminate the requested directories.
114 */
115
2/2
✓ Branch 1 taken 21 times.
✓ Branch 2 taken 21 times.
42 if (to_delete_ != "") {
116 vector<std::string> to_eliminate_vec = SplitStringMultiChar(
117
1/2
✓ Branch 1 taken 21 times.
✗ Branch 2 not taken.
21 to_delete_, path_delimiter_);
118
119 21 for (vector<string>::iterator s = to_eliminate_vec.begin();
120
2/2
✓ Branch 2 taken 28 times.
✓ Branch 3 taken 21 times.
49 s != to_eliminate_vec.end();
121 28 ++s) {
122 28 std::string parent_path;
123 28 std::string filename;
124
1/2
✓ Branch 2 taken 28 times.
✗ Branch 3 not taken.
28 SplitPath(*s, &parent_path, &filename);
125
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 28 times.
28 if (parent_path == ".")
126 parent_path = "";
127 const SharedPtr<SyncItem> sync_entry = CreateSyncItem(parent_path,
128
1/2
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
28 filename, kItemDir);
129
1/2
✓ Branch 2 taken 28 times.
✗ Branch 3 not taken.
28 mediator_->Remove(sync_entry, fast_delete_);
130 28 }
131 21 }
132
133 // we are simply deleting entity from the repo
134
2/2
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 21 times.
42 if (NULL == src)
135 21 return;
136
137 // Prime the signal so the first Wait() in the loop below can proceed.
138 21 read_archive_signal_->Wakeup();
139
140 21 struct archive_entry *entry = archive_entry_new();
141 while (true) {
142 // Get the lock, wait if lock is not available yet
143 119 read_archive_signal_->Wait();
144
145 119 const int result = archive_read_next_header2(src, entry);
146
147
2/6
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✓ Branch 2 taken 21 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 98 times.
✗ Branch 5 not taken.
119 switch (result) {
148 case ARCHIVE_FATAL: {
149 PANIC(kLogStderr, "Fatal error in reading the archive.\n%s\n",
150 archive_error_string(src));
151 break; // Only exit point with error
152 }
153
154 case ARCHIVE_RETRY: {
155 LogCvmfs(kLogUnionFs, kLogStdout,
156 "Error in reading the header, retrying.\n%s\n",
157 archive_error_string(src));
158 continue;
159 break;
160 }
161
162 21 case ARCHIVE_EOF: {
163
2/6
✗ Branch 0 not taken.
✓ Branch 1 taken 21 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 21 times.
21 if (create_catalog_on_root_ && (base_directory_ != "/")) {
164 CreateDirectories(base_directory_); // necessary for empty archives
165 const SharedPtr<SyncItem> catalog = SharedPtr<SyncItem>(
166 new SyncItemDummyCatalog(base_directory_, this));
167 ProcessFile(catalog);
168 to_create_catalog_dirs_.insert(base_directory_);
169 }
170 21 for (set<string>::iterator dir = to_create_catalog_dirs_.begin();
171
1/2
✗ Branch 2 not taken.
✓ Branch 3 taken 21 times.
21 dir != to_create_catalog_dirs_.end();
172 ++dir) {
173 assert(dirs_.find(*dir) != dirs_.end());
174 const SharedPtr<SyncItem> to_mark = dirs_[*dir];
175 assert(to_mark->IsDirectory());
176 to_mark->SetCatalogMarker();
177 to_mark->MakePlaceholderDirectory();
178 ProcessDirectory(to_mark);
179 }
180 21 return; // Only successful exit point
181 break;
182 }
183
184 case ARCHIVE_WARN: {
185 LogCvmfs(kLogUnionFs, kLogStderr,
186 "Warning in uncompression reading, going on.\n %s",
187 archive_error_string(src));
188 // We actually want this to enter the ARCHIVE_OK case
189 }
190
191 98 case ARCHIVE_OK: {
192 98 ProcessArchiveEntry(entry);
193 98 break;
194 }
195
196 default: {
197 // We should never enter in this branch, but just for safeness we prefer
198 // to abort in case we hit a case we don't how to manage.
199 PANIC(kLogStderr, "Enter in unknown state. Aborting.\nError: %s\n",
200 result, archive_error_string(src));
201 }
202 }
203 98 }
204 }
205
206 98 void SyncUnionTarball::ProcessArchiveEntry(struct archive_entry *entry) {
207
2/4
✓ Branch 2 taken 98 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 98 times.
✗ Branch 6 not taken.
98 std::string archive_file_path(archive_entry_pathname(entry));
208
1/2
✓ Branch 1 taken 98 times.
✗ Branch 2 not taken.
98 archive_file_path = SanitizePath(archive_file_path);
209
210 98 const std::string complete_path = base_directory_ != "/"
211 98 ? MakeCanonicalPath(base_directory_
212
1/4
✓ Branch 1 taken 98 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
98 + "/"
213
3/8
✓ Branch 1 taken 98 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 98 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 98 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
294 + archive_file_path)
214
2/6
✓ Branch 0 taken 98 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 98 times.
✗ Branch 4 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
196 : MakeCanonicalPath(archive_file_path);
215
216 98 std::string parent_path;
217 98 std::string filename;
218
1/2
✓ Branch 1 taken 98 times.
✗ Branch 2 not taken.
98 SplitPath(complete_path, &parent_path, &filename);
219
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 98 times.
98 if (parent_path == ".")
220 parent_path.clear();
221
222
1/2
✓ Branch 1 taken 98 times.
✗ Branch 2 not taken.
98 CreateDirectories(parent_path);
223
224 const SharedPtr<SyncItem> sync_entry = SharedPtr<SyncItem>(
225 new SyncItemTar(parent_path, filename, src, entry, read_archive_signal_,
226
3/6
✓ Branch 1 taken 98 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 98 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 98 times.
✗ Branch 8 not taken.
98 this, uid_, gid_));
227
228
2/4
✓ Branch 1 taken 98 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 98 times.
98 if (NULL != archive_entry_hardlink(entry)) {
229 const std::string hardlink_name(
230 SanitizePath(archive_entry_hardlink(entry)));
231 const std::string hardlink = base_directory_ != "/"
232 ? base_directory_ + "/" + hardlink_name
233 : hardlink_name;
234
235 if (hardlinks_.find(hardlink) != hardlinks_.end()) {
236 hardlinks_.find(hardlink)->second.push_back(complete_path);
237 } else {
238 std::list<std::string> to_hardlink;
239 to_hardlink.push_back(complete_path);
240 hardlinks_[hardlink] = to_hardlink;
241 }
242 if (filename == ".cvmfscatalog") {
243 // the file is created in the PostUpload phase
244 to_create_catalog_dirs_.insert(parent_path);
245 }
246 read_archive_signal_->Wakeup();
247 return;
248 }
249
250
3/4
✓ Branch 2 taken 98 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 28 times.
✓ Branch 5 taken 70 times.
98 if (sync_entry->IsDirectory()) {
251
3/5
✓ Branch 2 taken 28 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 14 times.
✓ Branch 6 taken 14 times.
28 if (know_directories_.find(complete_path) != know_directories_.end()) {
252
1/2
✓ Branch 2 taken 14 times.
✗ Branch 3 not taken.
14 sync_entry->MakePlaceholderDirectory();
253 }
254
1/2
✓ Branch 2 taken 28 times.
✗ Branch 3 not taken.
28 ProcessUnmaterializedDirectory(sync_entry);
255
2/4
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 28 times.
✗ Branch 5 not taken.
28 dirs_[complete_path] = sync_entry;
256
1/2
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
28 know_directories_.insert(complete_path);
257
258
1/2
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
28 read_archive_signal_->Wakeup(); // We don't need to read data and we
259 // can read the next header
260
261
3/4
✓ Branch 2 taken 70 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 63 times.
✓ Branch 5 taken 7 times.
70 } else if (sync_entry->IsRegularFile()) {
262 // inside the process pipeline we will wake up the signal
263
1/2
✓ Branch 2 taken 63 times.
✗ Branch 3 not taken.
63 ProcessFile(sync_entry);
264
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 63 times.
63 if (filename == ".cvmfscatalog") {
265 to_create_catalog_dirs_.insert(parent_path);
266 }
267
268
1/8
✓ Branch 2 taken 7 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
7 } else if (sync_entry->IsSymlink() || sync_entry->IsFifo()
269 || sync_entry->IsSocket() || sync_entry->IsCharacterDevice()
270
2/8
✗ Branch 0 not taken.
✓ Branch 1 taken 7 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✓ Branch 8 taken 7 times.
✗ Branch 9 not taken.
7 || sync_entry->IsBlockDevice()) {
271 // we avoid to add an entity called as a catalog marker if it is not a
272 // regular file
273
1/2
✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
7 if (filename != ".cvmfscatalog") {
274
1/2
✓ Branch 2 taken 7 times.
✗ Branch 3 not taken.
7 ProcessFile(sync_entry);
275 } else {
276 PANIC(kLogStderr,
277 "Found entity called as a catalog marker '%s' that however is "
278 "not a regular file, abort",
279 complete_path.c_str());
280 }
281
282 // here we don't need to read data from the tar file so we can wake up
283 // immediately the signal
284
1/2
✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
7 read_archive_signal_->Wakeup();
285
286 } else {
287 PANIC(kLogStderr, "Fatal error found unexpected file: \n%s\n",
288 filename.c_str());
289 // if for any reason this code path change and we don't abort anymore,
290 // remember to wakeup the signal, otherwise we will be stuck in a deadlock
291 //
292 // read_archive_signal_->Wakeup();
293 }
294
5/10
✓ Branch 1 taken 98 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 98 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 98 times.
✗ Branch 8 not taken.
✓ Branch 10 taken 98 times.
✗ Branch 11 not taken.
✓ Branch 13 taken 98 times.
✗ Branch 14 not taken.
98 }
295
296 98 std::string SyncUnionTarball::SanitizePath(const std::string &path) {
297
1/2
✓ Branch 1 taken 98 times.
✗ Branch 2 not taken.
98 if (path.length() >= 2) {
298
2/6
✗ Branch 1 not taken.
✓ Branch 2 taken 98 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 98 times.
98 if (path[0] == '.' && path[1] == '/') {
299 return path.substr(2);
300 }
301 }
302
1/2
✓ Branch 1 taken 98 times.
✗ Branch 2 not taken.
98 if (path.length() >= 1) {
303
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 98 times.
98 if (path[0] == '/') {
304 return path.substr(1);
305 }
306 }
307 98 return path;
308 }
309
310 void SyncUnionTarball::PostUpload() {
311 std::map<const std::string, std::list<std::string> >::iterator hardlink;
312 for (hardlink = hardlinks_.begin(); hardlink != hardlinks_.end();
313 ++hardlink) {
314 std::list<std::string>::iterator entry;
315 for (entry = hardlink->second.begin(); entry != hardlink->second.end();
316 ++entry) {
317 mediator_->Clone(*entry, hardlink->first);
318 }
319 }
320 }
321
322 std::string SyncUnionTarball::UnwindWhiteoutFilename(
323 SharedPtr<SyncItem> entry) const {
324 return entry->filename();
325 }
326
327 28 bool SyncUnionTarball::IsOpaqueDirectory(SharedPtr<SyncItem> directory) const {
328 28 return false;
329 }
330
331 28 bool SyncUnionTarball::IsWhiteoutEntry(SharedPtr<SyncItem> entry) const {
332 28 return false;
333 }
334
335 /* Tar files are not necessarily traversed in order from root to leave.
336 * So it may happens that we are expanding the file `/a/b/c.txt` without
337 * having created yet the directory `/a/b/`.
338 * In order to overcome this limitation the following function create dummy
339 * directories that can be used as placeholder and that they will be overwritten
340 * as soon as the real directory is found in the tarball
341 */
342 133 void SyncUnionTarball::CreateDirectories(const std::string &target) {
343
3/5
✓ Branch 2 taken 133 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 77 times.
✓ Branch 6 taken 56 times.
133 if (know_directories_.find(target) != know_directories_.end())
344 98 return;
345
2/2
✓ Branch 1 taken 21 times.
✓ Branch 2 taken 35 times.
56 if (target == ".")
346 21 return;
347
348
1/2
✓ Branch 2 taken 35 times.
✗ Branch 3 not taken.
35 std::string dirname = "";
349
1/2
✓ Branch 2 taken 35 times.
✗ Branch 3 not taken.
35 std::string filename = "";
350
1/2
✓ Branch 1 taken 35 times.
✗ Branch 2 not taken.
35 SplitPath(target, &dirname, &filename);
351
1/2
✓ Branch 1 taken 35 times.
✗ Branch 2 not taken.
35 CreateDirectories(dirname);
352
353
2/2
✓ Branch 1 taken 21 times.
✓ Branch 2 taken 14 times.
35 if (dirname == ".")
354
1/2
✓ Branch 1 taken 21 times.
✗ Branch 2 not taken.
21 dirname = "";
355 const SharedPtr<SyncItem> dummy = SharedPtr<SyncItem>(
356
3/6
✓ Branch 1 taken 35 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 35 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 35 times.
✗ Branch 8 not taken.
35 new SyncItemDummyDir(dirname, filename, this, kItemDir, uid_, gid_));
357
358
1/2
✓ Branch 2 taken 35 times.
✗ Branch 3 not taken.
35 ProcessUnmaterializedDirectory(dummy);
359
2/4
✓ Branch 1 taken 35 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 35 times.
✗ Branch 5 not taken.
35 dirs_[target] = dummy;
360
1/2
✓ Branch 1 taken 35 times.
✗ Branch 2 not taken.
35 know_directories_.insert(target);
361 35 }
362
363 } // namespace publish
364