GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/sync_union_tarball.cc
Date: 2025-06-29 02:35:41
Exec Total Coverage
Lines: 94 165 57.0%
Branches: 94 304 30.9%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System
3 */
4
5 #define __STDC_FORMAT_MACROS
6
7 #include "sync_union_tarball.h"
8
9 #include <pthread.h>
10 #include <unistd.h>
11
12 #include <cassert>
13 #include <cstdio>
14 #include <list>
15 #include <set>
16 #include <string>
17 #include <vector>
18
19 #include "duplex_libarchive.h"
20 #include "sync_item.h"
21 #include "sync_item_dummy.h"
22 #include "sync_item_tar.h"
23 #include "sync_mediator.h"
24 #include "sync_union.h"
25 #include "util/concurrency.h"
26 #include "util/exception.h"
27 #include "util/fs_traversal.h"
28 #include "util/posix.h"
29 #include "util/smalloc.h"
30
31 namespace publish {
32
33 48 SyncUnionTarball::SyncUnionTarball(AbstractSyncMediator *mediator,
34 const std::string &rdonly_path,
35 const std::string &tarball_path,
36 const std::string &base_directory,
37 const uid_t uid,
38 const gid_t gid,
39 const std::string &to_delete,
40 const bool create_catalog_on_root,
41 48 const std::string &path_delimiter)
42 : SyncUnion(mediator, rdonly_path, "", "")
43 48 , src(NULL)
44 48 , tarball_path_(tarball_path)
45
1/2
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
48 , base_directory_(base_directory)
46 48 , uid_(uid)
47 48 , gid_(gid)
48
1/2
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
48 , to_delete_(to_delete)
49 48 , create_catalog_on_root_(create_catalog_on_root)
50
1/2
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
48 , path_delimiter_(path_delimiter)
51
6/12
✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 48 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 48 times.
✗ Branch 10 not taken.
✓ Branch 16 taken 48 times.
✗ Branch 17 not taken.
✓ Branch 23 taken 48 times.
✗ Branch 24 not taken.
✓ Branch 26 taken 48 times.
✗ Branch 27 not taken.
96 , read_archive_signal_(new Signal) { }
52
53
1/2
✓ Branch 0 taken 48 times.
✗ Branch 1 not taken.
96 SyncUnionTarball::~SyncUnionTarball() { delete read_archive_signal_; }
54
55 48 bool SyncUnionTarball::Initialize() {
56 bool result;
57
58 // We are just deleting entity from the repo
59
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 48 times.
48 if (tarball_path_ == "") {
60 assert(NULL == src);
61 return SyncUnion::Initialize();
62 }
63
64 48 src = archive_read_new();
65
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 48 times.
48 assert(ARCHIVE_OK == archive_read_support_format_tar(src));
66
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 48 times.
48 assert(ARCHIVE_OK == archive_read_support_format_empty(src));
67
68
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 48 times.
48 if (tarball_path_ == "-") {
69 result = archive_read_open_filename(src, NULL, kBlockSize);
70 } else {
71
1/2
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
48 const std::string tarball_absolute_path = GetAbsolutePath(tarball_path_);
72
1/2
✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
48 result = archive_read_open_filename(src, tarball_absolute_path.c_str(),
73 kBlockSize);
74 48 }
75
76
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 48 times.
48 if (result != ARCHIVE_OK) {
77 LogCvmfs(kLogUnionFs, kLogStderr, "Impossible to open the archive: %s",
78 archive_error_string(src));
79 return false;
80 }
81
82 48 return SyncUnion::Initialize();
83 }
84
85 /*
86 * Libarchive is not thread aware, so we need to make sure that before
87 * to read/"open" the next header in the archive the content of the
88 *
89 * present header is been consumed completely.
90 * Different thread read/"open" the header from the one that consumes
91 * it so we opted for a Signal that is backed by a conditional variable.
92 * We wait for the signal just before to read the header.
93 * Then when we have done with the header the Signal is fired.
94 * The Signal can be fired inside the main loop if we don't need to read
95 * data, or when the IngestionSource get closed, which means that we are
96 * not reading data anymore from there.
97 * This whole process is not necessary for directories since we don't
98 * actually need to read data from them.
99 *
100 * It may be needed to add a catalog as a root of the archive.
101 * A possible way to do it is by creating an virtual `.cvmfscatalog` file and
102 * push it into the usual pipeline.
103 * This operation must be done only once, and it seems like a good idea to do
104 * it at the first iteration of the loop, hence this logic is managed by the
105 * `first_iteration` boolean flag.
106 */
107 48 void SyncUnionTarball::Traverse() {
108 48 read_archive_signal_->Wakeup();
109
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 48 times.
48 assert(this->IsInitialized());
110
111 /*
112 * As first step we eliminate the requested directories.
113 */
114
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 48 times.
48 if (to_delete_ != "") {
115 vector<std::string> to_eliminate_vec = SplitStringMultiChar(
116 to_delete_, path_delimiter_);
117
118 for (vector<string>::iterator s = to_eliminate_vec.begin();
119 s != to_eliminate_vec.end();
120 ++s) {
121 std::string parent_path;
122 std::string filename;
123 SplitPath(*s, &parent_path, &filename);
124 if (parent_path == ".")
125 parent_path = "";
126 const SharedPtr<SyncItem> sync_entry = CreateSyncItem(parent_path,
127 filename, kItemDir);
128 mediator_->Remove(sync_entry);
129 }
130 }
131
132 // we are simply deleting entity from the repo
133
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 48 times.
48 if (NULL == src)
134 return;
135
136 48 struct archive_entry *entry = archive_entry_new();
137 while (true) {
138 // Get the lock, wait if lock is not available yet
139 272 read_archive_signal_->Wait();
140
141 272 const int result = archive_read_next_header2(src, entry);
142
143
2/6
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 224 times.
✗ Branch 5 not taken.
272 switch (result) {
144 case ARCHIVE_FATAL: {
145 PANIC(kLogStderr, "Fatal error in reading the archive.\n%s\n",
146 archive_error_string(src));
147 break; // Only exit point with error
148 }
149
150 case ARCHIVE_RETRY: {
151 LogCvmfs(kLogUnionFs, kLogStdout,
152 "Error in reading the header, retrying.\n%s\n",
153 archive_error_string(src));
154 continue;
155 break;
156 }
157
158 48 case ARCHIVE_EOF: {
159
2/6
✗ Branch 0 not taken.
✓ Branch 1 taken 48 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 48 times.
48 if (create_catalog_on_root_ && (base_directory_ != "/")) {
160 CreateDirectories(base_directory_); // necessary for empty archives
161 const SharedPtr<SyncItem> catalog = SharedPtr<SyncItem>(
162 new SyncItemDummyCatalog(base_directory_, this));
163 ProcessFile(catalog);
164 to_create_catalog_dirs_.insert(base_directory_);
165 }
166 48 for (set<string>::iterator dir = to_create_catalog_dirs_.begin();
167
1/2
✗ Branch 2 not taken.
✓ Branch 3 taken 48 times.
48 dir != to_create_catalog_dirs_.end();
168 ++dir) {
169 assert(dirs_.find(*dir) != dirs_.end());
170 const SharedPtr<SyncItem> to_mark = dirs_[*dir];
171 assert(to_mark->IsDirectory());
172 to_mark->SetCatalogMarker();
173 to_mark->MakePlaceholderDirectory();
174 ProcessDirectory(to_mark);
175 }
176 48 return; // Only successful exit point
177 break;
178 }
179
180 case ARCHIVE_WARN: {
181 LogCvmfs(kLogUnionFs, kLogStderr,
182 "Warning in uncompression reading, going on.\n %s",
183 archive_error_string(src));
184 // We actually want this to enter the ARCHIVE_OK case
185 }
186
187 224 case ARCHIVE_OK: {
188 224 ProcessArchiveEntry(entry);
189 224 break;
190 }
191
192 default: {
193 // We should never enter in this branch, but just for safeness we prefer
194 // to abort in case we hit a case we don't how to manage.
195 PANIC(kLogStderr, "Enter in unknown state. Aborting.\nError: %s\n",
196 result, archive_error_string(src));
197 }
198 }
199 224 }
200 }
201
202 224 void SyncUnionTarball::ProcessArchiveEntry(struct archive_entry *entry) {
203
2/4
✓ Branch 2 taken 224 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 224 times.
✗ Branch 6 not taken.
224 std::string archive_file_path(archive_entry_pathname(entry));
204
1/2
✓ Branch 1 taken 224 times.
✗ Branch 2 not taken.
224 archive_file_path = SanitizePath(archive_file_path);
205
206 224 const std::string complete_path = base_directory_ != "/"
207 224 ? MakeCanonicalPath(base_directory_
208
1/4
✓ Branch 1 taken 224 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
224 + "/"
209
3/8
✓ Branch 1 taken 224 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 224 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 224 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
672 + archive_file_path)
210
2/6
✓ Branch 0 taken 224 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 224 times.
✗ Branch 4 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
448 : MakeCanonicalPath(archive_file_path);
211
212 224 std::string parent_path;
213 224 std::string filename;
214
1/2
✓ Branch 1 taken 224 times.
✗ Branch 2 not taken.
224 SplitPath(complete_path, &parent_path, &filename);
215
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 224 times.
224 if (parent_path == ".")
216 parent_path.clear();
217
218
1/2
✓ Branch 1 taken 224 times.
✗ Branch 2 not taken.
224 CreateDirectories(parent_path);
219
220 const SharedPtr<SyncItem> sync_entry = SharedPtr<SyncItem>(
221 new SyncItemTar(parent_path, filename, src, entry, read_archive_signal_,
222
3/6
✓ Branch 1 taken 224 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 224 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 224 times.
✗ Branch 8 not taken.
224 this, uid_, gid_));
223
224
2/4
✓ Branch 1 taken 224 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 224 times.
224 if (NULL != archive_entry_hardlink(entry)) {
225 const std::string hardlink_name(
226 SanitizePath(archive_entry_hardlink(entry)));
227 const std::string hardlink = base_directory_ != "/"
228 ? base_directory_ + "/" + hardlink_name
229 : hardlink_name;
230
231 if (hardlinks_.find(hardlink) != hardlinks_.end()) {
232 hardlinks_.find(hardlink)->second.push_back(complete_path);
233 } else {
234 std::list<std::string> to_hardlink;
235 to_hardlink.push_back(complete_path);
236 hardlinks_[hardlink] = to_hardlink;
237 }
238 if (filename == ".cvmfscatalog") {
239 // the file is created in the PostUpload phase
240 to_create_catalog_dirs_.insert(parent_path);
241 }
242 read_archive_signal_->Wakeup();
243 return;
244 }
245
246
3/4
✓ Branch 2 taken 224 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 64 times.
✓ Branch 5 taken 160 times.
224 if (sync_entry->IsDirectory()) {
247
3/5
✓ Branch 2 taken 64 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 32 times.
✓ Branch 6 taken 32 times.
64 if (know_directories_.find(complete_path) != know_directories_.end()) {
248
1/2
✓ Branch 2 taken 32 times.
✗ Branch 3 not taken.
32 sync_entry->MakePlaceholderDirectory();
249 }
250
1/2
✓ Branch 2 taken 64 times.
✗ Branch 3 not taken.
64 ProcessUnmaterializedDirectory(sync_entry);
251
2/4
✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 64 times.
✗ Branch 5 not taken.
64 dirs_[complete_path] = sync_entry;
252
1/2
✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.
64 know_directories_.insert(complete_path);
253
254
1/2
✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.
64 read_archive_signal_->Wakeup(); // We don't need to read data and we
255 // can read the next header
256
257
3/4
✓ Branch 2 taken 160 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 144 times.
✓ Branch 5 taken 16 times.
160 } else if (sync_entry->IsRegularFile()) {
258 // inside the process pipeline we will wake up the signal
259
1/2
✓ Branch 2 taken 144 times.
✗ Branch 3 not taken.
144 ProcessFile(sync_entry);
260
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 144 times.
144 if (filename == ".cvmfscatalog") {
261 to_create_catalog_dirs_.insert(parent_path);
262 }
263
264
1/8
✓ Branch 2 taken 16 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
16 } else if (sync_entry->IsSymlink() || sync_entry->IsFifo()
265 || sync_entry->IsSocket() || sync_entry->IsCharacterDevice()
266
2/8
✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✓ Branch 8 taken 16 times.
✗ Branch 9 not taken.
16 || sync_entry->IsBlockDevice()) {
267 // we avoid to add an entity called as a catalog marker if it is not a
268 // regular file
269
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 if (filename != ".cvmfscatalog") {
270
1/2
✓ Branch 2 taken 16 times.
✗ Branch 3 not taken.
16 ProcessFile(sync_entry);
271 } else {
272 PANIC(kLogStderr,
273 "Found entity called as a catalog marker '%s' that however is "
274 "not a regular file, abort",
275 complete_path.c_str());
276 }
277
278 // here we don't need to read data from the tar file so we can wake up
279 // immediately the signal
280
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 read_archive_signal_->Wakeup();
281
282 } else {
283 PANIC(kLogStderr, "Fatal error found unexpected file: \n%s\n",
284 filename.c_str());
285 // if for any reason this code path change and we don't abort anymore,
286 // remember to wakeup the signal, otherwise we will be stuck in a deadlock
287 //
288 // read_archive_signal_->Wakeup();
289 }
290
5/10
✓ Branch 1 taken 224 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 224 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 224 times.
✗ Branch 8 not taken.
✓ Branch 10 taken 224 times.
✗ Branch 11 not taken.
✓ Branch 13 taken 224 times.
✗ Branch 14 not taken.
224 }
291
292 224 std::string SyncUnionTarball::SanitizePath(const std::string &path) {
293
1/2
✓ Branch 1 taken 224 times.
✗ Branch 2 not taken.
224 if (path.length() >= 2) {
294
2/6
✗ Branch 1 not taken.
✓ Branch 2 taken 224 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 224 times.
224 if (path[0] == '.' && path[1] == '/') {
295 return path.substr(2);
296 }
297 }
298
1/2
✓ Branch 1 taken 224 times.
✗ Branch 2 not taken.
224 if (path.length() >= 1) {
299
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 224 times.
224 if (path[0] == '/') {
300 return path.substr(1);
301 }
302 }
303 224 return path;
304 }
305
306 void SyncUnionTarball::PostUpload() {
307 std::map<const std::string, std::list<std::string> >::iterator hardlink;
308 for (hardlink = hardlinks_.begin(); hardlink != hardlinks_.end();
309 ++hardlink) {
310 std::list<std::string>::iterator entry;
311 for (entry = hardlink->second.begin(); entry != hardlink->second.end();
312 ++entry) {
313 mediator_->Clone(*entry, hardlink->first);
314 }
315 }
316 }
317
318 std::string SyncUnionTarball::UnwindWhiteoutFilename(
319 SharedPtr<SyncItem> entry) const {
320 return entry->filename();
321 }
322
323 bool SyncUnionTarball::IsOpaqueDirectory(SharedPtr<SyncItem> directory) const {
324 return false;
325 }
326
327 bool SyncUnionTarball::IsWhiteoutEntry(SharedPtr<SyncItem> entry) const {
328 return false;
329 }
330
331 /* Tar files are not necessarily traversed in order from root to leave.
332 * So it may happens that we are expanding the file `/a/b/c.txt` without
333 * having created yet the directory `/a/b/`.
334 * In order to overcome this limitation the following function create dummy
335 * directories that can be used as placeholder and that they will be overwritten
336 * as soon as the real directory is found in the tarball
337 */
338 304 void SyncUnionTarball::CreateDirectories(const std::string &target) {
339
3/5
✓ Branch 2 taken 304 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 176 times.
✓ Branch 6 taken 128 times.
304 if (know_directories_.find(target) != know_directories_.end())
340 224 return;
341
2/2
✓ Branch 1 taken 48 times.
✓ Branch 2 taken 80 times.
128 if (target == ".")
342 48 return;
343
344
1/2
✓ Branch 2 taken 80 times.
✗ Branch 3 not taken.
80 std::string dirname = "";
345
1/2
✓ Branch 2 taken 80 times.
✗ Branch 3 not taken.
80 std::string filename = "";
346
1/2
✓ Branch 1 taken 80 times.
✗ Branch 2 not taken.
80 SplitPath(target, &dirname, &filename);
347
1/2
✓ Branch 1 taken 80 times.
✗ Branch 2 not taken.
80 CreateDirectories(dirname);
348
349
2/2
✓ Branch 1 taken 48 times.
✓ Branch 2 taken 32 times.
80 if (dirname == ".")
350
1/2
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
48 dirname = "";
351 const SharedPtr<SyncItem> dummy = SharedPtr<SyncItem>(
352
3/6
✓ Branch 1 taken 80 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 80 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 80 times.
✗ Branch 8 not taken.
80 new SyncItemDummyDir(dirname, filename, this, kItemDir, uid_, gid_));
353
354
1/2
✓ Branch 2 taken 80 times.
✗ Branch 3 not taken.
80 ProcessUnmaterializedDirectory(dummy);
355
2/4
✓ Branch 1 taken 80 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 80 times.
✗ Branch 5 not taken.
80 dirs_[target] = dummy;
356
1/2
✓ Branch 1 taken 80 times.
✗ Branch 2 not taken.
80 know_directories_.insert(target);
357 80 }
358
359 } // namespace publish
360