GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/sync_union_tarball.cc
Date: 2025-05-11 02:35:43
Exec Total Coverage
Lines: 89 155 57.4%
Branches: 94 305 30.8%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System
3 */
4
5 #define __STDC_FORMAT_MACROS
6
7 #include "sync_union_tarball.h"
8
9 #include <pthread.h>
10 #include <unistd.h>
11
12 #include <cassert>
13 #include <cstdio>
14 #include <list>
15 #include <set>
16 #include <string>
17 #include <vector>
18
19 #include "duplex_libarchive.h"
20 #include "sync_item.h"
21 #include "sync_item_dummy.h"
22 #include "sync_item_tar.h"
23 #include "sync_mediator.h"
24 #include "sync_union.h"
25 #include "util/concurrency.h"
26 #include "util/exception.h"
27 #include "util/fs_traversal.h"
28 #include "util/posix.h"
29 #include "util/smalloc.h"
30
31 namespace publish {
32
33 3 SyncUnionTarball::SyncUnionTarball(AbstractSyncMediator *mediator,
34 const std::string &rdonly_path,
35 const std::string &tarball_path,
36 const std::string &base_directory,
37 const uid_t uid,
38 const gid_t gid,
39 const std::string &to_delete,
40 const bool create_catalog_on_root,
41 3 const std::string &path_delimiter)
42 : SyncUnion(mediator, rdonly_path, "", ""),
43 3 src(NULL),
44 3 tarball_path_(tarball_path),
45
1/2
✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
3 base_directory_(base_directory),
46 3 uid_(uid),
47 3 gid_(gid),
48
1/2
✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
3 to_delete_(to_delete),
49 3 create_catalog_on_root_(create_catalog_on_root),
50
1/2
✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
3 path_delimiter_(path_delimiter),
51
6/12
✓ Branch 2 taken 3 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 3 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 3 times.
✗ Branch 10 not taken.
✓ Branch 16 taken 3 times.
✗ Branch 17 not taken.
✓ Branch 23 taken 3 times.
✗ Branch 24 not taken.
✓ Branch 26 taken 3 times.
✗ Branch 27 not taken.
6 read_archive_signal_(new Signal) {}
52
53
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
6 SyncUnionTarball::~SyncUnionTarball() { delete read_archive_signal_; }
54
55 3 bool SyncUnionTarball::Initialize() {
56 bool result;
57
58 // We are just deleting entity from the repo
59
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
3 if (tarball_path_ == "") {
60 assert(NULL == src);
61 return SyncUnion::Initialize();
62 }
63
64 3 src = archive_read_new();
65
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
3 assert(ARCHIVE_OK == archive_read_support_format_tar(src));
66
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
3 assert(ARCHIVE_OK == archive_read_support_format_empty(src));
67
68
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
3 if (tarball_path_ == "-") {
69 result = archive_read_open_filename(src, NULL, kBlockSize);
70 } else {
71
1/2
✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
3 std::string tarball_absolute_path = GetAbsolutePath(tarball_path_);
72
1/2
✓ Branch 2 taken 3 times.
✗ Branch 3 not taken.
3 result = archive_read_open_filename(src, tarball_absolute_path.c_str(),
73 kBlockSize);
74 3 }
75
76
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 if (result != ARCHIVE_OK) {
77 LogCvmfs(kLogUnionFs, kLogStderr, "Impossible to open the archive: %s",
78 archive_error_string(src));
79 return false;
80 }
81
82 3 return SyncUnion::Initialize();
83 }
84
85 /*
86 * Libarchive is not thread aware, so we need to make sure that before
87 * to read/"open" the next header in the archive the content of the
88 *
89 * present header is been consumed completely.
90 * Different thread read/"open" the header from the one that consumes
91 * it so we opted for a Signal that is backed by a conditional variable.
92 * We wait for the signal just before to read the header.
93 * Then when we have done with the header the Signal is fired.
94 * The Signal can be fired inside the main loop if we don't need to read
95 * data, or when the IngestionSource get closed, which means that we are
96 * not reading data anymore from there.
97 * This whole process is not necessary for directories since we don't
98 * actually need to read data from them.
99 *
100 * It may be needed to add a catalog as a root of the archive.
101 * A possible way to do it is by creating an virtual `.cvmfscatalog` file and
102 * push it into the usual pipeline.
103 * This operation must be done only once, and it seems like a good idea to do
104 * it at the first iteration of the loop, hence this logic is managed by the
105 * `first_iteration` boolean flag.
106 */
107 3 void SyncUnionTarball::Traverse() {
108 3 read_archive_signal_->Wakeup();
109
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
3 assert(this->IsInitialized());
110
111 /*
112 * As first step we eliminate the requested directories.
113 */
114
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
3 if (to_delete_ != "") {
115 vector<std::string> to_eliminate_vec = SplitStringMultiChar(to_delete_, path_delimiter_);
116
117 for (vector<string>::iterator s = to_eliminate_vec.begin();
118 s != to_eliminate_vec.end(); ++s) {
119 std::string parent_path;
120 std::string filename;
121 SplitPath(*s, &parent_path, &filename);
122 if (parent_path == ".") parent_path = "";
123 SharedPtr<SyncItem> sync_entry =
124 CreateSyncItem(parent_path, filename, kItemDir);
125 mediator_->Remove(sync_entry);
126 }
127 }
128
129 // we are simply deleting entity from the repo
130
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 if (NULL == src) return;
131
132 3 struct archive_entry *entry = archive_entry_new();
133 while (true) {
134 // Get the lock, wait if lock is not available yet
135 17 read_archive_signal_->Wait();
136
137 17 int result = archive_read_next_header2(src, entry);
138
139
2/6
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 14 times.
✗ Branch 5 not taken.
17 switch (result) {
140 case ARCHIVE_FATAL: {
141 PANIC(kLogStderr, "Fatal error in reading the archive.\n%s\n",
142 archive_error_string(src));
143 break; // Only exit point with error
144 }
145
146 case ARCHIVE_RETRY: {
147 LogCvmfs(kLogUnionFs, kLogStdout,
148 "Error in reading the header, retrying.\n%s\n",
149 archive_error_string(src));
150 continue;
151 break;
152 }
153
154 3 case ARCHIVE_EOF: {
155
2/6
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 3 times.
3 if (create_catalog_on_root_ && (base_directory_ != "/")) {
156 CreateDirectories(base_directory_); // necessary for empty archives
157 SharedPtr<SyncItem> catalog = SharedPtr<SyncItem>(
158 new SyncItemDummyCatalog(base_directory_, this));
159 ProcessFile(catalog);
160 to_create_catalog_dirs_.insert(base_directory_);
161 }
162 3 for (set<string>::iterator dir = to_create_catalog_dirs_.begin();
163
1/2
✗ Branch 3 not taken.
✓ Branch 4 taken 3 times.
3 dir != to_create_catalog_dirs_.end(); ++dir) {
164 assert(dirs_.find(*dir) != dirs_.end());
165 SharedPtr<SyncItem> to_mark = dirs_[*dir];
166 assert(to_mark->IsDirectory());
167 to_mark->SetCatalogMarker();
168 to_mark->MakePlaceholderDirectory();
169 ProcessDirectory(to_mark);
170 }
171 3 return; // Only successful exit point
172 break;
173 }
174
175 case ARCHIVE_WARN: {
176 LogCvmfs(kLogUnionFs, kLogStderr,
177 "Warning in uncompression reading, going on.\n %s",
178 archive_error_string(src));
179 // We actually want this to enter the ARCHIVE_OK case
180 }
181
182 14 case ARCHIVE_OK: {
183 14 ProcessArchiveEntry(entry);
184 14 break;
185 }
186
187 default: {
188 // We should never enter in this branch, but just for safeness we prefer
189 // to abort in case we hit a case we don't how to manage.
190 PANIC(kLogStderr, "Enter in unknown state. Aborting.\nError: %s\n",
191 result, archive_error_string(src));
192 }
193 }
194 14 }
195 }
196
197 14 void SyncUnionTarball::ProcessArchiveEntry(struct archive_entry *entry) {
198
2/4
✓ Branch 2 taken 14 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 14 times.
✗ Branch 6 not taken.
14 std::string archive_file_path(archive_entry_pathname(entry));
199
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 archive_file_path = SanitizePath(archive_file_path);
200
201 std::string complete_path =
202 14 base_directory_ != "/"
203
4/13
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 14 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 14 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 14 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
42 ? MakeCanonicalPath(base_directory_ + "/" + archive_file_path)
204
2/6
✓ Branch 0 taken 14 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 14 times.
✗ Branch 4 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
28 : MakeCanonicalPath(archive_file_path);
205
206 14 std::string parent_path;
207 14 std::string filename;
208
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 SplitPath(complete_path, &parent_path, &filename);
209
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 14 times.
14 if (parent_path == ".") parent_path.clear();
210
211
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 CreateDirectories(parent_path);
212
213 SharedPtr<SyncItem> sync_entry = SharedPtr<SyncItem>(new SyncItemTar(
214 parent_path, filename, src, entry,
215
3/6
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 14 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 14 times.
✗ Branch 8 not taken.
14 read_archive_signal_, this, uid_, gid_));
216
217
2/4
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 14 times.
14 if (NULL != archive_entry_hardlink(entry)) {
218 const std::string hardlink_name(
219 SanitizePath(archive_entry_hardlink(entry)));
220 const std::string hardlink = base_directory_ != "/"
221 ? base_directory_ + "/" + hardlink_name
222 : hardlink_name;
223
224 if (hardlinks_.find(hardlink) != hardlinks_.end()) {
225 hardlinks_.find(hardlink)->second.push_back(complete_path);
226 } else {
227 std::list<std::string> to_hardlink;
228 to_hardlink.push_back(complete_path);
229 hardlinks_[hardlink] = to_hardlink;
230 }
231 if (filename == ".cvmfscatalog") {
232 // the file is created in the PostUpload phase
233 to_create_catalog_dirs_.insert(parent_path);
234 }
235 read_archive_signal_->Wakeup();
236 return;
237 }
238
239
3/4
✓ Branch 2 taken 14 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 10 times.
14 if (sync_entry->IsDirectory()) {
240
3/5
✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
✓ Branch 6 taken 2 times.
4 if (know_directories_.find(complete_path) != know_directories_.end()) {
241
1/2
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
2 sync_entry->MakePlaceholderDirectory();
242 }
243
1/2
✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.
4 ProcessUnmaterializedDirectory(sync_entry);
244
2/4
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 4 times.
✗ Branch 5 not taken.
4 dirs_[complete_path] = sync_entry;
245
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 know_directories_.insert(complete_path);
246
247
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 read_archive_signal_->Wakeup(); // We don't need to read data and we
248 // can read the next header
249
250
3/4
✓ Branch 2 taken 10 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 9 times.
✓ Branch 5 taken 1 times.
10 } else if (sync_entry->IsRegularFile()) {
251 // inside the process pipeline we will wake up the signal
252
1/2
✓ Branch 2 taken 9 times.
✗ Branch 3 not taken.
9 ProcessFile(sync_entry);
253
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 9 times.
9 if (filename == ".cvmfscatalog") {
254 to_create_catalog_dirs_.insert(parent_path);
255 }
256
257
1/8
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
1 } else if (sync_entry->IsSymlink() || sync_entry->IsFifo() ||
258
2/14
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
1 sync_entry->IsSocket() || sync_entry->IsCharacterDevice() ||
259 sync_entry->IsBlockDevice()) {
260 // we avoid to add an entity called as a catalog marker if it is not a
261 // regular file
262
1/2
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
1 if (filename != ".cvmfscatalog") {
263
1/2
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
1 ProcessFile(sync_entry);
264 } else {
265 PANIC(kLogStderr,
266 "Found entity called as a catalog marker '%s' that however is "
267 "not a regular file, abort",
268 complete_path.c_str());
269 }
270
271 // here we don't need to read data from the tar file so we can wake up
272 // immediately the signal
273
1/2
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
1 read_archive_signal_->Wakeup();
274
275 } else {
276 PANIC(kLogStderr, "Fatal error found unexpected file: \n%s\n",
277 filename.c_str());
278 // if for any reason this code path change and we don't abort anymore,
279 // remember to wakeup the signal, otherwise we will be stuck in a deadlock
280 //
281 // read_archive_signal_->Wakeup();
282 }
283
5/10
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 14 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 14 times.
✗ Branch 8 not taken.
✓ Branch 10 taken 14 times.
✗ Branch 11 not taken.
✓ Branch 13 taken 14 times.
✗ Branch 14 not taken.
14 }
284
285 14 std::string SyncUnionTarball::SanitizePath(const std::string &path) {
286
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 if (path.length() >= 2) {
287
2/6
✗ Branch 1 not taken.
✓ Branch 2 taken 14 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 14 times.
14 if (path[0] == '.' && path[1] == '/') {
288 return path.substr(2);
289 }
290 }
291
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 if (path.length() >= 1) {
292
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 14 times.
14 if (path[0] == '/') {
293 return path.substr(1);
294 }
295 }
296 14 return path;
297 }
298
299 void SyncUnionTarball::PostUpload() {
300 std::map<const std::string, std::list<std::string> >::iterator hardlink;
301 for (hardlink = hardlinks_.begin(); hardlink != hardlinks_.end();
302 ++hardlink) {
303 std::list<std::string>::iterator entry;
304 for (entry = hardlink->second.begin(); entry != hardlink->second.end();
305 ++entry) {
306 mediator_->Clone(*entry, hardlink->first);
307 }
308 }
309 }
310
311 std::string SyncUnionTarball::UnwindWhiteoutFilename(
312 SharedPtr<SyncItem> entry) const {
313 return entry->filename();
314 }
315
316 bool SyncUnionTarball::IsOpaqueDirectory(SharedPtr<SyncItem> directory) const {
317 return false;
318 }
319
320 bool SyncUnionTarball::IsWhiteoutEntry(SharedPtr<SyncItem> entry) const {
321 return false;
322 }
323
324 /* Tar files are not necessarily traversed in order from root to leave.
325 * So it may happens that we are expanding the file `/a/b/c.txt` without
326 * having created yet the directory `/a/b/`.
327 * In order to overcome this limitation the following function create dummy
328 * directories that can be used as placeholder and that they will be overwritten
329 * as soon as the real directory is found in the tarball
330 */
331 19 void SyncUnionTarball::CreateDirectories(const std::string &target) {
332
3/5
✓ Branch 2 taken 19 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 11 times.
✓ Branch 6 taken 8 times.
22 if (know_directories_.find(target) != know_directories_.end()) return;
333
2/2
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 5 times.
8 if (target == ".") return;
334
335
1/2
✓ Branch 2 taken 5 times.
✗ Branch 3 not taken.
5 std::string dirname = "";
336
1/2
✓ Branch 2 taken 5 times.
✗ Branch 3 not taken.
5 std::string filename = "";
337
1/2
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
5 SplitPath(target, &dirname, &filename);
338
1/2
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
5 CreateDirectories(dirname);
339
340
3/4
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 2 times.
✓ Branch 4 taken 3 times.
✗ Branch 5 not taken.
5 if (dirname == ".") dirname = "";
341 SharedPtr<SyncItem> dummy = SharedPtr<SyncItem>(
342
3/6
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 5 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 5 times.
✗ Branch 8 not taken.
5 new SyncItemDummyDir(dirname, filename, this, kItemDir, uid_, gid_));
343
344
1/2
✓ Branch 2 taken 5 times.
✗ Branch 3 not taken.
5 ProcessUnmaterializedDirectory(dummy);
345
2/4
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 5 times.
✗ Branch 5 not taken.
5 dirs_[target] = dummy;
346
1/2
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
5 know_directories_.insert(target);
347 5 }
348
349 } // namespace publish
350