GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/sync_union_tarball.cc
Date: 2024-04-28 02:33:07
Exec Total Coverage
Lines: 88 154 57.1%
Branches: 93 303 30.7%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System
3 */
4
5 #define __STDC_FORMAT_MACROS
6
7 #include "sync_union_tarball.h"
8
9 #include <pthread.h>
10 #include <unistd.h>
11
12 #include <cassert>
13 #include <cstdio>
14 #include <list>
15 #include <set>
16 #include <string>
17 #include <vector>
18
19 #include "duplex_libarchive.h"
20 #include "sync_item.h"
21 #include "sync_item_dummy.h"
22 #include "sync_item_tar.h"
23 #include "sync_mediator.h"
24 #include "sync_union.h"
25 #include "util/concurrency.h"
26 #include "util/exception.h"
27 #include "util/fs_traversal.h"
28 #include "util/posix.h"
29 #include "util/smalloc.h"
30
31 namespace publish {
32
33 3 SyncUnionTarball::SyncUnionTarball(AbstractSyncMediator *mediator,
34 const std::string &rdonly_path,
35 const std::string &tarball_path,
36 const std::string &base_directory,
37 const uid_t uid,
38 const gid_t gid,
39 const std::string &to_delete,
40 3 const bool create_catalog_on_root)
41 : SyncUnion(mediator, rdonly_path, "", ""),
42 3 src(NULL),
43 3 tarball_path_(tarball_path),
44
1/2
✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
3 base_directory_(base_directory),
45 3 uid_(uid),
46 3 gid_(gid),
47
1/2
✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
3 to_delete_(to_delete),
48 3 create_catalog_on_root_(create_catalog_on_root),
49
6/12
✓ Branch 2 taken 3 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 3 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 3 times.
✗ Branch 10 not taken.
✓ Branch 16 taken 3 times.
✗ Branch 17 not taken.
✓ Branch 23 taken 3 times.
✗ Branch 24 not taken.
✓ Branch 26 taken 3 times.
✗ Branch 27 not taken.
6 read_archive_signal_(new Signal) {}
50
51
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
6 SyncUnionTarball::~SyncUnionTarball() { delete read_archive_signal_; }
52
53 3 bool SyncUnionTarball::Initialize() {
54 bool result;
55
56 // We are just deleting entity from the repo
57
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
3 if (tarball_path_ == "") {
58 assert(NULL == src);
59 return SyncUnion::Initialize();
60 }
61
62 3 src = archive_read_new();
63
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
3 assert(ARCHIVE_OK == archive_read_support_format_tar(src));
64
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
3 assert(ARCHIVE_OK == archive_read_support_format_empty(src));
65
66
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
3 if (tarball_path_ == "-") {
67 result = archive_read_open_filename(src, NULL, kBlockSize);
68 } else {
69
1/2
✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
3 std::string tarball_absolute_path = GetAbsolutePath(tarball_path_);
70
1/2
✓ Branch 2 taken 3 times.
✗ Branch 3 not taken.
3 result = archive_read_open_filename(src, tarball_absolute_path.c_str(),
71 kBlockSize);
72 3 }
73
74
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 if (result != ARCHIVE_OK) {
75 LogCvmfs(kLogUnionFs, kLogStderr, "Impossible to open the archive: %s",
76 archive_error_string(src));
77 return false;
78 }
79
80 3 return SyncUnion::Initialize();
81 }
82
83 /*
84 * Libarchive is not thread aware, so we need to make sure that before
85 * to read/"open" the next header in the archive the content of the
86 *
87 * present header is been consumed completely.
88 * Different thread read/"open" the header from the one that consumes
89 * it so we opted for a Signal that is backed by a conditional variable.
90 * We wait for the signal just before to read the header.
91 * Then when we have done with the header the Signal is fired.
92 * The Signal can be fired inside the main loop if we don't need to read
93 * data, or when the IngestionSource get closed, which means that we are
94 * not reading data anymore from there.
95 * This whole process is not necessary for directories since we don't
96 * actually need to read data from them.
97 *
98 * It may be needed to add a catalog as a root of the archive.
99 * A possible way to do it is by creating an virtual `.cvmfscatalog` file and
100 * push it into the usual pipeline.
101 * This operation must be done only once, and it seems like a good idea to do
102 * it at the first iteration of the loop, hence this logic is managed by the
103 * `first_iteration` boolean flag.
104 */
105 3 void SyncUnionTarball::Traverse() {
106 3 read_archive_signal_->Wakeup();
107
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
3 assert(this->IsInitialized());
108
109 /*
110 * As first step we eliminate the requested directories.
111 */
112
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
3 if (to_delete_ != "") {
113 vector<std::string> to_eliminate_vec = SplitString(to_delete_, ':');
114
115 for (vector<string>::iterator s = to_eliminate_vec.begin();
116 s != to_eliminate_vec.end(); ++s) {
117 std::string parent_path;
118 std::string filename;
119 SplitPath(*s, &parent_path, &filename);
120 if (parent_path == ".") parent_path = "";
121 SharedPtr<SyncItem> sync_entry =
122 CreateSyncItem(parent_path, filename, kItemDir);
123 mediator_->Remove(sync_entry);
124 }
125 }
126
127 // we are simply deleting entity from the repo
128
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 if (NULL == src) return;
129
130 3 struct archive_entry *entry = archive_entry_new();
131 while (true) {
132 // Get the lock, wait if lock is not available yet
133 17 read_archive_signal_->Wait();
134
135 17 int result = archive_read_next_header2(src, entry);
136
137
2/6
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 14 times.
✗ Branch 5 not taken.
17 switch (result) {
138 case ARCHIVE_FATAL: {
139 PANIC(kLogStderr, "Fatal error in reading the archive.\n%s\n",
140 archive_error_string(src));
141 break; // Only exit point with error
142 }
143
144 case ARCHIVE_RETRY: {
145 LogCvmfs(kLogUnionFs, kLogStdout,
146 "Error in reading the header, retrying.\n%s\n",
147 archive_error_string(src));
148 continue;
149 break;
150 }
151
152 3 case ARCHIVE_EOF: {
153
2/6
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 3 times.
3 if (create_catalog_on_root_ && (base_directory_ != "/")) {
154 CreateDirectories(base_directory_); // necessary for empty archives
155 SharedPtr<SyncItem> catalog = SharedPtr<SyncItem>(
156 new SyncItemDummyCatalog(base_directory_, this));
157 ProcessFile(catalog);
158 to_create_catalog_dirs_.insert(base_directory_);
159 }
160 3 for (set<string>::iterator dir = to_create_catalog_dirs_.begin();
161
1/2
✗ Branch 3 not taken.
✓ Branch 4 taken 3 times.
3 dir != to_create_catalog_dirs_.end(); ++dir) {
162 assert(dirs_.find(*dir) != dirs_.end());
163 SharedPtr<SyncItem> to_mark = dirs_[*dir];
164 assert(to_mark->IsDirectory());
165 to_mark->SetCatalogMarker();
166 to_mark->MakePlaceholderDirectory();
167 ProcessDirectory(to_mark);
168 }
169 3 return; // Only successful exit point
170 break;
171 }
172
173 case ARCHIVE_WARN: {
174 LogCvmfs(kLogUnionFs, kLogStderr,
175 "Warning in uncompression reading, going on.\n %s",
176 archive_error_string(src));
177 // We actually want this to enter the ARCHIVE_OK case
178 }
179
180 14 case ARCHIVE_OK: {
181 14 ProcessArchiveEntry(entry);
182 14 break;
183 }
184
185 default: {
186 // We should never enter in this branch, but just for safeness we prefer
187 // to abort in case we hit a case we don't how to manage.
188 PANIC(kLogStderr, "Enter in unknown state. Aborting.\nError: %s\n",
189 result, archive_error_string(src));
190 }
191 }
192 14 }
193 }
194
195 14 void SyncUnionTarball::ProcessArchiveEntry(struct archive_entry *entry) {
196
2/4
✓ Branch 2 taken 14 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 14 times.
✗ Branch 6 not taken.
14 std::string archive_file_path(archive_entry_pathname(entry));
197
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 archive_file_path = SanitizePath(archive_file_path);
198
199 std::string complete_path =
200 14 base_directory_ != "/"
201
4/13
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 14 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 14 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 14 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
42 ? MakeCanonicalPath(base_directory_ + "/" + archive_file_path)
202
2/6
✓ Branch 0 taken 14 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 14 times.
✗ Branch 4 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
28 : MakeCanonicalPath(archive_file_path);
203
204 14 std::string parent_path;
205 14 std::string filename;
206
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 SplitPath(complete_path, &parent_path, &filename);
207
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 14 times.
14 if (parent_path == ".") parent_path.clear();
208
209
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 CreateDirectories(parent_path);
210
211 SharedPtr<SyncItem> sync_entry = SharedPtr<SyncItem>(new SyncItemTar(
212 parent_path, filename, src, entry,
213
3/6
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 14 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 14 times.
✗ Branch 8 not taken.
14 read_archive_signal_, this, uid_, gid_));
214
215
2/4
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 14 times.
14 if (NULL != archive_entry_hardlink(entry)) {
216 const std::string hardlink_name(
217 SanitizePath(archive_entry_hardlink(entry)));
218 const std::string hardlink = base_directory_ != "/"
219 ? base_directory_ + "/" + hardlink_name
220 : hardlink_name;
221
222 if (hardlinks_.find(hardlink) != hardlinks_.end()) {
223 hardlinks_.find(hardlink)->second.push_back(complete_path);
224 } else {
225 std::list<std::string> to_hardlink;
226 to_hardlink.push_back(complete_path);
227 hardlinks_[hardlink] = to_hardlink;
228 }
229 if (filename == ".cvmfscatalog") {
230 // the file is created in the PostUpload phase
231 to_create_catalog_dirs_.insert(parent_path);
232 }
233 read_archive_signal_->Wakeup();
234 return;
235 }
236
237
3/4
✓ Branch 2 taken 14 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 10 times.
14 if (sync_entry->IsDirectory()) {
238
3/5
✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
✓ Branch 6 taken 2 times.
4 if (know_directories_.find(complete_path) != know_directories_.end()) {
239
1/2
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
2 sync_entry->MakePlaceholderDirectory();
240 }
241
1/2
✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.
4 ProcessUnmaterializedDirectory(sync_entry);
242
2/4
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 4 times.
✗ Branch 5 not taken.
4 dirs_[complete_path] = sync_entry;
243
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 know_directories_.insert(complete_path);
244
245
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 read_archive_signal_->Wakeup(); // We don't need to read data and we
246 // can read the next header
247
248
3/4
✓ Branch 2 taken 10 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 9 times.
✓ Branch 5 taken 1 times.
10 } else if (sync_entry->IsRegularFile()) {
249 // inside the process pipeline we will wake up the signal
250
1/2
✓ Branch 2 taken 9 times.
✗ Branch 3 not taken.
9 ProcessFile(sync_entry);
251
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 9 times.
9 if (filename == ".cvmfscatalog") {
252 to_create_catalog_dirs_.insert(parent_path);
253 }
254
255
1/8
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
1 } else if (sync_entry->IsSymlink() || sync_entry->IsFifo() ||
256
2/14
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
1 sync_entry->IsSocket() || sync_entry->IsCharacterDevice() ||
257 sync_entry->IsBlockDevice()) {
258 // we avoid to add an entity called as a catalog marker if it is not a
259 // regular file
260
1/2
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
1 if (filename != ".cvmfscatalog") {
261
1/2
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
1 ProcessFile(sync_entry);
262 } else {
263 PANIC(kLogStderr,
264 "Found entity called as a catalog marker '%s' that however is "
265 "not a regular file, abort",
266 complete_path.c_str());
267 }
268
269 // here we don't need to read data from the tar file so we can wake up
270 // immediately the signal
271
1/2
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
1 read_archive_signal_->Wakeup();
272
273 } else {
274 PANIC(kLogStderr, "Fatal error found unexpected file: \n%s\n",
275 filename.c_str());
276 // if for any reason this code path change and we don't abort anymore,
277 // remember to wakeup the signal, otherwise we will be stuck in a deadlock
278 //
279 // read_archive_signal_->Wakeup();
280 }
281
5/10
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 14 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 14 times.
✗ Branch 8 not taken.
✓ Branch 10 taken 14 times.
✗ Branch 11 not taken.
✓ Branch 13 taken 14 times.
✗ Branch 14 not taken.
14 }
282
283 14 std::string SyncUnionTarball::SanitizePath(const std::string &path) {
284
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 if (path.length() >= 2) {
285
2/6
✗ Branch 1 not taken.
✓ Branch 2 taken 14 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 14 times.
14 if (path[0] == '.' && path[1] == '/') {
286 return path.substr(2);
287 }
288 }
289
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 if (path.length() >= 1) {
290
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 14 times.
14 if (path[0] == '/') {
291 return path.substr(1);
292 }
293 }
294 14 return path;
295 }
296
297 void SyncUnionTarball::PostUpload() {
298 std::map<const std::string, std::list<std::string> >::iterator hardlink;
299 for (hardlink = hardlinks_.begin(); hardlink != hardlinks_.end();
300 ++hardlink) {
301 std::list<std::string>::iterator entry;
302 for (entry = hardlink->second.begin(); entry != hardlink->second.end();
303 ++entry) {
304 mediator_->Clone(*entry, hardlink->first);
305 }
306 }
307 }
308
309 std::string SyncUnionTarball::UnwindWhiteoutFilename(
310 SharedPtr<SyncItem> entry) const {
311 return entry->filename();
312 }
313
314 bool SyncUnionTarball::IsOpaqueDirectory(SharedPtr<SyncItem> directory) const {
315 return false;
316 }
317
318 bool SyncUnionTarball::IsWhiteoutEntry(SharedPtr<SyncItem> entry) const {
319 return false;
320 }
321
322 /* Tar files are not necessarily traversed in order from root to leave.
323 * So it may happens that we are expanding the file `/a/b/c.txt` without
324 * having created yet the directory `/a/b/`.
325 * In order to overcome this limitation the following function create dummy
326 * directories that can be used as placeholder and that they will be overwritten
327 * as soon as the real directory is found in the tarball
328 */
329 19 void SyncUnionTarball::CreateDirectories(const std::string &target) {
330
3/5
✓ Branch 2 taken 19 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 11 times.
✓ Branch 6 taken 8 times.
22 if (know_directories_.find(target) != know_directories_.end()) return;
331
2/2
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 5 times.
8 if (target == ".") return;
332
333
1/2
✓ Branch 2 taken 5 times.
✗ Branch 3 not taken.
5 std::string dirname = "";
334
1/2
✓ Branch 2 taken 5 times.
✗ Branch 3 not taken.
5 std::string filename = "";
335
1/2
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
5 SplitPath(target, &dirname, &filename);
336
1/2
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
5 CreateDirectories(dirname);
337
338
3/4
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 2 times.
✓ Branch 4 taken 3 times.
✗ Branch 5 not taken.
5 if (dirname == ".") dirname = "";
339 SharedPtr<SyncItem> dummy = SharedPtr<SyncItem>(
340
3/6
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 5 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 5 times.
✗ Branch 8 not taken.
5 new SyncItemDummyDir(dirname, filename, this, kItemDir, uid_, gid_));
341
342
1/2
✓ Branch 2 taken 5 times.
✗ Branch 3 not taken.
5 ProcessUnmaterializedDirectory(dummy);
343
2/4
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 5 times.
✗ Branch 5 not taken.
5 dirs_[target] = dummy;
344
1/2
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
5 know_directories_.insert(target);
345 5 }
346
347 } // namespace publish
348