GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/catalog_traversal.h
Date: 2026-05-24 02:35:55
Exec Total Coverage
Lines: 246 264 93.2%
Branches: 145 263 55.1%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 */
4
5 #ifndef CVMFS_CATALOG_TRAVERSAL_H_
6 #define CVMFS_CATALOG_TRAVERSAL_H_
7
8 #include <cassert>
9 #include <limits>
10 #include <set>
11 #include <stack>
12 #include <string>
13 #include <vector>
14
15 #include "catalog.h"
16 #include "compression/compression.h"
17 #include "crypto/signature.h"
18 #include "history_sqlite.h"
19 #include "manifest.h"
20 #include "object_fetcher.h"
21 #include "util/concurrency.h"
22 #include "util/logging.h"
23
24 namespace catalog {
25 class Catalog;
26 class WritableCatalog;
27 } // namespace catalog
28
29 namespace swissknife {
30
31 /**
32 * Callback data which has to be implemented by the registered callback
33 * functions/methods (see Observable<> for further details)
34 * @param catalog the catalog object which needs to be processed
35 * @param catalog_hash the content hash of the catalog
36 * @param tree_level the depth in the nested catalog tree
37 * (starting at zero)
38 * @param file_size the size of the downloaded catalog database file
39 * @param history_depth the distance from the current HEAD revision
40 * (current HEAD has history_depth 0)
41 */
42 template<class CatalogT>
43 struct CatalogTraversalData {
44 3738372 CatalogTraversalData(const CatalogT *catalog,
45 const shash::Any &catalog_hash,
46 const unsigned tree_level,
47 const size_t file_size,
48 const uint64_t history_depth)
49 3738372 : catalog(catalog)
50 3738372 , catalog_hash(catalog_hash)
51 3738372 , tree_level(tree_level)
52 3738372 , file_size(file_size)
53 3738372 , history_depth(history_depth) { }
54
55 const CatalogT *catalog;
56 const shash::Any catalog_hash;
57 const unsigned int tree_level;
58 const size_t file_size;
59 const uint64_t history_depth;
60 };
61
62
63 /**
64 * A layer to extract information from a catalog. Users of the catalog
65 * traversal can provide derived classes to overwrite behavior. Currently used
66 * to get the last-modified timestamp in configurable manner: for the garbage
67 * collection, the timestamp of the catalog hash in the reflog counts, which
68 * is the same or newer than the one stored in the catalog.
69 */
70 template<class CatalogT>
71 class CatalogTraversalInfoShim {
72 public:
73 6354 virtual ~CatalogTraversalInfoShim() { }
74
75 // Default implementation: use the catalog's timestamp
76 6908 virtual uint64_t GetLastModified(const CatalogT *catalog) {
77 6908 return catalog->GetLastModified();
78 }
79 };
80
81
82 /**
83 * A base class for CatalogTraversal and CatalogTraversalParallel implementing
84 * common functionality. Actual traversal classes inherit from this class.
85 *
86 */
87 template<class ObjectFetcherT>
88 class CatalogTraversalBase
89 : public Observable<
90 CatalogTraversalData<typename ObjectFetcherT::CatalogTN> > {
91 public:
92 typedef ObjectFetcherT ObjectFetcherTN;
93 typedef typename ObjectFetcherT::CatalogTN CatalogTN;
94 typedef typename ObjectFetcherT::HistoryTN HistoryTN;
95 typedef CatalogTraversalData<CatalogTN> CallbackDataTN;
96
97 /**
98 * @param repo_url the path to the repository to be traversed:
99 * -> either absolute path to the local catalogs
100 * -> or an URL to a remote repository
101 * @param repo_name fully qualified repository name (used for
102 * remote repository signature check) (optional)
103 * @param repo_keys a comma separated list of public key file
104 * locations to verify the repository manifest
105 * file
106 * @param history depth of the desired catalog history traversal
107 * (default: 0 - only HEAD catalogs are traversed)
108 * @param timestamp timestamp of history traversal threshold
109 * (default: 0 - no threshold, traverse
110 * everything)
111 * @param no_repeat_history keep track of visited catalogs and don't
112 * re-visit them in previous revisions
113 * @param no_close do not close catalogs after they were attached
114 * (catalogs retain their parent/child pointers)
115 * @param ignore_load_failure suppressed an error message if a catalog file
116 * could not be loaded (i.e. was sweeped before by
117 * a garbage collection run)
118 * @param quiet silence messages that would go to stderr
119 * @param tmp_dir path to the temporary directory to be used
120 * (default: /tmp)
121 *
122 * ONLY FOR CatalogTraversalParallel
123 * @param num_threads number of threads concurrently traversing
124 * the catalog trees; hint: set up based on
125 * CPU utilization and optimal number of
126 * download threads
127 * (default: 8)
128 *
129 * @param serialize_callbacks do not call multiple catalog callbacks
130 * concurrently; ensures safe access to shared
131 * variables inside the callback; turn off if
132 * the registered callback is thread-safe
133 * (default: true)
134 */
135 struct Parameters {
136 2275 Parameters()
137 2275 : object_fetcher(NULL)
138 2275 , history(kNoHistory)
139 2275 , timestamp(kNoTimestampThreshold)
140 2275 , no_repeat_history(false)
141 2275 , no_close(false)
142 2275 , ignore_load_failure(false)
143 2275 , quiet(false)
144 2275 , num_threads(8)
145 2275 , serialize_callbacks(true) { }
146
147 static const uint64_t kFullHistory;
148 static const uint64_t kNoHistory;
149 static const time_t kNoTimestampThreshold;
150
151 ObjectFetcherT *object_fetcher;
152
153 uint64_t history;
154 time_t timestamp;
155 bool no_repeat_history;
156 bool no_close;
157 bool ignore_load_failure;
158 bool quiet;
159 // These parameters only apply for CatalogTraversalParallel
160 unsigned int num_threads;
161 bool serialize_callbacks;
162 };
163
164 enum TraversalType {
165 kBreadthFirst,
166 kDepthFirst
167 };
168
169 2275 explicit CatalogTraversalBase(const Parameters &params)
170 2275 : object_fetcher_(params.object_fetcher)
171 2275 , catalog_info_shim_(&catalog_info_default_shim_)
172 2275 , default_history_depth_(params.history)
173 2275 , default_timestamp_threshold_(params.timestamp)
174 2275 , no_close_(params.no_close)
175 2275 , ignore_load_failure_(params.ignore_load_failure)
176 2275 , no_repeat_history_(params.no_repeat_history)
177
2/2
✓ Branch 2 taken 984 times.
✓ Branch 3 taken 1291 times.
2275 , error_sink_((params.quiet) ? kLogDebug : kLogStderr) {
178
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2275 times.
2275 assert(object_fetcher_ != NULL);
179 2275 }
180
181 /**
182 * Starts the traversal process.
183 * After calling this methods CatalogTraversal will go through all catalogs
184 * and call the registered callback methods for each found catalog.
185 * If something goes wrong in the process, the traversal will be cancelled.
186 *
187 * @param type breadths or depth first traversal
188 * @return true, when all catalogs were successfully processed. On
189 * failure the traversal is cancelled and false is returned.
190 */
191 virtual bool Traverse(const TraversalType type = kBreadthFirst) = 0;
192
193 /**
194 * Starts the traversal process at the catalog pointed to by the given hash
195 *
196 * @param root_catalog_hash the entry point into the catalog traversal
197 * @param type breadths or depth first traversal
198 * @return true when catalogs were successfully traversed
199 */
200 virtual bool Traverse(const shash::Any &root_catalog_hash,
201 const TraversalType type = kBreadthFirst) = 0;
202
203 /**
204 * Traverse a list of revisions represented by root catalogs from first
205 * to last. DO NOT traverse previous revisions based on history and
206 * timestamp threshold settings.
207 *
208 * @param catalog_list list of root catalog hashes
209 * @param type breadth- or depth- first traversal
210 * @return true on success
211 */
212 virtual bool TraverseList(const std::vector<shash::Any> &catalog_list,
213 const TraversalType type = kBreadthFirst) = 0;
214
215 /**
216 * Starts the traversal process at the catalog pointed to by the given hash
217 * but doesn't traverse into predecessor catalog revisions. This overrides the
218 * TraversalParameter settings provided at construction.
219 *
220 * @param root_catalog_hash the entry point into the catalog traversal
221 * @param type breadths or depth first traversal
222 * @return true when catalogs were successfully traversed
223 */
224 virtual bool TraverseRevision(const shash::Any &root_catalog_hash,
225 const TraversalType type = kBreadthFirst) = 0;
226
227 /**
228 * Figures out all named tags in a repository and uses all of them as entry
229 * points into the traversal process.
230 *
231 * @param type breadths or depth first traversal
232 * @return true when catalog traversal successfully finished
233 */
234 1189 virtual bool TraverseNamedSnapshots(
235 const TraversalType type = kBreadthFirst) {
236 typedef std::vector<shash::Any> HashList;
237
238
1/2
✓ Branch 1 taken 1189 times.
✗ Branch 2 not taken.
1189 UniquePtr<HistoryTN> tag_db;
239 const typename ObjectFetcherT::Failures
240
2/4
✓ Branch 1 taken 1189 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 1189 times.
✗ Branch 5 not taken.
1189 retval = object_fetcher_->FetchHistory(&tag_db);
241
2/3
✓ Branch 0 taken 1148 times.
✓ Branch 1 taken 41 times.
✗ Branch 2 not taken.
1189 switch (retval) {
242 1148 case ObjectFetcherT::kFailOk:
243 1148 break;
244
245 41 case ObjectFetcherT::kFailNotFound:
246
1/2
✓ Branch 1 taken 41 times.
✗ Branch 2 not taken.
41 LogCvmfs(kLogCatalogTraversal, kLogDebug,
247 "didn't find a history database to traverse");
248 41 return true;
249
250 default:
251 LogCvmfs(kLogCatalogTraversal, kLogStderr,
252 "failed to download history database (%d - %s)", retval,
253 Code2Ascii(retval));
254 return false;
255 }
256
257 1148 HashList root_hashes;
258
1/2
✓ Branch 2 taken 1148 times.
✗ Branch 3 not taken.
1148 bool success = tag_db->GetHashes(&root_hashes);
259
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1148 times.
1148 assert(success);
260
1/2
✓ Branch 1 taken 1148 times.
✗ Branch 2 not taken.
1148 return TraverseList(root_hashes, type);
261 1189 }
262
263 41 void SetCatalogInfoShim(CatalogTraversalInfoShim<CatalogTN> *shim) {
264 41 catalog_info_shim_ = shim;
265 41 }
266
267 protected:
268 typedef std::set<shash::Any> HashSet;
269
270 /**
271 * This struct keeps information about a catalog that still needs to be
272 * traversed by a currently running catalog traversal process.
273 */
274 struct CatalogJob {
275 3743213 CatalogJob(const std::string &path,
276 const shash::Any &hash,
277 const unsigned tree_level,
278 const uint64_t history_depth,
279 CatalogTN *parent = NULL)
280 3743213 : path(path)
281 3743213 , hash(hash)
282 3743213 , tree_level(tree_level)
283 3743213 , history_depth(history_depth)
284 3743213 , parent(parent)
285 3743213 , catalog_file_size(0)
286 3743213 , ignore(false)
287 3743213 , catalog(NULL)
288 3743213 , referenced_catalogs(0)
289 3743213 , postponed(false) { }
290
291 7436572 bool IsRootCatalog() const { return tree_level == 0; }
292
293 3738372 CallbackDataTN GetCallbackData() const {
294 3738372 return CallbackDataTN(catalog, hash, tree_level, catalog_file_size,
295 3738372 history_depth);
296 }
297
298 // initial state description
299 const std::string path;
300 const shash::Any hash;
301 const unsigned tree_level;
302 const uint64_t history_depth;
303 CatalogTN *parent;
304
305 // dynamic processing state (used internally)
306 std::string catalog_file_path;
307 size_t catalog_file_size;
308 bool ignore;
309 CatalogTN *catalog;
310 uint64_t referenced_catalogs;
311 bool postponed;
312 };
313
314 3738569 bool PrepareCatalog(CatalogJob *job) {
315 const typename ObjectFetcherT::Failures
316 11204837 retval = object_fetcher_->FetchCatalog(job->hash,
317 3738539 job->path,
318 &job->catalog,
319 3738569 !job->IsRootCatalog(),
320 job->parent);
321
2/3
✓ Branch 0 taken 3727442 times.
✓ Branch 1 taken 297 times.
✗ Branch 2 not taken.
3727729 switch (retval) {
322 3727442 case ObjectFetcherT::kFailOk:
323 3727442 break;
324
325 297 case ObjectFetcherT::kFailNotFound:
326
2/2
✓ Branch 0 taken 256 times.
✓ Branch 1 taken 41 times.
297 if (ignore_load_failure_) {
327
1/2
✓ Branch 3 taken 256 times.
✗ Branch 4 not taken.
256 LogCvmfs(kLogCatalogTraversal, kLogDebug,
328 "ignoring missing catalog "
329 "%s (swept before?)",
330 job->hash.ToString().c_str());
331 256 job->ignore = true;
332 256 return true;
333 }
334
335 default:
336
1/12
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✓ Branch 4 taken 41 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
31 LogCvmfs(kLogCatalogTraversal, error_sink_,
337 "failed to load catalog %s "
338 "(%d - %s)",
339 job->hash.ToStringWithSuffix().c_str(), retval,
340 Code2Ascii(retval));
341 41 return false;
342 }
343
344 // catalogs returned by ObjectFetcher<> are managing their database files by
345 // default... we need to manage this file manually here
346 3727442 job->catalog->DropDatabaseFileOwnership();
347
348 3726492 job->catalog_file_path = job->catalog->database_path();
349
1/2
✓ Branch 2 taken 3738142 times.
✗ Branch 3 not taken.
3729552 job->catalog_file_size = GetFileSize(job->catalog->database_path());
350
351 3737672 return true;
352 }
353
354
355 3693651 bool ReopenCatalog(CatalogJob *job) {
356
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3693651 times.
3693651 assert(!job->ignore);
357
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3693651 times.
3693651 assert(job->catalog == NULL);
358
359 7379192 job->catalog = CatalogTN::AttachFreely(job->path,
360 3693571 job->catalog_file_path,
361 3693571 job->hash,
362 job->parent,
363 3693651 !job->IsRootCatalog());
364
365
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3685621 times.
3685621 if (job->catalog == NULL) {
366 LogCvmfs(kLogCatalogTraversal, error_sink_,
367 "failed to re-open catalog %s", job->hash.ToString().c_str());
368 return false;
369 }
370
371 3685621 return true;
372 }
373
374
375 3763863 bool CloseCatalog(const bool unlink_db, CatalogJob *job) {
376
1/2
✓ Branch 0 taken 3763863 times.
✗ Branch 1 not taken.
3763863 delete job->catalog;
377 3763863 job->catalog = NULL;
378
2/6
✗ Branch 1 not taken.
✓ Branch 2 taken 3763863 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 3763863 times.
3763863 if (!job->catalog_file_path.empty() && unlink_db) {
379 const int retval = unlink(job->catalog_file_path.c_str());
380 if (retval != 0) {
381 LogCvmfs(kLogCatalogTraversal, error_sink_, "Failed to unlink %s - %d",
382 job->catalog_file_path.c_str(), errno);
383 return false;
384 }
385 }
386
387 3763863 return true;
388 }
389
390 1640 shash::Any GetRepositoryRootCatalogHash() {
391 // get the manifest of the repository to learn about the entry point or the
392 // root catalog of the repository to be traversed
393
1/2
✓ Branch 1 taken 1640 times.
✗ Branch 2 not taken.
1640 UniquePtr<manifest::Manifest> manifest;
394 const typename ObjectFetcherT::Failures
395
1/2
✓ Branch 1 taken 1640 times.
✗ Branch 2 not taken.
1640 retval = object_fetcher_->FetchManifest(&manifest);
396
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1640 times.
1640 if (retval != ObjectFetcherT::kFailOk) {
397 LogCvmfs(kLogCatalogTraversal, kLogStderr,
398 "failed to load manifest "
399 "(%d - %s)",
400 retval, Code2Ascii(retval));
401 return shash::Any();
402 }
403
404
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 1640 times.
1640 assert(manifest.IsValid());
405 1640 return manifest->catalog_hash();
406 1640 }
407
408 /**
409 * Checks if a root catalog is below one of the pruning thresholds.
410 * Pruning thresholds can be either the catalog's history depth or a timestamp
411 * threshold applied to the last modified timestamp of the catalog.
412 *
413 * @param ctx traversal context for traversal-specific state
414 * @param job the job defining the current catalog
415 * @return true if either history or timestamp threshold are satisfied
416 */
417 7072 bool IsBelowPruningThresholds(const CatalogJob &job,
418 const uint64_t history_depth,
419 const time_t timestamp_threshold) {
420
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 7072 times.
7072 assert(job.IsRootCatalog());
421
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 7072 times.
7072 assert(job.catalog != NULL);
422
423 7072 const bool h = job.history_depth >= history_depth;
424
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 7072 times.
7072 assert(timestamp_threshold >= 0);
425 7072 const bool t = catalog_info_shim_->GetLastModified(job.catalog)
426 7072 < unsigned(timestamp_threshold);
427
428
4/4
✓ Branch 0 taken 6621 times.
✓ Branch 1 taken 451 times.
✓ Branch 2 taken 3464 times.
✓ Branch 3 taken 3157 times.
7072 return t || h;
429 }
430
431 ObjectFetcherT *object_fetcher_;
432 CatalogTraversalInfoShim<CatalogTN> catalog_info_default_shim_;
433 CatalogTraversalInfoShim<CatalogTN> *catalog_info_shim_;
434 const uint64_t default_history_depth_;
435 const time_t default_timestamp_threshold_;
436 const bool no_close_;
437 const bool ignore_load_failure_;
438 const bool no_repeat_history_;
439 LogFacilities error_sink_;
440 };
441
442 /**
443 * This class traverses the catalog hierarchy of a CVMFS repository recursively.
444 * Also historic catalog trees can be traversed. The user needs to specify a
445 * callback which is called for each catalog on the way.
446 *
447 * CatalogTraversal<> can be configured and used in various ways:
448 * -> Historic catalog traversal
449 * -> Prune catalogs below a certain history level
450 * -> Prune catalogs older than a certain threshold timestamp
451 * -> Never traverse a certain catalog twice
452 * -> Breadth First Traversal or Depth First Traversal
453 * -> Optional catalog memory management (no_close)
454 * -> Use all Named Snapshots of a repository as traversal entry point
455 * -> Traverse starting from a provided catalog
456 * -> Traverse catalogs that were previously skipped
457 * -> Produce various flavours of catalogs (writable, mocked, ...)
458 *
459 * Breadth First Traversal Strategy
460 * Catalogs are handed out to the user identical as they are traversed.
461 * Say: From top to bottom. When you would simply print each received catalog
462 * the result would be a nice representation of the catalog tree.
463 * This method is more efficient, because catalogs are opened, processed and
464 * thrown away directly afterwards.
465 *
466 * Depth First Traversal Strategy
467 * The user gets the catalog tree starting from the leaf nodes.
468 * Say: From bottom to top. A user can assume that he got all children or
469 * historical ancestors of a catalog before.
470 * This method climbs down the full catalog tree and hands it out 'in reverse
471 * order'. Thus, catalogs on the way are opened, checked for their descendants
472 * and closed. Once all children and historical ancestors are processed, it is
473 * re-opened and handed out to the user.
474 * Note: This method needs more disk space to temporarily store downloaded but
475 * not yet processed catalogs.
476 *
477 * Note: Since all CVMFS catalog files together can grow to several gigabytes in
478 * file size, each catalog is loaded, processed and removed immediately
479 * afterwards. Except if no_close is specified, which allows the user to
480 * choose when a catalog should be closed. Keep in mind, that a user is
481 * responsible for both deletion of the delivered catalog objects as well
482 * as unlinking of the catalog database file.
483 *
484 *
485 * @param ObjectFetcherT Strategy Pattern implementation that defines how to
486 * retrieve catalogs from various backend storage types.
487 * Furthermore the ObjectFetcherT::CatalogTN is the type
488 * of catalog to be instantiated by CatalogTraversal<>.
489 *
490 * CAUTION: the CatalogTN* pointer passed into the callback becomes invalid
491 * directly after the callback method returns, unless you create the
492 * CatalogTraversal object with no_close = true.
493 */
494 template<class ObjectFetcherT>
495 class CatalogTraversal : public CatalogTraversalBase<ObjectFetcherT> {
496 public:
497 typedef CatalogTraversalBase<ObjectFetcherT> Base;
498 typedef ObjectFetcherT ObjectFetcherTN;
499 typedef typename ObjectFetcherT::CatalogTN CatalogTN;
500 typedef typename ObjectFetcherT::HistoryTN HistoryTN;
501 typedef CatalogTraversalData<CatalogTN> CallbackDataTN;
502 typedef typename Base::Parameters Parameters;
503 typedef typename Base::TraversalType TraversalType;
504 typedef typename Base::CatalogJob CatalogJob;
505
506 protected:
507 typedef std::set<shash::Any> HashSet;
508 typedef std::stack<CatalogJob> CatalogJobStack;
509
510 /**
511 * This struct represents a catalog traversal context. It needs to be re-
512 * created for each catalog traversal process and contains information to this
513 * specific catalog traversal run.
514 *
515 * @param history_depth the history traversal threshold
516 * @param traversal_type either breadth or depth first traversal strategy
517 * @param catalog_stack the call stack for catalogs to be traversed
518 * @param callback_stack used in depth first traversal for deferred yielding
519 */
520 struct TraversalContext {
521 2643 TraversalContext(const uint64_t history_depth,
522 const time_t timestamp_threshold,
523 const TraversalType traversal_type)
524 2643 : history_depth(history_depth)
525 2643 , timestamp_threshold(timestamp_threshold)
526
1/2
✓ Branch 2 taken 2643 times.
✗ Branch 3 not taken.
2643 , traversal_type(traversal_type) { }
527
528 const uint64_t history_depth;
529 const time_t timestamp_threshold;
530 const TraversalType traversal_type;
531 CatalogJobStack catalog_stack;
532 CatalogJobStack callback_stack;
533 };
534
535 public:
536 /**
537 * Constructs a new catalog traversal engine based on the construction
538 * parameters described in struct ConstructionParams.
539 */
540 1243 explicit CatalogTraversal(const Parameters &params)
541 1243 : CatalogTraversalBase<ObjectFetcherT>(params) { }
542
543 799 bool Traverse(const TraversalType type = Base::kBreadthFirst) {
544
1/2
✓ Branch 1 taken 799 times.
✗ Branch 2 not taken.
799 const shash::Any root_catalog_hash = this->GetRepositoryRootCatalogHash();
545
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 799 times.
799 if (root_catalog_hash.IsNull()) {
546 return false;
547 }
548
1/2
✓ Branch 1 taken 799 times.
✗ Branch 2 not taken.
799 return Traverse(root_catalog_hash, type);
549 }
550
551 1202 bool Traverse(const shash::Any &root_catalog_hash,
552 const TraversalType type = Base::kBreadthFirst) {
553 // add the root catalog of the repository as the first element on the job
554 // stack
555 1202 TraversalContext ctx(this->default_history_depth_,
556
1/2
✓ Branch 1 taken 1202 times.
✗ Branch 2 not taken.
1202 this->default_timestamp_threshold_, type);
557
1/2
✓ Branch 1 taken 1202 times.
✗ Branch 2 not taken.
1202 Push(root_catalog_hash, &ctx);
558
1/2
✓ Branch 1 taken 1202 times.
✗ Branch 2 not taken.
2404 return DoTraverse(&ctx);
559 1202 }
560
561 637 bool TraverseList(const std::vector<shash::Any> &catalog_list,
562 const TraversalType type = Base::kBreadthFirst) {
563 637 std::vector<shash::Any>::const_iterator i = catalog_list.begin();
564 637 const std::vector<shash::Any>::const_iterator iend = catalog_list.end();
565 637 bool success = true;
566
2/2
✓ Branch 1 taken 1441 times.
✓ Branch 2 taken 637 times.
2078 for (; i != iend; ++i) {
567
3/6
✓ Branch 0 taken 1441 times.
✗ Branch 1 not taken.
✓ Branch 4 taken 1441 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 1441 times.
✗ Branch 7 not taken.
1441 success = success && TraverseRevision(*i, type);
568 }
569 637 return success;
570 }
571
572 1441 bool TraverseRevision(const shash::Any &root_catalog_hash,
573 const TraversalType type = Base::kBreadthFirst) {
574 // add the given root catalog as the first element on the job stack
575
1/2
✓ Branch 1 taken 1441 times.
✗ Branch 2 not taken.
1441 TraversalContext ctx(Parameters::kNoHistory,
576 Parameters::kNoTimestampThreshold, type);
577
1/2
✓ Branch 1 taken 1441 times.
✗ Branch 2 not taken.
1441 Push(root_catalog_hash, &ctx);
578
1/2
✓ Branch 1 taken 1441 times.
✗ Branch 2 not taken.
2882 return DoTraverse(&ctx);
579 1441 }
580
581 protected:
582 /**
583 * This controls the actual traversal. Using a stack to traverse down the
584 * catalog hierarchy. This method implements the traversal itself, but not
585 * in which way catalogs are handed out to the user code.
586 *
587 * Each catalog is processed in these steps:
588 * 1.) Pop the next catalog from the stack
589 * Catalogs are always traversed from latest to oldest revision and
590 * from root to leaf nested catalogs
591 * 2.) Prepare the catalog for traversing
592 * 2.1.) Check if it was visited before
593 * 2.2.) Fetch the catalog database from the backend storage
594 * This might fail and produce an error. For root catalogs this
595 * error can be ignored (might be garbage collected before)
596 * 2.3.) Open the catalog database
597 * 2.4.) Check if the catalog is older than the timestamp threshold
598 * After these steps the catalog is opened either opened and ready for
599 * the traversal to continue, or it was marked for ignore (job.ignore)
600 * 3.) Check if the catalog is marked to be ignored
601 * Catalog might not be loadable (sweeped root catalog) or is too old
602 * Note: ignored catalogs can still trigger postponed yields
603 * 4.) Mark the catalog as visited to be able to skip it later on
604 * 5.) Find and push referencing catalogs
605 * This pushes all descendents of the current catalog onto the stack.
606 * Note that this is dependent on the strategy (depth or breadth first)
607 * and on the history threshold (see history_depth).
608 * 6.) Hand the catalog out to the user code
609 * Depending on the traversal strategy (depth of breadths first) this
610 * might immediately yield zero to N catalogs to the user code.
611 *
612 * Note: If anything unexpected goes wrong during the traversal process, it
613 * is aborted immediately.
614 *
615 * @param ctx the traversal context that steers the whole traversal process
616 * @return true on successful traversal and false on abort
617 */
618 2643 bool DoTraverse(TraversalContext *ctx) {
619
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 2643 times.
2643 assert(ctx->callback_stack.empty());
620
621
5/5
✓ Branch 1 taken 48653 times.
✓ Branch 2 taken 31 times.
✓ Branch 3 taken 2505 times.
✓ Branch 5 taken 51189 times.
✓ Branch 6 taken 2612 times.
104990 while (!ctx->catalog_stack.empty()) {
622 // Get the top most catalog for the next processing step
623
1/2
✓ Branch 1 taken 51189 times.
✗ Branch 2 not taken.
51189 CatalogJob job = Pop(ctx);
624
625
3/4
✓ Branch 1 taken 51189 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2372 times.
✓ Branch 4 taken 48817 times.
51189 if (ShouldBeSkipped(job)) {
626 2372 job.ignore = true;
627 // download and open the catalog for processing
628
3/4
✓ Branch 1 taken 48817 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 31 times.
✓ Branch 4 taken 48786 times.
48817 } else if (!this->PrepareCatalog(&job)) {
629 31 return false;
630 }
631
632 // ignored catalogs don't need to be processed anymore but they might
633 // release postponed yields
634
2/2
✓ Branch 0 taken 2505 times.
✓ Branch 1 taken 48653 times.
51158 if (job.ignore) {
635
2/4
✓ Branch 1 taken 2505 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 2505 times.
2505 if (!HandlePostponedYields(job, ctx)) {
636 return false;
637 }
638 2505 continue;
639 }
640
641 // push catalogs referenced by the current catalog (onto stack)
642
1/2
✓ Branch 1 taken 48653 times.
✗ Branch 2 not taken.
48653 this->MarkAsVisited(job);
643
1/2
✓ Branch 1 taken 48653 times.
✗ Branch 2 not taken.
48653 PushReferencedCatalogs(&job, ctx);
644
645 // notify listeners
646
2/4
✓ Branch 1 taken 48653 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 48653 times.
48653 if (!YieldToListeners(&job, ctx)) {
647 return false;
648 }
649 }
650
651 // invariant: after the traversal finished, there should be no more catalogs
652 // to traverse or to yield!
653
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 2612 times.
2612 assert(ctx->catalog_stack.empty());
654
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 2612 times.
2612 assert(ctx->callback_stack.empty());
655 2612 return true;
656 }
657
658 48653 void PushReferencedCatalogs(CatalogJob *job, TraversalContext *ctx) {
659
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 48653 times.
48653 assert(!job->ignore);
660
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 48653 times.
48653 assert(job->catalog != NULL);
661
3/4
✓ Branch 0 taken 11510 times.
✓ Branch 1 taken 37143 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 11510 times.
48653 assert(ctx->traversal_type == Base::kBreadthFirst
662 || ctx->traversal_type == Base::kDepthFirst);
663
664 // this differs, depending on the traversal strategy.
665 //
666 // Breadths First Traversal
667 // Catalogs are traversed from top (root catalog) to bottom (leaf
668 // catalogs) and from more recent (HEAD revision) to older (historic
669 // revisions)
670 //
671 // Depth First Traversal
672 // Catalogs are traversed from oldest revision (depends on the configured
673 // maximal history depth) to the HEAD revision and from bottom (leafs) to
674 // top (root catalogs)
675 97306 job->referenced_catalogs = (ctx->traversal_type == Base::kBreadthFirst)
676
2/2
✓ Branch 0 taken 37143 times.
✓ Branch 1 taken 11510 times.
48653 ? PushPreviousRevision(*job, ctx)
677 37143 + PushNestedCatalogs(*job, ctx)
678 11510 : PushNestedCatalogs(*job, ctx)
679 11510 + PushPreviousRevision(*job, ctx);
680 48653 }
681
682 /**
683 * Pushes the previous revision of a (root) catalog.
684 * @return the number of catalogs pushed on the processing stack
685 */
686 48653 unsigned int PushPreviousRevision(const CatalogJob &job,
687 TraversalContext *ctx) {
688 // only root catalogs are used for entering a previous revision (graph)
689
2/2
✓ Branch 1 taken 44622 times.
✓ Branch 2 taken 4031 times.
48653 if (!job.catalog->IsRoot()) {
690 44622 return 0;
691 }
692
693
1/2
✓ Branch 1 taken 4031 times.
✗ Branch 2 not taken.
4031 const shash::Any previous_revision = job.catalog->GetPreviousRevision();
694
2/2
✓ Branch 1 taken 337 times.
✓ Branch 2 taken 3694 times.
4031 if (previous_revision.IsNull()) {
695 337 return 0;
696 }
697
698 // check if the next deeper history level is actually requested
699 // Note: if the current catalog is below the timestamp threshold it will be
700 // traversed and only its ancestor revision will not be pushed anymore
701
2/2
✓ Branch 0 taken 1853 times.
✓ Branch 1 taken 1841 times.
3694 if (this->IsBelowPruningThresholds(job, ctx->history_depth,
702
1/2
✓ Branch 1 taken 3694 times.
✗ Branch 2 not taken.
3694 ctx->timestamp_threshold)) {
703 1853 return 0;
704 }
705
706
3/6
✓ Branch 2 taken 1841 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 1841 times.
✗ Branch 6 not taken.
✓ Branch 8 taken 1841 times.
✗ Branch 9 not taken.
1841 Push(CatalogJob("", previous_revision, 0, job.history_depth + 1, NULL),
707 ctx);
708 1841 return 1;
709 }
710
711 /**
712 * Pushes all the referenced nested catalogs.
713 * @return the number of catalogs pushed on the processing stack
714 */
715 48653 unsigned int PushNestedCatalogs(const CatalogJob &job,
716 TraversalContext *ctx) {
717 typedef typename CatalogTN::NestedCatalogList NestedCatalogList;
718
1/2
✓ Branch 1 taken 48653 times.
✗ Branch 2 not taken.
48653 const NestedCatalogList nested = job.catalog->ListOwnNestedCatalogs();
719 48653 typename NestedCatalogList::const_iterator i = nested.begin();
720 48653 typename NestedCatalogList::const_iterator iend = nested.end();
721
2/2
✓ Branch 3 taken 46767 times.
✓ Branch 4 taken 48653 times.
95420 for (; i != iend; ++i) {
722
2/2
✓ Branch 0 taken 2077 times.
✓ Branch 1 taken 44690 times.
46767 CatalogTN *parent = (this->no_close_) ? job.catalog : NULL;
723
2/4
✓ Branch 1 taken 46767 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 46767 times.
✗ Branch 5 not taken.
46767 const CatalogJob new_job(i->mountpoint.ToString(),
724 46767 i->hash,
725 46767 job.tree_level + 1,
726 46767 job.history_depth,
727 parent);
728
1/2
✓ Branch 1 taken 46767 times.
✗ Branch 2 not taken.
46767 Push(new_job, ctx);
729 }
730
731 97306 return nested.size();
732 48653 }
733
734 2643 void Push(const shash::Any &root_catalog_hash, TraversalContext *ctx) {
735
3/6
✓ Branch 2 taken 2643 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 2643 times.
✗ Branch 6 not taken.
✓ Branch 8 taken 2643 times.
✗ Branch 9 not taken.
2643 Push(CatalogJob("", root_catalog_hash, 0, 0), ctx);
736 2643 }
737
738 48653 bool YieldToListeners(CatalogJob *job, TraversalContext *ctx) {
739
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 48653 times.
48653 assert(!job->ignore);
740
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 48653 times.
48653 assert(job->catalog != NULL);
741
3/4
✓ Branch 0 taken 11510 times.
✓ Branch 1 taken 37143 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 11510 times.
48653 assert(ctx->traversal_type == Base::kBreadthFirst
742 || ctx->traversal_type == Base::kDepthFirst);
743
744 // in breadth first search mode, every catalog is simply handed out once
745 // it is visited. No extra magic required...
746
2/2
✓ Branch 0 taken 37143 times.
✓ Branch 1 taken 11510 times.
48653 if (ctx->traversal_type == Base::kBreadthFirst) {
747 37143 return Yield(job);
748 }
749
750 // in depth first search mode, catalogs might need to wait until all of
751 // their referenced catalogs are yielded (ctx.callback_stack)...
752
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 11510 times.
11510 assert(ctx->traversal_type == Base::kDepthFirst);
753
2/2
✓ Branch 0 taken 4362 times.
✓ Branch 1 taken 7148 times.
11510 if (job->referenced_catalogs > 0) {
754 4362 PostponeYield(job, ctx);
755 4362 return true;
756 }
757
758 // this catalog can be yielded
759
2/4
✓ Branch 1 taken 7148 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 7148 times.
✗ Branch 5 not taken.
7148 return Yield(job) && HandlePostponedYields(*job, ctx);
760 }
761
762 /**
763 * Checks the traversal history if the given catalog was traversed or at least
764 * seen before. If 'no_repeat_history' is not set this is always 'false'.
765 *
766 * @param job the job to be checked against the traversal history
767 * @return true if the specified catalog was hit before
768 */
769 51189 bool ShouldBeSkipped(const CatalogJob &job) {
770
4/4
✓ Branch 0 taken 17740 times.
✓ Branch 1 taken 33449 times.
✓ Branch 3 taken 2372 times.
✓ Branch 4 taken 15368 times.
51189 return this->no_repeat_history_ && (visited_catalogs_.count(job.hash) > 0);
771 }
772
773 48653 void MarkAsVisited(const CatalogJob &job) {
774
2/2
✓ Branch 0 taken 15235 times.
✓ Branch 1 taken 33418 times.
48653 if (this->no_repeat_history_) {
775 15235 visited_catalogs_.insert(job.hash);
776 }
777 48653 }
778
779 private:
780 /**
781 * This actually hands out a catalog to the user code
782 * It is not called by DoTraversa() directly but by wrapper functions in order
783 * to provide higher level yielding behaviour.
784 */
785 48653 bool Yield(CatalogJob *job) {
786
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 48653 times.
48653 assert(!job->ignore);
787
3/4
✓ Branch 0 taken 4362 times.
✓ Branch 1 taken 44291 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 4362 times.
48653 assert(job->catalog != NULL || job->postponed);
788
789 // catalog was pushed on ctx.callback_stack before, it might need to be re-
790 // opened. If CatalogTraversal<> is configured with no_close, it was not
791 // closed before, hence does not need a re-open.
792
5/8
✓ Branch 0 taken 4362 times.
✓ Branch 1 taken 44291 times.
✓ Branch 2 taken 4362 times.
✗ Branch 3 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 4362 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 48653 times.
48653 if (job->postponed && !this->no_close_ && !this->ReopenCatalog(job)) {
793 return false;
794 }
795
796 // hand the catalog out to the user code (see Observable<>)
797
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 48653 times.
48653 assert(job->catalog != NULL);
798
1/2
✓ Branch 2 taken 48653 times.
✗ Branch 3 not taken.
48653 this->NotifyListeners(job->GetCallbackData());
799
800 // skip the catalog closing procedure if asked for
801 // Note: In this case it is the user's responsibility to both delete the
802 // yielded catalog object and the underlying database temp file
803
2/2
✓ Branch 0 taken 2170 times.
✓ Branch 1 taken 46483 times.
48653 if (this->no_close_) {
804 2170 return true;
805 }
806
807 // we can close the catalog here and delete the temporary file
808 46483 const bool unlink_db = true;
809 46483 return this->CloseCatalog(unlink_db, job);
810 }
811
812
813 /**
814 * Pushes a catalog to the callback_stack for later yielding
815 * Note: this is only used for the Depth First Traversal strategy!
816 */
817 4362 void PostponeYield(CatalogJob *job, TraversalContext *ctx) {
818
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4362 times.
4362 assert(job->referenced_catalogs > 0);
819
820 4362 job->postponed = true;
821
1/2
✓ Branch 0 taken 4362 times.
✗ Branch 1 not taken.
4362 if (!this->no_close_) {
822 4362 const bool unlink_db = false; // will reopened just before yielding
823 4362 this->CloseCatalog(unlink_db, job);
824 }
825 4362 ctx->callback_stack.push(*job);
826 4362 }
827
828 /**
829 * Determines if there are postponed yields that can be set free based on
830 * the catalog currently being yielded
831 *
832 * Note: the CatalogJob being handed into this method does not necessarily
833 * have an open Catalog attached to it.
834 *
835 * @param ctx the TraversalContext
836 * @param job the catalog job that was just yielded
837 * @return true on successful execution
838 */
839 9653 bool HandlePostponedYields(const CatalogJob &job, TraversalContext *ctx) {
840
2/2
✓ Branch 0 taken 1848 times.
✓ Branch 1 taken 7805 times.
9653 if (ctx->traversal_type == Base::kBreadthFirst) {
841 1848 return true;
842 }
843
844
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 7805 times.
7805 assert(ctx->traversal_type == Base::kDepthFirst);
845
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 7805 times.
7805 assert(job.referenced_catalogs == 0);
846
847 // walk through the callback_stack and yield all catalogs that have no un-
848 // yielded referenced_catalogs anymore. Every time a CatalogJob in the
849 // callback_stack gets yielded it decrements the referenced_catalogs of the
850 // next top of the stack (it's parent CatalogJob waiting for yielding)
851 7805 CatalogJobStack &clbs = ctx->callback_stack;
852
2/2
✓ Branch 1 taken 11556 times.
✓ Branch 2 taken 611 times.
12167 while (!clbs.empty()) {
853 11556 CatalogJob &postponed_job = clbs.top();
854
2/2
✓ Branch 0 taken 7194 times.
✓ Branch 1 taken 4362 times.
11556 if (--postponed_job.referenced_catalogs > 0) {
855 7194 break;
856 }
857
858
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 4362 times.
4362 if (!Yield(&postponed_job)) {
859 return false;
860 }
861 4362 clbs.pop();
862 }
863
864 7805 return true;
865 }
866
867 51251 void Push(const CatalogJob &job, TraversalContext *ctx) {
868 51251 ctx->catalog_stack.push(job);
869 51251 }
870
871 51189 CatalogJob Pop(TraversalContext *ctx) {
872 51189 CatalogJob job = ctx->catalog_stack.top();
873 51189 ctx->catalog_stack.pop();
874 51189 return job;
875 }
876
877 protected:
878 HashSet visited_catalogs_;
879 };
880
881 template<class ObjectFetcherT>
882 const uint64_t CatalogTraversalBase<ObjectFetcherT>::Parameters::
883 kFullHistory = std::numeric_limits<uint64_t>::max();
884
885 template<class ObjectFetcherT>
886 const uint64_t CatalogTraversalBase<ObjectFetcherT>::Parameters::kNoHistory = 0;
887
888 template<class ObjectFetcherT>
889 const time_t
890 CatalogTraversalBase<ObjectFetcherT>::Parameters::kNoTimestampThreshold = 0;
891
892 } // namespace swissknife
893
894 #endif // CVMFS_CATALOG_TRAVERSAL_H_
895