GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/catalog_traversal.h
Date: 2025-06-22 02:36:02
Exec Total Coverage
Lines: 246 264 93.2%
Branches: 146 263 55.5%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 */
4
5 #ifndef CVMFS_CATALOG_TRAVERSAL_H_
6 #define CVMFS_CATALOG_TRAVERSAL_H_
7
8 #include <cassert>
9 #include <limits>
10 #include <set>
11 #include <stack>
12 #include <string>
13 #include <vector>
14
15 #include "catalog.h"
16 #include "compression/compression.h"
17 #include "crypto/signature.h"
18 #include "history_sqlite.h"
19 #include "manifest.h"
20 #include "object_fetcher.h"
21 #include "util/concurrency.h"
22 #include "util/logging.h"
23
24 namespace catalog {
25 class Catalog;
26 class WritableCatalog;
27 } // namespace catalog
28
29 namespace swissknife {
30
31 /**
32 * Callback data which has to be implemented by the registered callback
33 * functions/methods (see Observable<> for further details)
34 * @param catalog the catalog object which needs to be processed
35 * @param catalog_hash the content hash of the catalog
36 * @param tree_level the depth in the nested catalog tree
37 * (starting at zero)
38 * @param file_size the size of the downloaded catalog database file
39 * @param history_depth the distance from the current HEAD revision
40 * (current HEAD has history_depth 0)
41 */
42 template<class CatalogT>
43 struct CatalogTraversalData {
44 18076522 CatalogTraversalData(const CatalogT *catalog,
45 const shash::Any &catalog_hash,
46 const unsigned tree_level,
47 const size_t file_size,
48 const uint64_t history_depth)
49 18076522 : catalog(catalog)
50 18076522 , catalog_hash(catalog_hash)
51 18076522 , tree_level(tree_level)
52 18076522 , file_size(file_size)
53 18076522 , history_depth(history_depth) { }
54
55 const CatalogT *catalog;
56 const shash::Any catalog_hash;
57 const unsigned int tree_level;
58 const size_t file_size;
59 const uint64_t history_depth;
60 };
61
62
63 /**
64 * A layer to extract information from a catalog. Users of the catalog
65 * traversal can provide derived classes to overwrite behavior. Currently used
66 * to get the last-modified timestamp in configurable manner: for the garbage
67 * collection, the timestamp of the catalog hash in the reflog counts, which
68 * is the same or newer than the one stored in the catalog.
69 */
70 template<class CatalogT>
71 class CatalogTraversalInfoShim {
72 public:
73 8534 virtual ~CatalogTraversalInfoShim() { }
74
75 // Default implementation: use the catalog's timestamp
76 9766 virtual uint64_t GetLastModified(const CatalogT *catalog) {
77 9766 return catalog->GetLastModified();
78 }
79 };
80
81
82 /**
83 * A base class for CatalogTraversal and CatalogTraversalParallel implementing
84 * common functionality. Actual traversal classes inherit from this class.
85 *
86 */
87 template<class ObjectFetcherT>
88 class CatalogTraversalBase
89 : public Observable<
90 CatalogTraversalData<typename ObjectFetcherT::CatalogTN> > {
91 public:
92 typedef ObjectFetcherT ObjectFetcherTN;
93 typedef typename ObjectFetcherT::CatalogTN CatalogTN;
94 typedef typename ObjectFetcherT::HistoryTN HistoryTN;
95 typedef CatalogTraversalData<CatalogTN> CallbackDataTN;
96
97 /**
98 * @param repo_url the path to the repository to be traversed:
99 * -> either absolute path to the local catalogs
100 * -> or an URL to a remote repository
101 * @param repo_name fully qualified repository name (used for
102 * remote repository signature check) (optional)
103 * @param repo_keys a comma separated list of public key file
104 * locations to verify the repository manifest
105 * file
106 * @param history depth of the desired catalog history traversal
107 * (default: 0 - only HEAD catalogs are traversed)
108 * @param timestamp timestamp of history traversal threshold
109 * (default: 0 - no threshold, traverse
110 * everything)
111 * @param no_repeat_history keep track of visited catalogs and don't
112 * re-visit them in previous revisions
113 * @param no_close do not close catalogs after they were attached
114 * (catalogs retain their parent/child pointers)
115 * @param ignore_load_failure suppressed an error message if a catalog file
116 * could not be loaded (i.e. was sweeped before by
117 * a garbage collection run)
118 * @param quiet silence messages that would go to stderr
119 * @param tmp_dir path to the temporary directory to be used
120 * (default: /tmp)
121 *
122 * ONLY FOR CatalogTraversalParallel
123 * @param num_threads number of threads concurrently traversing
124 * the catalog trees; hint: set up based on
125 * CPU utilization and optimal number of
126 * download threads
127 * (default: 8)
128 *
129 * @param serialize_callbacks do not call multiple catalog callbacks
130 * concurrently; ensures safe access to shared
131 * variables inside the callback; turn off if
132 * the registered callback is thread-safe
133 * (default: true)
134 */
135 struct Parameters {
136 3321 Parameters()
137 3321 : object_fetcher(NULL)
138 3321 , history(kNoHistory)
139 3321 , timestamp(kNoTimestampThreshold)
140 3321 , no_repeat_history(false)
141 3321 , no_close(false)
142 3321 , ignore_load_failure(false)
143 3321 , quiet(false)
144 3321 , num_threads(8)
145 3321 , serialize_callbacks(true) { }
146
147 static const uint64_t kFullHistory;
148 static const uint64_t kNoHistory;
149 static const time_t kNoTimestampThreshold;
150
151 ObjectFetcherT *object_fetcher;
152
153 uint64_t history;
154 time_t timestamp;
155 bool no_repeat_history;
156 bool no_close;
157 bool ignore_load_failure;
158 bool quiet;
159 // These parameters only apply for CatalogTraversalParallel
160 unsigned int num_threads;
161 bool serialize_callbacks;
162 };
163
164 enum TraversalType {
165 kBreadthFirst,
166 kDepthFirst
167 };
168
169 3321 explicit CatalogTraversalBase(const Parameters &params)
170 3321 : object_fetcher_(params.object_fetcher)
171 3321 , catalog_info_shim_(&catalog_info_default_shim_)
172 3321 , default_history_depth_(params.history)
173 3321 , default_timestamp_threshold_(params.timestamp)
174 3321 , no_close_(params.no_close)
175 3321 , ignore_load_failure_(params.ignore_load_failure)
176 3321 , no_repeat_history_(params.no_repeat_history)
177
2/2
✓ Branch 2 taken 1084 times.
✓ Branch 3 taken 2237 times.
3321 , error_sink_((params.quiet) ? kLogDebug : kLogStderr) {
178
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3321 times.
3321 assert(object_fetcher_ != NULL);
179 3321 }
180
181 /**
182 * Starts the traversal process.
183 * After calling this methods CatalogTraversal will go through all catalogs
184 * and call the registered callback methods for each found catalog.
185 * If something goes wrong in the process, the traversal will be cancelled.
186 *
187 * @param type breadths or depth first traversal
188 * @return true, when all catalogs were successfully processed. On
189 * failure the traversal is cancelled and false is returned.
190 */
191 virtual bool Traverse(const TraversalType type = kBreadthFirst) = 0;
192
193 /**
194 * Starts the traversal process at the catalog pointed to by the given hash
195 *
196 * @param root_catalog_hash the entry point into the catalog traversal
197 * @param type breadths or depth first traversal
198 * @return true when catalogs were successfully traversed
199 */
200 virtual bool Traverse(const shash::Any &root_catalog_hash,
201 const TraversalType type = kBreadthFirst) = 0;
202
203 /**
204 * Traverse a list of revisions represented by root catalogs from first
205 * to last. DO NOT traverse previous revisions based on history and
206 * timestamp threshold settings.
207 *
208 * @param catalog_list list of root catalog hashes
209 * @param type breadth- or depth- first traversal
210 * @return true on success
211 */
212 virtual bool TraverseList(const std::vector<shash::Any> &catalog_list,
213 const TraversalType type = kBreadthFirst) = 0;
214
215 /**
216 * Starts the traversal process at the catalog pointed to by the given hash
217 * but doesn't traverse into predecessor catalog revisions. This overrides the
218 * TraversalParameter settings provided at construction.
219 *
220 * @param root_catalog_hash the entry point into the catalog traversal
221 * @param type breadths or depth first traversal
222 * @return true when catalogs were successfully traversed
223 */
224 virtual bool TraverseRevision(const shash::Any &root_catalog_hash,
225 const TraversalType type = kBreadthFirst) = 0;
226
227 /**
228 * Figures out all named tags in a repository and uses all of them as entry
229 * points into the traversal process.
230 *
231 * @param type breadths or depth first traversal
232 * @return true when catalog traversal successfully finished
233 */
234 1455 virtual bool TraverseNamedSnapshots(
235 const TraversalType type = kBreadthFirst) {
236 typedef std::vector<shash::Any> HashList;
237
238
1/2
✓ Branch 1 taken 1455 times.
✗ Branch 2 not taken.
1455 UniquePtr<HistoryTN> tag_db;
239 const typename ObjectFetcherT::Failures
240
2/4
✓ Branch 1 taken 1455 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 1455 times.
✗ Branch 5 not taken.
1455 retval = object_fetcher_->FetchHistory(&tag_db);
241
2/3
✓ Branch 0 taken 1386 times.
✓ Branch 1 taken 69 times.
✗ Branch 2 not taken.
1455 switch (retval) {
242 1386 case ObjectFetcherT::kFailOk:
243 1386 break;
244
245 69 case ObjectFetcherT::kFailNotFound:
246
1/2
✓ Branch 1 taken 69 times.
✗ Branch 2 not taken.
69 LogCvmfs(kLogCatalogTraversal, kLogDebug,
247 "didn't find a history database to traverse");
248 69 return true;
249
250 default:
251 LogCvmfs(kLogCatalogTraversal, kLogStderr,
252 "failed to download history database (%d - %s)", retval,
253 Code2Ascii(retval));
254 return false;
255 }
256
257 1386 HashList root_hashes;
258
1/2
✓ Branch 2 taken 1386 times.
✗ Branch 3 not taken.
1386 bool success = tag_db->GetHashes(&root_hashes);
259
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1386 times.
1386 assert(success);
260
1/2
✓ Branch 1 taken 1386 times.
✗ Branch 2 not taken.
1386 return TraverseList(root_hashes, type);
261 1455 }
262
263 43 void SetCatalogInfoShim(CatalogTraversalInfoShim<CatalogTN> *shim) {
264 43 catalog_info_shim_ = shim;
265 43 }
266
267 protected:
268 typedef std::set<shash::Any> HashSet;
269
270 /**
271 * This struct keeps information about a catalog that still needs to be
272 * traversed by a currently running catalog traversal process.
273 */
274 struct CatalogJob {
275 18082937 CatalogJob(const std::string &path,
276 const shash::Any &hash,
277 const unsigned tree_level,
278 const uint64_t history_depth,
279 CatalogTN *parent = NULL)
280 18082937 : path(path)
281 18082937 , hash(hash)
282 18082937 , tree_level(tree_level)
283 18082937 , history_depth(history_depth)
284 18082937 , parent(parent)
285 18082937 , catalog_file_size(0)
286 18082937 , ignore(false)
287 18082937 , catalog(NULL)
288 18082937 , referenced_catalogs(0)
289 18082937 , postponed(false) { }
290
291 36102426 bool IsRootCatalog() const { return tree_level == 0; }
292
293 18076522 CallbackDataTN GetCallbackData() const {
294 18076522 return CallbackDataTN(catalog, hash, tree_level, catalog_file_size,
295 18076522 history_depth);
296 }
297
298 // initial state description
299 const std::string path;
300 const shash::Any hash;
301 const unsigned tree_level;
302 const uint64_t history_depth;
303 CatalogTN *parent;
304
305 // dynamic processing state (used internally)
306 std::string catalog_file_path;
307 size_t catalog_file_size;
308 bool ignore;
309 CatalogTN *catalog;
310 uint64_t referenced_catalogs;
311 bool postponed;
312 };
313
314 18075400 bool PrepareCatalog(CatalogJob *job) {
315 const typename ObjectFetcherT::Failures
316 54174064 retval = object_fetcher_->FetchCatalog(job->hash,
317 18075008 job->path,
318 &job->catalog,
319 18075400 !job->IsRootCatalog(),
320 job->parent);
321
3/3
✓ Branch 0 taken 18020809 times.
✓ Branch 1 taken 446 times.
✓ Branch 2 taken 2401 times.
18023656 switch (retval) {
322 18020809 case ObjectFetcherT::kFailOk:
323 18020809 break;
324
325 446 case ObjectFetcherT::kFailNotFound:
326
2/2
✓ Branch 0 taken 377 times.
✓ Branch 1 taken 69 times.
446 if (ignore_load_failure_) {
327
1/2
✓ Branch 3 taken 377 times.
✗ Branch 4 not taken.
377 LogCvmfs(kLogCatalogTraversal, kLogDebug,
328 "ignoring missing catalog "
329 "%s (swept before?)",
330 job->hash.ToString().c_str());
331 377 job->ignore = true;
332 377 return true;
333 }
334
335 default:
336
1/12
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✓ Branch 4 taken 69 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
2470 LogCvmfs(kLogCatalogTraversal, error_sink_,
337 "failed to load catalog %s "
338 "(%d - %s)",
339 job->hash.ToStringWithSuffix().c_str(), retval,
340 Code2Ascii(retval));
341 69 return false;
342 }
343
344 // catalogs returned by ObjectFetcher<> are managing their database files by
345 // default... we need to manage this file manually here
346 18020809 job->catalog->DropDatabaseFileOwnership();
347
348 18016889 job->catalog_file_path = job->catalog->database_path();
349
1/2
✓ Branch 2 taken 18074513 times.
✗ Branch 3 not taken.
18015713 job->catalog_file_size = GetFileSize(job->catalog->database_path());
350
351 18074464 return true;
352 }
353
354
355 18034238 bool ReopenCatalog(CatalogJob *job) {
356
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 18034238 times.
18034238 assert(!job->ignore);
357
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 18034238 times.
18034238 assert(job->catalog == NULL);
358
359 36019378 job->catalog = CatalogTN::AttachFreely(job->path,
360 18034140 job->catalog_file_path,
361 18034140 job->hash,
362 job->parent,
363 18034238 !job->IsRootCatalog());
364
365
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 17985238 times.
17985238 if (job->catalog == NULL) {
366 LogCvmfs(kLogCatalogTraversal, error_sink_,
367 "failed to re-open catalog %s", job->hash.ToString().c_str());
368 return false;
369 }
370
371 17985238 return true;
372 }
373
374
375 18148214 bool CloseCatalog(const bool unlink_db, CatalogJob *job) {
376
1/2
✓ Branch 0 taken 18148214 times.
✗ Branch 1 not taken.
18148214 delete job->catalog;
377 18148214 job->catalog = NULL;
378
2/6
✗ Branch 1 not taken.
✓ Branch 2 taken 18148214 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 18148214 times.
18148214 if (!job->catalog_file_path.empty() && unlink_db) {
379 const int retval = unlink(job->catalog_file_path.c_str());
380 if (retval != 0) {
381 LogCvmfs(kLogCatalogTraversal, error_sink_, "Failed to unlink %s - %d",
382 job->catalog_file_path.c_str(), errno);
383 return false;
384 }
385 }
386
387 18148214 return true;
388 }
389
390 2214 shash::Any GetRepositoryRootCatalogHash() {
391 // get the manifest of the repository to learn about the entry point or the
392 // root catalog of the repository to be traversed
393
1/2
✓ Branch 1 taken 2214 times.
✗ Branch 2 not taken.
2214 UniquePtr<manifest::Manifest> manifest;
394 const typename ObjectFetcherT::Failures
395
1/2
✓ Branch 1 taken 2214 times.
✗ Branch 2 not taken.
2214 retval = object_fetcher_->FetchManifest(&manifest);
396
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2214 times.
2214 if (retval != ObjectFetcherT::kFailOk) {
397 LogCvmfs(kLogCatalogTraversal, kLogStderr,
398 "failed to load manifest "
399 "(%d - %s)",
400 retval, Code2Ascii(retval));
401 return shash::Any();
402 }
403
404
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 2214 times.
2214 assert(manifest.IsValid());
405 2214 return manifest->catalog_hash();
406 2214 }
407
408 /**
409 * Checks if a root catalog is below one of the pruning thresholds.
410 * Pruning thresholds can be either the catalog's history depth or a timestamp
411 * threshold applied to the last modified timestamp of the catalog.
412 *
413 * @param ctx traversal context for traversal-specific state
414 * @param job the job defining the current catalog
415 * @return true if either history or timestamp threshold are satisfied
416 */
417 9938 bool IsBelowPruningThresholds(const CatalogJob &job,
418 const uint64_t history_depth,
419 const time_t timestamp_threshold) {
420
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 9938 times.
9938 assert(job.IsRootCatalog());
421
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 9938 times.
9938 assert(job.catalog != NULL);
422
423 9938 const bool h = job.history_depth >= history_depth;
424
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 9938 times.
9938 assert(timestamp_threshold >= 0);
425 9938 const bool t = catalog_info_shim_->GetLastModified(job.catalog)
426 9938 < unsigned(timestamp_threshold);
427
428
4/4
✓ Branch 0 taken 9387 times.
✓ Branch 1 taken 551 times.
✓ Branch 2 taken 4750 times.
✓ Branch 3 taken 4637 times.
9938 return t || h;
429 }
430
431 ObjectFetcherT *object_fetcher_;
432 CatalogTraversalInfoShim<CatalogTN> catalog_info_default_shim_;
433 CatalogTraversalInfoShim<CatalogTN> *catalog_info_shim_;
434 const uint64_t default_history_depth_;
435 const time_t default_timestamp_threshold_;
436 const bool no_close_;
437 const bool ignore_load_failure_;
438 const bool no_repeat_history_;
439 LogFacilities error_sink_;
440 };
441
442 /**
443 * This class traverses the catalog hierarchy of a CVMFS repository recursively.
444 * Also historic catalog trees can be traversed. The user needs to specify a
445 * callback which is called for each catalog on the way.
446 *
447 * CatalogTraversal<> can be configured and used in various ways:
448 * -> Historic catalog traversal
449 * -> Prune catalogs below a certain history level
450 * -> Prune catalogs older than a certain threshold timestamp
451 * -> Never traverse a certain catalog twice
452 * -> Breadth First Traversal or Depth First Traversal
453 * -> Optional catalog memory management (no_close)
454 * -> Use all Named Snapshots of a repository as traversal entry point
455 * -> Traverse starting from a provided catalog
456 * -> Traverse catalogs that were previously skipped
457 * -> Produce various flavours of catalogs (writable, mocked, ...)
458 *
459 * Breadth First Traversal Strategy
460 * Catalogs are handed out to the user identical as they are traversed.
461 * Say: From top to bottom. When you would simply print each received catalog
462 * the result would be a nice representation of the catalog tree.
463 * This method is more efficient, because catalogs are opened, processed and
464 * thrown away directly afterwards.
465 *
466 * Depth First Traversal Strategy
467 * The user gets the catalog tree starting from the leaf nodes.
468 * Say: From bottom to top. A user can assume that he got all children or
469 * historical ancestors of a catalog before.
470 * This method climbs down the full catalog tree and hands it out 'in reverse
471 * order'. Thus, catalogs on the way are opened, checked for their descendants
472 * and closed. Once all children and historical ancestors are processed, it is
473 * re-opened and handed out to the user.
474 * Note: This method needs more disk space to temporarily store downloaded but
475 * not yet processed catalogs.
476 *
477 * Note: Since all CVMFS catalog files together can grow to several gigabytes in
478 * file size, each catalog is loaded, processed and removed immediately
479 * afterwards. Except if no_close is specified, which allows the user to
480 * choose when a catalog should be closed. Keep in mind, that a user is
481 * responsible for both deletion of the delivered catalog objects as well
482 * as unlinking of the catalog database file.
483 *
484 *
485 * @param ObjectFetcherT Strategy Pattern implementation that defines how to
486 * retrieve catalogs from various backend storage types.
487 * Furthermore the ObjectFetcherT::CatalogTN is the type
488 * of catalog to be instantiated by CatalogTraversal<>.
489 *
490 * CAUTION: the CatalogTN* pointer passed into the callback becomes invalid
491 * directly after the callback method returns, unless you create the
492 * CatalogTraversal object with no_close = true.
493 */
494 template<class ObjectFetcherT>
495 class CatalogTraversal : public CatalogTraversalBase<ObjectFetcherT> {
496 public:
497 typedef CatalogTraversalBase<ObjectFetcherT> Base;
498 typedef ObjectFetcherT ObjectFetcherTN;
499 typedef typename ObjectFetcherT::CatalogTN CatalogTN;
500 typedef typename ObjectFetcherT::HistoryTN HistoryTN;
501 typedef CatalogTraversalData<CatalogTN> CallbackDataTN;
502 typedef typename Base::Parameters Parameters;
503 typedef typename Base::TraversalType TraversalType;
504 typedef typename Base::CatalogJob CatalogJob;
505
506 protected:
507 typedef std::set<shash::Any> HashSet;
508 typedef std::stack<CatalogJob> CatalogJobStack;
509
510 /**
511 * This struct represents a catalog traversal context. It needs to be re-
512 * created for each catalog traversal process and contains information to this
513 * specific catalog traversal run.
514 *
515 * @param history_depth the history traversal threshold
516 * @param traversal_type either breadth or depth first traversal strategy
517 * @param catalog_stack the call stack for catalogs to be traversed
518 * @param callback_stack used in depth first traversal for deferred yielding
519 */
520 struct TraversalContext {
521 5160 TraversalContext(const uint64_t history_depth,
522 const time_t timestamp_threshold,
523 const TraversalType traversal_type)
524 5160 : history_depth(history_depth)
525 5160 , timestamp_threshold(timestamp_threshold)
526
1/2
✓ Branch 2 taken 5160 times.
✗ Branch 3 not taken.
5160 , traversal_type(traversal_type) { }
527
528 const uint64_t history_depth;
529 const time_t timestamp_threshold;
530 const TraversalType traversal_type;
531 CatalogJobStack catalog_stack;
532 CatalogJobStack callback_stack;
533 };
534
535 public:
536 /**
537 * Constructs a new catalog traversal engine based on the construction
538 * parameters described in struct ConstructionParams.
539 */
540 1562 explicit CatalogTraversal(const Parameters &params)
541 1562 : CatalogTraversalBase<ObjectFetcherT>(params) { }
542
543 1241 bool Traverse(const TraversalType type = Base::kBreadthFirst) {
544
1/2
✓ Branch 1 taken 1241 times.
✗ Branch 2 not taken.
1241 const shash::Any root_catalog_hash = this->GetRepositoryRootCatalogHash();
545
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 1241 times.
1241 if (root_catalog_hash.IsNull()) {
546 return false;
547 }
548
1/2
✓ Branch 1 taken 1241 times.
✗ Branch 2 not taken.
1241 return Traverse(root_catalog_hash, type);
549 }
550
551 1501 bool Traverse(const shash::Any &root_catalog_hash,
552 const TraversalType type = Base::kBreadthFirst) {
553 // add the root catalog of the repository as the first element on the job
554 // stack
555 1501 TraversalContext ctx(
556
1/2
✓ Branch 1 taken 1501 times.
✗ Branch 2 not taken.
1501 this->default_history_depth_, this->default_timestamp_threshold_, type);
557
1/2
✓ Branch 1 taken 1501 times.
✗ Branch 2 not taken.
1501 Push(root_catalog_hash, &ctx);
558
1/2
✓ Branch 1 taken 1501 times.
✗ Branch 2 not taken.
3002 return DoTraverse(&ctx);
559 1501 }
560
561 1862 bool TraverseList(const std::vector<shash::Any> &catalog_list,
562 const TraversalType type = Base::kBreadthFirst) {
563 1862 std::vector<shash::Any>::const_iterator i = catalog_list.begin();
564 1862 const std::vector<shash::Any>::const_iterator iend = catalog_list.end();
565 1862 bool success = true;
566
2/2
✓ Branch 1 taken 3659 times.
✓ Branch 2 taken 1862 times.
5521 for (; i != iend; ++i) {
567
3/6
✓ Branch 0 taken 3659 times.
✗ Branch 1 not taken.
✓ Branch 4 taken 3659 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 3659 times.
✗ Branch 7 not taken.
3659 success = success && TraverseRevision(*i, type);
568 }
569 1862 return success;
570 }
571
572 3659 bool TraverseRevision(const shash::Any &root_catalog_hash,
573 const TraversalType type = Base::kBreadthFirst) {
574 // add the given root catalog as the first element on the job stack
575
1/2
✓ Branch 1 taken 3659 times.
✗ Branch 2 not taken.
3659 TraversalContext ctx(
576 Parameters::kNoHistory, Parameters::kNoTimestampThreshold, type);
577
1/2
✓ Branch 1 taken 3659 times.
✗ Branch 2 not taken.
3659 Push(root_catalog_hash, &ctx);
578
1/2
✓ Branch 1 taken 3659 times.
✗ Branch 2 not taken.
7318 return DoTraverse(&ctx);
579 3659 }
580
581 protected:
582 /**
583 * This controls the actual traversal. Using a stack to traverse down the
584 * catalog hierarchy. This method implements the traversal itself, but not
585 * in which way catalogs are handed out to the user code.
586 *
587 * Each catalog is processed in these steps:
588 * 1.) Pop the next catalog from the stack
589 * Catalogs are always traversed from latest to oldest revision and
590 * from root to leaf nested catalogs
591 * 2.) Prepare the catalog for traversing
592 * 2.1.) Check if it was visited before
593 * 2.2.) Fetch the catalog database from the backend storage
594 * This might fail and produce an error. For root catalogs this
595 * error can be ignored (might be garbage collected before)
596 * 2.3.) Open the catalog database
597 * 2.4.) Check if the catalog is older than the timestamp threshold
598 * After these steps the catalog is opened either opened and ready for
599 * the traversal to continue, or it was marked for ignore (job.ignore)
600 * 3.) Check if the catalog is marked to be ignored
601 * Catalog might not be loadable (sweeped root catalog) or is too old
602 * Note: ignored catalogs can still trigger postponed yields
603 * 4.) Mark the catalog as visited to be able to skip it later on
604 * 5.) Find and push referencing catalogs
605 * This pushes all descendents of the current catalog onto the stack.
606 * Note that this is dependent on the strategy (depth or breadth first)
607 * and on the history threshold (see history_depth).
608 * 6.) Hand the catalog out to the user code
609 * Depending on the traversal strategy (depth of breadths first) this
610 * might immediately yield zero to N catalogs to the user code.
611 *
612 * Note: If anything unexpected goes wrong during the traversal process, it
613 * is aborted immediately.
614 *
615 * @param ctx the traversal context that steers the whole traversal process
616 * @return true on successful traversal and false on abort
617 */
618 5160 bool DoTraverse(TraversalContext *ctx) {
619
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 5160 times.
5160 assert(ctx->callback_stack.empty());
620
621
5/5
✓ Branch 1 taken 41719 times.
✓ Branch 2 taken 20 times.
✓ Branch 3 taken 3309 times.
✓ Branch 5 taken 45048 times.
✓ Branch 6 taken 5140 times.
95236 while (!ctx->catalog_stack.empty()) {
622 // Get the top most catalog for the next processing step
623
1/2
✓ Branch 1 taken 45048 times.
✗ Branch 2 not taken.
45048 CatalogJob job = Pop(ctx);
624
625
3/4
✓ Branch 1 taken 45048 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 3085 times.
✓ Branch 4 taken 41963 times.
45048 if (ShouldBeSkipped(job)) {
626 3085 job.ignore = true;
627 // download and open the catalog for processing
628
3/4
✓ Branch 1 taken 41963 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 20 times.
✓ Branch 4 taken 41943 times.
41963 } else if (!this->PrepareCatalog(&job)) {
629 20 return false;
630 }
631
632 // ignored catalogs don't need to be processed anymore but they might
633 // release postponed yields
634
2/2
✓ Branch 0 taken 3309 times.
✓ Branch 1 taken 41719 times.
45028 if (job.ignore) {
635
2/4
✓ Branch 1 taken 3309 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 3309 times.
3309 if (!HandlePostponedYields(job, ctx)) {
636 return false;
637 }
638 3309 continue;
639 }
640
641 // push catalogs referenced by the current catalog (onto stack)
642
1/2
✓ Branch 1 taken 41719 times.
✗ Branch 2 not taken.
41719 this->MarkAsVisited(job);
643
1/2
✓ Branch 1 taken 41719 times.
✗ Branch 2 not taken.
41719 PushReferencedCatalogs(&job, ctx);
644
645 // notify listeners
646
2/4
✓ Branch 1 taken 41719 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 41719 times.
41719 if (!YieldToListeners(&job, ctx)) {
647 return false;
648 }
649 }
650
651 // invariant: after the traversal finished, there should be no more catalogs
652 // to traverse or to yield!
653
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 5140 times.
5140 assert(ctx->catalog_stack.empty());
654
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 5140 times.
5140 assert(ctx->callback_stack.empty());
655 5140 return true;
656 }
657
658 41719 void PushReferencedCatalogs(CatalogJob *job, TraversalContext *ctx) {
659
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 41719 times.
41719 assert(!job->ignore);
660
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 41719 times.
41719 assert(job->catalog != NULL);
661
3/4
✓ Branch 0 taken 9706 times.
✓ Branch 1 taken 32013 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 9706 times.
41719 assert(ctx->traversal_type == Base::kBreadthFirst
662 || ctx->traversal_type == Base::kDepthFirst);
663
664 // this differs, depending on the traversal strategy.
665 //
666 // Breadths First Traversal
667 // Catalogs are traversed from top (root catalog) to bottom (leaf
668 // catalogs) and from more recent (HEAD revision) to older (historic
669 // revisions)
670 //
671 // Depth First Traversal
672 // Catalogs are traversed from oldest revision (depends on the configured
673 // maximal history depth) to the HEAD revision and from bottom (leafs) to
674 // top (root catalogs)
675 83438 job->referenced_catalogs = (ctx->traversal_type == Base::kBreadthFirst)
676
2/2
✓ Branch 0 taken 32013 times.
✓ Branch 1 taken 9706 times.
41719 ? PushPreviousRevision(*job, ctx)
677 32013 + PushNestedCatalogs(*job, ctx)
678 9706 : PushNestedCatalogs(*job, ctx)
679 9706 + PushPreviousRevision(*job, ctx);
680 41719 }
681
682 /**
683 * Pushes the previous revision of a (root) catalog.
684 * @return the number of catalogs pushed on the processing stack
685 */
686 41719 unsigned int PushPreviousRevision(const CatalogJob &job,
687 TraversalContext *ctx) {
688 // only root catalogs are used for entering a previous revision (graph)
689
2/2
✓ Branch 1 taken 36009 times.
✓ Branch 2 taken 5710 times.
41719 if (!job.catalog->IsRoot()) {
690 36009 return 0;
691 }
692
693
1/2
✓ Branch 1 taken 5710 times.
✗ Branch 2 not taken.
5710 const shash::Any previous_revision = job.catalog->GetPreviousRevision();
694
2/2
✓ Branch 1 taken 632 times.
✓ Branch 2 taken 5078 times.
5710 if (previous_revision.IsNull()) {
695 632 return 0;
696 }
697
698 // check if the next deeper history level is actually requested
699 // Note: if the current catalog is below the timestamp threshold it will be
700 // traversed and only its ancestor revision will not be pushed anymore
701
2/2
✓ Branch 0 taken 2992 times.
✓ Branch 1 taken 2086 times.
5078 if (this->IsBelowPruningThresholds(job, ctx->history_depth,
702
1/2
✓ Branch 1 taken 5078 times.
✗ Branch 2 not taken.
5078 ctx->timestamp_threshold)) {
703 2992 return 0;
704 }
705
706
3/6
✓ Branch 2 taken 2086 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 2086 times.
✗ Branch 6 not taken.
✓ Branch 8 taken 2086 times.
✗ Branch 9 not taken.
2086 Push(CatalogJob("", previous_revision, 0, job.history_depth + 1, NULL),
707 ctx);
708 2086 return 1;
709 }
710
711 /**
712 * Pushes all the referenced nested catalogs.
713 * @return the number of catalogs pushed on the processing stack
714 */
715 41719 unsigned int PushNestedCatalogs(const CatalogJob &job,
716 TraversalContext *ctx) {
717 typedef typename CatalogTN::NestedCatalogList NestedCatalogList;
718
1/2
✓ Branch 1 taken 41719 times.
✗ Branch 2 not taken.
41719 const NestedCatalogList nested = job.catalog->ListOwnNestedCatalogs();
719 41719 typename NestedCatalogList::const_iterator i = nested.begin();
720 41719 typename NestedCatalogList::const_iterator iend = nested.end();
721
2/2
✓ Branch 3 taken 37842 times.
✓ Branch 4 taken 41719 times.
79561 for (; i != iend; ++i) {
722
2/2
✓ Branch 0 taken 1340 times.
✓ Branch 1 taken 36502 times.
37842 CatalogTN *parent = (this->no_close_) ? job.catalog : NULL;
723
2/4
✓ Branch 1 taken 37842 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 37842 times.
✗ Branch 5 not taken.
37842 const CatalogJob new_job(i->mountpoint.ToString(),
724 37842 i->hash,
725 37842 job.tree_level + 1,
726 37842 job.history_depth,
727 parent);
728
1/2
✓ Branch 1 taken 37842 times.
✗ Branch 2 not taken.
37842 Push(new_job, ctx);
729 }
730
731 83438 return nested.size();
732 41719 }
733
734 5160 void Push(const shash::Any &root_catalog_hash, TraversalContext *ctx) {
735
3/6
✓ Branch 2 taken 5160 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 5160 times.
✗ Branch 6 not taken.
✓ Branch 8 taken 5160 times.
✗ Branch 9 not taken.
5160 Push(CatalogJob("", root_catalog_hash, 0, 0), ctx);
736 5160 }
737
738 41719 bool YieldToListeners(CatalogJob *job, TraversalContext *ctx) {
739
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 41719 times.
41719 assert(!job->ignore);
740
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 41719 times.
41719 assert(job->catalog != NULL);
741
3/4
✓ Branch 0 taken 9706 times.
✓ Branch 1 taken 32013 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 9706 times.
41719 assert(ctx->traversal_type == Base::kBreadthFirst
742 || ctx->traversal_type == Base::kDepthFirst);
743
744 // in breadth first search mode, every catalog is simply handed out once
745 // it is visited. No extra magic required...
746
2/2
✓ Branch 0 taken 32013 times.
✓ Branch 1 taken 9706 times.
41719 if (ctx->traversal_type == Base::kBreadthFirst) {
747 32013 return Yield(job);
748 }
749
750 // in depth first search mode, catalogs might need to wait until all of
751 // their referenced catalogs are yielded (ctx.callback_stack)...
752
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 9706 times.
9706 assert(ctx->traversal_type == Base::kDepthFirst);
753
2/2
✓ Branch 0 taken 3747 times.
✓ Branch 1 taken 5959 times.
9706 if (job->referenced_catalogs > 0) {
754 3747 PostponeYield(job, ctx);
755 3747 return true;
756 }
757
758 // this catalog can be yielded
759
2/4
✓ Branch 1 taken 5959 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 5959 times.
✗ Branch 5 not taken.
5959 return Yield(job) && HandlePostponedYields(*job, ctx);
760 }
761
762 /**
763 * Checks the traversal history if the given catalog was traversed or at least
764 * seen before. If 'no_repeat_history' is not set this is always 'false'.
765 *
766 * @param job the job to be checked against the traversal history
767 * @return true if the specified catalog was hit before
768 */
769 45048 bool ShouldBeSkipped(const CatalogJob &job) {
770
4/4
✓ Branch 0 taken 23468 times.
✓ Branch 1 taken 21580 times.
✓ Branch 3 taken 3085 times.
✓ Branch 4 taken 20383 times.
45048 return this->no_repeat_history_ && (visited_catalogs_.count(job.hash) > 0);
771 }
772
773 41719 void MarkAsVisited(const CatalogJob &job) {
774
2/2
✓ Branch 0 taken 20159 times.
✓ Branch 1 taken 21560 times.
41719 if (this->no_repeat_history_) {
775 20159 visited_catalogs_.insert(job.hash);
776 }
777 41719 }
778
779 private:
780 /**
781 * This actually hands out a catalog to the user code
782 * It is not called by DoTraversa() directly but by wrapper functions in order
783 * to provide higher level yielding behaviour.
784 */
785 41719 bool Yield(CatalogJob *job) {
786
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 41719 times.
41719 assert(!job->ignore);
787
3/4
✓ Branch 0 taken 3747 times.
✓ Branch 1 taken 37972 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 3747 times.
41719 assert(job->catalog != NULL || job->postponed);
788
789 // catalog was pushed on ctx.callback_stack before, it might need to be re-
790 // opened. If CatalogTraversal<> is configured with no_close, it was not
791 // closed before, hence does not need a re-open.
792
5/8
✓ Branch 0 taken 3747 times.
✓ Branch 1 taken 37972 times.
✓ Branch 2 taken 3747 times.
✗ Branch 3 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 3747 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 41719 times.
41719 if (job->postponed && !this->no_close_ && !this->ReopenCatalog(job)) {
793 return false;
794 }
795
796 // hand the catalog out to the user code (see Observable<>)
797
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 41719 times.
41719 assert(job->catalog != NULL);
798
1/2
✓ Branch 2 taken 41719 times.
✗ Branch 3 not taken.
41719 this->NotifyListeners(job->GetCallbackData());
799
800 // skip the catalog closing procedure if asked for
801 // Note: In this case it is the user's responsibility to both delete the
802 // yielded catalog object and the underlying database temp file
803
2/2
✓ Branch 0 taken 1400 times.
✓ Branch 1 taken 40319 times.
41719 if (this->no_close_) {
804 1400 return true;
805 }
806
807 // we can close the catalog here and delete the temporary file
808 40319 const bool unlink_db = true;
809 40319 return this->CloseCatalog(unlink_db, job);
810 }
811
812
813 /**
814 * Pushes a catalog to the callback_stack for later yielding
815 * Note: this is only used for the Depth First Traversal strategy!
816 */
817 3747 void PostponeYield(CatalogJob *job, TraversalContext *ctx) {
818
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3747 times.
3747 assert(job->referenced_catalogs > 0);
819
820 3747 job->postponed = true;
821
1/2
✓ Branch 0 taken 3747 times.
✗ Branch 1 not taken.
3747 if (!this->no_close_) {
822 3747 const bool unlink_db = false; // will reopened just before yielding
823 3747 this->CloseCatalog(unlink_db, job);
824 }
825 3747 ctx->callback_stack.push(*job);
826 3747 }
827
828 /**
829 * Determines if there are postponed yields that can be set free based on
830 * the catalog currently being yielded
831 *
832 * Note: the CatalogJob being handed into this method does not necessarily
833 * have an open Catalog attached to it.
834 *
835 * @param ctx the TraversalContext
836 * @param job the catalog job that was just yielded
837 * @return true on successful execution
838 */
839 9268 bool HandlePostponedYields(const CatalogJob &job, TraversalContext *ctx) {
840
2/2
✓ Branch 0 taken 2436 times.
✓ Branch 1 taken 6832 times.
9268 if (ctx->traversal_type == Base::kBreadthFirst) {
841 2436 return true;
842 }
843
844
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6832 times.
6832 assert(ctx->traversal_type == Base::kDepthFirst);
845
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6832 times.
6832 assert(job.referenced_catalogs == 0);
846
847 // walk through the callback_stack and yield all catalogs that have no un-
848 // yielded referenced_catalogs anymore. Every time a CatalogJob in the
849 // callback_stack gets yielded it decrements the referenced_catalogs of the
850 // next top of the stack (it's parent CatalogJob waiting for yielding)
851 6832 CatalogJobStack &clbs = ctx->callback_stack;
852
2/2
✓ Branch 1 taken 9252 times.
✓ Branch 2 taken 1327 times.
10579 while (!clbs.empty()) {
853 9252 CatalogJob &postponed_job = clbs.top();
854
2/2
✓ Branch 0 taken 5505 times.
✓ Branch 1 taken 3747 times.
9252 if (--postponed_job.referenced_catalogs > 0) {
855 5505 break;
856 }
857
858
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 3747 times.
3747 if (!Yield(&postponed_job)) {
859 return false;
860 }
861 3747 clbs.pop();
862 }
863
864 6832 return true;
865 }
866
867 45088 void Push(const CatalogJob &job, TraversalContext *ctx) {
868 45088 ctx->catalog_stack.push(job);
869 45088 }
870
871 45048 CatalogJob Pop(TraversalContext *ctx) {
872 45048 CatalogJob job = ctx->catalog_stack.top();
873 45048 ctx->catalog_stack.pop();
874 45048 return job;
875 }
876
877 protected:
878 HashSet visited_catalogs_;
879 };
880
881 template<class ObjectFetcherT>
882 const uint64_t CatalogTraversalBase<ObjectFetcherT>::Parameters::
883 kFullHistory = std::numeric_limits<uint64_t>::max();
884
885 template<class ObjectFetcherT>
886 const uint64_t CatalogTraversalBase<ObjectFetcherT>::Parameters::kNoHistory = 0;
887
888 template<class ObjectFetcherT>
889 const time_t
890 CatalogTraversalBase<ObjectFetcherT>::Parameters::kNoTimestampThreshold = 0;
891
892 } // namespace swissknife
893
894 #endif // CVMFS_CATALOG_TRAVERSAL_H_
895