GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/catalog_traversal.h
Date: 2024-04-28 02:33:07
Exec Total Coverage
Lines: 246 265 92.8%
Branches: 145 263 55.1%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 */
4
5 #ifndef CVMFS_CATALOG_TRAVERSAL_H_
6 #define CVMFS_CATALOG_TRAVERSAL_H_
7
8 #include <cassert>
9 #include <limits>
10 #include <set>
11 #include <stack>
12 #include <string>
13 #include <vector>
14
15 #include "catalog.h"
16 #include "compression.h"
17 #include "crypto/signature.h"
18 #include "history_sqlite.h"
19 #include "manifest.h"
20 #include "object_fetcher.h"
21 #include "util/concurrency.h"
22 #include "util/logging.h"
23
24 namespace catalog {
25 class Catalog;
26 class WritableCatalog;
27 }
28
29 namespace swissknife {
30
31 /**
32 * Callback data which has to be implemented by the registered callback
33 * functions/methods (see Observable<> for further details)
34 * @param catalog the catalog object which needs to be processed
35 * @param catalog_hash the content hash of the catalog
36 * @param tree_level the depth in the nested catalog tree
37 * (starting at zero)
38 * @param file_size the size of the downloaded catalog database file
39 * @param history_depth the distance from the current HEAD revision
40 * (current HEAD has history_depth 0)
41 */
42 template <class CatalogT>
43 struct CatalogTraversalData {
44 370116 CatalogTraversalData(const CatalogT *catalog,
45 const shash::Any &catalog_hash,
46 const unsigned tree_level,
47 const size_t file_size,
48 const uint64_t history_depth)
49 370116 : catalog(catalog)
50 370116 , catalog_hash(catalog_hash)
51 370116 , tree_level(tree_level)
52 370116 , file_size(file_size)
53 370116 , history_depth(history_depth) {}
54
55 const CatalogT *catalog;
56 const shash::Any catalog_hash;
57 const unsigned int tree_level;
58 const size_t file_size;
59 const uint64_t history_depth;
60 };
61
62
63 /**
64 * A layer to extract information from a catalog. Users of the catalog
65 * traversal can provide derived classes to overwrite behavior. Currently used
66 * to get the last-modified timestamp in configurable manner: for the garbage
67 * collection, the timestamp of the catalog hash in the reflog counts, which
68 * is the same or newer than the one stored in the catalog.
69 */
70 template <class CatalogT>
71 class CatalogTraversalInfoShim {
72 public:
73 312 virtual ~CatalogTraversalInfoShim() { }
74
75 // Default implementation: use the catalog's timestamp
76 338 virtual uint64_t GetLastModified(const CatalogT *catalog) {
77 338 return catalog->GetLastModified();
78 }
79 };
80
81
82 /**
83 * A base class for CatalogTraversal and CatalogTraversalParallel implementing
84 * common functionality. Actual traversal classes inherit from this class.
85 *
86 */
87 template <class ObjectFetcherT>
88 class CatalogTraversalBase
89 : public Observable<CatalogTraversalData<typename ObjectFetcherT::CatalogTN> >
90 {
91 public:
92 typedef ObjectFetcherT ObjectFetcherTN;
93 typedef typename ObjectFetcherT::CatalogTN CatalogTN;
94 typedef typename ObjectFetcherT::HistoryTN HistoryTN;
95 typedef CatalogTraversalData<CatalogTN> CallbackDataTN;
96
97 /**
98 * @param repo_url the path to the repository to be traversed:
99 * -> either absolute path to the local catalogs
100 * -> or an URL to a remote repository
101 * @param repo_name fully qualified repository name (used for remote
102 * repository signature check) (optional)
103 * @param repo_keys a comma separated list of public key file
104 * locations to verify the repository manifest file
105 * @param history depth of the desired catalog history traversal
106 * (default: 0 - only HEAD catalogs are traversed)
107 * @param timestamp timestamp of history traversal threshold
108 * (default: 0 - no threshold, traverse everything)
109 * @param no_repeat_history keep track of visited catalogs and don't re-visit
110 * them in previous revisions
111 * @param no_close do not close catalogs after they were attached
112 * (catalogs retain their parent/child pointers)
113 * @param ignore_load_failure suppressed an error message if a catalog file
114 * could not be loaded (i.e. was sweeped before by
115 * a garbage collection run)
116 * @param quiet silence messages that would go to stderr
117 * @param tmp_dir path to the temporary directory to be used
118 * (default: /tmp)
119 *
120 * ONLY FOR CatalogTraversalParallel
121 * @param num_threads number of threads concurrently traversing
122 * the catalog trees; hint: set up based on
123 * CPU utilization and optimal number of
124 * download threads
125 * (default: 8)
126 *
127 * @param serialize_callbacks do not call multiple catalog callbacks
128 * concurrently; ensures safe access to shared
129 * variables inside the callback; turn off if
130 * the registered callback is thread-safe
131 * (default: true)
132 */
133 struct Parameters {
134 112 Parameters()
135 112 : object_fetcher(NULL)
136 112 , history(kNoHistory)
137 112 , timestamp(kNoTimestampThreshold)
138 112 , no_repeat_history(false)
139 112 , no_close(false)
140 112 , ignore_load_failure(false)
141 112 , quiet(false)
142 112 , num_threads(8)
143 112 , serialize_callbacks(true) {}
144
145 static const uint64_t kFullHistory;
146 static const uint64_t kNoHistory;
147 static const time_t kNoTimestampThreshold;
148
149 ObjectFetcherT *object_fetcher;
150
151 uint64_t history;
152 time_t timestamp;
153 bool no_repeat_history;
154 bool no_close;
155 bool ignore_load_failure;
156 bool quiet;
157 // These parameters only apply for CatalogTraversalParallel
158 unsigned int num_threads;
159 bool serialize_callbacks;
160 };
161
162 enum TraversalType {
163 kBreadthFirst,
164 kDepthFirst
165 };
166
167 112 explicit CatalogTraversalBase(const Parameters &params)
168 112 : object_fetcher_(params.object_fetcher)
169 112 , catalog_info_shim_(&catalog_info_default_shim_)
170 112 , default_history_depth_(params.history)
171 112 , default_timestamp_threshold_(params.timestamp)
172 112 , no_close_(params.no_close)
173 112 , ignore_load_failure_(params.ignore_load_failure)
174 112 , no_repeat_history_(params.no_repeat_history)
175
2/2
✓ Branch 2 taken 48 times.
✓ Branch 3 taken 64 times.
112 , error_sink_((params.quiet) ? kLogDebug : kLogStderr)
176 {
177
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 112 times.
112 assert(object_fetcher_ != NULL);
178 112 }
179
180 /**
181 * Starts the traversal process.
182 * After calling this methods CatalogTraversal will go through all catalogs
183 * and call the registered callback methods for each found catalog.
184 * If something goes wrong in the process, the traversal will be cancelled.
185 *
186 * @param type breadths or depth first traversal
187 * @return true, when all catalogs were successfully processed. On
188 * failure the traversal is cancelled and false is returned.
189 */
190 virtual bool Traverse(const TraversalType type = kBreadthFirst) = 0;
191
192 /**
193 * Starts the traversal process at the catalog pointed to by the given hash
194 *
195 * @param root_catalog_hash the entry point into the catalog traversal
196 * @param type breadths or depth first traversal
197 * @return true when catalogs were successfully traversed
198 */
199 virtual bool Traverse(const shash::Any &root_catalog_hash,
200 const TraversalType type = kBreadthFirst) = 0;
201
202 /**
203 * Traverse a list of revisions represented by root catalogs from first
204 * to last. DO NOT traverse previous revisions based on history and
205 * timestamp threshold settings.
206 *
207 * @param catalog_list list of root catalog hashes
208 * @param type breadth- or depth- first traversal
209 * @return true on success
210 */
211 virtual bool TraverseList(const std::vector<shash::Any> &catalog_list,
212 const TraversalType type = kBreadthFirst) = 0;
213
214 /**
215 * Starts the traversal process at the catalog pointed to by the given hash
216 * but doesn't traverse into predecessor catalog revisions. This overrides the
217 * TraversalParameter settings provided at construction.
218 *
219 * @param root_catalog_hash the entry point into the catalog traversal
220 * @param type breadths or depth first traversal
221 * @return true when catalogs were successfully traversed
222 */
223 virtual bool TraverseRevision(const shash::Any &root_catalog_hash,
224 const TraversalType type = kBreadthFirst) = 0;
225
226 /**
227 * Figures out all named tags in a repository and uses all of them as entry
228 * points into the traversal process.
229 *
230 * @param type breadths or depth first traversal
231 * @return true when catalog traversal successfully finished
232 */
233 58 virtual bool TraverseNamedSnapshots(
234 const TraversalType type = kBreadthFirst)
235 {
236 typedef std::vector<shash::Any> HashList;
237
238
1/2
✓ Branch 1 taken 58 times.
✗ Branch 2 not taken.
58 UniquePtr<HistoryTN> tag_db;
239 const typename ObjectFetcherT::Failures retval =
240
2/4
✓ Branch 1 taken 58 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 58 times.
✗ Branch 5 not taken.
58 object_fetcher_->FetchHistory(&tag_db);
241
2/3
✓ Branch 0 taken 56 times.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
58 switch (retval) {
242 56 case ObjectFetcherT::kFailOk:
243 56 break;
244
245 2 case ObjectFetcherT::kFailNotFound:
246
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 LogCvmfs(kLogCatalogTraversal, kLogDebug,
247 "didn't find a history database to traverse");
248 2 return true;
249
250 default:
251 LogCvmfs(kLogCatalogTraversal, kLogStderr,
252 "failed to download history database (%d - %s)",
253 retval, Code2Ascii(retval));
254 return false;
255 }
256
257 56 HashList root_hashes;
258
1/2
✓ Branch 2 taken 56 times.
✗ Branch 3 not taken.
56 bool success = tag_db->GetHashes(&root_hashes);
259
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 56 times.
56 assert(success);
260
1/2
✓ Branch 1 taken 56 times.
✗ Branch 2 not taken.
56 return TraverseList(root_hashes, type);
261 58 }
262
263 2 void SetCatalogInfoShim(CatalogTraversalInfoShim<CatalogTN> *shim) {
264 2 catalog_info_shim_ = shim;
265 2 }
266
267 protected:
268 typedef std::set<shash::Any> HashSet;
269
270 /**
271 * This struct keeps information about a catalog that still needs to be
272 * traversed by a currently running catalog traversal process.
273 */
274 struct CatalogJob {
275 370346 CatalogJob(const std::string &path,
276 const shash::Any &hash,
277 const unsigned tree_level,
278 const uint64_t history_depth,
279 CatalogTN *parent = NULL) :
280 370346 path(path),
281 370346 hash(hash),
282 370346 tree_level(tree_level),
283 370346 history_depth(history_depth),
284 370346 parent(parent),
285 370346 catalog_file_size(0),
286 370346 ignore(false),
287 370346 catalog(NULL),
288 370346 referenced_catalogs(0),
289 370346 postponed(false) {}
290
291 738572 bool IsRootCatalog() const { return tree_level == 0; }
292
293 370116 CallbackDataTN GetCallbackData() const {
294 370116 return CallbackDataTN(catalog, hash, tree_level,
295 370116 catalog_file_size, history_depth);
296 }
297
298 // initial state description
299 const std::string path;
300 const shash::Any hash;
301 const unsigned tree_level;
302 const uint64_t history_depth;
303 CatalogTN *parent;
304
305 // dynamic processing state (used internally)
306 std::string catalog_file_path;
307 size_t catalog_file_size;
308 bool ignore;
309 CatalogTN *catalog;
310 uint64_t referenced_catalogs;
311 bool postponed;
312 };
313
314 370114 bool PrepareCatalog(CatalogJob *job) {
315 const typename ObjectFetcherT::Failures retval =
316 1109610 object_fetcher_->FetchCatalog(job->hash,
317 370110 job->path,
318 &job->catalog,
319 370114 !job->IsRootCatalog(),
320 job->parent);
321
2/3
✓ Branch 0 taken 369398 times.
✓ Branch 1 taken 15 times.
✗ Branch 2 not taken.
369386 switch (retval) {
322 369398 case ObjectFetcherT::kFailOk:
323 369398 break;
324
325 15 case ObjectFetcherT::kFailNotFound:
326
2/2
✓ Branch 0 taken 13 times.
✓ Branch 1 taken 2 times.
15 if (ignore_load_failure_) {
327
1/2
✓ Branch 3 taken 13 times.
✗ Branch 4 not taken.
13 LogCvmfs(kLogCatalogTraversal, kLogDebug, "ignoring missing catalog "
328 "%s (swept before?)",
329 job->hash.ToString().c_str());
330 13 job->ignore = true;
331 13 return true;
332 }
333
334 default:
335 LogCvmfs(kLogCatalogTraversal, error_sink_, "failed to load catalog %s "
336 "(%d - %s)",
337 job->hash.ToStringWithSuffix().c_str(),
338 retval, Code2Ascii(retval));
339 2 return false;
340 }
341
342 // catalogs returned by ObjectFetcher<> are managing their database files by
343 // default... we need to manage this file manually here
344 369398 job->catalog->DropDatabaseFileOwnership();
345
346 369243 job->catalog_file_path = job->catalog->database_path();
347
1/2
✓ Branch 2 taken 370046 times.
✗ Branch 3 not taken.
369216 job->catalog_file_size = GetFileSize(job->catalog->database_path());
348
349 369969 return true;
350 }
351
352
353 368425 bool ReopenCatalog(CatalogJob *job) {
354
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 368425 times.
368425 assert(!job->ignore);
355
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 368425 times.
368425 assert(job->catalog == NULL);
356
357 735782 job->catalog = CatalogTN::AttachFreely(job->path,
358 368410 job->catalog_file_path,
359 368410 job->hash,
360 job->parent,
361 368425 !job->IsRootCatalog());
362
363
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 367372 times.
367372 if (job->catalog == NULL) {
364 LogCvmfs(kLogCatalogTraversal, error_sink_,
365 "failed to re-open catalog %s", job->hash.ToString().c_str());
366 return false;
367 }
368
369 367372 return true;
370 }
371
372
373 371907 bool CloseCatalog(const bool unlink_db, CatalogJob *job) {
374
1/2
✓ Branch 0 taken 371907 times.
✗ Branch 1 not taken.
371907 delete job->catalog;
375 371907 job->catalog = NULL;
376
2/6
✗ Branch 1 not taken.
✓ Branch 2 taken 371907 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 371907 times.
371907 if (!job->catalog_file_path.empty() && unlink_db) {
377 const int retval = unlink(job->catalog_file_path.c_str());
378 if (retval != 0) {
379 LogCvmfs(kLogCatalogTraversal, error_sink_, "Failed to unlink %s - %d",
380 job->catalog_file_path.c_str(), errno);
381 return false;
382 }
383 }
384
385 371907 return true;
386 }
387
388 80 shash::Any GetRepositoryRootCatalogHash() {
389 // get the manifest of the repository to learn about the entry point or the
390 // root catalog of the repository to be traversed
391
1/2
✓ Branch 1 taken 80 times.
✗ Branch 2 not taken.
80 UniquePtr<manifest::Manifest> manifest;
392 const typename ObjectFetcherT::Failures retval =
393
1/2
✓ Branch 1 taken 80 times.
✗ Branch 2 not taken.
80 object_fetcher_->FetchManifest(&manifest);
394
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 80 times.
80 if (retval != ObjectFetcherT::kFailOk) {
395 LogCvmfs(kLogCatalogTraversal, kLogStderr, "failed to load manifest "
396 "(%d - %s)",
397 retval, Code2Ascii(retval));
398 return shash::Any();
399 }
400
401
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 80 times.
80 assert(manifest.IsValid());
402 80 return manifest->catalog_hash();
403 80 }
404
405 /**
406 * Checks if a root catalog is below one of the pruning thresholds.
407 * Pruning thresholds can be either the catalog's history depth or a timestamp
408 * threshold applied to the last modified timestamp of the catalog.
409 *
410 * @param ctx traversal context for traversal-specific state
411 * @param job the job defining the current catalog
412 * @return true if either history or timestamp threshold are satisfied
413 */
414 346 bool IsBelowPruningThresholds(
415 const CatalogJob &job,
416 const uint64_t history_depth,
417 const time_t timestamp_threshold
418 ) {
419
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 346 times.
346 assert(job.IsRootCatalog());
420
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 346 times.
346 assert(job.catalog != NULL);
421
422 346 const bool h = job.history_depth >= history_depth;
423
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 346 times.
346 assert(timestamp_threshold >= 0);
424 346 const bool t =
425 346 catalog_info_shim_->GetLastModified(job.catalog) <
426 346 unsigned(timestamp_threshold);
427
428
4/4
✓ Branch 0 taken 324 times.
✓ Branch 1 taken 22 times.
✓ Branch 2 taken 170 times.
✓ Branch 3 taken 154 times.
346 return t || h;
429 }
430
431 ObjectFetcherT *object_fetcher_;
432 CatalogTraversalInfoShim<CatalogTN> catalog_info_default_shim_;
433 CatalogTraversalInfoShim<CatalogTN> *catalog_info_shim_;
434 const uint64_t default_history_depth_;
435 const time_t default_timestamp_threshold_;
436 const bool no_close_;
437 const bool ignore_load_failure_;
438 const bool no_repeat_history_;
439 LogFacilities error_sink_;
440 };
441
442 /**
443 * This class traverses the catalog hierarchy of a CVMFS repository recursively.
444 * Also historic catalog trees can be traversed. The user needs to specify a
445 * callback which is called for each catalog on the way.
446 *
447 * CatalogTraversal<> can be configured and used in various ways:
448 * -> Historic catalog traversal
449 * -> Prune catalogs below a certain history level
450 * -> Prune catalogs older than a certain threshold timestamp
451 * -> Never traverse a certain catalog twice
452 * -> Breadth First Traversal or Depth First Traversal
453 * -> Optional catalog memory management (no_close)
454 * -> Use all Named Snapshots of a repository as traversal entry point
455 * -> Traverse starting from a provided catalog
456 * -> Traverse catalogs that were previously skipped
457 * -> Produce various flavours of catalogs (writable, mocked, ...)
458 *
459 * Breadth First Traversal Strategy
460 * Catalogs are handed out to the user identical as they are traversed.
461 * Say: From top to bottom. When you would simply print each received catalog
462 * the result would be a nice representation of the catalog tree.
463 * This method is more efficient, because catalogs are opened, processed and
464 * thrown away directly afterwards.
465 *
466 * Depth First Traversal Strategy
467 * The user gets the catalog tree starting from the leaf nodes.
468 * Say: From bottom to top. A user can assume that he got all children or
469 * historical ancestors of a catalog before.
470 * This method climbs down the full catalog tree and hands it out 'in reverse
471 * order'. Thus, catalogs on the way are opened, checked for their descendants
472 * and closed. Once all children and historical ancestors are processed, it is
473 * re-opened and handed out to the user.
474 * Note: This method needs more disk space to temporarily store downloaded but
475 * not yet processed catalogs.
476 *
477 * Note: Since all CVMFS catalog files together can grow to several gigabytes in
478 * file size, each catalog is loaded, processed and removed immediately
479 * afterwards. Except if no_close is specified, which allows the user to
480 * choose when a catalog should be closed. Keep in mind, that a user is
481 * responsible for both deletion of the delivered catalog objects as well
482 * as unlinking of the catalog database file.
483 *
484 *
485 * @param ObjectFetcherT Strategy Pattern implementation that defines how to
486 * retrieve catalogs from various backend storage types.
487 * Furthermore the ObjectFetcherT::CatalogTN is the type
488 * of catalog to be instantiated by CatalogTraversal<>.
489 *
490 * CAUTION: the CatalogTN* pointer passed into the callback becomes invalid
491 * directly after the callback method returns, unless you create the
492 * CatalogTraversal object with no_close = true.
493 */
494 template<class ObjectFetcherT>
495 class CatalogTraversal
496 : public CatalogTraversalBase<ObjectFetcherT>
497 {
498 public:
499 typedef CatalogTraversalBase<ObjectFetcherT> Base;
500 typedef ObjectFetcherT ObjectFetcherTN;
501 typedef typename ObjectFetcherT::CatalogTN CatalogTN;
502 typedef typename ObjectFetcherT::HistoryTN HistoryTN;
503 typedef CatalogTraversalData<CatalogTN> CallbackDataTN;
504 typedef typename Base::Parameters Parameters;
505 typedef typename Base::TraversalType TraversalType;
506 typedef typename Base::CatalogJob CatalogJob;
507
508 protected:
509 typedef std::set<shash::Any> HashSet;
510 typedef std::stack<CatalogJob> CatalogJobStack;
511
512 /**
513 * This struct represents a catalog traversal context. It needs to be re-
514 * created for each catalog traversal process and contains information to this
515 * specific catalog traversal run.
516 *
517 * @param history_depth the history traversal threshold
518 * @param traversal_type either breadth or depth first traversal strategy
519 * @param catalog_stack the call stack for catalogs to be traversed
520 * @param callback_stack used in depth first traversal for deferred yielding
521 */
522 struct TraversalContext {
523 153 TraversalContext(const uint64_t history_depth,
524 const time_t timestamp_threshold,
525 const TraversalType traversal_type) :
526 153 history_depth(history_depth),
527 153 timestamp_threshold(timestamp_threshold),
528
1/2
✓ Branch 2 taken 153 times.
✗ Branch 3 not taken.
153 traversal_type(traversal_type) {}
529
530 const uint64_t history_depth;
531 const time_t timestamp_threshold;
532 const TraversalType traversal_type;
533 CatalogJobStack catalog_stack;
534 CatalogJobStack callback_stack;
535 };
536
537 public:
538 /**
539 * Constructs a new catalog traversal engine based on the construction
540 * parameters described in struct ConstructionParams.
541 */
542 55 explicit CatalogTraversal(const Parameters &params)
543 55 : CatalogTraversalBase<ObjectFetcherT>(params)
544 55 { }
545
546 40 bool Traverse(const TraversalType type = Base::kBreadthFirst) {
547
1/2
✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
40 const shash::Any root_catalog_hash = this->GetRepositoryRootCatalogHash();
548
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 40 times.
40 if (root_catalog_hash.IsNull()) {
549 return false;
550 }
551
1/2
✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
40 return Traverse(root_catalog_hash, type);
552 }
553
554 53 bool Traverse(const shash::Any &root_catalog_hash,
555 const TraversalType type = Base::kBreadthFirst) {
556 // add the root catalog of the repository as the first element on the job
557 // stack
558 53 TraversalContext ctx(this->default_history_depth_,
559
1/2
✓ Branch 1 taken 53 times.
✗ Branch 2 not taken.
53 this->default_timestamp_threshold_,
560 type);
561
1/2
✓ Branch 1 taken 53 times.
✗ Branch 2 not taken.
53 Push(root_catalog_hash, &ctx);
562
1/2
✓ Branch 1 taken 53 times.
✗ Branch 2 not taken.
106 return DoTraverse(&ctx);
563 53 }
564
565 49 bool TraverseList(const std::vector<shash::Any> &catalog_list,
566 const TraversalType type = Base::kBreadthFirst) {
567 49 std::vector<shash::Any>::const_iterator i = catalog_list.begin();
568 49 const std::vector<shash::Any>::const_iterator iend = catalog_list.end();
569 49 bool success = true;
570
2/2
✓ Branch 1 taken 100 times.
✓ Branch 2 taken 49 times.
149 for (; i != iend; ++i) {
571
3/6
✓ Branch 0 taken 100 times.
✗ Branch 1 not taken.
✓ Branch 4 taken 100 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 100 times.
✗ Branch 7 not taken.
100 success = success && TraverseRevision(*i, type);
572 }
573 49 return success;
574 }
575
576 100 bool TraverseRevision(
577 const shash::Any &root_catalog_hash,
578 const TraversalType type = Base::kBreadthFirst)
579 {
580 // add the given root catalog as the first element on the job stack
581
1/2
✓ Branch 1 taken 100 times.
✗ Branch 2 not taken.
100 TraversalContext ctx(Parameters::kNoHistory,
582 Parameters::kNoTimestampThreshold,
583 type);
584
1/2
✓ Branch 1 taken 100 times.
✗ Branch 2 not taken.
100 Push(root_catalog_hash, &ctx);
585
1/2
✓ Branch 1 taken 100 times.
✗ Branch 2 not taken.
200 return DoTraverse(&ctx);
586 100 }
587
588 protected:
589 /**
590 * This controls the actual traversal. Using a stack to traverse down the
591 * catalog hierarchy. This method implements the traversal itself, but not
592 * in which way catalogs are handed out to the user code.
593 *
594 * Each catalog is processed in these steps:
595 * 1.) Pop the next catalog from the stack
596 * Catalogs are always traversed from latest to oldest revision and
597 * from root to leaf nested catalogs
598 * 2.) Prepare the catalog for traversing
599 * 2.1.) Check if it was visited before
600 * 2.2.) Fetch the catalog database from the backend storage
601 * This might fail and produce an error. For root catalogs this
602 * error can be ignored (might be garbage collected before)
603 * 2.3.) Open the catalog database
604 * 2.4.) Check if the catalog is older than the timestamp threshold
605 * After these steps the catalog is opened either opened and ready for
606 * the traversal to continue, or it was marked for ignore (job.ignore)
607 * 3.) Check if the catalog is marked to be ignored
608 * Catalog might not be loadable (sweeped root catalog) or is too old
609 * Note: ignored catalogs can still trigger postponed yields
610 * 4.) Mark the catalog as visited to be able to skip it later on
611 * 5.) Find and push referencing catalogs
612 * This pushes all descendents of the current catalog onto the stack.
613 * Note that this is dependent on the strategy (depth or breadth first)
614 * and on the history threshold (see history_depth).
615 * 6.) Hand the catalog out to the user code
616 * Depending on the traversal strategy (depth of breadths first) this
617 * might immediately yield zero to N catalogs to the user code.
618 *
619 * Note: If anything unexpected goes wrong during the traversal process, it
620 * is aborted immediately.
621 *
622 * @param ctx the traversal context that steers the whole traversal process
623 * @return true on successful traversal and false on abort
624 */
625 153 bool DoTraverse(TraversalContext *ctx) {
626
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 153 times.
153 assert(ctx->callback_stack.empty());
627
628
5/5
✓ Branch 1 taken 1772 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 114 times.
✓ Branch 5 taken 1887 times.
✓ Branch 6 taken 152 times.
3926 while (!ctx->catalog_stack.empty()) {
629 // Get the top most catalog for the next processing step
630
1/2
✓ Branch 1 taken 1887 times.
✗ Branch 2 not taken.
1887 CatalogJob job = Pop(ctx);
631
632
3/4
✓ Branch 1 taken 1887 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 107 times.
✓ Branch 4 taken 1780 times.
1887 if (ShouldBeSkipped(job)) {
633 107 job.ignore = true;
634 // download and open the catalog for processing
635
3/4
✓ Branch 1 taken 1780 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
✓ Branch 4 taken 1779 times.
1780 } else if (!this->PrepareCatalog(&job)) {
636 1 return false;
637 }
638
639 // ignored catalogs don't need to be processed anymore but they might
640 // release postponed yields
641
2/2
✓ Branch 0 taken 114 times.
✓ Branch 1 taken 1772 times.
1886 if (job.ignore) {
642
2/4
✓ Branch 1 taken 114 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 114 times.
114 if (!HandlePostponedYields(job, ctx)) {
643 return false;
644 }
645 114 continue;
646 }
647
648 // push catalogs referenced by the current catalog (onto stack)
649
1/2
✓ Branch 1 taken 1772 times.
✗ Branch 2 not taken.
1772 this->MarkAsVisited(job);
650
1/2
✓ Branch 1 taken 1772 times.
✗ Branch 2 not taken.
1772 PushReferencedCatalogs(&job, ctx);
651
652 // notify listeners
653
2/4
✓ Branch 1 taken 1772 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 1772 times.
1772 if (!YieldToListeners(&job, ctx)) {
654 return false;
655 }
656 }
657
658 // invariant: after the traversal finished, there should be no more catalogs
659 // to traverse or to yield!
660
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 152 times.
152 assert(ctx->catalog_stack.empty());
661
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 152 times.
152 assert(ctx->callback_stack.empty());
662 152 return true;
663 }
664
665 1772 void PushReferencedCatalogs(CatalogJob *job, TraversalContext *ctx) {
666
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1772 times.
1772 assert(!job->ignore);
667
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1772 times.
1772 assert(job->catalog != NULL);
668
3/4
✓ Branch 0 taken 416 times.
✓ Branch 1 taken 1356 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 416 times.
1772 assert(ctx->traversal_type == Base::kBreadthFirst ||
669 ctx->traversal_type == Base::kDepthFirst);
670
671 // this differs, depending on the traversal strategy.
672 //
673 // Breadths First Traversal
674 // Catalogs are traversed from top (root catalog) to bottom (leaf
675 // catalogs) and from more recent (HEAD revision) to older (historic
676 // revisions)
677 //
678 // Depth First Traversal
679 // Catalogs are traversed from oldest revision (depends on the configured
680 // maximal history depth) to the HEAD revision and from bottom (leafs) to
681 // top (root catalogs)
682 1772 job->referenced_catalogs =
683 1772 (ctx->traversal_type == Base::kBreadthFirst)
684
2/2
✓ Branch 0 taken 1356 times.
✓ Branch 1 taken 416 times.
1772 ? PushPreviousRevision(*job, ctx) + PushNestedCatalogs(*job, ctx)
685 416 : PushNestedCatalogs(*job, ctx) + PushPreviousRevision(*job, ctx);
686 1772 }
687
688 /**
689 * Pushes the previous revision of a (root) catalog.
690 * @return the number of catalogs pushed on the processing stack
691 */
692 1772 unsigned int PushPreviousRevision(
693 const CatalogJob &job,
694 TraversalContext *ctx
695 ) {
696 // only root catalogs are used for entering a previous revision (graph)
697
2/2
✓ Branch 1 taken 1581 times.
✓ Branch 2 taken 191 times.
1772 if (!job.catalog->IsRoot()) {
698 1581 return 0;
699 }
700
701
1/2
✓ Branch 1 taken 191 times.
✗ Branch 2 not taken.
191 const shash::Any previous_revision = job.catalog->GetPreviousRevision();
702
2/2
✓ Branch 1 taken 19 times.
✓ Branch 2 taken 172 times.
191 if (previous_revision.IsNull()) {
703 19 return 0;
704 }
705
706 // check if the next deeper history level is actually requested
707 // Note: if the current catalog is below the timestamp threshold it will be
708 // traversed and only its ancestor revision will not be pushed anymore
709
2/2
✓ Branch 0 taken 95 times.
✓ Branch 1 taken 77 times.
172 if (this->IsBelowPruningThresholds(job, ctx->history_depth,
710
1/2
✓ Branch 1 taken 172 times.
✗ Branch 2 not taken.
172 ctx->timestamp_threshold)) {
711 95 return 0;
712 }
713
714
3/6
✓ Branch 2 taken 77 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 77 times.
✗ Branch 6 not taken.
✓ Branch 8 taken 77 times.
✗ Branch 9 not taken.
77 Push(CatalogJob("", previous_revision, 0, job.history_depth + 1, NULL),
715 ctx);
716 77 return 1;
717 }
718
719 /**
720 * Pushes all the referenced nested catalogs.
721 * @return the number of catalogs pushed on the processing stack
722 */
723 1772 unsigned int PushNestedCatalogs(
724 const CatalogJob &job,
725 TraversalContext *ctx
726 ) {
727 typedef typename CatalogTN::NestedCatalogList NestedCatalogList;
728
1/2
✓ Branch 1 taken 1772 times.
✗ Branch 2 not taken.
1772 const NestedCatalogList nested = job.catalog->ListOwnNestedCatalogs();
729 1772 typename NestedCatalogList::const_iterator i = nested.begin();
730 1772 typename NestedCatalogList::const_iterator iend = nested.end();
731
2/2
✓ Branch 3 taken 1659 times.
✓ Branch 4 taken 1772 times.
3431 for (; i != iend; ++i) {
732
2/2
✓ Branch 0 taken 67 times.
✓ Branch 1 taken 1592 times.
1659 CatalogTN* parent = (this->no_close_) ? job.catalog : NULL;
733
2/4
✓ Branch 1 taken 1659 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 1659 times.
✗ Branch 5 not taken.
1659 const CatalogJob new_job(i->mountpoint.ToString(),
734 1659 i->hash,
735 1659 job.tree_level + 1,
736 1659 job.history_depth,
737 parent);
738
1/2
✓ Branch 1 taken 1659 times.
✗ Branch 2 not taken.
1659 Push(new_job, ctx);
739 }
740
741 3544 return nested.size();
742 1772 }
743
744 153 void Push(const shash::Any &root_catalog_hash, TraversalContext *ctx) {
745
3/6
✓ Branch 2 taken 153 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 153 times.
✗ Branch 6 not taken.
✓ Branch 8 taken 153 times.
✗ Branch 9 not taken.
153 Push(CatalogJob("", root_catalog_hash, 0, 0), ctx);
746 153 }
747
748 1772 bool YieldToListeners(CatalogJob *job, TraversalContext *ctx) {
749
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1772 times.
1772 assert(!job->ignore);
750
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1772 times.
1772 assert(job->catalog != NULL);
751
3/4
✓ Branch 0 taken 416 times.
✓ Branch 1 taken 1356 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 416 times.
1772 assert(ctx->traversal_type == Base::kBreadthFirst ||
752 ctx->traversal_type == Base::kDepthFirst);
753
754 // in breadth first search mode, every catalog is simply handed out once
755 // it is visited. No extra magic required...
756
2/2
✓ Branch 0 taken 1356 times.
✓ Branch 1 taken 416 times.
1772 if (ctx->traversal_type == Base::kBreadthFirst) {
757 1356 return Yield(job);
758 }
759
760 // in depth first search mode, catalogs might need to wait until all of
761 // their referenced catalogs are yielded (ctx.callback_stack)...
762
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 416 times.
416 assert(ctx->traversal_type == Base::kDepthFirst);
763
2/2
✓ Branch 0 taken 159 times.
✓ Branch 1 taken 257 times.
416 if (job->referenced_catalogs > 0) {
764 159 PostponeYield(job, ctx);
765 159 return true;
766 }
767
768 // this catalog can be yielded
769
2/4
✓ Branch 1 taken 257 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 257 times.
✗ Branch 5 not taken.
257 return Yield(job) && HandlePostponedYields(*job, ctx);
770 }
771
772 /**
773 * Checks the traversal history if the given catalog was traversed or at least
774 * seen before. If 'no_repeat_history' is not set this is always 'false'.
775 *
776 * @param job the job to be checked against the traversal history
777 * @return true if the specified catalog was hit before
778 */
779 1887 bool ShouldBeSkipped(const CatalogJob &job) {
780
4/4
✓ Branch 0 taken 808 times.
✓ Branch 1 taken 1079 times.
✓ Branch 3 taken 107 times.
✓ Branch 4 taken 701 times.
1887 return this->no_repeat_history_ && (visited_catalogs_.count(job.hash) > 0);
781 }
782
783 1772 void MarkAsVisited(const CatalogJob &job) {
784
2/2
✓ Branch 0 taken 694 times.
✓ Branch 1 taken 1078 times.
1772 if (this->no_repeat_history_) {
785 694 visited_catalogs_.insert(job.hash);
786 }
787 1772 }
788
789 private:
790 /**
791 * This actually hands out a catalog to the user code
792 * It is not called by DoTraversa() directly but by wrapper functions in order
793 * to provide higher level yielding behaviour.
794 */
795 1772 bool Yield(CatalogJob *job) {
796
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1772 times.
1772 assert(!job->ignore);
797
3/4
✓ Branch 0 taken 159 times.
✓ Branch 1 taken 1613 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 159 times.
1772 assert(job->catalog != NULL || job->postponed);
798
799 // catalog was pushed on ctx.callback_stack before, it might need to be re-
800 // opened. If CatalogTraversal<> is configured with no_close, it was not
801 // closed before, hence does not need a re-open.
802
5/8
✓ Branch 0 taken 159 times.
✓ Branch 1 taken 1613 times.
✓ Branch 2 taken 159 times.
✗ Branch 3 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 159 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 1772 times.
1772 if (job->postponed && !this->no_close_ && !this->ReopenCatalog(job)) {
803 return false;
804 }
805
806 // hand the catalog out to the user code (see Observable<>)
807
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1772 times.
1772 assert(job->catalog != NULL);
808
1/2
✓ Branch 2 taken 1772 times.
✗ Branch 3 not taken.
1772 this->NotifyListeners(job->GetCallbackData());
809
810 // skip the catalog closing procedure if asked for
811 // Note: In this case it is the user's responsibility to both delete the
812 // yielded catalog object and the underlying database temp file
813
2/2
✓ Branch 0 taken 70 times.
✓ Branch 1 taken 1702 times.
1772 if (this->no_close_) {
814 70 return true;
815 }
816
817 // we can close the catalog here and delete the temporary file
818 1702 const bool unlink_db = true;
819 1702 return this->CloseCatalog(unlink_db, job);
820 }
821
822
823 /**
824 * Pushes a catalog to the callback_stack for later yielding
825 * Note: this is only used for the Depth First Traversal strategy!
826 */
827 159 void PostponeYield(CatalogJob *job, TraversalContext *ctx) {
828
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 159 times.
159 assert(job->referenced_catalogs > 0);
829
830 159 job->postponed = true;
831
1/2
✓ Branch 0 taken 159 times.
✗ Branch 1 not taken.
159 if (!this->no_close_) {
832 159 const bool unlink_db = false; // will reopened just before yielding
833 159 this->CloseCatalog(unlink_db, job);
834 }
835 159 ctx->callback_stack.push(*job);
836 159 }
837
838 /**
839 * Determines if there are postponed yields that can be set free based on
840 * the catalog currently being yielded
841 *
842 * Note: the CatalogJob being handed into this method does not necessarily
843 * have an open Catalog attached to it.
844 *
845 * @param ctx the TraversalContext
846 * @param job the catalog job that was just yielded
847 * @return true on successful execution
848 */
849 371 bool HandlePostponedYields(const CatalogJob &job, TraversalContext *ctx) {
850
2/2
✓ Branch 0 taken 84 times.
✓ Branch 1 taken 287 times.
371 if (ctx->traversal_type == Base::kBreadthFirst) {
851 84 return true;
852 }
853
854
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 287 times.
287 assert(ctx->traversal_type == Base::kDepthFirst);
855
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 287 times.
287 assert(job.referenced_catalogs == 0);
856
857 // walk through the callback_stack and yield all catalogs that have no un-
858 // yielded referenced_catalogs anymore. Every time a CatalogJob in the
859 // callback_stack gets yielded it decrements the referenced_catalogs of the
860 // next top of the stack (it's parent CatalogJob waiting for yielding)
861 287 CatalogJobStack &clbs = ctx->callback_stack;
862
2/2
✓ Branch 1 taken 408 times.
✓ Branch 2 taken 38 times.
446 while (!clbs.empty()) {
863 408 CatalogJob &postponed_job = clbs.top();
864
2/2
✓ Branch 0 taken 249 times.
✓ Branch 1 taken 159 times.
408 if (--postponed_job.referenced_catalogs > 0) {
865 249 break;
866 }
867
868
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 159 times.
159 if (!Yield(&postponed_job)) {
869 return false;
870 }
871 159 clbs.pop();
872 }
873
874 287 return true;
875 }
876
877 1889 void Push(const CatalogJob &job, TraversalContext *ctx) {
878 1889 ctx->catalog_stack.push(job);
879 1889 }
880
881 1887 CatalogJob Pop(TraversalContext *ctx) {
882 1887 CatalogJob job = ctx->catalog_stack.top();
883 1887 ctx->catalog_stack.pop();
884 1887 return job;
885 }
886
887 protected:
888 HashSet visited_catalogs_;
889 };
890
891 template <class ObjectFetcherT>
892 const uint64_t CatalogTraversalBase<ObjectFetcherT>::Parameters::kFullHistory =
893 std::numeric_limits<uint64_t>::max();
894
895 template <class ObjectFetcherT>
896 const uint64_t CatalogTraversalBase<ObjectFetcherT>::Parameters::kNoHistory = 0;
897
898 template <class ObjectFetcherT>
899 const time_t
900 CatalogTraversalBase<ObjectFetcherT>::Parameters::kNoTimestampThreshold = 0;
901
902 } // namespace swissknife
903
904 #endif // CVMFS_CATALOG_TRAVERSAL_H_
905