| Directory: | cvmfs/ |
|---|---|
| File: | cvmfs/catalog_traversal.h |
| Date: | 2025-11-02 02:35:35 |
| Exec | Total | Coverage | |
|---|---|---|---|
| Lines: | 245 | 264 | 92.8% |
| Branches: | 145 | 263 | 55.1% |
| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /** | ||
| 2 | * This file is part of the CernVM File System. | ||
| 3 | */ | ||
| 4 | |||
| 5 | #ifndef CVMFS_CATALOG_TRAVERSAL_H_ | ||
| 6 | #define CVMFS_CATALOG_TRAVERSAL_H_ | ||
| 7 | |||
| 8 | #include <cassert> | ||
| 9 | #include <limits> | ||
| 10 | #include <set> | ||
| 11 | #include <stack> | ||
| 12 | #include <string> | ||
| 13 | #include <vector> | ||
| 14 | |||
| 15 | #include "catalog.h" | ||
| 16 | #include "compression/compression.h" | ||
| 17 | #include "crypto/signature.h" | ||
| 18 | #include "history_sqlite.h" | ||
| 19 | #include "manifest.h" | ||
| 20 | #include "object_fetcher.h" | ||
| 21 | #include "util/concurrency.h" | ||
| 22 | #include "util/logging.h" | ||
| 23 | |||
| 24 | namespace catalog { | ||
| 25 | class Catalog; | ||
| 26 | class WritableCatalog; | ||
| 27 | } // namespace catalog | ||
| 28 | |||
| 29 | namespace swissknife { | ||
| 30 | |||
| 31 | /** | ||
| 32 | * Callback data which has to be implemented by the registered callback | ||
| 33 | * functions/methods (see Observable<> for further details) | ||
| 34 | * @param catalog the catalog object which needs to be processed | ||
| 35 | * @param catalog_hash the content hash of the catalog | ||
| 36 | * @param tree_level the depth in the nested catalog tree | ||
| 37 | * (starting at zero) | ||
| 38 | * @param file_size the size of the downloaded catalog database file | ||
| 39 | * @param history_depth the distance from the current HEAD revision | ||
| 40 | * (current HEAD has history_depth 0) | ||
| 41 | */ | ||
| 42 | template<class CatalogT> | ||
| 43 | struct CatalogTraversalData { | ||
| 44 | 18097531 | CatalogTraversalData(const CatalogT *catalog, | |
| 45 | const shash::Any &catalog_hash, | ||
| 46 | const unsigned tree_level, | ||
| 47 | const size_t file_size, | ||
| 48 | const uint64_t history_depth) | ||
| 49 | 18097531 | : catalog(catalog) | |
| 50 | 18097531 | , catalog_hash(catalog_hash) | |
| 51 | 18097531 | , tree_level(tree_level) | |
| 52 | 18097531 | , file_size(file_size) | |
| 53 | 18097531 | , history_depth(history_depth) { } | |
| 54 | |||
| 55 | const CatalogT *catalog; | ||
| 56 | const shash::Any catalog_hash; | ||
| 57 | const unsigned int tree_level; | ||
| 58 | const size_t file_size; | ||
| 59 | const uint64_t history_depth; | ||
| 60 | }; | ||
| 61 | |||
| 62 | |||
| 63 | /** | ||
| 64 | * A layer to extract information from a catalog. Users of the catalog | ||
| 65 | * traversal can provide derived classes to overwrite behavior. Currently used | ||
| 66 | * to get the last-modified timestamp in configurable manner: for the garbage | ||
| 67 | * collection, the timestamp of the catalog hash in the reflog counts, which | ||
| 68 | * is the same or newer than the one stored in the catalog. | ||
| 69 | */ | ||
| 70 | template<class CatalogT> | ||
| 71 | class CatalogTraversalInfoShim { | ||
| 72 | public: | ||
| 73 | 11042 | virtual ~CatalogTraversalInfoShim() { } | |
| 74 | |||
| 75 | // Default implementation: use the catalog's timestamp | ||
| 76 | 12260 | virtual uint64_t GetLastModified(const CatalogT *catalog) { | |
| 77 | 12260 | return catalog->GetLastModified(); | |
| 78 | } | ||
| 79 | }; | ||
| 80 | |||
| 81 | |||
| 82 | /** | ||
| 83 | * A base class for CatalogTraversal and CatalogTraversalParallel implementing | ||
| 84 | * common functionality. Actual traversal classes inherit from this class. | ||
| 85 | * | ||
| 86 | */ | ||
| 87 | template<class ObjectFetcherT> | ||
| 88 | class CatalogTraversalBase | ||
| 89 | : public Observable< | ||
| 90 | CatalogTraversalData<typename ObjectFetcherT::CatalogTN> > { | ||
| 91 | public: | ||
| 92 | typedef ObjectFetcherT ObjectFetcherTN; | ||
| 93 | typedef typename ObjectFetcherT::CatalogTN CatalogTN; | ||
| 94 | typedef typename ObjectFetcherT::HistoryTN HistoryTN; | ||
| 95 | typedef CatalogTraversalData<CatalogTN> CallbackDataTN; | ||
| 96 | |||
| 97 | /** | ||
| 98 | * @param repo_url the path to the repository to be traversed: | ||
| 99 | * -> either absolute path to the local catalogs | ||
| 100 | * -> or an URL to a remote repository | ||
| 101 | * @param repo_name fully qualified repository name (used for | ||
| 102 | * remote repository signature check) (optional) | ||
| 103 | * @param repo_keys a comma separated list of public key file | ||
| 104 | * locations to verify the repository manifest | ||
| 105 | * file | ||
| 106 | * @param history depth of the desired catalog history traversal | ||
| 107 | * (default: 0 - only HEAD catalogs are traversed) | ||
| 108 | * @param timestamp timestamp of history traversal threshold | ||
| 109 | * (default: 0 - no threshold, traverse | ||
| 110 | * everything) | ||
| 111 | * @param no_repeat_history keep track of visited catalogs and don't | ||
| 112 | * re-visit them in previous revisions | ||
| 113 | * @param no_close do not close catalogs after they were attached | ||
| 114 | * (catalogs retain their parent/child pointers) | ||
| 115 | * @param ignore_load_failure suppressed an error message if a catalog file | ||
| 116 | * could not be loaded (i.e. was sweeped before by | ||
| 117 | * a garbage collection run) | ||
| 118 | * @param quiet silence messages that would go to stderr | ||
| 119 | * @param tmp_dir path to the temporary directory to be used | ||
| 120 | * (default: /tmp) | ||
| 121 | * | ||
| 122 | * ONLY FOR CatalogTraversalParallel | ||
| 123 | * @param num_threads number of threads concurrently traversing | ||
| 124 | * the catalog trees; hint: set up based on | ||
| 125 | * CPU utilization and optimal number of | ||
| 126 | * download threads | ||
| 127 | * (default: 8) | ||
| 128 | * | ||
| 129 | * @param serialize_callbacks do not call multiple catalog callbacks | ||
| 130 | * concurrently; ensures safe access to shared | ||
| 131 | * variables inside the callback; turn off if | ||
| 132 | * the registered callback is thread-safe | ||
| 133 | * (default: true) | ||
| 134 | */ | ||
| 135 | struct Parameters { | ||
| 136 | 4113 | Parameters() | |
| 137 | 4113 | : object_fetcher(NULL) | |
| 138 | 4113 | , history(kNoHistory) | |
| 139 | 4113 | , timestamp(kNoTimestampThreshold) | |
| 140 | 4113 | , no_repeat_history(false) | |
| 141 | 4113 | , no_close(false) | |
| 142 | 4113 | , ignore_load_failure(false) | |
| 143 | 4113 | , quiet(false) | |
| 144 | 4113 | , num_threads(8) | |
| 145 | 4113 | , serialize_callbacks(true) { } | |
| 146 | |||
| 147 | static const uint64_t kFullHistory; | ||
| 148 | static const uint64_t kNoHistory; | ||
| 149 | static const time_t kNoTimestampThreshold; | ||
| 150 | |||
| 151 | ObjectFetcherT *object_fetcher; | ||
| 152 | |||
| 153 | uint64_t history; | ||
| 154 | time_t timestamp; | ||
| 155 | bool no_repeat_history; | ||
| 156 | bool no_close; | ||
| 157 | bool ignore_load_failure; | ||
| 158 | bool quiet; | ||
| 159 | // These parameters only apply for CatalogTraversalParallel | ||
| 160 | unsigned int num_threads; | ||
| 161 | bool serialize_callbacks; | ||
| 162 | }; | ||
| 163 | |||
| 164 | enum TraversalType { | ||
| 165 | kBreadthFirst, | ||
| 166 | kDepthFirst | ||
| 167 | }; | ||
| 168 | |||
| 169 | 4113 | explicit CatalogTraversalBase(const Parameters ¶ms) | |
| 170 | 4113 | : object_fetcher_(params.object_fetcher) | |
| 171 | 4113 | , catalog_info_shim_(&catalog_info_default_shim_) | |
| 172 | 4113 | , default_history_depth_(params.history) | |
| 173 | 4113 | , default_timestamp_threshold_(params.timestamp) | |
| 174 | 4113 | , no_close_(params.no_close) | |
| 175 | 4113 | , ignore_load_failure_(params.ignore_load_failure) | |
| 176 | 4113 | , no_repeat_history_(params.no_repeat_history) | |
| 177 |
2/2✓ Branch 2 taken 1566 times.
✓ Branch 3 taken 2547 times.
|
4113 | , error_sink_((params.quiet) ? kLogDebug : kLogStderr) { |
| 178 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4113 times.
|
4113 | assert(object_fetcher_ != NULL); |
| 179 | 4113 | } | |
| 180 | |||
| 181 | /** | ||
| 182 | * Starts the traversal process. | ||
| 183 | * After calling this methods CatalogTraversal will go through all catalogs | ||
| 184 | * and call the registered callback methods for each found catalog. | ||
| 185 | * If something goes wrong in the process, the traversal will be cancelled. | ||
| 186 | * | ||
| 187 | * @param type breadths or depth first traversal | ||
| 188 | * @return true, when all catalogs were successfully processed. On | ||
| 189 | * failure the traversal is cancelled and false is returned. | ||
| 190 | */ | ||
| 191 | virtual bool Traverse(const TraversalType type = kBreadthFirst) = 0; | ||
| 192 | |||
| 193 | /** | ||
| 194 | * Starts the traversal process at the catalog pointed to by the given hash | ||
| 195 | * | ||
| 196 | * @param root_catalog_hash the entry point into the catalog traversal | ||
| 197 | * @param type breadths or depth first traversal | ||
| 198 | * @return true when catalogs were successfully traversed | ||
| 199 | */ | ||
| 200 | virtual bool Traverse(const shash::Any &root_catalog_hash, | ||
| 201 | const TraversalType type = kBreadthFirst) = 0; | ||
| 202 | |||
| 203 | /** | ||
| 204 | * Traverse a list of revisions represented by root catalogs from first | ||
| 205 | * to last. DO NOT traverse previous revisions based on history and | ||
| 206 | * timestamp threshold settings. | ||
| 207 | * | ||
| 208 | * @param catalog_list list of root catalog hashes | ||
| 209 | * @param type breadth- or depth- first traversal | ||
| 210 | * @return true on success | ||
| 211 | */ | ||
| 212 | virtual bool TraverseList(const std::vector<shash::Any> &catalog_list, | ||
| 213 | const TraversalType type = kBreadthFirst) = 0; | ||
| 214 | |||
| 215 | /** | ||
| 216 | * Starts the traversal process at the catalog pointed to by the given hash | ||
| 217 | * but doesn't traverse into predecessor catalog revisions. This overrides the | ||
| 218 | * TraversalParameter settings provided at construction. | ||
| 219 | * | ||
| 220 | * @param root_catalog_hash the entry point into the catalog traversal | ||
| 221 | * @param type breadths or depth first traversal | ||
| 222 | * @return true when catalogs were successfully traversed | ||
| 223 | */ | ||
| 224 | virtual bool TraverseRevision(const shash::Any &root_catalog_hash, | ||
| 225 | const TraversalType type = kBreadthFirst) = 0; | ||
| 226 | |||
| 227 | /** | ||
| 228 | * Figures out all named tags in a repository and uses all of them as entry | ||
| 229 | * points into the traversal process. | ||
| 230 | * | ||
| 231 | * @param type breadths or depth first traversal | ||
| 232 | * @return true when catalog traversal successfully finished | ||
| 233 | */ | ||
| 234 | 1976 | virtual bool TraverseNamedSnapshots( | |
| 235 | const TraversalType type = kBreadthFirst) { | ||
| 236 | typedef std::vector<shash::Any> HashList; | ||
| 237 | |||
| 238 |
1/2✓ Branch 1 taken 1976 times.
✗ Branch 2 not taken.
|
1976 | UniquePtr<HistoryTN> tag_db; |
| 239 | const typename ObjectFetcherT::Failures | ||
| 240 |
2/4✓ Branch 1 taken 1976 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 1976 times.
✗ Branch 5 not taken.
|
1976 | retval = object_fetcher_->FetchHistory(&tag_db); |
| 241 |
2/3✓ Branch 0 taken 1897 times.
✓ Branch 1 taken 79 times.
✗ Branch 2 not taken.
|
1976 | switch (retval) { |
| 242 | 1897 | case ObjectFetcherT::kFailOk: | |
| 243 | 1897 | break; | |
| 244 | |||
| 245 | 79 | case ObjectFetcherT::kFailNotFound: | |
| 246 |
1/2✓ Branch 1 taken 79 times.
✗ Branch 2 not taken.
|
79 | LogCvmfs(kLogCatalogTraversal, kLogDebug, |
| 247 | "didn't find a history database to traverse"); | ||
| 248 | 79 | return true; | |
| 249 | |||
| 250 | ✗ | default: | |
| 251 | ✗ | LogCvmfs(kLogCatalogTraversal, kLogStderr, | |
| 252 | "failed to download history database (%d - %s)", retval, | ||
| 253 | Code2Ascii(retval)); | ||
| 254 | ✗ | return false; | |
| 255 | } | ||
| 256 | |||
| 257 | 1897 | HashList root_hashes; | |
| 258 |
1/2✓ Branch 2 taken 1897 times.
✗ Branch 3 not taken.
|
1897 | bool success = tag_db->GetHashes(&root_hashes); |
| 259 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1897 times.
|
1897 | assert(success); |
| 260 |
1/2✓ Branch 1 taken 1897 times.
✗ Branch 2 not taken.
|
1897 | return TraverseList(root_hashes, type); |
| 261 | 1976 | } | |
| 262 | |||
| 263 | 64 | void SetCatalogInfoShim(CatalogTraversalInfoShim<CatalogTN> *shim) { | |
| 264 | 64 | catalog_info_shim_ = shim; | |
| 265 | 64 | } | |
| 266 | |||
| 267 | protected: | ||
| 268 | typedef std::set<shash::Any> HashSet; | ||
| 269 | |||
| 270 | /** | ||
| 271 | * This struct keeps information about a catalog that still needs to be | ||
| 272 | * traversed by a currently running catalog traversal process. | ||
| 273 | */ | ||
| 274 | struct CatalogJob { | ||
| 275 | 18105795 | CatalogJob(const std::string &path, | |
| 276 | const shash::Any &hash, | ||
| 277 | const unsigned tree_level, | ||
| 278 | const uint64_t history_depth, | ||
| 279 | CatalogTN *parent = NULL) | ||
| 280 | 18105795 | : path(path) | |
| 281 | 18105795 | , hash(hash) | |
| 282 | 18105795 | , tree_level(tree_level) | |
| 283 | 18105795 | , history_depth(history_depth) | |
| 284 | 18105795 | , parent(parent) | |
| 285 | 18105795 | , catalog_file_size(0) | |
| 286 | 18105795 | , ignore(false) | |
| 287 | 18105795 | , catalog(NULL) | |
| 288 | 18105795 | , referenced_catalogs(0) | |
| 289 | 18105795 | , postponed(false) { } | |
| 290 | |||
| 291 | 36140939 | bool IsRootCatalog() const { return tree_level == 0; } | |
| 292 | |||
| 293 | 18097531 | CallbackDataTN GetCallbackData() const { | |
| 294 | 18097531 | return CallbackDataTN(catalog, hash, tree_level, catalog_file_size, | |
| 295 | 18097531 | history_depth); | |
| 296 | } | ||
| 297 | |||
| 298 | // initial state description | ||
| 299 | const std::string path; | ||
| 300 | const shash::Any hash; | ||
| 301 | const unsigned tree_level; | ||
| 302 | const uint64_t history_depth; | ||
| 303 | CatalogTN *parent; | ||
| 304 | |||
| 305 | // dynamic processing state (used internally) | ||
| 306 | std::string catalog_file_path; | ||
| 307 | size_t catalog_file_size; | ||
| 308 | bool ignore; | ||
| 309 | CatalogTN *catalog; | ||
| 310 | uint64_t referenced_catalogs; | ||
| 311 | bool postponed; | ||
| 312 | }; | ||
| 313 | |||
| 314 | 18096939 | bool PrepareCatalog(CatalogJob *job) { | |
| 315 | const typename ObjectFetcherT::Failures | ||
| 316 | 54246521 | retval = object_fetcher_->FetchCatalog(job->hash, | |
| 317 | 18096547 | job->path, | |
| 318 | &job->catalog, | ||
| 319 | 18096939 | !job->IsRootCatalog(), | |
| 320 | job->parent); | ||
| 321 |
2/3✓ Branch 0 taken 18052696 times.
✓ Branch 1 taken 535 times.
✗ Branch 2 not taken.
|
18053035 | switch (retval) { |
| 322 | 18052696 | case ObjectFetcherT::kFailOk: | |
| 323 | 18052696 | break; | |
| 324 | |||
| 325 | 535 | case ObjectFetcherT::kFailNotFound: | |
| 326 |
2/2✓ Branch 0 taken 456 times.
✓ Branch 1 taken 79 times.
|
535 | if (ignore_load_failure_) { |
| 327 |
1/2✓ Branch 3 taken 456 times.
✗ Branch 4 not taken.
|
456 | LogCvmfs(kLogCatalogTraversal, kLogDebug, |
| 328 | "ignoring missing catalog " | ||
| 329 | "%s (swept before?)", | ||
| 330 | job->hash.ToString().c_str()); | ||
| 331 | 456 | job->ignore = true; | |
| 332 | 456 | return true; | |
| 333 | } | ||
| 334 | |||
| 335 | default: | ||
| 336 | ✗ | LogCvmfs(kLogCatalogTraversal, error_sink_, | |
| 337 | "failed to load catalog %s " | ||
| 338 | "(%d - %s)", | ||
| 339 | job->hash.ToStringWithSuffix().c_str(), retval, | ||
| 340 | Code2Ascii(retval)); | ||
| 341 | 79 | return false; | |
| 342 | } | ||
| 343 | |||
| 344 | // catalogs returned by ObjectFetcher<> are managing their database files by | ||
| 345 | // default... we need to manage this file manually here | ||
| 346 | 18052696 | job->catalog->DropDatabaseFileOwnership(); | |
| 347 | |||
| 348 | 18048090 | job->catalog_file_path = job->catalog->database_path(); | |
| 349 |
1/2✓ Branch 2 taken 18095228 times.
✗ Branch 3 not taken.
|
18055881 | job->catalog_file_size = GetFileSize(job->catalog->database_path()); |
| 350 | |||
| 351 | 18092288 | return true; | |
| 352 | } | ||
| 353 | |||
| 354 | |||
| 355 | 18047458 | bool ReopenCatalog(CatalogJob *job) { | |
| 356 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 18047458 times.
|
18047458 | assert(!job->ignore); |
| 357 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 18047458 times.
|
18047458 | assert(job->catalog == NULL); |
| 358 | |||
| 359 | 36044250 | job->catalog = CatalogTN::AttachFreely(job->path, | |
| 360 | 18046184 | job->catalog_file_path, | |
| 361 | 18046184 | job->hash, | |
| 362 | ✗ | job->parent, | |
| 363 | 18047458 | !job->IsRootCatalog()); | |
| 364 | |||
| 365 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 17998066 times.
|
17998066 | if (job->catalog == NULL) { |
| 366 | ✗ | LogCvmfs(kLogCatalogTraversal, error_sink_, | |
| 367 | "failed to re-open catalog %s", job->hash.ToString().c_str()); | ||
| 368 | ✗ | return false; | |
| 369 | } | ||
| 370 | |||
| 371 | 17998066 | return true; | |
| 372 | } | ||
| 373 | |||
| 374 | |||
| 375 | 18179930 | bool CloseCatalog(const bool unlink_db, CatalogJob *job) { | |
| 376 |
1/2✓ Branch 0 taken 18179930 times.
✗ Branch 1 not taken.
|
18179930 | delete job->catalog; |
| 377 | 18179930 | job->catalog = NULL; | |
| 378 |
2/6✗ Branch 1 not taken.
✓ Branch 2 taken 18179930 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 18179930 times.
|
18179930 | if (!job->catalog_file_path.empty() && unlink_db) { |
| 379 | ✗ | const int retval = unlink(job->catalog_file_path.c_str()); | |
| 380 | ✗ | if (retval != 0) { | |
| 381 | ✗ | LogCvmfs(kLogCatalogTraversal, error_sink_, "Failed to unlink %s - %d", | |
| 382 | ✗ | job->catalog_file_path.c_str(), errno); | |
| 383 | ✗ | return false; | |
| 384 | } | ||
| 385 | } | ||
| 386 | |||
| 387 | 18179930 | return true; | |
| 388 | } | ||
| 389 | |||
| 390 | 2845 | shash::Any GetRepositoryRootCatalogHash() { | |
| 391 | // get the manifest of the repository to learn about the entry point or the | ||
| 392 | // root catalog of the repository to be traversed | ||
| 393 |
1/2✓ Branch 1 taken 2845 times.
✗ Branch 2 not taken.
|
2845 | UniquePtr<manifest::Manifest> manifest; |
| 394 | const typename ObjectFetcherT::Failures | ||
| 395 |
1/2✓ Branch 1 taken 2845 times.
✗ Branch 2 not taken.
|
2845 | retval = object_fetcher_->FetchManifest(&manifest); |
| 396 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2845 times.
|
2845 | if (retval != ObjectFetcherT::kFailOk) { |
| 397 | ✗ | LogCvmfs(kLogCatalogTraversal, kLogStderr, | |
| 398 | "failed to load manifest " | ||
| 399 | "(%d - %s)", | ||
| 400 | retval, Code2Ascii(retval)); | ||
| 401 | ✗ | return shash::Any(); | |
| 402 | } | ||
| 403 | |||
| 404 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 2845 times.
|
2845 | assert(manifest.IsValid()); |
| 405 | 2845 | return manifest->catalog_hash(); | |
| 406 | 2845 | } | |
| 407 | |||
| 408 | /** | ||
| 409 | * Checks if a root catalog is below one of the pruning thresholds. | ||
| 410 | * Pruning thresholds can be either the catalog's history depth or a timestamp | ||
| 411 | * threshold applied to the last modified timestamp of the catalog. | ||
| 412 | * | ||
| 413 | * @param ctx traversal context for traversal-specific state | ||
| 414 | * @param job the job defining the current catalog | ||
| 415 | * @return true if either history or timestamp threshold are satisfied | ||
| 416 | */ | ||
| 417 | 12516 | bool IsBelowPruningThresholds(const CatalogJob &job, | |
| 418 | const uint64_t history_depth, | ||
| 419 | const time_t timestamp_threshold) { | ||
| 420 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 12516 times.
|
12516 | assert(job.IsRootCatalog()); |
| 421 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 12516 times.
|
12516 | assert(job.catalog != NULL); |
| 422 | |||
| 423 | 12516 | const bool h = job.history_depth >= history_depth; | |
| 424 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 12516 times.
|
12516 | assert(timestamp_threshold >= 0); |
| 425 | 12516 | const bool t = catalog_info_shim_->GetLastModified(job.catalog) | |
| 426 | 12516 | < unsigned(timestamp_threshold); | |
| 427 | |||
| 428 |
4/4✓ Branch 0 taken 11767 times.
✓ Branch 1 taken 749 times.
✓ Branch 2 taken 6074 times.
✓ Branch 3 taken 5693 times.
|
12516 | return t || h; |
| 429 | } | ||
| 430 | |||
| 431 | ObjectFetcherT *object_fetcher_; | ||
| 432 | CatalogTraversalInfoShim<CatalogTN> catalog_info_default_shim_; | ||
| 433 | CatalogTraversalInfoShim<CatalogTN> *catalog_info_shim_; | ||
| 434 | const uint64_t default_history_depth_; | ||
| 435 | const time_t default_timestamp_threshold_; | ||
| 436 | const bool no_close_; | ||
| 437 | const bool ignore_load_failure_; | ||
| 438 | const bool no_repeat_history_; | ||
| 439 | LogFacilities error_sink_; | ||
| 440 | }; | ||
| 441 | |||
| 442 | /** | ||
| 443 | * This class traverses the catalog hierarchy of a CVMFS repository recursively. | ||
| 444 | * Also historic catalog trees can be traversed. The user needs to specify a | ||
| 445 | * callback which is called for each catalog on the way. | ||
| 446 | * | ||
| 447 | * CatalogTraversal<> can be configured and used in various ways: | ||
| 448 | * -> Historic catalog traversal | ||
| 449 | * -> Prune catalogs below a certain history level | ||
| 450 | * -> Prune catalogs older than a certain threshold timestamp | ||
| 451 | * -> Never traverse a certain catalog twice | ||
| 452 | * -> Breadth First Traversal or Depth First Traversal | ||
| 453 | * -> Optional catalog memory management (no_close) | ||
| 454 | * -> Use all Named Snapshots of a repository as traversal entry point | ||
| 455 | * -> Traverse starting from a provided catalog | ||
| 456 | * -> Traverse catalogs that were previously skipped | ||
| 457 | * -> Produce various flavours of catalogs (writable, mocked, ...) | ||
| 458 | * | ||
| 459 | * Breadth First Traversal Strategy | ||
| 460 | * Catalogs are handed out to the user identical as they are traversed. | ||
| 461 | * Say: From top to bottom. When you would simply print each received catalog | ||
| 462 | * the result would be a nice representation of the catalog tree. | ||
| 463 | * This method is more efficient, because catalogs are opened, processed and | ||
| 464 | * thrown away directly afterwards. | ||
| 465 | * | ||
| 466 | * Depth First Traversal Strategy | ||
| 467 | * The user gets the catalog tree starting from the leaf nodes. | ||
| 468 | * Say: From bottom to top. A user can assume that he got all children or | ||
| 469 | * historical ancestors of a catalog before. | ||
| 470 | * This method climbs down the full catalog tree and hands it out 'in reverse | ||
| 471 | * order'. Thus, catalogs on the way are opened, checked for their descendants | ||
| 472 | * and closed. Once all children and historical ancestors are processed, it is | ||
| 473 | * re-opened and handed out to the user. | ||
| 474 | * Note: This method needs more disk space to temporarily store downloaded but | ||
| 475 | * not yet processed catalogs. | ||
| 476 | * | ||
| 477 | * Note: Since all CVMFS catalog files together can grow to several gigabytes in | ||
| 478 | * file size, each catalog is loaded, processed and removed immediately | ||
| 479 | * afterwards. Except if no_close is specified, which allows the user to | ||
| 480 | * choose when a catalog should be closed. Keep in mind, that a user is | ||
| 481 | * responsible for both deletion of the delivered catalog objects as well | ||
| 482 | * as unlinking of the catalog database file. | ||
| 483 | * | ||
| 484 | * | ||
| 485 | * @param ObjectFetcherT Strategy Pattern implementation that defines how to | ||
| 486 | * retrieve catalogs from various backend storage types. | ||
| 487 | * Furthermore the ObjectFetcherT::CatalogTN is the type | ||
| 488 | * of catalog to be instantiated by CatalogTraversal<>. | ||
| 489 | * | ||
| 490 | * CAUTION: the CatalogTN* pointer passed into the callback becomes invalid | ||
| 491 | * directly after the callback method returns, unless you create the | ||
| 492 | * CatalogTraversal object with no_close = true. | ||
| 493 | */ | ||
| 494 | template<class ObjectFetcherT> | ||
| 495 | class CatalogTraversal : public CatalogTraversalBase<ObjectFetcherT> { | ||
| 496 | public: | ||
| 497 | typedef CatalogTraversalBase<ObjectFetcherT> Base; | ||
| 498 | typedef ObjectFetcherT ObjectFetcherTN; | ||
| 499 | typedef typename ObjectFetcherT::CatalogTN CatalogTN; | ||
| 500 | typedef typename ObjectFetcherT::HistoryTN HistoryTN; | ||
| 501 | typedef CatalogTraversalData<CatalogTN> CallbackDataTN; | ||
| 502 | typedef typename Base::Parameters Parameters; | ||
| 503 | typedef typename Base::TraversalType TraversalType; | ||
| 504 | typedef typename Base::CatalogJob CatalogJob; | ||
| 505 | |||
| 506 | protected: | ||
| 507 | typedef std::set<shash::Any> HashSet; | ||
| 508 | typedef std::stack<CatalogJob> CatalogJobStack; | ||
| 509 | |||
| 510 | /** | ||
| 511 | * This struct represents a catalog traversal context. It needs to be re- | ||
| 512 | * created for each catalog traversal process and contains information to this | ||
| 513 | * specific catalog traversal run. | ||
| 514 | * | ||
| 515 | * @param history_depth the history traversal threshold | ||
| 516 | * @param traversal_type either breadth or depth first traversal strategy | ||
| 517 | * @param catalog_stack the call stack for catalogs to be traversed | ||
| 518 | * @param callback_stack used in depth first traversal for deferred yielding | ||
| 519 | */ | ||
| 520 | struct TraversalContext { | ||
| 521 | 4290 | TraversalContext(const uint64_t history_depth, | |
| 522 | const time_t timestamp_threshold, | ||
| 523 | const TraversalType traversal_type) | ||
| 524 | 4290 | : history_depth(history_depth) | |
| 525 | 4290 | , timestamp_threshold(timestamp_threshold) | |
| 526 |
1/2✓ Branch 2 taken 4290 times.
✗ Branch 3 not taken.
|
4290 | , traversal_type(traversal_type) { } |
| 527 | |||
| 528 | const uint64_t history_depth; | ||
| 529 | const time_t timestamp_threshold; | ||
| 530 | const TraversalType traversal_type; | ||
| 531 | CatalogJobStack catalog_stack; | ||
| 532 | CatalogJobStack callback_stack; | ||
| 533 | }; | ||
| 534 | |||
| 535 | public: | ||
| 536 | /** | ||
| 537 | * Constructs a new catalog traversal engine based on the construction | ||
| 538 | * parameters described in struct ConstructionParams. | ||
| 539 | */ | ||
| 540 | 1584 | explicit CatalogTraversal(const Parameters ¶ms) | |
| 541 | 1584 | : CatalogTraversalBase<ObjectFetcherT>(params) { } | |
| 542 | |||
| 543 | 1137 | bool Traverse(const TraversalType type = Base::kBreadthFirst) { | |
| 544 |
1/2✓ Branch 1 taken 1137 times.
✗ Branch 2 not taken.
|
1137 | const shash::Any root_catalog_hash = this->GetRepositoryRootCatalogHash(); |
| 545 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1137 times.
|
1137 | if (root_catalog_hash.IsNull()) { |
| 546 | ✗ | return false; | |
| 547 | } | ||
| 548 |
1/2✓ Branch 1 taken 1137 times.
✗ Branch 2 not taken.
|
1137 | return Traverse(root_catalog_hash, type); |
| 549 | } | ||
| 550 | |||
| 551 | 1527 | bool Traverse(const shash::Any &root_catalog_hash, | |
| 552 | const TraversalType type = Base::kBreadthFirst) { | ||
| 553 | // add the root catalog of the repository as the first element on the job | ||
| 554 | // stack | ||
| 555 | 1527 | TraversalContext ctx(this->default_history_depth_, | |
| 556 |
1/2✓ Branch 1 taken 1527 times.
✗ Branch 2 not taken.
|
1527 | this->default_timestamp_threshold_, type); |
| 557 |
1/2✓ Branch 1 taken 1527 times.
✗ Branch 2 not taken.
|
1527 | Push(root_catalog_hash, &ctx); |
| 558 |
1/2✓ Branch 1 taken 1527 times.
✗ Branch 2 not taken.
|
3054 | return DoTraverse(&ctx); |
| 559 | 1527 | } | |
| 560 | |||
| 561 | 1344 | bool TraverseList(const std::vector<shash::Any> &catalog_list, | |
| 562 | const TraversalType type = Base::kBreadthFirst) { | ||
| 563 | 1344 | std::vector<shash::Any>::const_iterator i = catalog_list.begin(); | |
| 564 | 1344 | const std::vector<shash::Any>::const_iterator iend = catalog_list.end(); | |
| 565 | 1344 | bool success = true; | |
| 566 |
2/2✓ Branch 1 taken 2763 times.
✓ Branch 2 taken 1344 times.
|
4107 | for (; i != iend; ++i) { |
| 567 |
3/6✓ Branch 0 taken 2763 times.
✗ Branch 1 not taken.
✓ Branch 4 taken 2763 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2763 times.
✗ Branch 7 not taken.
|
2763 | success = success && TraverseRevision(*i, type); |
| 568 | } | ||
| 569 | 1344 | return success; | |
| 570 | } | ||
| 571 | |||
| 572 | 2763 | bool TraverseRevision(const shash::Any &root_catalog_hash, | |
| 573 | const TraversalType type = Base::kBreadthFirst) { | ||
| 574 | // add the given root catalog as the first element on the job stack | ||
| 575 |
1/2✓ Branch 1 taken 2763 times.
✗ Branch 2 not taken.
|
2763 | TraversalContext ctx(Parameters::kNoHistory, |
| 576 | Parameters::kNoTimestampThreshold, type); | ||
| 577 |
1/2✓ Branch 1 taken 2763 times.
✗ Branch 2 not taken.
|
2763 | Push(root_catalog_hash, &ctx); |
| 578 |
1/2✓ Branch 1 taken 2763 times.
✗ Branch 2 not taken.
|
5526 | return DoTraverse(&ctx); |
| 579 | 2763 | } | |
| 580 | |||
| 581 | protected: | ||
| 582 | /** | ||
| 583 | * This controls the actual traversal. Using a stack to traverse down the | ||
| 584 | * catalog hierarchy. This method implements the traversal itself, but not | ||
| 585 | * in which way catalogs are handed out to the user code. | ||
| 586 | * | ||
| 587 | * Each catalog is processed in these steps: | ||
| 588 | * 1.) Pop the next catalog from the stack | ||
| 589 | * Catalogs are always traversed from latest to oldest revision and | ||
| 590 | * from root to leaf nested catalogs | ||
| 591 | * 2.) Prepare the catalog for traversing | ||
| 592 | * 2.1.) Check if it was visited before | ||
| 593 | * 2.2.) Fetch the catalog database from the backend storage | ||
| 594 | * This might fail and produce an error. For root catalogs this | ||
| 595 | * error can be ignored (might be garbage collected before) | ||
| 596 | * 2.3.) Open the catalog database | ||
| 597 | * 2.4.) Check if the catalog is older than the timestamp threshold | ||
| 598 | * After these steps the catalog is opened either opened and ready for | ||
| 599 | * the traversal to continue, or it was marked for ignore (job.ignore) | ||
| 600 | * 3.) Check if the catalog is marked to be ignored | ||
| 601 | * Catalog might not be loadable (sweeped root catalog) or is too old | ||
| 602 | * Note: ignored catalogs can still trigger postponed yields | ||
| 603 | * 4.) Mark the catalog as visited to be able to skip it later on | ||
| 604 | * 5.) Find and push referencing catalogs | ||
| 605 | * This pushes all descendents of the current catalog onto the stack. | ||
| 606 | * Note that this is dependent on the strategy (depth or breadth first) | ||
| 607 | * and on the history threshold (see history_depth). | ||
| 608 | * 6.) Hand the catalog out to the user code | ||
| 609 | * Depending on the traversal strategy (depth of breadths first) this | ||
| 610 | * might immediately yield zero to N catalogs to the user code. | ||
| 611 | * | ||
| 612 | * Note: If anything unexpected goes wrong during the traversal process, it | ||
| 613 | * is aborted immediately. | ||
| 614 | * | ||
| 615 | * @param ctx the traversal context that steers the whole traversal process | ||
| 616 | * @return true on successful traversal and false on abort | ||
| 617 | */ | ||
| 618 | 4290 | bool DoTraverse(TraversalContext *ctx) { | |
| 619 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 4290 times.
|
4290 | assert(ctx->callback_stack.empty()); |
| 620 | |||
| 621 |
5/5✓ Branch 1 taken 52263 times.
✓ Branch 2 taken 30 times.
✓ Branch 3 taken 3273 times.
✓ Branch 5 taken 55566 times.
✓ Branch 6 taken 4260 times.
|
115392 | while (!ctx->catalog_stack.empty()) { |
| 622 | // Get the top most catalog for the next processing step | ||
| 623 |
1/2✓ Branch 1 taken 55566 times.
✗ Branch 2 not taken.
|
55566 | CatalogJob job = Pop(ctx); |
| 624 | |||
| 625 |
3/4✓ Branch 1 taken 55566 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 3075 times.
✓ Branch 4 taken 52491 times.
|
55566 | if (ShouldBeSkipped(job)) { |
| 626 | 3075 | job.ignore = true; | |
| 627 | // download and open the catalog for processing | ||
| 628 |
3/4✓ Branch 1 taken 52491 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 30 times.
✓ Branch 4 taken 52461 times.
|
52491 | } else if (!this->PrepareCatalog(&job)) { |
| 629 | 30 | return false; | |
| 630 | } | ||
| 631 | |||
| 632 | // ignored catalogs don't need to be processed anymore but they might | ||
| 633 | // release postponed yields | ||
| 634 |
2/2✓ Branch 0 taken 3273 times.
✓ Branch 1 taken 52263 times.
|
55536 | if (job.ignore) { |
| 635 |
2/4✓ Branch 1 taken 3273 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 3273 times.
|
3273 | if (!HandlePostponedYields(job, ctx)) { |
| 636 | ✗ | return false; | |
| 637 | } | ||
| 638 | 3273 | continue; | |
| 639 | } | ||
| 640 | |||
| 641 | // push catalogs referenced by the current catalog (onto stack) | ||
| 642 |
1/2✓ Branch 1 taken 52263 times.
✗ Branch 2 not taken.
|
52263 | this->MarkAsVisited(job); |
| 643 |
1/2✓ Branch 1 taken 52263 times.
✗ Branch 2 not taken.
|
52263 | PushReferencedCatalogs(&job, ctx); |
| 644 | |||
| 645 | // notify listeners | ||
| 646 |
2/4✓ Branch 1 taken 52263 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 52263 times.
|
52263 | if (!YieldToListeners(&job, ctx)) { |
| 647 | ✗ | return false; | |
| 648 | } | ||
| 649 | } | ||
| 650 | |||
| 651 | // invariant: after the traversal finished, there should be no more catalogs | ||
| 652 | // to traverse or to yield! | ||
| 653 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 4260 times.
|
4260 | assert(ctx->catalog_stack.empty()); |
| 654 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 4260 times.
|
4260 | assert(ctx->callback_stack.empty()); |
| 655 | 4260 | return true; | |
| 656 | } | ||
| 657 | |||
| 658 | 52263 | void PushReferencedCatalogs(CatalogJob *job, TraversalContext *ctx) { | |
| 659 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 52263 times.
|
52263 | assert(!job->ignore); |
| 660 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 52263 times.
|
52263 | assert(job->catalog != NULL); |
| 661 |
3/4✓ Branch 0 taken 12282 times.
✓ Branch 1 taken 39981 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 12282 times.
|
52263 | assert(ctx->traversal_type == Base::kBreadthFirst |
| 662 | || ctx->traversal_type == Base::kDepthFirst); | ||
| 663 | |||
| 664 | // this differs, depending on the traversal strategy. | ||
| 665 | // | ||
| 666 | // Breadths First Traversal | ||
| 667 | // Catalogs are traversed from top (root catalog) to bottom (leaf | ||
| 668 | // catalogs) and from more recent (HEAD revision) to older (historic | ||
| 669 | // revisions) | ||
| 670 | // | ||
| 671 | // Depth First Traversal | ||
| 672 | // Catalogs are traversed from oldest revision (depends on the configured | ||
| 673 | // maximal history depth) to the HEAD revision and from bottom (leafs) to | ||
| 674 | // top (root catalogs) | ||
| 675 | 104526 | job->referenced_catalogs = (ctx->traversal_type == Base::kBreadthFirst) | |
| 676 |
2/2✓ Branch 0 taken 39981 times.
✓ Branch 1 taken 12282 times.
|
52263 | ? PushPreviousRevision(*job, ctx) |
| 677 | 39981 | + PushNestedCatalogs(*job, ctx) | |
| 678 | 12282 | : PushNestedCatalogs(*job, ctx) | |
| 679 | 12282 | + PushPreviousRevision(*job, ctx); | |
| 680 | 52263 | } | |
| 681 | |||
| 682 | /** | ||
| 683 | * Pushes the previous revision of a (root) catalog. | ||
| 684 | * @return the number of catalogs pushed on the processing stack | ||
| 685 | */ | ||
| 686 | 52263 | unsigned int PushPreviousRevision(const CatalogJob &job, | |
| 687 | TraversalContext *ctx) { | ||
| 688 | // only root catalogs are used for entering a previous revision (graph) | ||
| 689 |
2/2✓ Branch 1 taken 46803 times.
✓ Branch 2 taken 5460 times.
|
52263 | if (!job.catalog->IsRoot()) { |
| 690 | 46803 | return 0; | |
| 691 | } | ||
| 692 | |||
| 693 |
1/2✓ Branch 1 taken 5460 times.
✗ Branch 2 not taken.
|
5460 | const shash::Any previous_revision = job.catalog->GetPreviousRevision(); |
| 694 |
2/2✓ Branch 1 taken 534 times.
✓ Branch 2 taken 4926 times.
|
5460 | if (previous_revision.IsNull()) { |
| 695 | 534 | return 0; | |
| 696 | } | ||
| 697 | |||
| 698 | // check if the next deeper history level is actually requested | ||
| 699 | // Note: if the current catalog is below the timestamp threshold it will be | ||
| 700 | // traversed and only its ancestor revision will not be pushed anymore | ||
| 701 |
2/2✓ Branch 0 taken 2694 times.
✓ Branch 1 taken 2232 times.
|
4926 | if (this->IsBelowPruningThresholds(job, ctx->history_depth, |
| 702 |
1/2✓ Branch 1 taken 4926 times.
✗ Branch 2 not taken.
|
4926 | ctx->timestamp_threshold)) { |
| 703 | 2694 | return 0; | |
| 704 | } | ||
| 705 | |||
| 706 |
3/6✓ Branch 2 taken 2232 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 2232 times.
✗ Branch 6 not taken.
✓ Branch 8 taken 2232 times.
✗ Branch 9 not taken.
|
2232 | Push(CatalogJob("", previous_revision, 0, job.history_depth + 1, NULL), |
| 707 | ctx); | ||
| 708 | 2232 | return 1; | |
| 709 | } | ||
| 710 | |||
| 711 | /** | ||
| 712 | * Pushes all the referenced nested catalogs. | ||
| 713 | * @return the number of catalogs pushed on the processing stack | ||
| 714 | */ | ||
| 715 | 52263 | unsigned int PushNestedCatalogs(const CatalogJob &job, | |
| 716 | TraversalContext *ctx) { | ||
| 717 | typedef typename CatalogTN::NestedCatalogList NestedCatalogList; | ||
| 718 |
1/2✓ Branch 1 taken 52263 times.
✗ Branch 2 not taken.
|
52263 | const NestedCatalogList nested = job.catalog->ListOwnNestedCatalogs(); |
| 719 | 52263 | typename NestedCatalogList::const_iterator i = nested.begin(); | |
| 720 | 52263 | typename NestedCatalogList::const_iterator iend = nested.end(); | |
| 721 |
2/2✓ Branch 3 taken 49104 times.
✓ Branch 4 taken 52263 times.
|
101367 | for (; i != iend; ++i) { |
| 722 |
2/2✓ Branch 0 taken 2010 times.
✓ Branch 1 taken 47094 times.
|
49104 | CatalogTN *parent = (this->no_close_) ? job.catalog : NULL; |
| 723 |
2/4✓ Branch 1 taken 49104 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 49104 times.
✗ Branch 5 not taken.
|
49104 | const CatalogJob new_job(i->mountpoint.ToString(), |
| 724 | 49104 | i->hash, | |
| 725 | 49104 | job.tree_level + 1, | |
| 726 | 49104 | job.history_depth, | |
| 727 | parent); | ||
| 728 |
1/2✓ Branch 1 taken 49104 times.
✗ Branch 2 not taken.
|
49104 | Push(new_job, ctx); |
| 729 | } | ||
| 730 | |||
| 731 | 104526 | return nested.size(); | |
| 732 | 52263 | } | |
| 733 | |||
| 734 | 4290 | void Push(const shash::Any &root_catalog_hash, TraversalContext *ctx) { | |
| 735 |
3/6✓ Branch 2 taken 4290 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 4290 times.
✗ Branch 6 not taken.
✓ Branch 8 taken 4290 times.
✗ Branch 9 not taken.
|
4290 | Push(CatalogJob("", root_catalog_hash, 0, 0), ctx); |
| 736 | 4290 | } | |
| 737 | |||
| 738 | 52263 | bool YieldToListeners(CatalogJob *job, TraversalContext *ctx) { | |
| 739 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 52263 times.
|
52263 | assert(!job->ignore); |
| 740 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 52263 times.
|
52263 | assert(job->catalog != NULL); |
| 741 |
3/4✓ Branch 0 taken 12282 times.
✓ Branch 1 taken 39981 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 12282 times.
|
52263 | assert(ctx->traversal_type == Base::kBreadthFirst |
| 742 | || ctx->traversal_type == Base::kDepthFirst); | ||
| 743 | |||
| 744 | // in breadth first search mode, every catalog is simply handed out once | ||
| 745 | // it is visited. No extra magic required... | ||
| 746 |
2/2✓ Branch 0 taken 39981 times.
✓ Branch 1 taken 12282 times.
|
52263 | if (ctx->traversal_type == Base::kBreadthFirst) { |
| 747 | 39981 | return Yield(job); | |
| 748 | } | ||
| 749 | |||
| 750 | // in depth first search mode, catalogs might need to wait until all of | ||
| 751 | // their referenced catalogs are yielded (ctx.callback_stack)... | ||
| 752 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 12282 times.
|
12282 | assert(ctx->traversal_type == Base::kDepthFirst); |
| 753 |
2/2✓ Branch 0 taken 4689 times.
✓ Branch 1 taken 7593 times.
|
12282 | if (job->referenced_catalogs > 0) { |
| 754 | 4689 | PostponeYield(job, ctx); | |
| 755 | 4689 | return true; | |
| 756 | } | ||
| 757 | |||
| 758 | // this catalog can be yielded | ||
| 759 |
2/4✓ Branch 1 taken 7593 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 7593 times.
✗ Branch 5 not taken.
|
7593 | return Yield(job) && HandlePostponedYields(*job, ctx); |
| 760 | } | ||
| 761 | |||
| 762 | /** | ||
| 763 | * Checks the traversal history if the given catalog was traversed or at least | ||
| 764 | * seen before. If 'no_repeat_history' is not set this is always 'false'. | ||
| 765 | * | ||
| 766 | * @param job the job to be checked against the traversal history | ||
| 767 | * @return true if the specified catalog was hit before | ||
| 768 | */ | ||
| 769 | 55566 | bool ShouldBeSkipped(const CatalogJob &job) { | |
| 770 |
4/4✓ Branch 0 taken 23196 times.
✓ Branch 1 taken 32370 times.
✓ Branch 3 taken 3075 times.
✓ Branch 4 taken 20121 times.
|
55566 | return this->no_repeat_history_ && (visited_catalogs_.count(job.hash) > 0); |
| 771 | } | ||
| 772 | |||
| 773 | 52263 | void MarkAsVisited(const CatalogJob &job) { | |
| 774 |
2/2✓ Branch 0 taken 19923 times.
✓ Branch 1 taken 32340 times.
|
52263 | if (this->no_repeat_history_) { |
| 775 | 19923 | visited_catalogs_.insert(job.hash); | |
| 776 | } | ||
| 777 | 52263 | } | |
| 778 | |||
| 779 | private: | ||
| 780 | /** | ||
| 781 | * This actually hands out a catalog to the user code | ||
| 782 | * It is not called by DoTraversa() directly but by wrapper functions in order | ||
| 783 | * to provide higher level yielding behaviour. | ||
| 784 | */ | ||
| 785 | 52263 | bool Yield(CatalogJob *job) { | |
| 786 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 52263 times.
|
52263 | assert(!job->ignore); |
| 787 |
3/4✓ Branch 0 taken 4689 times.
✓ Branch 1 taken 47574 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 4689 times.
|
52263 | assert(job->catalog != NULL || job->postponed); |
| 788 | |||
| 789 | // catalog was pushed on ctx.callback_stack before, it might need to be re- | ||
| 790 | // opened. If CatalogTraversal<> is configured with no_close, it was not | ||
| 791 | // closed before, hence does not need a re-open. | ||
| 792 |
5/8✓ Branch 0 taken 4689 times.
✓ Branch 1 taken 47574 times.
✓ Branch 2 taken 4689 times.
✗ Branch 3 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 4689 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 52263 times.
|
52263 | if (job->postponed && !this->no_close_ && !this->ReopenCatalog(job)) { |
| 793 | ✗ | return false; | |
| 794 | } | ||
| 795 | |||
| 796 | // hand the catalog out to the user code (see Observable<>) | ||
| 797 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 52263 times.
|
52263 | assert(job->catalog != NULL); |
| 798 |
1/2✓ Branch 2 taken 52263 times.
✗ Branch 3 not taken.
|
52263 | this->NotifyListeners(job->GetCallbackData()); |
| 799 | |||
| 800 | // skip the catalog closing procedure if asked for | ||
| 801 | // Note: In this case it is the user's responsibility to both delete the | ||
| 802 | // yielded catalog object and the underlying database temp file | ||
| 803 |
2/2✓ Branch 0 taken 2100 times.
✓ Branch 1 taken 50163 times.
|
52263 | if (this->no_close_) { |
| 804 | 2100 | return true; | |
| 805 | } | ||
| 806 | |||
| 807 | // we can close the catalog here and delete the temporary file | ||
| 808 | 50163 | const bool unlink_db = true; | |
| 809 | 50163 | return this->CloseCatalog(unlink_db, job); | |
| 810 | } | ||
| 811 | |||
| 812 | |||
| 813 | /** | ||
| 814 | * Pushes a catalog to the callback_stack for later yielding | ||
| 815 | * Note: this is only used for the Depth First Traversal strategy! | ||
| 816 | */ | ||
| 817 | 4689 | void PostponeYield(CatalogJob *job, TraversalContext *ctx) { | |
| 818 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4689 times.
|
4689 | assert(job->referenced_catalogs > 0); |
| 819 | |||
| 820 | 4689 | job->postponed = true; | |
| 821 |
1/2✓ Branch 0 taken 4689 times.
✗ Branch 1 not taken.
|
4689 | if (!this->no_close_) { |
| 822 | 4689 | const bool unlink_db = false; // will reopened just before yielding | |
| 823 | 4689 | this->CloseCatalog(unlink_db, job); | |
| 824 | } | ||
| 825 | 4689 | ctx->callback_stack.push(*job); | |
| 826 | 4689 | } | |
| 827 | |||
| 828 | /** | ||
| 829 | * Determines if there are postponed yields that can be set free based on | ||
| 830 | * the catalog currently being yielded | ||
| 831 | * | ||
| 832 | * Note: the CatalogJob being handed into this method does not necessarily | ||
| 833 | * have an open Catalog attached to it. | ||
| 834 | * | ||
| 835 | * @param ctx the TraversalContext | ||
| 836 | * @param job the catalog job that was just yielded | ||
| 837 | * @return true on successful execution | ||
| 838 | */ | ||
| 839 | 10866 | bool HandlePostponedYields(const CatalogJob &job, TraversalContext *ctx) { | |
| 840 |
2/2✓ Branch 0 taken 2412 times.
✓ Branch 1 taken 8454 times.
|
10866 | if (ctx->traversal_type == Base::kBreadthFirst) { |
| 841 | 2412 | return true; | |
| 842 | } | ||
| 843 | |||
| 844 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8454 times.
|
8454 | assert(ctx->traversal_type == Base::kDepthFirst); |
| 845 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8454 times.
|
8454 | assert(job.referenced_catalogs == 0); |
| 846 | |||
| 847 | // walk through the callback_stack and yield all catalogs that have no un- | ||
| 848 | // yielded referenced_catalogs anymore. Every time a CatalogJob in the | ||
| 849 | // callback_stack gets yielded it decrements the referenced_catalogs of the | ||
| 850 | // next top of the stack (it's parent CatalogJob waiting for yielding) | ||
| 851 | 8454 | CatalogJobStack &clbs = ctx->callback_stack; | |
| 852 |
2/2✓ Branch 1 taken 12084 times.
✓ Branch 2 taken 1059 times.
|
13143 | while (!clbs.empty()) { |
| 853 | 12084 | CatalogJob &postponed_job = clbs.top(); | |
| 854 |
2/2✓ Branch 0 taken 7395 times.
✓ Branch 1 taken 4689 times.
|
12084 | if (--postponed_job.referenced_catalogs > 0) { |
| 855 | 7395 | break; | |
| 856 | } | ||
| 857 | |||
| 858 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 4689 times.
|
4689 | if (!Yield(&postponed_job)) { |
| 859 | ✗ | return false; | |
| 860 | } | ||
| 861 | 4689 | clbs.pop(); | |
| 862 | } | ||
| 863 | |||
| 864 | 8454 | return true; | |
| 865 | } | ||
| 866 | |||
| 867 | 55626 | void Push(const CatalogJob &job, TraversalContext *ctx) { | |
| 868 | 55626 | ctx->catalog_stack.push(job); | |
| 869 | 55626 | } | |
| 870 | |||
| 871 | 55566 | CatalogJob Pop(TraversalContext *ctx) { | |
| 872 | 55566 | CatalogJob job = ctx->catalog_stack.top(); | |
| 873 | 55566 | ctx->catalog_stack.pop(); | |
| 874 | 55566 | return job; | |
| 875 | } | ||
| 876 | |||
| 877 | protected: | ||
| 878 | HashSet visited_catalogs_; | ||
| 879 | }; | ||
| 880 | |||
| 881 | template<class ObjectFetcherT> | ||
| 882 | const uint64_t CatalogTraversalBase<ObjectFetcherT>::Parameters:: | ||
| 883 | kFullHistory = std::numeric_limits<uint64_t>::max(); | ||
| 884 | |||
| 885 | template<class ObjectFetcherT> | ||
| 886 | const uint64_t CatalogTraversalBase<ObjectFetcherT>::Parameters::kNoHistory = 0; | ||
| 887 | |||
| 888 | template<class ObjectFetcherT> | ||
| 889 | const time_t | ||
| 890 | CatalogTraversalBase<ObjectFetcherT>::Parameters::kNoTimestampThreshold = 0; | ||
| 891 | |||
| 892 | } // namespace swissknife | ||
| 893 | |||
| 894 | #endif // CVMFS_CATALOG_TRAVERSAL_H_ | ||
| 895 |