Directory: | cvmfs/ |
---|---|
File: | cvmfs/catalog_traversal.h |
Date: | 2025-06-22 02:36:02 |
Exec | Total | Coverage | |
---|---|---|---|
Lines: | 246 | 264 | 93.2% |
Branches: | 146 | 263 | 55.5% |
Line | Branch | Exec | Source |
---|---|---|---|
1 | /** | ||
2 | * This file is part of the CernVM File System. | ||
3 | */ | ||
4 | |||
5 | #ifndef CVMFS_CATALOG_TRAVERSAL_H_ | ||
6 | #define CVMFS_CATALOG_TRAVERSAL_H_ | ||
7 | |||
8 | #include <cassert> | ||
9 | #include <limits> | ||
10 | #include <set> | ||
11 | #include <stack> | ||
12 | #include <string> | ||
13 | #include <vector> | ||
14 | |||
15 | #include "catalog.h" | ||
16 | #include "compression/compression.h" | ||
17 | #include "crypto/signature.h" | ||
18 | #include "history_sqlite.h" | ||
19 | #include "manifest.h" | ||
20 | #include "object_fetcher.h" | ||
21 | #include "util/concurrency.h" | ||
22 | #include "util/logging.h" | ||
23 | |||
24 | namespace catalog { | ||
25 | class Catalog; | ||
26 | class WritableCatalog; | ||
27 | } // namespace catalog | ||
28 | |||
29 | namespace swissknife { | ||
30 | |||
31 | /** | ||
32 | * Callback data which has to be implemented by the registered callback | ||
33 | * functions/methods (see Observable<> for further details) | ||
34 | * @param catalog the catalog object which needs to be processed | ||
35 | * @param catalog_hash the content hash of the catalog | ||
36 | * @param tree_level the depth in the nested catalog tree | ||
37 | * (starting at zero) | ||
38 | * @param file_size the size of the downloaded catalog database file | ||
39 | * @param history_depth the distance from the current HEAD revision | ||
40 | * (current HEAD has history_depth 0) | ||
41 | */ | ||
42 | template<class CatalogT> | ||
43 | struct CatalogTraversalData { | ||
44 | 18076522 | CatalogTraversalData(const CatalogT *catalog, | |
45 | const shash::Any &catalog_hash, | ||
46 | const unsigned tree_level, | ||
47 | const size_t file_size, | ||
48 | const uint64_t history_depth) | ||
49 | 18076522 | : catalog(catalog) | |
50 | 18076522 | , catalog_hash(catalog_hash) | |
51 | 18076522 | , tree_level(tree_level) | |
52 | 18076522 | , file_size(file_size) | |
53 | 18076522 | , history_depth(history_depth) { } | |
54 | |||
55 | const CatalogT *catalog; | ||
56 | const shash::Any catalog_hash; | ||
57 | const unsigned int tree_level; | ||
58 | const size_t file_size; | ||
59 | const uint64_t history_depth; | ||
60 | }; | ||
61 | |||
62 | |||
63 | /** | ||
64 | * A layer to extract information from a catalog. Users of the catalog | ||
65 | * traversal can provide derived classes to overwrite behavior. Currently used | ||
66 | * to get the last-modified timestamp in configurable manner: for the garbage | ||
67 | * collection, the timestamp of the catalog hash in the reflog counts, which | ||
68 | * is the same or newer than the one stored in the catalog. | ||
69 | */ | ||
70 | template<class CatalogT> | ||
71 | class CatalogTraversalInfoShim { | ||
72 | public: | ||
73 | 8534 | virtual ~CatalogTraversalInfoShim() { } | |
74 | |||
75 | // Default implementation: use the catalog's timestamp | ||
76 | 9766 | virtual uint64_t GetLastModified(const CatalogT *catalog) { | |
77 | 9766 | return catalog->GetLastModified(); | |
78 | } | ||
79 | }; | ||
80 | |||
81 | |||
82 | /** | ||
83 | * A base class for CatalogTraversal and CatalogTraversalParallel implementing | ||
84 | * common functionality. Actual traversal classes inherit from this class. | ||
85 | * | ||
86 | */ | ||
87 | template<class ObjectFetcherT> | ||
88 | class CatalogTraversalBase | ||
89 | : public Observable< | ||
90 | CatalogTraversalData<typename ObjectFetcherT::CatalogTN> > { | ||
91 | public: | ||
92 | typedef ObjectFetcherT ObjectFetcherTN; | ||
93 | typedef typename ObjectFetcherT::CatalogTN CatalogTN; | ||
94 | typedef typename ObjectFetcherT::HistoryTN HistoryTN; | ||
95 | typedef CatalogTraversalData<CatalogTN> CallbackDataTN; | ||
96 | |||
97 | /** | ||
98 | * @param repo_url the path to the repository to be traversed: | ||
99 | * -> either absolute path to the local catalogs | ||
100 | * -> or an URL to a remote repository | ||
101 | * @param repo_name fully qualified repository name (used for | ||
102 | * remote repository signature check) (optional) | ||
103 | * @param repo_keys a comma separated list of public key file | ||
104 | * locations to verify the repository manifest | ||
105 | * file | ||
106 | * @param history depth of the desired catalog history traversal | ||
107 | * (default: 0 - only HEAD catalogs are traversed) | ||
108 | * @param timestamp timestamp of history traversal threshold | ||
109 | * (default: 0 - no threshold, traverse | ||
110 | * everything) | ||
111 | * @param no_repeat_history keep track of visited catalogs and don't | ||
112 | * re-visit them in previous revisions | ||
113 | * @param no_close do not close catalogs after they were attached | ||
114 | * (catalogs retain their parent/child pointers) | ||
115 | * @param ignore_load_failure suppressed an error message if a catalog file | ||
116 | * could not be loaded (i.e. was sweeped before by | ||
117 | * a garbage collection run) | ||
118 | * @param quiet silence messages that would go to stderr | ||
119 | * @param tmp_dir path to the temporary directory to be used | ||
120 | * (default: /tmp) | ||
121 | * | ||
122 | * ONLY FOR CatalogTraversalParallel | ||
123 | * @param num_threads number of threads concurrently traversing | ||
124 | * the catalog trees; hint: set up based on | ||
125 | * CPU utilization and optimal number of | ||
126 | * download threads | ||
127 | * (default: 8) | ||
128 | * | ||
129 | * @param serialize_callbacks do not call multiple catalog callbacks | ||
130 | * concurrently; ensures safe access to shared | ||
131 | * variables inside the callback; turn off if | ||
132 | * the registered callback is thread-safe | ||
133 | * (default: true) | ||
134 | */ | ||
135 | struct Parameters { | ||
136 | 3321 | Parameters() | |
137 | 3321 | : object_fetcher(NULL) | |
138 | 3321 | , history(kNoHistory) | |
139 | 3321 | , timestamp(kNoTimestampThreshold) | |
140 | 3321 | , no_repeat_history(false) | |
141 | 3321 | , no_close(false) | |
142 | 3321 | , ignore_load_failure(false) | |
143 | 3321 | , quiet(false) | |
144 | 3321 | , num_threads(8) | |
145 | 3321 | , serialize_callbacks(true) { } | |
146 | |||
147 | static const uint64_t kFullHistory; | ||
148 | static const uint64_t kNoHistory; | ||
149 | static const time_t kNoTimestampThreshold; | ||
150 | |||
151 | ObjectFetcherT *object_fetcher; | ||
152 | |||
153 | uint64_t history; | ||
154 | time_t timestamp; | ||
155 | bool no_repeat_history; | ||
156 | bool no_close; | ||
157 | bool ignore_load_failure; | ||
158 | bool quiet; | ||
159 | // These parameters only apply for CatalogTraversalParallel | ||
160 | unsigned int num_threads; | ||
161 | bool serialize_callbacks; | ||
162 | }; | ||
163 | |||
164 | enum TraversalType { | ||
165 | kBreadthFirst, | ||
166 | kDepthFirst | ||
167 | }; | ||
168 | |||
169 | 3321 | explicit CatalogTraversalBase(const Parameters ¶ms) | |
170 | 3321 | : object_fetcher_(params.object_fetcher) | |
171 | 3321 | , catalog_info_shim_(&catalog_info_default_shim_) | |
172 | 3321 | , default_history_depth_(params.history) | |
173 | 3321 | , default_timestamp_threshold_(params.timestamp) | |
174 | 3321 | , no_close_(params.no_close) | |
175 | 3321 | , ignore_load_failure_(params.ignore_load_failure) | |
176 | 3321 | , no_repeat_history_(params.no_repeat_history) | |
177 |
2/2✓ Branch 2 taken 1084 times.
✓ Branch 3 taken 2237 times.
|
3321 | , error_sink_((params.quiet) ? kLogDebug : kLogStderr) { |
178 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3321 times.
|
3321 | assert(object_fetcher_ != NULL); |
179 | 3321 | } | |
180 | |||
181 | /** | ||
182 | * Starts the traversal process. | ||
183 | * After calling this methods CatalogTraversal will go through all catalogs | ||
184 | * and call the registered callback methods for each found catalog. | ||
185 | * If something goes wrong in the process, the traversal will be cancelled. | ||
186 | * | ||
187 | * @param type breadths or depth first traversal | ||
188 | * @return true, when all catalogs were successfully processed. On | ||
189 | * failure the traversal is cancelled and false is returned. | ||
190 | */ | ||
191 | virtual bool Traverse(const TraversalType type = kBreadthFirst) = 0; | ||
192 | |||
193 | /** | ||
194 | * Starts the traversal process at the catalog pointed to by the given hash | ||
195 | * | ||
196 | * @param root_catalog_hash the entry point into the catalog traversal | ||
197 | * @param type breadths or depth first traversal | ||
198 | * @return true when catalogs were successfully traversed | ||
199 | */ | ||
200 | virtual bool Traverse(const shash::Any &root_catalog_hash, | ||
201 | const TraversalType type = kBreadthFirst) = 0; | ||
202 | |||
203 | /** | ||
204 | * Traverse a list of revisions represented by root catalogs from first | ||
205 | * to last. DO NOT traverse previous revisions based on history and | ||
206 | * timestamp threshold settings. | ||
207 | * | ||
208 | * @param catalog_list list of root catalog hashes | ||
209 | * @param type breadth- or depth- first traversal | ||
210 | * @return true on success | ||
211 | */ | ||
212 | virtual bool TraverseList(const std::vector<shash::Any> &catalog_list, | ||
213 | const TraversalType type = kBreadthFirst) = 0; | ||
214 | |||
215 | /** | ||
216 | * Starts the traversal process at the catalog pointed to by the given hash | ||
217 | * but doesn't traverse into predecessor catalog revisions. This overrides the | ||
218 | * TraversalParameter settings provided at construction. | ||
219 | * | ||
220 | * @param root_catalog_hash the entry point into the catalog traversal | ||
221 | * @param type breadths or depth first traversal | ||
222 | * @return true when catalogs were successfully traversed | ||
223 | */ | ||
224 | virtual bool TraverseRevision(const shash::Any &root_catalog_hash, | ||
225 | const TraversalType type = kBreadthFirst) = 0; | ||
226 | |||
227 | /** | ||
228 | * Figures out all named tags in a repository and uses all of them as entry | ||
229 | * points into the traversal process. | ||
230 | * | ||
231 | * @param type breadths or depth first traversal | ||
232 | * @return true when catalog traversal successfully finished | ||
233 | */ | ||
234 | 1455 | virtual bool TraverseNamedSnapshots( | |
235 | const TraversalType type = kBreadthFirst) { | ||
236 | typedef std::vector<shash::Any> HashList; | ||
237 | |||
238 |
1/2✓ Branch 1 taken 1455 times.
✗ Branch 2 not taken.
|
1455 | UniquePtr<HistoryTN> tag_db; |
239 | const typename ObjectFetcherT::Failures | ||
240 |
2/4✓ Branch 1 taken 1455 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 1455 times.
✗ Branch 5 not taken.
|
1455 | retval = object_fetcher_->FetchHistory(&tag_db); |
241 |
2/3✓ Branch 0 taken 1386 times.
✓ Branch 1 taken 69 times.
✗ Branch 2 not taken.
|
1455 | switch (retval) { |
242 | 1386 | case ObjectFetcherT::kFailOk: | |
243 | 1386 | break; | |
244 | |||
245 | 69 | case ObjectFetcherT::kFailNotFound: | |
246 |
1/2✓ Branch 1 taken 69 times.
✗ Branch 2 not taken.
|
69 | LogCvmfs(kLogCatalogTraversal, kLogDebug, |
247 | "didn't find a history database to traverse"); | ||
248 | 69 | return true; | |
249 | |||
250 | ✗ | default: | |
251 | ✗ | LogCvmfs(kLogCatalogTraversal, kLogStderr, | |
252 | "failed to download history database (%d - %s)", retval, | ||
253 | Code2Ascii(retval)); | ||
254 | ✗ | return false; | |
255 | } | ||
256 | |||
257 | 1386 | HashList root_hashes; | |
258 |
1/2✓ Branch 2 taken 1386 times.
✗ Branch 3 not taken.
|
1386 | bool success = tag_db->GetHashes(&root_hashes); |
259 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1386 times.
|
1386 | assert(success); |
260 |
1/2✓ Branch 1 taken 1386 times.
✗ Branch 2 not taken.
|
1386 | return TraverseList(root_hashes, type); |
261 | 1455 | } | |
262 | |||
263 | 43 | void SetCatalogInfoShim(CatalogTraversalInfoShim<CatalogTN> *shim) { | |
264 | 43 | catalog_info_shim_ = shim; | |
265 | 43 | } | |
266 | |||
267 | protected: | ||
268 | typedef std::set<shash::Any> HashSet; | ||
269 | |||
270 | /** | ||
271 | * This struct keeps information about a catalog that still needs to be | ||
272 | * traversed by a currently running catalog traversal process. | ||
273 | */ | ||
274 | struct CatalogJob { | ||
275 | 18082937 | CatalogJob(const std::string &path, | |
276 | const shash::Any &hash, | ||
277 | const unsigned tree_level, | ||
278 | const uint64_t history_depth, | ||
279 | CatalogTN *parent = NULL) | ||
280 | 18082937 | : path(path) | |
281 | 18082937 | , hash(hash) | |
282 | 18082937 | , tree_level(tree_level) | |
283 | 18082937 | , history_depth(history_depth) | |
284 | 18082937 | , parent(parent) | |
285 | 18082937 | , catalog_file_size(0) | |
286 | 18082937 | , ignore(false) | |
287 | 18082937 | , catalog(NULL) | |
288 | 18082937 | , referenced_catalogs(0) | |
289 | 18082937 | , postponed(false) { } | |
290 | |||
291 | 36102426 | bool IsRootCatalog() const { return tree_level == 0; } | |
292 | |||
293 | 18076522 | CallbackDataTN GetCallbackData() const { | |
294 | 18076522 | return CallbackDataTN(catalog, hash, tree_level, catalog_file_size, | |
295 | 18076522 | history_depth); | |
296 | } | ||
297 | |||
298 | // initial state description | ||
299 | const std::string path; | ||
300 | const shash::Any hash; | ||
301 | const unsigned tree_level; | ||
302 | const uint64_t history_depth; | ||
303 | CatalogTN *parent; | ||
304 | |||
305 | // dynamic processing state (used internally) | ||
306 | std::string catalog_file_path; | ||
307 | size_t catalog_file_size; | ||
308 | bool ignore; | ||
309 | CatalogTN *catalog; | ||
310 | uint64_t referenced_catalogs; | ||
311 | bool postponed; | ||
312 | }; | ||
313 | |||
314 | 18075400 | bool PrepareCatalog(CatalogJob *job) { | |
315 | const typename ObjectFetcherT::Failures | ||
316 | 54174064 | retval = object_fetcher_->FetchCatalog(job->hash, | |
317 | 18075008 | job->path, | |
318 | &job->catalog, | ||
319 | 18075400 | !job->IsRootCatalog(), | |
320 | job->parent); | ||
321 |
3/3✓ Branch 0 taken 18020809 times.
✓ Branch 1 taken 446 times.
✓ Branch 2 taken 2401 times.
|
18023656 | switch (retval) { |
322 | 18020809 | case ObjectFetcherT::kFailOk: | |
323 | 18020809 | break; | |
324 | |||
325 | 446 | case ObjectFetcherT::kFailNotFound: | |
326 |
2/2✓ Branch 0 taken 377 times.
✓ Branch 1 taken 69 times.
|
446 | if (ignore_load_failure_) { |
327 |
1/2✓ Branch 3 taken 377 times.
✗ Branch 4 not taken.
|
377 | LogCvmfs(kLogCatalogTraversal, kLogDebug, |
328 | "ignoring missing catalog " | ||
329 | "%s (swept before?)", | ||
330 | job->hash.ToString().c_str()); | ||
331 | 377 | job->ignore = true; | |
332 | 377 | return true; | |
333 | } | ||
334 | |||
335 | default: | ||
336 |
1/12✗ Branch 0 not taken.
✗ Branch 1 not taken.
✓ Branch 4 taken 69 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
|
2470 | LogCvmfs(kLogCatalogTraversal, error_sink_, |
337 | "failed to load catalog %s " | ||
338 | "(%d - %s)", | ||
339 | job->hash.ToStringWithSuffix().c_str(), retval, | ||
340 | Code2Ascii(retval)); | ||
341 | 69 | return false; | |
342 | } | ||
343 | |||
344 | // catalogs returned by ObjectFetcher<> are managing their database files by | ||
345 | // default... we need to manage this file manually here | ||
346 | 18020809 | job->catalog->DropDatabaseFileOwnership(); | |
347 | |||
348 | 18016889 | job->catalog_file_path = job->catalog->database_path(); | |
349 |
1/2✓ Branch 2 taken 18074513 times.
✗ Branch 3 not taken.
|
18015713 | job->catalog_file_size = GetFileSize(job->catalog->database_path()); |
350 | |||
351 | 18074464 | return true; | |
352 | } | ||
353 | |||
354 | |||
355 | 18034238 | bool ReopenCatalog(CatalogJob *job) { | |
356 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 18034238 times.
|
18034238 | assert(!job->ignore); |
357 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 18034238 times.
|
18034238 | assert(job->catalog == NULL); |
358 | |||
359 | 36019378 | job->catalog = CatalogTN::AttachFreely(job->path, | |
360 | 18034140 | job->catalog_file_path, | |
361 | 18034140 | job->hash, | |
362 | ✗ | job->parent, | |
363 | 18034238 | !job->IsRootCatalog()); | |
364 | |||
365 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 17985238 times.
|
17985238 | if (job->catalog == NULL) { |
366 | ✗ | LogCvmfs(kLogCatalogTraversal, error_sink_, | |
367 | "failed to re-open catalog %s", job->hash.ToString().c_str()); | ||
368 | ✗ | return false; | |
369 | } | ||
370 | |||
371 | 17985238 | return true; | |
372 | } | ||
373 | |||
374 | |||
375 | 18148214 | bool CloseCatalog(const bool unlink_db, CatalogJob *job) { | |
376 |
1/2✓ Branch 0 taken 18148214 times.
✗ Branch 1 not taken.
|
18148214 | delete job->catalog; |
377 | 18148214 | job->catalog = NULL; | |
378 |
2/6✗ Branch 1 not taken.
✓ Branch 2 taken 18148214 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 18148214 times.
|
18148214 | if (!job->catalog_file_path.empty() && unlink_db) { |
379 | ✗ | const int retval = unlink(job->catalog_file_path.c_str()); | |
380 | ✗ | if (retval != 0) { | |
381 | ✗ | LogCvmfs(kLogCatalogTraversal, error_sink_, "Failed to unlink %s - %d", | |
382 | ✗ | job->catalog_file_path.c_str(), errno); | |
383 | ✗ | return false; | |
384 | } | ||
385 | } | ||
386 | |||
387 | 18148214 | return true; | |
388 | } | ||
389 | |||
390 | 2214 | shash::Any GetRepositoryRootCatalogHash() { | |
391 | // get the manifest of the repository to learn about the entry point or the | ||
392 | // root catalog of the repository to be traversed | ||
393 |
1/2✓ Branch 1 taken 2214 times.
✗ Branch 2 not taken.
|
2214 | UniquePtr<manifest::Manifest> manifest; |
394 | const typename ObjectFetcherT::Failures | ||
395 |
1/2✓ Branch 1 taken 2214 times.
✗ Branch 2 not taken.
|
2214 | retval = object_fetcher_->FetchManifest(&manifest); |
396 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2214 times.
|
2214 | if (retval != ObjectFetcherT::kFailOk) { |
397 | ✗ | LogCvmfs(kLogCatalogTraversal, kLogStderr, | |
398 | "failed to load manifest " | ||
399 | "(%d - %s)", | ||
400 | retval, Code2Ascii(retval)); | ||
401 | ✗ | return shash::Any(); | |
402 | } | ||
403 | |||
404 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 2214 times.
|
2214 | assert(manifest.IsValid()); |
405 | 2214 | return manifest->catalog_hash(); | |
406 | 2214 | } | |
407 | |||
408 | /** | ||
409 | * Checks if a root catalog is below one of the pruning thresholds. | ||
410 | * Pruning thresholds can be either the catalog's history depth or a timestamp | ||
411 | * threshold applied to the last modified timestamp of the catalog. | ||
412 | * | ||
413 | * @param ctx traversal context for traversal-specific state | ||
414 | * @param job the job defining the current catalog | ||
415 | * @return true if either history or timestamp threshold are satisfied | ||
416 | */ | ||
417 | 9938 | bool IsBelowPruningThresholds(const CatalogJob &job, | |
418 | const uint64_t history_depth, | ||
419 | const time_t timestamp_threshold) { | ||
420 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 9938 times.
|
9938 | assert(job.IsRootCatalog()); |
421 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 9938 times.
|
9938 | assert(job.catalog != NULL); |
422 | |||
423 | 9938 | const bool h = job.history_depth >= history_depth; | |
424 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 9938 times.
|
9938 | assert(timestamp_threshold >= 0); |
425 | 9938 | const bool t = catalog_info_shim_->GetLastModified(job.catalog) | |
426 | 9938 | < unsigned(timestamp_threshold); | |
427 | |||
428 |
4/4✓ Branch 0 taken 9387 times.
✓ Branch 1 taken 551 times.
✓ Branch 2 taken 4750 times.
✓ Branch 3 taken 4637 times.
|
9938 | return t || h; |
429 | } | ||
430 | |||
431 | ObjectFetcherT *object_fetcher_; | ||
432 | CatalogTraversalInfoShim<CatalogTN> catalog_info_default_shim_; | ||
433 | CatalogTraversalInfoShim<CatalogTN> *catalog_info_shim_; | ||
434 | const uint64_t default_history_depth_; | ||
435 | const time_t default_timestamp_threshold_; | ||
436 | const bool no_close_; | ||
437 | const bool ignore_load_failure_; | ||
438 | const bool no_repeat_history_; | ||
439 | LogFacilities error_sink_; | ||
440 | }; | ||
441 | |||
442 | /** | ||
443 | * This class traverses the catalog hierarchy of a CVMFS repository recursively. | ||
444 | * Also historic catalog trees can be traversed. The user needs to specify a | ||
445 | * callback which is called for each catalog on the way. | ||
446 | * | ||
447 | * CatalogTraversal<> can be configured and used in various ways: | ||
448 | * -> Historic catalog traversal | ||
449 | * -> Prune catalogs below a certain history level | ||
450 | * -> Prune catalogs older than a certain threshold timestamp | ||
451 | * -> Never traverse a certain catalog twice | ||
452 | * -> Breadth First Traversal or Depth First Traversal | ||
453 | * -> Optional catalog memory management (no_close) | ||
454 | * -> Use all Named Snapshots of a repository as traversal entry point | ||
455 | * -> Traverse starting from a provided catalog | ||
456 | * -> Traverse catalogs that were previously skipped | ||
457 | * -> Produce various flavours of catalogs (writable, mocked, ...) | ||
458 | * | ||
459 | * Breadth First Traversal Strategy | ||
460 | * Catalogs are handed out to the user identical as they are traversed. | ||
461 | * Say: From top to bottom. When you would simply print each received catalog | ||
462 | * the result would be a nice representation of the catalog tree. | ||
463 | * This method is more efficient, because catalogs are opened, processed and | ||
464 | * thrown away directly afterwards. | ||
465 | * | ||
466 | * Depth First Traversal Strategy | ||
467 | * The user gets the catalog tree starting from the leaf nodes. | ||
468 | * Say: From bottom to top. A user can assume that he got all children or | ||
469 | * historical ancestors of a catalog before. | ||
470 | * This method climbs down the full catalog tree and hands it out 'in reverse | ||
471 | * order'. Thus, catalogs on the way are opened, checked for their descendants | ||
472 | * and closed. Once all children and historical ancestors are processed, it is | ||
473 | * re-opened and handed out to the user. | ||
474 | * Note: This method needs more disk space to temporarily store downloaded but | ||
475 | * not yet processed catalogs. | ||
476 | * | ||
477 | * Note: Since all CVMFS catalog files together can grow to several gigabytes in | ||
478 | * file size, each catalog is loaded, processed and removed immediately | ||
479 | * afterwards. Except if no_close is specified, which allows the user to | ||
480 | * choose when a catalog should be closed. Keep in mind, that a user is | ||
481 | * responsible for both deletion of the delivered catalog objects as well | ||
482 | * as unlinking of the catalog database file. | ||
483 | * | ||
484 | * | ||
485 | * @param ObjectFetcherT Strategy Pattern implementation that defines how to | ||
486 | * retrieve catalogs from various backend storage types. | ||
487 | * Furthermore the ObjectFetcherT::CatalogTN is the type | ||
488 | * of catalog to be instantiated by CatalogTraversal<>. | ||
489 | * | ||
490 | * CAUTION: the CatalogTN* pointer passed into the callback becomes invalid | ||
491 | * directly after the callback method returns, unless you create the | ||
492 | * CatalogTraversal object with no_close = true. | ||
493 | */ | ||
494 | template<class ObjectFetcherT> | ||
495 | class CatalogTraversal : public CatalogTraversalBase<ObjectFetcherT> { | ||
496 | public: | ||
497 | typedef CatalogTraversalBase<ObjectFetcherT> Base; | ||
498 | typedef ObjectFetcherT ObjectFetcherTN; | ||
499 | typedef typename ObjectFetcherT::CatalogTN CatalogTN; | ||
500 | typedef typename ObjectFetcherT::HistoryTN HistoryTN; | ||
501 | typedef CatalogTraversalData<CatalogTN> CallbackDataTN; | ||
502 | typedef typename Base::Parameters Parameters; | ||
503 | typedef typename Base::TraversalType TraversalType; | ||
504 | typedef typename Base::CatalogJob CatalogJob; | ||
505 | |||
506 | protected: | ||
507 | typedef std::set<shash::Any> HashSet; | ||
508 | typedef std::stack<CatalogJob> CatalogJobStack; | ||
509 | |||
510 | /** | ||
511 | * This struct represents a catalog traversal context. It needs to be re- | ||
512 | * created for each catalog traversal process and contains information to this | ||
513 | * specific catalog traversal run. | ||
514 | * | ||
515 | * @param history_depth the history traversal threshold | ||
516 | * @param traversal_type either breadth or depth first traversal strategy | ||
517 | * @param catalog_stack the call stack for catalogs to be traversed | ||
518 | * @param callback_stack used in depth first traversal for deferred yielding | ||
519 | */ | ||
520 | struct TraversalContext { | ||
521 | 5160 | TraversalContext(const uint64_t history_depth, | |
522 | const time_t timestamp_threshold, | ||
523 | const TraversalType traversal_type) | ||
524 | 5160 | : history_depth(history_depth) | |
525 | 5160 | , timestamp_threshold(timestamp_threshold) | |
526 |
1/2✓ Branch 2 taken 5160 times.
✗ Branch 3 not taken.
|
5160 | , traversal_type(traversal_type) { } |
527 | |||
528 | const uint64_t history_depth; | ||
529 | const time_t timestamp_threshold; | ||
530 | const TraversalType traversal_type; | ||
531 | CatalogJobStack catalog_stack; | ||
532 | CatalogJobStack callback_stack; | ||
533 | }; | ||
534 | |||
535 | public: | ||
536 | /** | ||
537 | * Constructs a new catalog traversal engine based on the construction | ||
538 | * parameters described in struct ConstructionParams. | ||
539 | */ | ||
540 | 1562 | explicit CatalogTraversal(const Parameters ¶ms) | |
541 | 1562 | : CatalogTraversalBase<ObjectFetcherT>(params) { } | |
542 | |||
543 | 1241 | bool Traverse(const TraversalType type = Base::kBreadthFirst) { | |
544 |
1/2✓ Branch 1 taken 1241 times.
✗ Branch 2 not taken.
|
1241 | const shash::Any root_catalog_hash = this->GetRepositoryRootCatalogHash(); |
545 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1241 times.
|
1241 | if (root_catalog_hash.IsNull()) { |
546 | ✗ | return false; | |
547 | } | ||
548 |
1/2✓ Branch 1 taken 1241 times.
✗ Branch 2 not taken.
|
1241 | return Traverse(root_catalog_hash, type); |
549 | } | ||
550 | |||
551 | 1501 | bool Traverse(const shash::Any &root_catalog_hash, | |
552 | const TraversalType type = Base::kBreadthFirst) { | ||
553 | // add the root catalog of the repository as the first element on the job | ||
554 | // stack | ||
555 | 1501 | TraversalContext ctx( | |
556 |
1/2✓ Branch 1 taken 1501 times.
✗ Branch 2 not taken.
|
1501 | this->default_history_depth_, this->default_timestamp_threshold_, type); |
557 |
1/2✓ Branch 1 taken 1501 times.
✗ Branch 2 not taken.
|
1501 | Push(root_catalog_hash, &ctx); |
558 |
1/2✓ Branch 1 taken 1501 times.
✗ Branch 2 not taken.
|
3002 | return DoTraverse(&ctx); |
559 | 1501 | } | |
560 | |||
561 | 1862 | bool TraverseList(const std::vector<shash::Any> &catalog_list, | |
562 | const TraversalType type = Base::kBreadthFirst) { | ||
563 | 1862 | std::vector<shash::Any>::const_iterator i = catalog_list.begin(); | |
564 | 1862 | const std::vector<shash::Any>::const_iterator iend = catalog_list.end(); | |
565 | 1862 | bool success = true; | |
566 |
2/2✓ Branch 1 taken 3659 times.
✓ Branch 2 taken 1862 times.
|
5521 | for (; i != iend; ++i) { |
567 |
3/6✓ Branch 0 taken 3659 times.
✗ Branch 1 not taken.
✓ Branch 4 taken 3659 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 3659 times.
✗ Branch 7 not taken.
|
3659 | success = success && TraverseRevision(*i, type); |
568 | } | ||
569 | 1862 | return success; | |
570 | } | ||
571 | |||
572 | 3659 | bool TraverseRevision(const shash::Any &root_catalog_hash, | |
573 | const TraversalType type = Base::kBreadthFirst) { | ||
574 | // add the given root catalog as the first element on the job stack | ||
575 |
1/2✓ Branch 1 taken 3659 times.
✗ Branch 2 not taken.
|
3659 | TraversalContext ctx( |
576 | Parameters::kNoHistory, Parameters::kNoTimestampThreshold, type); | ||
577 |
1/2✓ Branch 1 taken 3659 times.
✗ Branch 2 not taken.
|
3659 | Push(root_catalog_hash, &ctx); |
578 |
1/2✓ Branch 1 taken 3659 times.
✗ Branch 2 not taken.
|
7318 | return DoTraverse(&ctx); |
579 | 3659 | } | |
580 | |||
581 | protected: | ||
582 | /** | ||
583 | * This controls the actual traversal. Using a stack to traverse down the | ||
584 | * catalog hierarchy. This method implements the traversal itself, but not | ||
585 | * in which way catalogs are handed out to the user code. | ||
586 | * | ||
587 | * Each catalog is processed in these steps: | ||
588 | * 1.) Pop the next catalog from the stack | ||
589 | * Catalogs are always traversed from latest to oldest revision and | ||
590 | * from root to leaf nested catalogs | ||
591 | * 2.) Prepare the catalog for traversing | ||
592 | * 2.1.) Check if it was visited before | ||
593 | * 2.2.) Fetch the catalog database from the backend storage | ||
594 | * This might fail and produce an error. For root catalogs this | ||
595 | * error can be ignored (might be garbage collected before) | ||
596 | * 2.3.) Open the catalog database | ||
597 | * 2.4.) Check if the catalog is older than the timestamp threshold | ||
598 | * After these steps the catalog is opened either opened and ready for | ||
599 | * the traversal to continue, or it was marked for ignore (job.ignore) | ||
600 | * 3.) Check if the catalog is marked to be ignored | ||
601 | * Catalog might not be loadable (sweeped root catalog) or is too old | ||
602 | * Note: ignored catalogs can still trigger postponed yields | ||
603 | * 4.) Mark the catalog as visited to be able to skip it later on | ||
604 | * 5.) Find and push referencing catalogs | ||
605 | * This pushes all descendents of the current catalog onto the stack. | ||
606 | * Note that this is dependent on the strategy (depth or breadth first) | ||
607 | * and on the history threshold (see history_depth). | ||
608 | * 6.) Hand the catalog out to the user code | ||
609 | * Depending on the traversal strategy (depth of breadths first) this | ||
610 | * might immediately yield zero to N catalogs to the user code. | ||
611 | * | ||
612 | * Note: If anything unexpected goes wrong during the traversal process, it | ||
613 | * is aborted immediately. | ||
614 | * | ||
615 | * @param ctx the traversal context that steers the whole traversal process | ||
616 | * @return true on successful traversal and false on abort | ||
617 | */ | ||
618 | 5160 | bool DoTraverse(TraversalContext *ctx) { | |
619 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 5160 times.
|
5160 | assert(ctx->callback_stack.empty()); |
620 | |||
621 |
5/5✓ Branch 1 taken 41719 times.
✓ Branch 2 taken 20 times.
✓ Branch 3 taken 3309 times.
✓ Branch 5 taken 45048 times.
✓ Branch 6 taken 5140 times.
|
95236 | while (!ctx->catalog_stack.empty()) { |
622 | // Get the top most catalog for the next processing step | ||
623 |
1/2✓ Branch 1 taken 45048 times.
✗ Branch 2 not taken.
|
45048 | CatalogJob job = Pop(ctx); |
624 | |||
625 |
3/4✓ Branch 1 taken 45048 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 3085 times.
✓ Branch 4 taken 41963 times.
|
45048 | if (ShouldBeSkipped(job)) { |
626 | 3085 | job.ignore = true; | |
627 | // download and open the catalog for processing | ||
628 |
3/4✓ Branch 1 taken 41963 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 20 times.
✓ Branch 4 taken 41943 times.
|
41963 | } else if (!this->PrepareCatalog(&job)) { |
629 | 20 | return false; | |
630 | } | ||
631 | |||
632 | // ignored catalogs don't need to be processed anymore but they might | ||
633 | // release postponed yields | ||
634 |
2/2✓ Branch 0 taken 3309 times.
✓ Branch 1 taken 41719 times.
|
45028 | if (job.ignore) { |
635 |
2/4✓ Branch 1 taken 3309 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 3309 times.
|
3309 | if (!HandlePostponedYields(job, ctx)) { |
636 | ✗ | return false; | |
637 | } | ||
638 | 3309 | continue; | |
639 | } | ||
640 | |||
641 | // push catalogs referenced by the current catalog (onto stack) | ||
642 |
1/2✓ Branch 1 taken 41719 times.
✗ Branch 2 not taken.
|
41719 | this->MarkAsVisited(job); |
643 |
1/2✓ Branch 1 taken 41719 times.
✗ Branch 2 not taken.
|
41719 | PushReferencedCatalogs(&job, ctx); |
644 | |||
645 | // notify listeners | ||
646 |
2/4✓ Branch 1 taken 41719 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 41719 times.
|
41719 | if (!YieldToListeners(&job, ctx)) { |
647 | ✗ | return false; | |
648 | } | ||
649 | } | ||
650 | |||
651 | // invariant: after the traversal finished, there should be no more catalogs | ||
652 | // to traverse or to yield! | ||
653 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 5140 times.
|
5140 | assert(ctx->catalog_stack.empty()); |
654 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 5140 times.
|
5140 | assert(ctx->callback_stack.empty()); |
655 | 5140 | return true; | |
656 | } | ||
657 | |||
658 | 41719 | void PushReferencedCatalogs(CatalogJob *job, TraversalContext *ctx) { | |
659 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 41719 times.
|
41719 | assert(!job->ignore); |
660 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 41719 times.
|
41719 | assert(job->catalog != NULL); |
661 |
3/4✓ Branch 0 taken 9706 times.
✓ Branch 1 taken 32013 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 9706 times.
|
41719 | assert(ctx->traversal_type == Base::kBreadthFirst |
662 | || ctx->traversal_type == Base::kDepthFirst); | ||
663 | |||
664 | // this differs, depending on the traversal strategy. | ||
665 | // | ||
666 | // Breadths First Traversal | ||
667 | // Catalogs are traversed from top (root catalog) to bottom (leaf | ||
668 | // catalogs) and from more recent (HEAD revision) to older (historic | ||
669 | // revisions) | ||
670 | // | ||
671 | // Depth First Traversal | ||
672 | // Catalogs are traversed from oldest revision (depends on the configured | ||
673 | // maximal history depth) to the HEAD revision and from bottom (leafs) to | ||
674 | // top (root catalogs) | ||
675 | 83438 | job->referenced_catalogs = (ctx->traversal_type == Base::kBreadthFirst) | |
676 |
2/2✓ Branch 0 taken 32013 times.
✓ Branch 1 taken 9706 times.
|
41719 | ? PushPreviousRevision(*job, ctx) |
677 | 32013 | + PushNestedCatalogs(*job, ctx) | |
678 | 9706 | : PushNestedCatalogs(*job, ctx) | |
679 | 9706 | + PushPreviousRevision(*job, ctx); | |
680 | 41719 | } | |
681 | |||
682 | /** | ||
683 | * Pushes the previous revision of a (root) catalog. | ||
684 | * @return the number of catalogs pushed on the processing stack | ||
685 | */ | ||
686 | 41719 | unsigned int PushPreviousRevision(const CatalogJob &job, | |
687 | TraversalContext *ctx) { | ||
688 | // only root catalogs are used for entering a previous revision (graph) | ||
689 |
2/2✓ Branch 1 taken 36009 times.
✓ Branch 2 taken 5710 times.
|
41719 | if (!job.catalog->IsRoot()) { |
690 | 36009 | return 0; | |
691 | } | ||
692 | |||
693 |
1/2✓ Branch 1 taken 5710 times.
✗ Branch 2 not taken.
|
5710 | const shash::Any previous_revision = job.catalog->GetPreviousRevision(); |
694 |
2/2✓ Branch 1 taken 632 times.
✓ Branch 2 taken 5078 times.
|
5710 | if (previous_revision.IsNull()) { |
695 | 632 | return 0; | |
696 | } | ||
697 | |||
698 | // check if the next deeper history level is actually requested | ||
699 | // Note: if the current catalog is below the timestamp threshold it will be | ||
700 | // traversed and only its ancestor revision will not be pushed anymore | ||
701 |
2/2✓ Branch 0 taken 2992 times.
✓ Branch 1 taken 2086 times.
|
5078 | if (this->IsBelowPruningThresholds(job, ctx->history_depth, |
702 |
1/2✓ Branch 1 taken 5078 times.
✗ Branch 2 not taken.
|
5078 | ctx->timestamp_threshold)) { |
703 | 2992 | return 0; | |
704 | } | ||
705 | |||
706 |
3/6✓ Branch 2 taken 2086 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 2086 times.
✗ Branch 6 not taken.
✓ Branch 8 taken 2086 times.
✗ Branch 9 not taken.
|
2086 | Push(CatalogJob("", previous_revision, 0, job.history_depth + 1, NULL), |
707 | ctx); | ||
708 | 2086 | return 1; | |
709 | } | ||
710 | |||
711 | /** | ||
712 | * Pushes all the referenced nested catalogs. | ||
713 | * @return the number of catalogs pushed on the processing stack | ||
714 | */ | ||
715 | 41719 | unsigned int PushNestedCatalogs(const CatalogJob &job, | |
716 | TraversalContext *ctx) { | ||
717 | typedef typename CatalogTN::NestedCatalogList NestedCatalogList; | ||
718 |
1/2✓ Branch 1 taken 41719 times.
✗ Branch 2 not taken.
|
41719 | const NestedCatalogList nested = job.catalog->ListOwnNestedCatalogs(); |
719 | 41719 | typename NestedCatalogList::const_iterator i = nested.begin(); | |
720 | 41719 | typename NestedCatalogList::const_iterator iend = nested.end(); | |
721 |
2/2✓ Branch 3 taken 37842 times.
✓ Branch 4 taken 41719 times.
|
79561 | for (; i != iend; ++i) { |
722 |
2/2✓ Branch 0 taken 1340 times.
✓ Branch 1 taken 36502 times.
|
37842 | CatalogTN *parent = (this->no_close_) ? job.catalog : NULL; |
723 |
2/4✓ Branch 1 taken 37842 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 37842 times.
✗ Branch 5 not taken.
|
37842 | const CatalogJob new_job(i->mountpoint.ToString(), |
724 | 37842 | i->hash, | |
725 | 37842 | job.tree_level + 1, | |
726 | 37842 | job.history_depth, | |
727 | parent); | ||
728 |
1/2✓ Branch 1 taken 37842 times.
✗ Branch 2 not taken.
|
37842 | Push(new_job, ctx); |
729 | } | ||
730 | |||
731 | 83438 | return nested.size(); | |
732 | 41719 | } | |
733 | |||
734 | 5160 | void Push(const shash::Any &root_catalog_hash, TraversalContext *ctx) { | |
735 |
3/6✓ Branch 2 taken 5160 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 5160 times.
✗ Branch 6 not taken.
✓ Branch 8 taken 5160 times.
✗ Branch 9 not taken.
|
5160 | Push(CatalogJob("", root_catalog_hash, 0, 0), ctx); |
736 | 5160 | } | |
737 | |||
738 | 41719 | bool YieldToListeners(CatalogJob *job, TraversalContext *ctx) { | |
739 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 41719 times.
|
41719 | assert(!job->ignore); |
740 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 41719 times.
|
41719 | assert(job->catalog != NULL); |
741 |
3/4✓ Branch 0 taken 9706 times.
✓ Branch 1 taken 32013 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 9706 times.
|
41719 | assert(ctx->traversal_type == Base::kBreadthFirst |
742 | || ctx->traversal_type == Base::kDepthFirst); | ||
743 | |||
744 | // in breadth first search mode, every catalog is simply handed out once | ||
745 | // it is visited. No extra magic required... | ||
746 |
2/2✓ Branch 0 taken 32013 times.
✓ Branch 1 taken 9706 times.
|
41719 | if (ctx->traversal_type == Base::kBreadthFirst) { |
747 | 32013 | return Yield(job); | |
748 | } | ||
749 | |||
750 | // in depth first search mode, catalogs might need to wait until all of | ||
751 | // their referenced catalogs are yielded (ctx.callback_stack)... | ||
752 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 9706 times.
|
9706 | assert(ctx->traversal_type == Base::kDepthFirst); |
753 |
2/2✓ Branch 0 taken 3747 times.
✓ Branch 1 taken 5959 times.
|
9706 | if (job->referenced_catalogs > 0) { |
754 | 3747 | PostponeYield(job, ctx); | |
755 | 3747 | return true; | |
756 | } | ||
757 | |||
758 | // this catalog can be yielded | ||
759 |
2/4✓ Branch 1 taken 5959 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 5959 times.
✗ Branch 5 not taken.
|
5959 | return Yield(job) && HandlePostponedYields(*job, ctx); |
760 | } | ||
761 | |||
762 | /** | ||
763 | * Checks the traversal history if the given catalog was traversed or at least | ||
764 | * seen before. If 'no_repeat_history' is not set this is always 'false'. | ||
765 | * | ||
766 | * @param job the job to be checked against the traversal history | ||
767 | * @return true if the specified catalog was hit before | ||
768 | */ | ||
769 | 45048 | bool ShouldBeSkipped(const CatalogJob &job) { | |
770 |
4/4✓ Branch 0 taken 23468 times.
✓ Branch 1 taken 21580 times.
✓ Branch 3 taken 3085 times.
✓ Branch 4 taken 20383 times.
|
45048 | return this->no_repeat_history_ && (visited_catalogs_.count(job.hash) > 0); |
771 | } | ||
772 | |||
773 | 41719 | void MarkAsVisited(const CatalogJob &job) { | |
774 |
2/2✓ Branch 0 taken 20159 times.
✓ Branch 1 taken 21560 times.
|
41719 | if (this->no_repeat_history_) { |
775 | 20159 | visited_catalogs_.insert(job.hash); | |
776 | } | ||
777 | 41719 | } | |
778 | |||
779 | private: | ||
780 | /** | ||
781 | * This actually hands out a catalog to the user code | ||
782 | * It is not called by DoTraversa() directly but by wrapper functions in order | ||
783 | * to provide higher level yielding behaviour. | ||
784 | */ | ||
785 | 41719 | bool Yield(CatalogJob *job) { | |
786 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 41719 times.
|
41719 | assert(!job->ignore); |
787 |
3/4✓ Branch 0 taken 3747 times.
✓ Branch 1 taken 37972 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 3747 times.
|
41719 | assert(job->catalog != NULL || job->postponed); |
788 | |||
789 | // catalog was pushed on ctx.callback_stack before, it might need to be re- | ||
790 | // opened. If CatalogTraversal<> is configured with no_close, it was not | ||
791 | // closed before, hence does not need a re-open. | ||
792 |
5/8✓ Branch 0 taken 3747 times.
✓ Branch 1 taken 37972 times.
✓ Branch 2 taken 3747 times.
✗ Branch 3 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 3747 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 41719 times.
|
41719 | if (job->postponed && !this->no_close_ && !this->ReopenCatalog(job)) { |
793 | ✗ | return false; | |
794 | } | ||
795 | |||
796 | // hand the catalog out to the user code (see Observable<>) | ||
797 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 41719 times.
|
41719 | assert(job->catalog != NULL); |
798 |
1/2✓ Branch 2 taken 41719 times.
✗ Branch 3 not taken.
|
41719 | this->NotifyListeners(job->GetCallbackData()); |
799 | |||
800 | // skip the catalog closing procedure if asked for | ||
801 | // Note: In this case it is the user's responsibility to both delete the | ||
802 | // yielded catalog object and the underlying database temp file | ||
803 |
2/2✓ Branch 0 taken 1400 times.
✓ Branch 1 taken 40319 times.
|
41719 | if (this->no_close_) { |
804 | 1400 | return true; | |
805 | } | ||
806 | |||
807 | // we can close the catalog here and delete the temporary file | ||
808 | 40319 | const bool unlink_db = true; | |
809 | 40319 | return this->CloseCatalog(unlink_db, job); | |
810 | } | ||
811 | |||
812 | |||
813 | /** | ||
814 | * Pushes a catalog to the callback_stack for later yielding | ||
815 | * Note: this is only used for the Depth First Traversal strategy! | ||
816 | */ | ||
817 | 3747 | void PostponeYield(CatalogJob *job, TraversalContext *ctx) { | |
818 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3747 times.
|
3747 | assert(job->referenced_catalogs > 0); |
819 | |||
820 | 3747 | job->postponed = true; | |
821 |
1/2✓ Branch 0 taken 3747 times.
✗ Branch 1 not taken.
|
3747 | if (!this->no_close_) { |
822 | 3747 | const bool unlink_db = false; // will reopened just before yielding | |
823 | 3747 | this->CloseCatalog(unlink_db, job); | |
824 | } | ||
825 | 3747 | ctx->callback_stack.push(*job); | |
826 | 3747 | } | |
827 | |||
828 | /** | ||
829 | * Determines if there are postponed yields that can be set free based on | ||
830 | * the catalog currently being yielded | ||
831 | * | ||
832 | * Note: the CatalogJob being handed into this method does not necessarily | ||
833 | * have an open Catalog attached to it. | ||
834 | * | ||
835 | * @param ctx the TraversalContext | ||
836 | * @param job the catalog job that was just yielded | ||
837 | * @return true on successful execution | ||
838 | */ | ||
839 | 9268 | bool HandlePostponedYields(const CatalogJob &job, TraversalContext *ctx) { | |
840 |
2/2✓ Branch 0 taken 2436 times.
✓ Branch 1 taken 6832 times.
|
9268 | if (ctx->traversal_type == Base::kBreadthFirst) { |
841 | 2436 | return true; | |
842 | } | ||
843 | |||
844 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6832 times.
|
6832 | assert(ctx->traversal_type == Base::kDepthFirst); |
845 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6832 times.
|
6832 | assert(job.referenced_catalogs == 0); |
846 | |||
847 | // walk through the callback_stack and yield all catalogs that have no un- | ||
848 | // yielded referenced_catalogs anymore. Every time a CatalogJob in the | ||
849 | // callback_stack gets yielded it decrements the referenced_catalogs of the | ||
850 | // next top of the stack (it's parent CatalogJob waiting for yielding) | ||
851 | 6832 | CatalogJobStack &clbs = ctx->callback_stack; | |
852 |
2/2✓ Branch 1 taken 9252 times.
✓ Branch 2 taken 1327 times.
|
10579 | while (!clbs.empty()) { |
853 | 9252 | CatalogJob &postponed_job = clbs.top(); | |
854 |
2/2✓ Branch 0 taken 5505 times.
✓ Branch 1 taken 3747 times.
|
9252 | if (--postponed_job.referenced_catalogs > 0) { |
855 | 5505 | break; | |
856 | } | ||
857 | |||
858 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 3747 times.
|
3747 | if (!Yield(&postponed_job)) { |
859 | ✗ | return false; | |
860 | } | ||
861 | 3747 | clbs.pop(); | |
862 | } | ||
863 | |||
864 | 6832 | return true; | |
865 | } | ||
866 | |||
867 | 45088 | void Push(const CatalogJob &job, TraversalContext *ctx) { | |
868 | 45088 | ctx->catalog_stack.push(job); | |
869 | 45088 | } | |
870 | |||
871 | 45048 | CatalogJob Pop(TraversalContext *ctx) { | |
872 | 45048 | CatalogJob job = ctx->catalog_stack.top(); | |
873 | 45048 | ctx->catalog_stack.pop(); | |
874 | 45048 | return job; | |
875 | } | ||
876 | |||
877 | protected: | ||
878 | HashSet visited_catalogs_; | ||
879 | }; | ||
880 | |||
881 | template<class ObjectFetcherT> | ||
882 | const uint64_t CatalogTraversalBase<ObjectFetcherT>::Parameters:: | ||
883 | kFullHistory = std::numeric_limits<uint64_t>::max(); | ||
884 | |||
885 | template<class ObjectFetcherT> | ||
886 | const uint64_t CatalogTraversalBase<ObjectFetcherT>::Parameters::kNoHistory = 0; | ||
887 | |||
888 | template<class ObjectFetcherT> | ||
889 | const time_t | ||
890 | CatalogTraversalBase<ObjectFetcherT>::Parameters::kNoTimestampThreshold = 0; | ||
891 | |||
892 | } // namespace swissknife | ||
893 | |||
894 | #endif // CVMFS_CATALOG_TRAVERSAL_H_ | ||
895 |