GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/swissknife_check.cc
Date: 2026-06-28 02:36:10
Exec Total Coverage
Lines: 0 624 0.0%
Branches: 0 462 0.0%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 *
4 * This tool checks a cvmfs repository for file catalog errors.
5 */
6
7 #include "swissknife_check.h"
8
9 #include <inttypes.h>
10 #include <unistd.h>
11
12 #include <cassert>
13 #include <map>
14 #include <set>
15 #include <string>
16 #include <vector>
17
18 #include "catalog_sql.h"
19 #include "compression/compression.h"
20 #include "file_chunk.h"
21 #include "history_sqlite.h"
22 #include "manifest.h"
23 #include "network/download.h"
24 #include "network/sink_path.h"
25 #include "reflog.h"
26 #include "sanitizer.h"
27 #include "shortstring.h"
28 #include "util/exception.h"
29 #include "util/logging.h"
30 #include "util/pointer.h"
31 #include "util/posix.h"
32
33 using namespace std; // NOLINT
34
35 // for map of duplicate entries; as in kvstore.cc
36 static inline uint32_t hasher_any(const shash::Any &key) {
37 // We'll just do the same thing as hasher_md5, since every hash is at
38 // least as large.
39 return *const_cast<uint32_t *>(reinterpret_cast<const uint32_t *>(key.digest)
40 + 1);
41 }
42
43
44 namespace swissknife {
45
46 CommandCheck::CommandCheck()
47 : check_chunks_(false), no_duplicates_map_(false), is_remote_(false),
48 inclusion_spec_(NULL) {
49 const shash::Any hash_null;
50 duplicates_map_.Init(16, hash_null, hasher_any);
51 }
52
53 bool CommandCheck::CompareEntries(const catalog::DirectoryEntry &a,
54 const catalog::DirectoryEntry &b,
55 const bool compare_names,
56 const bool is_transition_point) {
57 typedef catalog::DirectoryEntry::Difference Difference;
58
59 const catalog::DirectoryEntry::Differences diffs = a.CompareTo(b);
60 if (diffs == Difference::kIdentical) {
61 return true;
62 }
63
64 // in case of a nested catalog transition point the controlling flags are
65 // supposed to differ. If this is the only difference we are done...
66 if (is_transition_point
67 && (diffs ^ Difference::kNestedCatalogTransitionFlags) == 0) {
68 return true;
69 }
70
71 bool retval = true;
72 if (compare_names) {
73 if (diffs & Difference::kName) {
74 LogCvmfs(kLogCvmfs, kLogStderr, "names differ: %s / %s", a.name().c_str(),
75 b.name().c_str());
76 retval = false;
77 }
78 }
79 if (diffs & Difference::kLinkcount) {
80 LogCvmfs(kLogCvmfs, kLogStderr, "linkcounts differ: %u / %u", a.linkcount(),
81 b.linkcount());
82 retval = false;
83 }
84 if (diffs & Difference::kHardlinkGroup) {
85 LogCvmfs(kLogCvmfs, kLogStderr, "hardlink groups differ: %u / %u",
86 a.hardlink_group(), b.hardlink_group());
87 retval = false;
88 }
89 if (diffs & Difference::kSize) {
90 LogCvmfs(kLogCvmfs, kLogStderr, "sizes differ: %" PRIu64 " / %" PRIu64,
91 a.size(), b.size());
92 retval = false;
93 }
94 if (diffs & Difference::kMode) {
95 LogCvmfs(kLogCvmfs, kLogStderr, "modes differ: %u / %u", a.mode(),
96 b.mode());
97 retval = false;
98 }
99 if (diffs & Difference::kMtime) {
100 LogCvmfs(kLogCvmfs, kLogStderr, "timestamps differ: %lu / %lu", a.mtime(),
101 b.mtime());
102 retval = false;
103 }
104 if (diffs & Difference::kChecksum) {
105 LogCvmfs(kLogCvmfs, kLogStderr, "content hashes differ: %s / %s",
106 a.checksum().ToString().c_str(), b.checksum().ToString().c_str());
107 retval = false;
108 }
109 if (diffs & Difference::kSymlink) {
110 LogCvmfs(kLogCvmfs, kLogStderr, "symlinks differ: %s / %s",
111 a.symlink().c_str(), b.symlink().c_str());
112 retval = false;
113 }
114 if (diffs & Difference::kExternalFileFlag) {
115 LogCvmfs(kLogCvmfs, kLogStderr,
116 "external file flag differs: %d / %d "
117 "(%s / %s)",
118 a.IsExternalFile(), b.IsExternalFile(), a.name().c_str(),
119 b.name().c_str());
120 retval = false;
121 }
122 if (diffs & Difference::kHasXattrsFlag) {
123 LogCvmfs(kLogCvmfs, kLogStderr,
124 "extended attributes differ: %d / %d "
125 "(%s / %s)",
126 a.HasXattrs(), b.HasXattrs(), a.name().c_str(), b.name().c_str());
127 retval = false;
128 }
129 if (!is_transition_point) {
130 if (diffs & Difference::kUid) {
131 LogCvmfs(kLogCvmfs, kLogStderr, "uids differ: %d / %d (%s / %s)", a.uid(),
132 b.uid(), a.name().c_str(), b.name().c_str());
133 retval = false;
134 }
135 if (diffs & Difference::kGid) {
136 LogCvmfs(kLogCvmfs, kLogStderr, "gids differ: %d / %d (%s / %s)", a.gid(),
137 b.gid(), a.name().c_str(), b.name().c_str());
138 retval = false;
139 }
140 }
141
142 return retval;
143 }
144
145
146 bool CommandCheck::CompareCounters(const catalog::Counters &a,
147 const catalog::Counters &b) {
148 const catalog::Counters::FieldsMap map_a = a.GetFieldsMap();
149 const catalog::Counters::FieldsMap map_b = b.GetFieldsMap();
150
151 bool retval = true;
152 catalog::Counters::FieldsMap::const_iterator i = map_a.begin();
153 const catalog::Counters::FieldsMap::const_iterator iend = map_a.end();
154 for (; i != iend; ++i) {
155 const catalog::Counters::FieldsMap::const_iterator comp = map_b.find(
156 i->first);
157 assert(comp != map_b.end());
158
159 if (*(i->second) != *(comp->second)) {
160 LogCvmfs(kLogCvmfs, kLogStderr,
161 "catalog statistics mismatch: %s (expected: %" PRIu64 " / "
162 "in catalog: %" PRIu64 ")",
163 comp->first.c_str(), *(i->second), *(comp->second));
164 retval = false;
165 }
166 }
167
168 return retval;
169 }
170
171
172 /**
173 * Checks for existence of a file either locally or via HTTP head
174 */
175 bool CommandCheck::Exists(const string &file) {
176 if (!is_remote_) {
177 return FileExists(file) || SymlinkExists(file);
178 } else {
179 const string url = repo_base_path_ + "/" + file;
180 LogCvmfs(kLogCvmfs, kLogVerboseMsg, "[Exists::url] %s", url.c_str());
181 download::JobInfo head(&url, false);
182 return download_manager()->Fetch(&head) == download::kFailOk;
183 }
184 }
185
186
187 /**
188 * Copies a file from the repository into a temporary file.
189 */
190 string CommandCheck::FetchPath(const string &path) {
191 string tmp_path;
192 FILE *f = CreateTempFile(temp_directory_ + "/cvmfstmp", kDefaultFileMode,
193 "w+", &tmp_path);
194 assert(f != NULL);
195
196 const string url = repo_base_path_ + "/" + path;
197 if (is_remote_) {
198 cvmfs::FileSink filesink(f);
199 download::JobInfo download_job(&url, false, false, NULL, &filesink);
200 const download::Failures retval = download_manager()->Fetch(&download_job);
201 if (retval != download::kFailOk) {
202 PANIC(kLogStderr, "failed to read %s", url.c_str());
203 }
204 } else {
205 const bool retval = CopyPath2File(url, f);
206 if (!retval) {
207 PANIC(kLogStderr, "failed to read %s", url.c_str());
208 }
209 }
210
211 fclose(f);
212 return tmp_path;
213 }
214
215
216 /**
217 * Verifies reflog checksum and looks for presence of the entry points
218 * referenced in the manifest.
219 */
220 bool CommandCheck::InspectReflog(const shash::Any &reflog_hash,
221 manifest::Manifest *manifest) {
222 LogCvmfs(kLogCvmfs, kLogStdout, "Inspecting log of references");
223 const string reflog_path = FetchPath(".cvmfsreflog");
224 shash::Any computed_hash(reflog_hash.algorithm);
225 manifest::Reflog::HashDatabase(reflog_path, &computed_hash);
226 if (computed_hash != reflog_hash) {
227 LogCvmfs(kLogCvmfs, kLogStderr,
228 "The .cvmfsreflog has unexpected content hash %s (expected %s)",
229 computed_hash.ToString().c_str(), reflog_hash.ToString().c_str());
230 unlink(reflog_path.c_str());
231 return false;
232 }
233
234 const UniquePtr<manifest::Reflog> reflog(manifest::Reflog::Open(reflog_path));
235 assert(reflog.IsValid());
236 reflog->TakeDatabaseFileOwnership();
237
238 if (!reflog->ContainsCatalog(manifest->catalog_hash())) {
239 LogCvmfs(kLogCvmfs, kLogStderr,
240 "failed to find catalog root hash %s in .cvmfsreflog",
241 manifest->catalog_hash().ToString().c_str());
242 return false;
243 }
244
245 if (!reflog->ContainsCertificate(manifest->certificate())) {
246 LogCvmfs(kLogCvmfs, kLogStderr,
247 "failed to find certificate hash %s in .cvmfsreflog",
248 manifest->certificate().ToString().c_str());
249 return false;
250 }
251
252 if (!manifest->history().IsNull()
253 && !reflog->ContainsHistory(manifest->history())) {
254 LogCvmfs(kLogCvmfs, kLogStderr,
255 "failed to find tag database's hash %s in .cvmfsreflog",
256 manifest->history().ToString().c_str());
257 return false;
258 }
259
260 if (!manifest->meta_info().IsNull()
261 && !reflog->ContainsMetainfo(manifest->meta_info())) {
262 LogCvmfs(kLogCvmfs, kLogStderr,
263 "failed to find meta info hash %s in .cvmfsreflog",
264 manifest->meta_info().ToString().c_str());
265 return false;
266 }
267
268 return true;
269 }
270
271
272 /**
273 * Verifies the logical consistency of the tag database.
274 */
275 bool CommandCheck::InspectHistory(history::History *history) {
276 LogCvmfs(kLogCvmfs, kLogStdout, "Inspecting tag database");
277 bool retval;
278 vector<history::History::Tag> tags;
279 retval = history->List(&tags);
280 if (!retval) {
281 LogCvmfs(kLogCvmfs, kLogStderr, "failed to enumerate tags");
282 return false;
283 }
284 vector<history::History::Branch> branches;
285 retval = history->ListBranches(&branches);
286 if (!retval) {
287 LogCvmfs(kLogCvmfs, kLogStderr, "failed to enumerate branches");
288 return false;
289 }
290
291 bool result = true;
292
293 map<string, uint64_t> initial_revisions;
294 const sanitizer::BranchSanitizer sanitizer;
295 for (unsigned i = 0; i < branches.size(); ++i) {
296 if (!sanitizer.IsValid(branches[i].branch)) {
297 LogCvmfs(kLogCvmfs, kLogStderr, "invalid branch name: %s",
298 branches[i].branch.c_str());
299 result = false;
300 }
301 initial_revisions[branches[i].branch] = branches[i].initial_revision;
302 }
303
304 set<string> used_branches; // all branches referenced in tag db
305 // TODO(jblomer): same root hash implies same size and revision
306 for (unsigned i = 0; i < tags.size(); ++i) {
307 used_branches.insert(tags[i].branch);
308 const map<string, uint64_t>::const_iterator iter = initial_revisions.find(
309 tags[i].branch);
310 if (iter == initial_revisions.end()) {
311 LogCvmfs(kLogCvmfs, kLogStderr, "invalid branch %s in tag %s",
312 tags[i].branch.c_str(), tags[i].name.c_str());
313 result = false;
314 } else {
315 if (tags[i].revision < iter->second) {
316 LogCvmfs(kLogCvmfs, kLogStderr,
317 "invalid revision %" PRIu64 " of tag %s", tags[i].revision,
318 tags[i].name.c_str());
319 result = false;
320 }
321 }
322 }
323
324 if (used_branches.size() != branches.size()) {
325 LogCvmfs(kLogCvmfs, kLogStderr, "unused, dangling branches stored");
326 result = false;
327 }
328
329 return result;
330 }
331
332
333 /**
334 * Recursive catalog walk-through
335 *
336 * TODO(vavolkl): This method is large and does a lot of checks
337 * that could be split into smaller ones.
338 *
339 */
340 bool CommandCheck::Find(const catalog::Catalog *catalog,
341 const PathString &path,
342 catalog::DeltaCounters *computed_counters,
343 set<PathString> *bind_mountpoints) {
344 catalog::DirectoryEntryList entries;
345 catalog::DirectoryEntry this_directory;
346
347 if (!catalog->LookupPath(path, &this_directory)) {
348 LogCvmfs(kLogCvmfs, kLogStderr, "failed to lookup %s", path.c_str());
349 return false;
350 }
351 if (!catalog->ListingPath(path, &entries)) {
352 LogCvmfs(kLogCvmfs, kLogStderr, "failed to list %s", path.c_str());
353 return false;
354 }
355
356 uint32_t num_subdirs = 0;
357 bool retval = true;
358 typedef map<uint32_t, vector<catalog::DirectoryEntry> > HardlinkMap;
359 HardlinkMap hardlinks;
360 bool found_nested_marker = false;
361
362 for (unsigned i = 0; i < entries.size(); ++i) {
363 // for performance reasons, keep track of files already checked
364 // and only run requests once per hash
365 const bool entry_needs_check = !entries[i].checksum().IsNull() &&
366 !entries[i].IsExternalFile() &&
367 !(catalog::g_ignore_legacy_bulk_hashes &&
368 entries[i].IsChunkedFile()) &&
369 // fallback cli option can force the entry to
370 // be checked
371 (no_duplicates_map_
372 || !duplicates_map_.Contains(
373 entries[i].checksum()));
374 if (entry_needs_check && !no_duplicates_map_)
375 duplicates_map_.Insert(entries[i].checksum(), 1);
376
377 PathString full_path(path);
378 full_path.Append("/", 1);
379 full_path.Append(entries[i].name().GetChars(),
380 entries[i].name().GetLength());
381 LogCvmfs(kLogCvmfs, kLogVerboseMsg, "[path] %s [needs check] %i",
382 full_path.c_str(), entry_needs_check);
383
384
385 // Name must not be empty
386 if (entries[i].name().IsEmpty()) {
387 LogCvmfs(kLogCvmfs, kLogStderr, "empty path at %s", full_path.c_str());
388 retval = false;
389 }
390
391 // Catalog markers should indicate nested catalogs
392 if (entries[i].name() == NameString(string(".cvmfscatalog"))) {
393 if (catalog->mountpoint() != path) {
394 LogCvmfs(kLogCvmfs, kLogStderr,
395 "found abandoned nested catalog marker at %s",
396 full_path.c_str());
397 retval = false;
398 }
399 found_nested_marker = true;
400 }
401
402 // Check if checksum is not null
403 if (entries[i].IsRegular() && !entries[i].IsChunkedFile()
404 && entries[i].checksum().IsNull()) {
405 LogCvmfs(kLogCvmfs, kLogStderr,
406 "regular file pointing to zero-hash: '%s'", full_path.c_str());
407 retval = false;
408 }
409
410 // Check if the chunk is there
411 if (check_chunks_ && entry_needs_check) {
412 string chunk_path = "data/" + entries[i].checksum().MakePath();
413 if (entries[i].IsDirectory())
414 chunk_path += shash::kSuffixMicroCatalog;
415 if (!Exists(chunk_path)) {
416 LogCvmfs(kLogCvmfs, kLogStderr, "data chunk %s (%s) missing",
417 entries[i].checksum().ToString().c_str(), full_path.c_str());
418 retval = false;
419 }
420 }
421
422 // Add hardlinks to counting map
423 if ((entries[i].linkcount() > 1) && !entries[i].IsDirectory()) {
424 if (entries[i].hardlink_group() == 0) {
425 LogCvmfs(kLogCvmfs, kLogStderr, "invalid hardlink group for %s",
426 full_path.c_str());
427 retval = false;
428 } else {
429 const HardlinkMap::iterator hardlink_group = hardlinks.find(
430 entries[i].hardlink_group());
431 if (hardlink_group == hardlinks.end()) {
432 hardlinks[entries[i].hardlink_group()];
433 hardlinks[entries[i].hardlink_group()].push_back(entries[i]);
434 } else {
435 if (!CompareEntries(entries[i], (hardlink_group->second)[0], false)) {
436 LogCvmfs(kLogCvmfs, kLogStderr, "hardlink %s doesn't match",
437 full_path.c_str());
438 retval = false;
439 }
440 hardlink_group->second.push_back(entries[i]);
441 } // Hardlink added to map
442 } // Hardlink group > 0
443 } // Hardlink found
444
445 // For any kind of entry, the linkcount should be > 0
446 if (entries[i].linkcount() == 0) {
447 LogCvmfs(kLogCvmfs, kLogStderr, "Entry %s has linkcount 0.",
448 entries[i].name().c_str());
449 retval = false;
450 }
451
452 // Checks depending of entry type
453 if (!entries[i].IsRegular()) {
454 if (entries[i].IsDirectIo()) {
455 LogCvmfs(kLogCvmfs, kLogStderr, "invalid direct i/o flag found: %s",
456 full_path.c_str());
457 retval = false;
458 }
459 }
460 if (entries[i].IsDirectory()) {
461 computed_counters->self.directories++;
462 num_subdirs++;
463 // Directory size
464 // if (entries[i].size() < 4096) {
465 // LogCvmfs(kLogCvmfs, kLogStderr, "invalid file size for %s",
466 // full_path.c_str());
467 // retval = false;
468 // }
469 // No directory hardlinks
470 if (entries[i].hardlink_group() != 0) {
471 LogCvmfs(kLogCvmfs, kLogStderr, "directory hardlink found at %s",
472 full_path.c_str());
473 retval = false;
474 }
475 if (entries[i].IsNestedCatalogMountpoint()
476 || entries[i].IsBindMountpoint()) {
477 // Find transition point
478 if (entries[i].IsNestedCatalogMountpoint())
479 computed_counters->self.nested_catalogs++;
480 shash::Any tmp;
481 uint64_t tmp2;
482 const PathString mountpoint(full_path);
483 if (!catalog->FindNested(mountpoint, &tmp, &tmp2)) {
484 LogCvmfs(kLogCvmfs, kLogStderr, "nested catalog at %s not registered",
485 full_path.c_str());
486 retval = false;
487 }
488
489 // check that the nested mountpoint is empty in the current catalog
490 catalog::DirectoryEntryList nested_entries;
491 if (catalog->ListingPath(full_path, &nested_entries)
492 && !nested_entries.empty()) {
493 LogCvmfs(kLogCvmfs, kLogStderr,
494 "non-empty nested catalog mountpoint "
495 "at %s.",
496 full_path.c_str());
497 retval = false;
498 }
499
500 if (entries[i].IsBindMountpoint()) {
501 bind_mountpoints->insert(full_path);
502 if (entries[i].IsNestedCatalogMountpoint()) {
503 LogCvmfs(kLogCvmfs, kLogStderr,
504 "bind mountpoint and nested mountpoint mutually exclusive"
505 " at %s.",
506 full_path.c_str());
507 retval = false;
508 }
509 }
510 } else {
511 // Recurse
512 if (!Find(catalog, full_path, computed_counters, bind_mountpoints))
513 retval = false;
514 }
515 } else if (entries[i].IsLink()) {
516 computed_counters->self.symlinks++;
517 // No hash for symbolics links
518 if (!entries[i].checksum().IsNull()) {
519 LogCvmfs(kLogCvmfs, kLogStderr, "symbolic links with hash at %s",
520 full_path.c_str());
521 retval = false;
522 }
523 // Right size of symbolic link?
524 if (entries[i].size() != entries[i].symlink().GetLength()) {
525 LogCvmfs(kLogCvmfs, kLogStderr,
526 "wrong symbolic link size for %s; "
527 "expected %u, got %lu",
528 full_path.c_str(), entries[i].symlink().GetLength(),
529 entries[i].size());
530 retval = false;
531 }
532 } else if (entries[i].IsRegular()) {
533 computed_counters->self.regular_files++;
534 computed_counters->self.file_size += entries[i].size();
535 } else if (entries[i].IsSpecial()) {
536 computed_counters->self.specials++;
537 // Size zero for special files
538 if (entries[i].size() != 0) {
539 LogCvmfs(kLogCvmfs, kLogStderr,
540 "unexpected non-zero special file size %s", full_path.c_str());
541 retval = false;
542 }
543 // No hash for special files
544 if (!entries[i].checksum().IsNull()) {
545 LogCvmfs(kLogCvmfs, kLogStderr, "special file with hash at %s",
546 full_path.c_str());
547 retval = false;
548 }
549 // No symlink
550 if (entries[i].symlink().GetLength() > 0) {
551 LogCvmfs(kLogCvmfs, kLogStderr,
552 "special file with non-zero symlink at %s", full_path.c_str());
553 retval = false;
554 }
555 } else {
556 LogCvmfs(kLogCvmfs, kLogStderr, "unknown file type %s",
557 full_path.c_str());
558 retval = false;
559 }
560
561 if (entries[i].HasXattrs()) {
562 computed_counters->self.xattrs++;
563 }
564
565 if (entries[i].IsExternalFile()) {
566 computed_counters->self.externals++;
567 computed_counters->self.external_file_size += entries[i].size();
568 if (!entries[i].IsRegular()) {
569 LogCvmfs(kLogCvmfs, kLogStderr,
570 "only regular files can be external: %s", full_path.c_str());
571 retval = false;
572 }
573 }
574
575 // checking file chunk integrity
576 if (entries[i].IsChunkedFile()) {
577 FileChunkList chunks;
578 catalog->ListPathChunks(full_path, entries[i].hash_algorithm(), &chunks);
579
580 computed_counters->self.chunked_files++;
581 computed_counters->self.chunked_file_size += entries[i].size();
582 computed_counters->self.file_chunks += chunks.size();
583
584 // do we find file chunks for the chunked file in this catalog?
585 if (chunks.size() == 0) {
586 LogCvmfs(kLogCvmfs, kLogStderr, "no file chunks found for big file %s",
587 full_path.c_str());
588 retval = false;
589 }
590
591 size_t aggregated_file_size = 0;
592 off_t next_offset = 0;
593
594 for (unsigned j = 0; j < chunks.size(); ++j) {
595 const FileChunk this_chunk = chunks.At(j);
596 // check if the chunk boundaries fit together...
597 if (next_offset != this_chunk.offset()) {
598 LogCvmfs(kLogCvmfs, kLogStderr, "misaligned chunk offsets for %s",
599 full_path.c_str());
600 retval = false;
601 }
602 next_offset = this_chunk.offset() + this_chunk.size();
603 aggregated_file_size += this_chunk.size();
604
605 // are all data chunks in the data store?
606 if (check_chunks_ && !entries[i].IsExternalFile()) {
607 const shash::Any &chunk_hash = this_chunk.content_hash();
608 // for performance reasons, only perform the check once
609 // and skip if the hash has been checked before
610 bool chunk_needs_check = true;
611 if (!no_duplicates_map_ && !duplicates_map_.Contains(chunk_hash)) {
612 duplicates_map_.Insert(chunk_hash, 1);
613 } else if (!no_duplicates_map_) {
614 chunk_needs_check = false;
615 }
616 if (chunk_needs_check) {
617 const string chunk_path = "data/" + chunk_hash.MakePath();
618 if (!Exists(chunk_path)) {
619 LogCvmfs(kLogCvmfs, kLogStderr,
620 "partial data chunk %s (%s -> "
621 "offset: %ld | size: %lu) missing",
622 this_chunk.content_hash().ToStringWithSuffix().c_str(),
623 full_path.c_str(), this_chunk.offset(),
624 this_chunk.size());
625 retval = false;
626 }
627 }
628 }
629 }
630
631 // is the aggregated chunk size equal to the actual file size?
632 if (aggregated_file_size != entries[i].size()) {
633 LogCvmfs(kLogCvmfs, kLogStderr,
634 "chunks of file %s produce a size "
635 "mismatch. Calculated %zu bytes | %lu "
636 "bytes expected",
637 full_path.c_str(), aggregated_file_size, entries[i].size());
638 retval = false;
639 }
640 }
641 } // Loop through entries
642
643 // Check if nested catalog marker has been found
644 if (!path.IsEmpty() && (path == catalog->mountpoint())
645 && !found_nested_marker) {
646 LogCvmfs(kLogCvmfs, kLogStderr, "nested catalog without marker at %s",
647 path.c_str());
648 retval = false;
649 }
650
651 // Check directory linkcount
652 if (this_directory.linkcount() != num_subdirs + 2) {
653 LogCvmfs(kLogCvmfs, kLogStderr,
654 "wrong linkcount for %s; "
655 "expected %u, got %u",
656 path.c_str(), num_subdirs + 2, this_directory.linkcount());
657 retval = false;
658 }
659
660 // Check hardlink linkcounts
661 for (HardlinkMap::const_iterator i = hardlinks.begin(),
662 iEnd = hardlinks.end();
663 i != iEnd;
664 ++i) {
665 if (i->second[0].linkcount() != i->second.size()) {
666 LogCvmfs(kLogCvmfs, kLogStderr,
667 "hardlink linkcount wrong for %s, "
668 "expected %lu, got %u",
669 (path.ToString() + "/" + i->second[0].name().ToString()).c_str(),
670 i->second.size(), i->second[0].linkcount());
671 retval = false;
672 }
673 }
674
675 return retval;
676 }
677
678
679 string CommandCheck::DownloadPiece(const shash::Any catalog_hash) {
680 const string source = "data/" + catalog_hash.MakePath();
681 const string dest = temp_directory_ + "/" + catalog_hash.ToString();
682 const string url = repo_base_path_ + "/" + source;
683
684 cvmfs::PathSink pathsink(dest);
685 download::JobInfo download_catalog(&url, true, false, &catalog_hash,
686 &pathsink);
687 const download::Failures retval = download_manager()->Fetch(
688 &download_catalog);
689 if (retval != download::kFailOk) {
690 LogCvmfs(kLogCvmfs, kLogStderr, "failed to download object %s (%d)",
691 catalog_hash.ToString().c_str(), retval);
692 return "";
693 }
694
695 return dest;
696 }
697
698
699 string CommandCheck::DecompressPiece(const shash::Any catalog_hash) {
700 const string source = "data/" + catalog_hash.MakePath();
701 const string dest = temp_directory_ + "/" + catalog_hash.ToString();
702 if (!zlib::DecompressPath2Path(source, dest))
703 return "";
704
705 return dest;
706 }
707
708
709 catalog::Catalog *CommandCheck::FetchCatalog(const string &path,
710 const shash::Any &catalog_hash,
711 const uint64_t catalog_size) {
712 string tmp_file;
713 if (!is_remote_)
714 tmp_file = DecompressPiece(catalog_hash);
715 else
716 tmp_file = DownloadPiece(catalog_hash);
717
718 if (tmp_file == "") {
719 LogCvmfs(kLogCvmfs, kLogStderr, "failed to load catalog %s",
720 catalog_hash.ToString().c_str());
721 return NULL;
722 }
723
724 catalog::Catalog *catalog = catalog::Catalog::AttachFreely(path, tmp_file,
725 catalog_hash);
726 const int64_t catalog_file_size = GetFileSize(tmp_file);
727 if (catalog_file_size <= 0) {
728 LogCvmfs(kLogCvmfs, kLogStderr, "Error downloading catalog %s at %s %s",
729 catalog_hash.ToString().c_str(), path.c_str(), tmp_file.c_str());
730 assert(catalog_file_size > 0);
731 }
732 unlink(tmp_file.c_str());
733
734 if ((catalog_size > 0) && (uint64_t(catalog_file_size) != catalog_size)) {
735 LogCvmfs(kLogCvmfs, kLogStderr,
736 "catalog file size mismatch, "
737 "expected %" PRIu64 ", got %" PRIu64,
738 catalog_size, catalog_file_size);
739 delete catalog;
740 return NULL;
741 }
742
743 return catalog;
744 }
745
746
747 bool CommandCheck::FindSubtreeRootCatalog(const string &subtree_path,
748 shash::Any *root_hash,
749 uint64_t *root_size) {
750 catalog::Catalog *current_catalog = FetchCatalog("", *root_hash);
751 if (current_catalog == NULL) {
752 return false;
753 }
754
755 typedef vector<string> Tokens;
756 const Tokens path_tokens = SplitString(subtree_path, '/');
757
758 string current_path = "";
759
760 Tokens::const_iterator i = path_tokens.begin();
761 const Tokens::const_iterator iend = path_tokens.end();
762 for (; i != iend; ++i) {
763 if (i->empty()) {
764 continue;
765 }
766
767 current_path += "/" + *i;
768 if (current_catalog->FindNested(PathString(current_path), root_hash,
769 root_size)) {
770 delete current_catalog;
771
772 if (current_path.length() < subtree_path.length()) {
773 current_catalog = FetchCatalog(current_path, *root_hash);
774 if (current_catalog == NULL) {
775 break;
776 }
777 } else {
778 return true;
779 }
780 }
781 }
782 return false;
783 }
784
785
786 /**
787 * Recursion on nested catalog level. No ownership of computed_counters.
788 */
789 bool CommandCheck::InspectTree(const string &path,
790 const shash::Any &catalog_hash,
791 const uint64_t catalog_size,
792 const bool is_nested_catalog,
793 const catalog::DirectoryEntry *transition_point,
794 catalog::DeltaCounters *computed_counters,
795 bool *pruned_subtree) {
796 LogCvmfs(kLogCvmfs, kLogStdout | kLogInform, "[inspecting catalog] %s at %s",
797 catalog_hash.ToString().c_str(), path == "" ? "/" : path.c_str());
798
799 const catalog::Catalog *catalog = FetchCatalog(path, catalog_hash,
800 catalog_size);
801 if (catalog == NULL) {
802 LogCvmfs(kLogCvmfs, kLogStderr, "failed to open catalog %s",
803 catalog_hash.ToString().c_str());
804 return false;
805 }
806
807 int retval = true;
808
809 if (catalog->root_prefix() != PathString(path.data(), path.length())) {
810 LogCvmfs(kLogCvmfs, kLogStderr,
811 "root prefix mismatch; "
812 "expected %s, got %s",
813 path.c_str(), catalog->root_prefix().c_str());
814 retval = false;
815 }
816
817 // Check transition point
818 catalog::DirectoryEntry root_entry;
819 if (!catalog->LookupPath(catalog->root_prefix(), &root_entry)) {
820 LogCvmfs(kLogCvmfs, kLogStderr, "failed to lookup root entry (%s)",
821 path.c_str());
822 retval = false;
823 }
824 if (!root_entry.IsDirectory()) {
825 LogCvmfs(kLogCvmfs, kLogStderr, "root entry not a directory (%s)",
826 path.c_str());
827 retval = false;
828 }
829 if (is_nested_catalog) {
830 if (transition_point != NULL
831 && !CompareEntries(*transition_point, root_entry, true, true)) {
832 LogCvmfs(kLogCvmfs, kLogStderr,
833 "transition point and root entry differ (%s)", path.c_str());
834 retval = false;
835 }
836 if (!root_entry.IsNestedCatalogRoot()) {
837 LogCvmfs(kLogCvmfs, kLogStderr,
838 "nested catalog root expected but not found (%s)", path.c_str());
839 retval = false;
840 }
841 } else {
842 if (root_entry.IsNestedCatalogRoot()) {
843 LogCvmfs(kLogCvmfs, kLogStderr,
844 "nested catalog root found but not expected (%s)", path.c_str());
845 retval = false;
846 }
847 }
848
849 // Partial replication: excluded subtrees are not replicated at all (their
850 // catalogs are pruned during snapshot), so InspectTree is only ever reached
851 // for catalogs that are present. Pruning of the recursion happens in the
852 // nested-catalog loop below; here we just track whether anything was pruned
853 // so the aggregate counter comparison can be relaxed accordingly.
854 bool local_pruned = false;
855
856 // Traverse the catalog
857 set<PathString> bind_mountpoints;
858 if (!Find(catalog, PathString(path.data(), path.length()), computed_counters,
859 &bind_mountpoints)) {
860 retval = false;
861 }
862
863 // Check number of entries
864 if (root_entry.HasXattrs())
865 computed_counters->self.xattrs++;
866 const uint64_t num_found_entries = 1 + computed_counters->self.regular_files
867 + computed_counters->self.symlinks
868 + computed_counters->self.specials
869 + computed_counters->self.directories;
870 if (num_found_entries != catalog->GetNumEntries()) {
871 LogCvmfs(kLogCvmfs, kLogStderr,
872 "dangling entries in catalog, "
873 "expected %" PRIu64 ", got %" PRIu64,
874 catalog->GetNumEntries(), num_found_entries);
875 retval = false;
876 }
877
878 // Recurse into nested catalogs
879 const catalog::Catalog::NestedCatalogList
880 &nested_catalogs = catalog->ListNestedCatalogs();
881 const catalog::Catalog::NestedCatalogList
882 own_nested_catalogs = catalog->ListOwnNestedCatalogs();
883 if (own_nested_catalogs.size()
884 != static_cast<uint64_t>(computed_counters->self.nested_catalogs)) {
885 LogCvmfs(kLogCvmfs, kLogStderr,
886 "number of nested catalogs does not match;"
887 " expected %lu, got %lu",
888 computed_counters->self.nested_catalogs,
889 own_nested_catalogs.size());
890 retval = false;
891 }
892 set<PathString> nested_catalog_paths;
893 for (catalog::Catalog::NestedCatalogList::const_iterator
894 i = nested_catalogs.begin(),
895 iEnd = nested_catalogs.end();
896 i != iEnd;
897 ++i) {
898 nested_catalog_paths.insert(i->mountpoint);
899 }
900 if (nested_catalog_paths.size() != nested_catalogs.size()) {
901 LogCvmfs(kLogCvmfs, kLogStderr,
902 "duplicates among nested catalogs and bind mountpoints");
903 retval = false;
904 }
905
906 for (catalog::Catalog::NestedCatalogList::const_iterator
907 i = nested_catalogs.begin(),
908 iEnd = nested_catalogs.end();
909 i != iEnd;
910 ++i) {
911 if (bind_mountpoints.find(i->mountpoint) != bind_mountpoints.end()) {
912 catalog::DirectoryEntry bind_mountpoint;
913 const PathString mountpoint("/" + i->mountpoint.ToString().substr(1));
914 if (!catalog->LookupPath(mountpoint, &bind_mountpoint)) {
915 LogCvmfs(kLogCvmfs, kLogStderr, "failed to lookup bind mountpoint %s",
916 mountpoint.c_str());
917 retval = false;
918 }
919 LogCvmfs(kLogCvmfs, kLogDebug, "skipping bind mountpoint %s",
920 i->mountpoint.c_str());
921 continue;
922 }
923 // Partial replication: a subtree that contains no included path is pruned
924 // on the Stratum-1 (catalog + objects absent), so we must not descend into
925 // it. Skipping recursion here also means the aggregate counters for this
926 // catalog will be lower than the stored ones; record that so the counter
927 // comparison can be relaxed below.
928 if (inclusion_spec_ != NULL
929 && inclusion_spec_->IsExcluded(i->mountpoint.ToString())) {
930 LogCvmfs(kLogCvmfs, kLogStdout,
931 " Skipping pruned (excluded) subtree at %s",
932 i->mountpoint.c_str());
933 local_pruned = true;
934 continue;
935 }
936 catalog::DirectoryEntry nested_transition_point;
937 if (!catalog->LookupPath(i->mountpoint, &nested_transition_point)) {
938 LogCvmfs(kLogCvmfs, kLogStderr, "failed to lookup transition point %s",
939 i->mountpoint.c_str());
940 retval = false;
941 } else {
942 catalog::DeltaCounters nested_counters;
943 const bool is_nested = true;
944 bool nested_pruned = false;
945 if (!InspectTree(i->mountpoint.ToString(), i->hash, i->size, is_nested,
946 &nested_transition_point, &nested_counters,
947 &nested_pruned))
948 retval = false;
949 if (nested_pruned)
950 local_pruned = true;
951 nested_counters.PopulateToParent(computed_counters);
952 }
953 }
954
955 if (pruned_subtree != NULL && local_pruned)
956 *pruned_subtree = true;
957
958 // Check statistics counters
959 // Additionally account for root directory
960 computed_counters->self.directories++;
961 // Partial replication: when descendant subtrees have been pruned, the
962 // aggregate (subtree) counters stored in this catalog necessarily exceed
963 // what we could recompute, so the comparison would always fail. Skip it for
964 // affected catalogs; self counters and structure are still fully verified.
965 if (local_pruned) {
966 LogCvmfs(kLogCvmfs, kLogStdout,
967 " Skipping aggregate counter check at %s (pruned subtree)",
968 path == "" ? "/" : path.c_str());
969 } else {
970 catalog::Counters compare_counters;
971 compare_counters.ApplyDelta(*computed_counters);
972 const catalog::Counters stored_counters = catalog->GetCounters();
973 if (!CompareCounters(compare_counters, stored_counters)) {
974 LogCvmfs(kLogCvmfs, kLogStderr, "statistics counter mismatch [%s]",
975 catalog_hash.ToString().c_str());
976 retval = false;
977 }
978 }
979
980 delete catalog;
981 return retval;
982 }
983
984
985 int CommandCheck::Main(const swissknife::ArgumentList &args) {
986 string tag_name;
987 string subtree_path = "";
988 string pubkey_path = "";
989 string repo_name = "";
990 string reflog_chksum_path = "";
991
992 temp_directory_ = (args.find('t') != args.end()) ? *args.find('t')->second
993 : "/tmp";
994 if (args.find('n') != args.end())
995 tag_name = *args.find('n')->second;
996 if (args.find('c') != args.end())
997 check_chunks_ = true;
998 if (args.find('d') != args.end())
999 no_duplicates_map_ = true;
1000 if (args.find('l') != args.end()) {
1001 const unsigned log_level = kLogLevel0
1002 << String2Uint64(*args.find('l')->second);
1003 if (log_level > kLogNone) {
1004 LogCvmfs(kLogCvmfs, kLogStderr, "invalid log level");
1005 return 1;
1006 }
1007 SetLogVerbosity(static_cast<LogLevels>(log_level));
1008 }
1009 if (args.find('k') != args.end())
1010 pubkey_path = *args.find('k')->second;
1011 if (DirectoryExists(pubkey_path))
1012 pubkey_path = JoinStrings(FindFilesBySuffix(pubkey_path, ".pub"), ":");
1013 if (args.find('N') != args.end())
1014 repo_name = *args.find('N')->second;
1015
1016 if (args.find('E') != args.end()) {
1017 inclusion_spec_ =
1018 catalog::InclusionSpec::Create(*args.find('E')->second);
1019 if (inclusion_spec_ == NULL || !inclusion_spec_->IsValid()) {
1020 LogCvmfs(kLogCvmfs, kLogStderr,
1021 "Failed to parse inclusion spec from '%s'",
1022 args.find('E')->second->c_str());
1023 return 1;
1024 }
1025 LogCvmfs(kLogCvmfs, kLogStdout,
1026 "Partial replication: will skip pruned (excluded) subtrees");
1027 }
1028
1029 repo_base_path_ = MakeCanonicalPath(*args.find('r')->second);
1030 if (args.find('s') != args.end())
1031 subtree_path = MakeCanonicalPath(*args.find('s')->second);
1032 if (args.find('R') != args.end())
1033 reflog_chksum_path = *args.find('R')->second;
1034
1035 // Repository can be HTTP address or on local file system
1036 is_remote_ = IsHttpUrl(repo_base_path_);
1037
1038 // initialize the (swissknife global) download and signature managers
1039 if (is_remote_) {
1040 const bool follow_redirects = (args.count('L') > 0);
1041 const string proxy = (args.count('@') > 0) ? *args.find('@')->second : "";
1042 if (!this->InitDownloadManager(follow_redirects, proxy)) {
1043 return 1;
1044 }
1045
1046 if (pubkey_path.empty() || repo_name.empty()) {
1047 LogCvmfs(kLogCvmfs, kLogStderr,
1048 "please provide pubkey and repo name for "
1049 "remote repositories");
1050 return 1;
1051 }
1052
1053 if (!this->InitSignatureManager(pubkey_path)) {
1054 return 1;
1055 }
1056 }
1057
1058 // Load Manifest
1059 UniquePtr<manifest::Manifest> manifest;
1060 bool successful = true;
1061
1062 if (is_remote_) {
1063 manifest = FetchRemoteManifest(repo_base_path_, repo_name);
1064 } else {
1065 if (chdir(repo_base_path_.c_str()) != 0) {
1066 LogCvmfs(kLogCvmfs, kLogStderr, "failed to switch to directory %s",
1067 repo_base_path_.c_str());
1068 return 1;
1069 }
1070 manifest = OpenLocalManifest(".cvmfspublished");
1071 }
1072
1073 if (!manifest.IsValid()) {
1074 LogCvmfs(kLogCvmfs, kLogStderr, "failed to load repository manifest");
1075 return 1;
1076 }
1077
1078 // Check meta-info object
1079 if (!manifest->meta_info().IsNull()) {
1080 string tmp_file;
1081 if (!is_remote_)
1082 tmp_file = DecompressPiece(manifest->meta_info());
1083 else
1084 tmp_file = DownloadPiece(manifest->meta_info());
1085 if (tmp_file == "") {
1086 LogCvmfs(kLogCvmfs, kLogStderr, "failed to load repository metainfo %s",
1087 manifest->meta_info().ToString().c_str());
1088 return 1;
1089 }
1090 unlink(tmp_file.c_str());
1091 }
1092
1093 shash::Any reflog_hash;
1094 if (!reflog_chksum_path.empty()) {
1095 if (!manifest::Reflog::ReadChecksum(reflog_chksum_path, &reflog_hash)) {
1096 LogCvmfs(kLogCvmfs, kLogStderr, "failed to read reflog checksum file");
1097 return 1;
1098 }
1099 } else {
1100 reflog_hash = manifest->reflog_hash();
1101 }
1102
1103 if (Exists(".cvmfsreflog")) {
1104 if (reflog_hash.IsNull()) {
1105 // If there is a reflog, we want to check it
1106 LogCvmfs(kLogCvmfs, kLogStderr,
1107 ".cvmfsreflog present but no checksum provided, aborting");
1108 return 1;
1109 }
1110 const bool retval = InspectReflog(reflog_hash, manifest.weak_ref());
1111 if (!retval) {
1112 LogCvmfs(kLogCvmfs, kLogStderr, "failed to verify reflog");
1113 return 1;
1114 }
1115 } else {
1116 if (!reflog_hash.IsNull()) {
1117 // There is a checksum but no reflog; possibly the checksum is for the
1118 // from the manifest for the stratum 0 reflog
1119 if (!reflog_chksum_path.empty()) {
1120 LogCvmfs(kLogCvmfs, kLogStderr,
1121 "local reflog checksum set but reflog itself is missing, "
1122 "aborting");
1123 return 1;
1124 }
1125 }
1126 }
1127
1128 // Load history
1129 UniquePtr<history::History> tag_db;
1130 if (!manifest->history().IsNull()) {
1131 string tmp_file;
1132 if (!is_remote_)
1133 tmp_file = DecompressPiece(manifest->history());
1134 else
1135 tmp_file = DownloadPiece(manifest->history());
1136 if (tmp_file == "") {
1137 LogCvmfs(kLogCvmfs, kLogStderr, "failed to load history database %s",
1138 manifest->history().ToString().c_str());
1139 return 1;
1140 }
1141 tag_db = history::SqliteHistory::Open(tmp_file);
1142 if (!tag_db.IsValid()) {
1143 LogCvmfs(kLogCvmfs, kLogStderr, "failed to open history database %s",
1144 manifest->history().ToString().c_str());
1145 return 1;
1146 }
1147 tag_db->TakeDatabaseFileOwnership();
1148 successful = InspectHistory(tag_db.weak_ref()) && successful;
1149 }
1150
1151 if (manifest->has_alt_catalog_path()) {
1152 if (!Exists(manifest->certificate().MakeAlternativePath())) {
1153 LogCvmfs(kLogCvmfs, kLogStderr,
1154 "failed to find alternative certificate link %s",
1155 manifest->certificate().MakeAlternativePath().c_str());
1156 return 1;
1157 }
1158 if (!Exists(manifest->catalog_hash().MakeAlternativePath())) {
1159 LogCvmfs(kLogCvmfs, kLogStderr,
1160 "failed to find alternative catalog link %s",
1161 manifest->catalog_hash().MakeAlternativePath().c_str());
1162 return 1;
1163 }
1164 }
1165
1166 shash::Any root_hash = manifest->catalog_hash();
1167 uint64_t root_size = manifest->catalog_size();
1168 if (tag_name != "") {
1169 if (!tag_db.IsValid()) {
1170 LogCvmfs(kLogCvmfs, kLogStderr, "no history");
1171 return 1;
1172 }
1173 history::History::Tag tag;
1174 const bool retval = tag_db->GetByName(tag_name, &tag);
1175 if (!retval) {
1176 LogCvmfs(kLogCvmfs, kLogStderr, "no such tag: %s", tag_name.c_str());
1177 return 1;
1178 }
1179 root_hash = tag.root_hash;
1180 root_size = tag.size;
1181 LogCvmfs(kLogCvmfs, kLogStdout, "Inspecting repository tag %s",
1182 tag_name.c_str());
1183 }
1184
1185 const bool is_nested_catalog = (!subtree_path.empty());
1186 if (is_nested_catalog
1187 && !FindSubtreeRootCatalog(subtree_path, &root_hash, &root_size)) {
1188 LogCvmfs(kLogCvmfs, kLogStderr, "cannot find nested catalog at %s",
1189 subtree_path.c_str());
1190 return 1;
1191 }
1192
1193
1194 catalog::DeltaCounters computed_counters;
1195 successful = InspectTree(subtree_path,
1196 root_hash,
1197 root_size,
1198 is_nested_catalog,
1199 NULL,
1200 &computed_counters)
1201 && successful;
1202
1203 if (!successful) {
1204 LogCvmfs(kLogCvmfs, kLogStderr, "CATALOG PROBLEMS OR OTHER ERRORS FOUND");
1205 return 1;
1206 }
1207
1208 LogCvmfs(kLogCvmfs, kLogStdout, "no problems found");
1209 delete inclusion_spec_;
1210 inclusion_spec_ = NULL;
1211 return 0;
1212 }
1213
1214 } // namespace swissknife
1215