CernVM-FS  2.13.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
swissknife_check.cc
Go to the documentation of this file.
1 
7 #define __STDC_FORMAT_MACROS
8 
9 
10 #include "swissknife_check.h"
11 
12 #include <inttypes.h>
13 #include <unistd.h>
14 
15 #include <cassert>
16 #include <map>
17 #include <queue>
18 #include <set>
19 #include <string>
20 #include <vector>
21 
22 #include "catalog_sql.h"
24 #include "file_chunk.h"
25 #include "history_sqlite.h"
26 #include "manifest.h"
27 #include "network/download.h"
28 #include "reflog.h"
29 #include "sanitizer.h"
30 #include "shortstring.h"
31 #include "util/exception.h"
32 #include "util/logging.h"
33 #include "util/pointer.h"
34 #include "util/posix.h"
35 
36 using namespace std; // NOLINT
37 
38 // for map of duplicate entries; as in kvstore.cc
39 static inline uint32_t hasher_any(const shash::Any &key) {
40  // We'll just do the same thing as hasher_md5, since every hash is at
41  // least as large.
42  return *const_cast<uint32_t *>(reinterpret_cast<const uint32_t *>(key.digest)
43  + 1);
44 }
45 
46 
47 namespace swissknife {
48 
49 CommandCheck::CommandCheck()
50  : check_chunks_(false), no_duplicates_map_(false), is_remote_(false) {
51  const shash::Any hash_null;
52  duplicates_map_.Init(16, hash_null, hasher_any);
53 }
54 
56  const catalog::DirectoryEntry &b,
57  const bool compare_names,
58  const bool is_transition_point) {
59  typedef catalog::DirectoryEntry::Difference Difference;
60 
62  if (diffs == Difference::kIdentical) {
63  return true;
64  }
65 
66  // in case of a nested catalog transition point the controlling flags are
67  // supposed to differ. If this is the only difference we are done...
68  if (is_transition_point
69  && (diffs ^ Difference::kNestedCatalogTransitionFlags) == 0) {
70  return true;
71  }
72 
73  bool retval = true;
74  if (compare_names) {
75  if (diffs & Difference::kName) {
76  LogCvmfs(kLogCvmfs, kLogStderr, "names differ: %s / %s", a.name().c_str(),
77  b.name().c_str());
78  retval = false;
79  }
80  }
81  if (diffs & Difference::kLinkcount) {
82  LogCvmfs(kLogCvmfs, kLogStderr, "linkcounts differ: %u / %u", a.linkcount(),
83  b.linkcount());
84  retval = false;
85  }
86  if (diffs & Difference::kHardlinkGroup) {
87  LogCvmfs(kLogCvmfs, kLogStderr, "hardlink groups differ: %u / %u",
89  retval = false;
90  }
91  if (diffs & Difference::kSize) {
92  LogCvmfs(kLogCvmfs, kLogStderr, "sizes differ: %" PRIu64 " / %" PRIu64,
93  a.size(), b.size());
94  retval = false;
95  }
96  if (diffs & Difference::kMode) {
97  LogCvmfs(kLogCvmfs, kLogStderr, "modes differ: %u / %u", a.mode(),
98  b.mode());
99  retval = false;
100  }
101  if (diffs & Difference::kMtime) {
102  LogCvmfs(kLogCvmfs, kLogStderr, "timestamps differ: %lu / %lu", a.mtime(),
103  b.mtime());
104  retval = false;
105  }
106  if (diffs & Difference::kChecksum) {
107  LogCvmfs(kLogCvmfs, kLogStderr, "content hashes differ: %s / %s",
108  a.checksum().ToString().c_str(), b.checksum().ToString().c_str());
109  retval = false;
110  }
111  if (diffs & Difference::kSymlink) {
112  LogCvmfs(kLogCvmfs, kLogStderr, "symlinks differ: %s / %s",
113  a.symlink().c_str(), b.symlink().c_str());
114  retval = false;
115  }
116  if (diffs & Difference::kExternalFileFlag) {
118  "external file flag differs: %d / %d "
119  "(%s / %s)",
120  a.IsExternalFile(), b.IsExternalFile(), a.name().c_str(),
121  b.name().c_str());
122  retval = false;
123  }
124  if (diffs & Difference::kHasXattrsFlag) {
126  "extended attributes differ: %d / %d "
127  "(%s / %s)",
128  a.HasXattrs(), b.HasXattrs(), a.name().c_str(), b.name().c_str());
129  retval = false;
130  }
131  if (!is_transition_point) {
132  if (diffs & Difference::kUid) {
133  LogCvmfs(kLogCvmfs, kLogStderr, "uids differ: %d / %d (%s / %s)", a.uid(),
134  b.uid(), a.name().c_str(), b.name().c_str());
135  retval = false;
136  }
137  if (diffs & Difference::kGid) {
138  LogCvmfs(kLogCvmfs, kLogStderr, "gids differ: %d / %d (%s / %s)", a.gid(),
139  b.gid(), a.name().c_str(), b.name().c_str());
140  retval = false;
141  }
142  }
143 
144  return retval;
145 }
146 
147 
149  const catalog::Counters &b) {
150  const catalog::Counters::FieldsMap map_a = a.GetFieldsMap();
151  const catalog::Counters::FieldsMap map_b = b.GetFieldsMap();
152 
153  bool retval = true;
154  catalog::Counters::FieldsMap::const_iterator i = map_a.begin();
155  const catalog::Counters::FieldsMap::const_iterator iend = map_a.end();
156  for (; i != iend; ++i) {
157  const catalog::Counters::FieldsMap::const_iterator comp =
158  map_b.find(i->first);
159  assert(comp != map_b.end());
160 
161  if (*(i->second) != *(comp->second)) {
163  "catalog statistics mismatch: %s (expected: %" PRIu64 " / "
164  "in catalog: %" PRIu64 ")",
165  comp->first.c_str(), *(i->second), *(comp->second));
166  retval = false;
167  }
168  }
169 
170  return retval;
171 }
172 
173 
177 bool CommandCheck::Exists(const string &file) {
178  if (!is_remote_) {
179  return FileExists(file) || SymlinkExists(file);
180  } else {
181  const string url = repo_base_path_ + "/" + file;
182  LogCvmfs(kLogCvmfs, kLogVerboseMsg, "[Exists::url] %s", url.c_str());
183  download::JobInfo head(&url, false);
184  return download_manager()->Fetch(&head) == download::kFailOk;
185  }
186 }
187 
188 
192 string CommandCheck::FetchPath(const string &path) {
193  string tmp_path;
194  FILE *f = CreateTempFile(temp_directory_ + "/cvmfstmp", kDefaultFileMode,
195  "w+", &tmp_path);
196  assert(f != NULL);
197 
198  const string url = repo_base_path_ + "/" + path;
199  if (is_remote_) {
200  cvmfs::FileSink filesink(f);
201  download::JobInfo download_job(&url, false, false, NULL, &filesink);
202  const download::Failures retval = download_manager()->Fetch(&download_job);
203  if (retval != download::kFailOk) {
204  PANIC(kLogStderr, "failed to read %s", url.c_str());
205  }
206  } else {
207  const bool retval = CopyPath2File(url, f);
208  if (!retval) {
209  PANIC(kLogStderr, "failed to read %s", url.c_str());
210  }
211  }
212 
213  fclose(f);
214  return tmp_path;
215 }
216 
217 
222 bool CommandCheck::InspectReflog(const shash::Any &reflog_hash,
224  LogCvmfs(kLogCvmfs, kLogStdout, "Inspecting log of references");
225  const string reflog_path = FetchPath(".cvmfsreflog");
226  shash::Any computed_hash(reflog_hash.algorithm);
227  manifest::Reflog::HashDatabase(reflog_path, &computed_hash);
228  if (computed_hash != reflog_hash) {
230  "The .cvmfsreflog has unexpected content hash %s (expected %s)",
231  computed_hash.ToString().c_str(), reflog_hash.ToString().c_str());
232  unlink(reflog_path.c_str());
233  return false;
234  }
235 
237  assert(reflog.IsValid());
238  reflog->TakeDatabaseFileOwnership();
239 
240  if (!reflog->ContainsCatalog(manifest->catalog_hash())) {
242  "failed to find catalog root hash %s in .cvmfsreflog",
243  manifest->catalog_hash().ToString().c_str());
244  return false;
245  }
246 
247  if (!reflog->ContainsCertificate(manifest->certificate())) {
249  "failed to find certificate hash %s in .cvmfsreflog",
250  manifest->certificate().ToString().c_str());
251  return false;
252  }
253 
254  if (!manifest->history().IsNull()
255  && !reflog->ContainsHistory(manifest->history())) {
257  "failed to find tag database's hash %s in .cvmfsreflog",
258  manifest->history().ToString().c_str());
259  return false;
260  }
261 
262  if (!manifest->meta_info().IsNull()
263  && !reflog->ContainsMetainfo(manifest->meta_info())) {
265  "failed to find meta info hash %s in .cvmfsreflog",
266  manifest->meta_info().ToString().c_str());
267  return false;
268  }
269 
270  return true;
271 }
272 
273 
278  LogCvmfs(kLogCvmfs, kLogStdout, "Inspecting tag database");
279  bool retval;
280  vector<history::History::Tag> tags;
281  retval = history->List(&tags);
282  if (!retval) {
283  LogCvmfs(kLogCvmfs, kLogStderr, "failed to enumerate tags");
284  return false;
285  }
286  vector<history::History::Branch> branches;
287  retval = history->ListBranches(&branches);
288  if (!retval) {
289  LogCvmfs(kLogCvmfs, kLogStderr, "failed to enumerate branches");
290  return false;
291  }
292 
293  bool result = true;
294 
295  map<string, uint64_t> initial_revisions;
296  const sanitizer::BranchSanitizer sanitizer;
297  for (unsigned i = 0; i < branches.size(); ++i) {
298  if (!sanitizer.IsValid(branches[i].branch)) {
299  LogCvmfs(kLogCvmfs, kLogStderr, "invalid branch name: %s",
300  branches[i].branch.c_str());
301  result = false;
302  }
303  initial_revisions[branches[i].branch] = branches[i].initial_revision;
304  }
305 
306  set<string> used_branches; // all branches referenced in tag db
307  // TODO(jblomer): same root hash implies same size and revision
308  for (unsigned i = 0; i < tags.size(); ++i) {
309  used_branches.insert(tags[i].branch);
310  const map<string, uint64_t>::const_iterator iter = initial_revisions.find(
311  tags[i].branch);
312  if (iter == initial_revisions.end()) {
313  LogCvmfs(kLogCvmfs, kLogStderr, "invalid branch %s in tag %s",
314  tags[i].branch.c_str(), tags[i].name.c_str());
315  result = false;
316  } else {
317  if (tags[i].revision < iter->second) {
319  "invalid revision %" PRIu64 " of tag %s", tags[i].revision,
320  tags[i].name.c_str());
321  result = false;
322  }
323  }
324  }
325 
326  if (used_branches.size() != branches.size()) {
327  LogCvmfs(kLogCvmfs, kLogStderr, "unused, dangling branches stored");
328  result = false;
329  }
330 
331  return result;
332 }
333 
334 
343  const PathString &path,
344  catalog::DeltaCounters *computed_counters,
345  set<PathString> *bind_mountpoints) {
347  catalog::DirectoryEntry this_directory;
348 
349  if (!catalog->LookupPath(path, &this_directory)) {
350  LogCvmfs(kLogCvmfs, kLogStderr, "failed to lookup %s", path.c_str());
351  return false;
352  }
353  if (!catalog->ListingPath(path, &entries)) {
354  LogCvmfs(kLogCvmfs, kLogStderr, "failed to list %s", path.c_str());
355  return false;
356  }
357 
358  uint32_t num_subdirs = 0;
359  bool retval = true;
360  typedef map<uint32_t, vector<catalog::DirectoryEntry> > HardlinkMap;
361  HardlinkMap hardlinks;
362  bool found_nested_marker = false;
363 
364  for (unsigned i = 0; i < entries.size(); ++i) {
365  // for performance reasons, keep track of files already checked
366  // and only run requests once per hash
367  const bool entry_needs_check = !entries[i].checksum().IsNull()
368  && !entries[i].IsExternalFile() &&
369  // fallback cli option can force the entry to
370  // be checked
373  entries[i].checksum()));
374  if (entry_needs_check && !no_duplicates_map_)
375  duplicates_map_.Insert(entries[i].checksum(), 1);
376 
377  PathString full_path(path);
378  full_path.Append("/", 1);
379  full_path.Append(entries[i].name().GetChars(),
380  entries[i].name().GetLength());
381  LogCvmfs(kLogCvmfs, kLogVerboseMsg, "[path] %s [needs check] %i",
382  full_path.c_str(), entry_needs_check);
383 
384 
385  // Name must not be empty
386  if (entries[i].name().IsEmpty()) {
387  LogCvmfs(kLogCvmfs, kLogStderr, "empty path at %s", full_path.c_str());
388  retval = false;
389  }
390 
391  // Catalog markers should indicate nested catalogs
392  if (entries[i].name() == NameString(string(".cvmfscatalog"))) {
393  if (catalog->mountpoint() != path) {
395  "found abandoned nested catalog marker at %s",
396  full_path.c_str());
397  retval = false;
398  }
399  found_nested_marker = true;
400  }
401 
402  // Check if checksum is not null
403  if (entries[i].IsRegular() && !entries[i].IsChunkedFile()
404  && entries[i].checksum().IsNull()) {
406  "regular file pointing to zero-hash: '%s'", full_path.c_str());
407  retval = false;
408  }
409 
410  // Check if the chunk is there
411  if (check_chunks_ && entry_needs_check) {
412  string chunk_path = "data/" + entries[i].checksum().MakePath();
413  if (entries[i].IsDirectory())
414  chunk_path += shash::kSuffixMicroCatalog;
415  if (!Exists(chunk_path)) {
416  LogCvmfs(kLogCvmfs, kLogStderr, "data chunk %s (%s) missing",
417  entries[i].checksum().ToString().c_str(), full_path.c_str());
418  retval = false;
419  }
420  }
421 
422  // Add hardlinks to counting map
423  if ((entries[i].linkcount() > 1) && !entries[i].IsDirectory()) {
424  if (entries[i].hardlink_group() == 0) {
425  LogCvmfs(kLogCvmfs, kLogStderr, "invalid hardlink group for %s",
426  full_path.c_str());
427  retval = false;
428  } else {
429  const HardlinkMap::iterator hardlink_group =
430  hardlinks.find(entries[i].hardlink_group());
431  if (hardlink_group == hardlinks.end()) {
432  hardlinks[entries[i].hardlink_group()];
433  hardlinks[entries[i].hardlink_group()].push_back(entries[i]);
434  } else {
435  if (!CompareEntries(entries[i], (hardlink_group->second)[0], false)) {
436  LogCvmfs(kLogCvmfs, kLogStderr, "hardlink %s doesn't match",
437  full_path.c_str());
438  retval = false;
439  }
440  hardlink_group->second.push_back(entries[i]);
441  } // Hardlink added to map
442  } // Hardlink group > 0
443  } // Hardlink found
444 
445  // For any kind of entry, the linkcount should be > 0
446  if (entries[i].linkcount() == 0) {
447  LogCvmfs(kLogCvmfs, kLogStderr, "Entry %s has linkcount 0.",
448  entries[i].name().c_str());
449  retval = false;
450  }
451 
452  // Checks depending of entry type
453  if (!entries[i].IsRegular()) {
454  if (entries[i].IsDirectIo()) {
455  LogCvmfs(kLogCvmfs, kLogStderr, "invalid direct i/o flag found: %s",
456  full_path.c_str());
457  retval = false;
458  }
459  }
460  if (entries[i].IsDirectory()) {
461  computed_counters->self.directories++;
462  num_subdirs++;
463  // Directory size
464  // if (entries[i].size() < 4096) {
465  // LogCvmfs(kLogCvmfs, kLogStderr, "invalid file size for %s",
466  // full_path.c_str());
467  // retval = false;
468  // }
469  // No directory hardlinks
470  if (entries[i].hardlink_group() != 0) {
471  LogCvmfs(kLogCvmfs, kLogStderr, "directory hardlink found at %s",
472  full_path.c_str());
473  retval = false;
474  }
475  if (entries[i].IsNestedCatalogMountpoint()
476  || entries[i].IsBindMountpoint()) {
477  // Find transition point
478  if (entries[i].IsNestedCatalogMountpoint())
479  computed_counters->self.nested_catalogs++;
480  shash::Any tmp;
481  uint64_t tmp2;
482  const PathString mountpoint(full_path);
483  if (!catalog->FindNested(mountpoint, &tmp, &tmp2)) {
484  LogCvmfs(kLogCvmfs, kLogStderr, "nested catalog at %s not registered",
485  full_path.c_str());
486  retval = false;
487  }
488 
489  // check that the nested mountpoint is empty in the current catalog
490  catalog::DirectoryEntryList nested_entries;
491  if (catalog->ListingPath(full_path, &nested_entries)
492  && !nested_entries.empty()) {
494  "non-empty nested catalog mountpoint "
495  "at %s.",
496  full_path.c_str());
497  retval = false;
498  }
499 
500  if (entries[i].IsBindMountpoint()) {
501  bind_mountpoints->insert(full_path);
502  if (entries[i].IsNestedCatalogMountpoint()) {
504  "bind mountpoint and nested mountpoint mutually exclusive"
505  " at %s.",
506  full_path.c_str());
507  retval = false;
508  }
509  }
510  } else {
511  // Recurse
512  if (!Find(catalog, full_path, computed_counters, bind_mountpoints))
513  retval = false;
514  }
515  } else if (entries[i].IsLink()) {
516  computed_counters->self.symlinks++;
517  // No hash for symbolics links
518  if (!entries[i].checksum().IsNull()) {
519  LogCvmfs(kLogCvmfs, kLogStderr, "symbolic links with hash at %s",
520  full_path.c_str());
521  retval = false;
522  }
523  // Right size of symbolic link?
524  if (entries[i].size() != entries[i].symlink().GetLength()) {
526  "wrong symbolic link size for %s; "
527  "expected %u, got %lu",
528  full_path.c_str(), entries[i].symlink().GetLength(),
529  entries[i].size());
530  retval = false;
531  }
532  } else if (entries[i].IsRegular()) {
533  computed_counters->self.regular_files++;
534  computed_counters->self.file_size += entries[i].size();
535  } else if (entries[i].IsSpecial()) {
536  computed_counters->self.specials++;
537  // Size zero for special files
538  if (entries[i].size() != 0) {
540  "unexpected non-zero special file size %s", full_path.c_str());
541  retval = false;
542  }
543  // No hash for special files
544  if (!entries[i].checksum().IsNull()) {
545  LogCvmfs(kLogCvmfs, kLogStderr, "special file with hash at %s",
546  full_path.c_str());
547  retval = false;
548  }
549  // No symlink
550  if (entries[i].symlink().GetLength() > 0) {
552  "special file with non-zero symlink at %s", full_path.c_str());
553  retval = false;
554  }
555  } else {
556  LogCvmfs(kLogCvmfs, kLogStderr, "unknown file type %s",
557  full_path.c_str());
558  retval = false;
559  }
560 
561  if (entries[i].HasXattrs()) {
562  computed_counters->self.xattrs++;
563  }
564 
565  if (entries[i].IsExternalFile()) {
566  computed_counters->self.externals++;
567  computed_counters->self.external_file_size += entries[i].size();
568  if (!entries[i].IsRegular()) {
570  "only regular files can be external: %s", full_path.c_str());
571  retval = false;
572  }
573  }
574 
575  // checking file chunk integrity
576  if (entries[i].IsChunkedFile()) {
577  FileChunkList chunks;
578  catalog->ListPathChunks(full_path, entries[i].hash_algorithm(), &chunks);
579 
580  computed_counters->self.chunked_files++;
581  computed_counters->self.chunked_file_size += entries[i].size();
582  computed_counters->self.file_chunks += chunks.size();
583 
584  // do we find file chunks for the chunked file in this catalog?
585  if (chunks.size() == 0) {
586  LogCvmfs(kLogCvmfs, kLogStderr, "no file chunks found for big file %s",
587  full_path.c_str());
588  retval = false;
589  }
590 
591  size_t aggregated_file_size = 0;
592  off_t next_offset = 0;
593 
594  for (unsigned j = 0; j < chunks.size(); ++j) {
595  const FileChunk this_chunk = chunks.At(j);
596  // check if the chunk boundaries fit together...
597  if (next_offset != this_chunk.offset()) {
598  LogCvmfs(kLogCvmfs, kLogStderr, "misaligned chunk offsets for %s",
599  full_path.c_str());
600  retval = false;
601  }
602  next_offset = this_chunk.offset() + this_chunk.size();
603  aggregated_file_size += this_chunk.size();
604 
605  // are all data chunks in the data store?
606  if (check_chunks_ && !entries[i].IsExternalFile()) {
607  const shash::Any &chunk_hash = this_chunk.content_hash();
608  // for performance reasons, only perform the check once
609  // and skip if the hash has been checked before
610  bool chunk_needs_check = true;
611  if (!no_duplicates_map_ && !duplicates_map_.Contains(chunk_hash)) {
612  duplicates_map_.Insert(chunk_hash, 1);
613  } else if (!no_duplicates_map_) {
614  chunk_needs_check = false;
615  }
616  if (chunk_needs_check) {
617  const string chunk_path = "data/" + chunk_hash.MakePath();
618  if (!Exists(chunk_path)) {
620  "partial data chunk %s (%s -> "
621  "offset: %ld | size: %lu) missing",
622  this_chunk.content_hash().ToStringWithSuffix().c_str(),
623  full_path.c_str(), this_chunk.offset(),
624  this_chunk.size());
625  retval = false;
626  }
627  }
628  }
629  }
630 
631  // is the aggregated chunk size equal to the actual file size?
632  if (aggregated_file_size != entries[i].size()) {
634  "chunks of file %s produce a size "
635  "mismatch. Calculated %zu bytes | %lu "
636  "bytes expected",
637  full_path.c_str(), aggregated_file_size, entries[i].size());
638  retval = false;
639  }
640  }
641  } // Loop through entries
642 
643  // Check if nested catalog marker has been found
644  if (!path.IsEmpty() && (path == catalog->mountpoint())
645  && !found_nested_marker) {
646  LogCvmfs(kLogCvmfs, kLogStderr, "nested catalog without marker at %s",
647  path.c_str());
648  retval = false;
649  }
650 
651  // Check directory linkcount
652  if (this_directory.linkcount() != num_subdirs + 2) {
654  "wrong linkcount for %s; "
655  "expected %u, got %u",
656  path.c_str(), num_subdirs + 2, this_directory.linkcount());
657  retval = false;
658  }
659 
660  // Check hardlink linkcounts
661  for (HardlinkMap::const_iterator i = hardlinks.begin(),
662  iEnd = hardlinks.end();
663  i != iEnd;
664  ++i) {
665  if (i->second[0].linkcount() != i->second.size()) {
667  "hardlink linkcount wrong for %s, "
668  "expected %lu, got %u",
669  (path.ToString() + "/" + i->second[0].name().ToString()).c_str(),
670  i->second.size(), i->second[0].linkcount());
671  retval = false;
672  }
673  }
674 
675  return retval;
676 }
677 
678 
679 string CommandCheck::DownloadPiece(const shash::Any catalog_hash) {
680  const string source = "data/" + catalog_hash.MakePath();
681  const string dest = temp_directory_ + "/" + catalog_hash.ToString();
682  const string url = repo_base_path_ + "/" + source;
683 
684  cvmfs::PathSink pathsink(dest);
685  download::JobInfo download_catalog(&url, true, false, &catalog_hash,
686  &pathsink);
687  const download::Failures retval =
688  download_manager()->Fetch(&download_catalog);
689  if (retval != download::kFailOk) {
690  LogCvmfs(kLogCvmfs, kLogStderr, "failed to download object %s (%d)",
691  catalog_hash.ToString().c_str(), retval);
692  return "";
693  }
694 
695  return dest;
696 }
697 
698 
699 string CommandCheck::DecompressPiece(const shash::Any catalog_hash) {
700  const string source = "data/" + catalog_hash.MakePath();
701  const string dest = temp_directory_ + "/" + catalog_hash.ToString();
702  if (!zlib::DecompressPath2Path(source, dest))
703  return "";
704 
705  return dest;
706 }
707 
708 
710  const shash::Any &catalog_hash,
711  const uint64_t catalog_size) {
712  string tmp_file;
713  if (!is_remote_)
714  tmp_file = DecompressPiece(catalog_hash);
715  else
716  tmp_file = DownloadPiece(catalog_hash);
717 
718  if (tmp_file == "") {
719  LogCvmfs(kLogCvmfs, kLogStderr, "failed to load catalog %s",
720  catalog_hash.ToString().c_str());
721  return NULL;
722  }
723 
724  catalog::Catalog *catalog = catalog::Catalog::AttachFreely(path, tmp_file,
725  catalog_hash);
726  const int64_t catalog_file_size = GetFileSize(tmp_file);
727  if (catalog_file_size <= 0) {
728  LogCvmfs(kLogCvmfs, kLogStderr, "Error downloading catalog %s at %s %s",
729  catalog_hash.ToString().c_str(), path.c_str(), tmp_file.c_str());
730  assert(catalog_file_size > 0);
731  }
732  unlink(tmp_file.c_str());
733 
734  if ((catalog_size > 0) && (uint64_t(catalog_file_size) != catalog_size)) {
736  "catalog file size mismatch, "
737  "expected %" PRIu64 ", got %" PRIu64,
738  catalog_size, catalog_file_size);
739  delete catalog;
740  return NULL;
741  }
742 
743  return catalog;
744 }
745 
746 
747 bool CommandCheck::FindSubtreeRootCatalog(const string &subtree_path,
748  shash::Any *root_hash,
749  uint64_t *root_size) {
750  catalog::Catalog *current_catalog = FetchCatalog("", *root_hash);
751  if (current_catalog == NULL) {
752  return false;
753  }
754 
755  typedef vector<string> Tokens;
756  const Tokens path_tokens = SplitString(subtree_path, '/');
757 
758  string current_path = "";
759 
760  Tokens::const_iterator i = path_tokens.begin();
761  const Tokens::const_iterator iend = path_tokens.end();
762  for (; i != iend; ++i) {
763  if (i->empty()) {
764  continue;
765  }
766 
767  current_path += "/" + *i;
768  if (current_catalog->FindNested(
769  PathString(current_path), root_hash, root_size)) {
770  delete current_catalog;
771 
772  if (current_path.length() < subtree_path.length()) {
773  current_catalog = FetchCatalog(current_path, *root_hash);
774  if (current_catalog == NULL) {
775  break;
776  }
777  } else {
778  return true;
779  }
780  }
781  }
782  return false;
783 }
784 
785 
789 bool CommandCheck::InspectTree(const string &path,
790  const shash::Any &catalog_hash,
791  const uint64_t catalog_size,
792  const bool is_nested_catalog,
793  const catalog::DirectoryEntry *transition_point,
794  catalog::DeltaCounters *computed_counters) {
795  LogCvmfs(kLogCvmfs, kLogStdout | kLogInform, "[inspecting catalog] %s at %s",
796  catalog_hash.ToString().c_str(), path == "" ? "/" : path.c_str());
797 
798  const catalog::Catalog *catalog = FetchCatalog(
799  path, catalog_hash, catalog_size);
800  if (catalog == NULL) {
801  LogCvmfs(kLogCvmfs, kLogStderr, "failed to open catalog %s",
802  catalog_hash.ToString().c_str());
803  return false;
804  }
805 
806  int retval = true;
807 
808  if (catalog->root_prefix() != PathString(path.data(), path.length())) {
810  "root prefix mismatch; "
811  "expected %s, got %s",
812  path.c_str(), catalog->root_prefix().c_str());
813  retval = false;
814  }
815 
816  // Check transition point
817  catalog::DirectoryEntry root_entry;
818  if (!catalog->LookupPath(catalog->root_prefix(), &root_entry)) {
819  LogCvmfs(kLogCvmfs, kLogStderr, "failed to lookup root entry (%s)",
820  path.c_str());
821  retval = false;
822  }
823  if (!root_entry.IsDirectory()) {
824  LogCvmfs(kLogCvmfs, kLogStderr, "root entry not a directory (%s)",
825  path.c_str());
826  retval = false;
827  }
828  if (is_nested_catalog) {
829  if (transition_point != NULL
830  && !CompareEntries(*transition_point, root_entry, true, true)) {
832  "transition point and root entry differ (%s)", path.c_str());
833  retval = false;
834  }
835  if (!root_entry.IsNestedCatalogRoot()) {
837  "nested catalog root expected but not found (%s)", path.c_str());
838  retval = false;
839  }
840  } else {
841  if (root_entry.IsNestedCatalogRoot()) {
843  "nested catalog root found but not expected (%s)", path.c_str());
844  retval = false;
845  }
846  }
847 
848  // Traverse the catalog
849  set<PathString> bind_mountpoints;
850  if (!Find(catalog, PathString(path.data(), path.length()), computed_counters,
851  &bind_mountpoints)) {
852  retval = false;
853  }
854 
855  // Check number of entries
856  if (root_entry.HasXattrs())
857  computed_counters->self.xattrs++;
858  const uint64_t num_found_entries = 1 + computed_counters->self.regular_files
859  + computed_counters->self.symlinks
860  + computed_counters->self.specials
861  + computed_counters->self.directories;
862  if (num_found_entries != catalog->GetNumEntries()) {
864  "dangling entries in catalog, "
865  "expected %" PRIu64 ", got %" PRIu64,
866  catalog->GetNumEntries(), num_found_entries);
867  retval = false;
868  }
869 
870  // Recurse into nested catalogs
872  &nested_catalogs = catalog->ListNestedCatalogs();
874  own_nested_catalogs = catalog->ListOwnNestedCatalogs();
875  if (own_nested_catalogs.size()
876  != static_cast<uint64_t>(computed_counters->self.nested_catalogs)) {
878  "number of nested catalogs does not match;"
879  " expected %lu, got %lu",
880  computed_counters->self.nested_catalogs,
881  own_nested_catalogs.size());
882  retval = false;
883  }
884  set<PathString> nested_catalog_paths;
885  for (catalog::Catalog::NestedCatalogList::const_iterator
886  i = nested_catalogs.begin(),
887  iEnd = nested_catalogs.end();
888  i != iEnd; ++i) {
889  nested_catalog_paths.insert(i->mountpoint);
890  }
891  if (nested_catalog_paths.size() != nested_catalogs.size()) {
893  "duplicates among nested catalogs and bind mountpoints");
894  retval = false;
895  }
896 
897  for (catalog::Catalog::NestedCatalogList::const_iterator
898  i = nested_catalogs.begin(),
899  iEnd = nested_catalogs.end();
900  i != iEnd; ++i) {
901  if (bind_mountpoints.find(i->mountpoint) != bind_mountpoints.end()) {
902  catalog::DirectoryEntry bind_mountpoint;
903  const PathString mountpoint("/" + i->mountpoint.ToString().substr(1));
904  if (!catalog->LookupPath(mountpoint, &bind_mountpoint)) {
905  LogCvmfs(kLogCvmfs, kLogStderr, "failed to lookup bind mountpoint %s",
906  mountpoint.c_str());
907  retval = false;
908  }
909  LogCvmfs(kLogCvmfs, kLogDebug, "skipping bind mountpoint %s",
910  i->mountpoint.c_str());
911  continue;
912  }
913  catalog::DirectoryEntry nested_transition_point;
914  if (!catalog->LookupPath(i->mountpoint, &nested_transition_point)) {
915  LogCvmfs(kLogCvmfs, kLogStderr, "failed to lookup transition point %s",
916  i->mountpoint.c_str());
917  retval = false;
918  } else {
919  catalog::DeltaCounters nested_counters;
920  const bool is_nested = true;
921  if (!InspectTree(i->mountpoint.ToString(), i->hash, i->size, is_nested,
922  &nested_transition_point, &nested_counters))
923  retval = false;
924  nested_counters.PopulateToParent(computed_counters);
925  }
926  }
927 
928  // Check statistics counters
929  // Additionally account for root directory
930  computed_counters->self.directories++;
931  catalog::Counters compare_counters;
932  compare_counters.ApplyDelta(*computed_counters);
933  const catalog::Counters stored_counters = catalog->GetCounters();
934  if (!CompareCounters(compare_counters, stored_counters)) {
935  LogCvmfs(kLogCvmfs, kLogStderr, "statistics counter mismatch [%s]",
936  catalog_hash.ToString().c_str());
937  retval = false;
938  }
939 
940  delete catalog;
941  return retval;
942 }
943 
944 
946  string tag_name;
947  string subtree_path = "";
948  string pubkey_path = "";
949  string repo_name = "";
950  string reflog_chksum_path = "";
951 
952  temp_directory_ = (args.find('t') != args.end()) ? *args.find('t')->second
953  : "/tmp";
954  if (args.find('n') != args.end())
955  tag_name = *args.find('n')->second;
956  if (args.find('c') != args.end())
957  check_chunks_ = true;
958  if (args.find('d') != args.end())
959  no_duplicates_map_ = true;
960  if (args.find('l') != args.end()) {
961  const unsigned log_level = kLogLevel0
962  << String2Uint64(*args.find('l')->second);
963  if (log_level > kLogNone) {
964  LogCvmfs(kLogCvmfs, kLogStderr, "invalid log level");
965  return 1;
966  }
967  SetLogVerbosity(static_cast<LogLevels>(log_level));
968  }
969  if (args.find('k') != args.end())
970  pubkey_path = *args.find('k')->second;
971  if (DirectoryExists(pubkey_path))
972  pubkey_path = JoinStrings(FindFilesBySuffix(pubkey_path, ".pub"), ":");
973  if (args.find('N') != args.end())
974  repo_name = *args.find('N')->second;
975 
976  repo_base_path_ = MakeCanonicalPath(*args.find('r')->second);
977  if (args.find('s') != args.end())
978  subtree_path = MakeCanonicalPath(*args.find('s')->second);
979  if (args.find('R') != args.end())
980  reflog_chksum_path = *args.find('R')->second;
981 
982  // Repository can be HTTP address or on local file system
984 
985  // initialize the (swissknife global) download and signature managers
986  if (is_remote_) {
987  const bool follow_redirects = (args.count('L') > 0);
988  const string proxy = (args.count('@') > 0) ? *args.find('@')->second : "";
989  if (!this->InitDownloadManager(follow_redirects, proxy)) {
990  return 1;
991  }
992 
993  if (pubkey_path.empty() || repo_name.empty()) {
995  "please provide pubkey and repo name for "
996  "remote repositories");
997  return 1;
998  }
999 
1000  if (!this->InitSignatureManager(pubkey_path)) {
1001  return 1;
1002  }
1003  }
1004 
1005  // Load Manifest
1007  bool successful = true;
1008 
1009  if (is_remote_) {
1010  manifest = FetchRemoteManifest(repo_base_path_, repo_name);
1011  } else {
1012  if (chdir(repo_base_path_.c_str()) != 0) {
1013  LogCvmfs(kLogCvmfs, kLogStderr, "failed to switch to directory %s",
1014  repo_base_path_.c_str());
1015  return 1;
1016  }
1017  manifest = OpenLocalManifest(".cvmfspublished");
1018  }
1019 
1020  if (!manifest.IsValid()) {
1021  LogCvmfs(kLogCvmfs, kLogStderr, "failed to load repository manifest");
1022  return 1;
1023  }
1024 
1025  // Check meta-info object
1026  if (!manifest->meta_info().IsNull()) {
1027  string tmp_file;
1028  if (!is_remote_)
1029  tmp_file = DecompressPiece(manifest->meta_info());
1030  else
1031  tmp_file = DownloadPiece(manifest->meta_info());
1032  if (tmp_file == "") {
1033  LogCvmfs(kLogCvmfs, kLogStderr, "failed to load repository metainfo %s",
1034  manifest->meta_info().ToString().c_str());
1035  return 1;
1036  }
1037  unlink(tmp_file.c_str());
1038  }
1039 
1040  shash::Any reflog_hash;
1041  if (!reflog_chksum_path.empty()) {
1042  if (!manifest::Reflog::ReadChecksum(reflog_chksum_path, &reflog_hash)) {
1043  LogCvmfs(kLogCvmfs, kLogStderr, "failed to read reflog checksum file");
1044  return 1;
1045  }
1046  } else {
1047  reflog_hash = manifest->reflog_hash();
1048  }
1049 
1050  if (Exists(".cvmfsreflog")) {
1051  if (reflog_hash.IsNull()) {
1052  // If there is a reflog, we want to check it
1054  ".cvmfsreflog present but no checksum provided, aborting");
1055  return 1;
1056  }
1057  const bool retval = InspectReflog(reflog_hash, manifest.weak_ref());
1058  if (!retval) {
1059  LogCvmfs(kLogCvmfs, kLogStderr, "failed to verify reflog");
1060  return 1;
1061  }
1062  } else {
1063  if (!reflog_hash.IsNull()) {
1064  // There is a checksum but no reflog; possibly the checksum is for the
1065  // from the manifest for the stratum 0 reflog
1066  if (!reflog_chksum_path.empty()) {
1068  "local reflog checksum set but reflog itself is missing, "
1069  "aborting");
1070  return 1;
1071  }
1072  }
1073  }
1074 
1075  // Load history
1077  if (!manifest->history().IsNull()) {
1078  string tmp_file;
1079  if (!is_remote_)
1080  tmp_file = DecompressPiece(manifest->history());
1081  else
1082  tmp_file = DownloadPiece(manifest->history());
1083  if (tmp_file == "") {
1084  LogCvmfs(kLogCvmfs, kLogStderr, "failed to load history database %s",
1085  manifest->history().ToString().c_str());
1086  return 1;
1087  }
1088  tag_db = history::SqliteHistory::Open(tmp_file);
1089  if (!tag_db.IsValid()) {
1090  LogCvmfs(kLogCvmfs, kLogStderr, "failed to open history database %s",
1091  manifest->history().ToString().c_str());
1092  return 1;
1093  }
1094  tag_db->TakeDatabaseFileOwnership();
1095  successful = InspectHistory(tag_db.weak_ref()) && successful;
1096  }
1097 
1098  if (manifest->has_alt_catalog_path()) {
1099  if (!Exists(manifest->certificate().MakeAlternativePath())) {
1101  "failed to find alternative certificate link %s",
1102  manifest->certificate().MakeAlternativePath().c_str());
1103  return 1;
1104  }
1105  if (!Exists(manifest->catalog_hash().MakeAlternativePath())) {
1107  "failed to find alternative catalog link %s",
1108  manifest->catalog_hash().MakeAlternativePath().c_str());
1109  return 1;
1110  }
1111  }
1112 
1113  shash::Any root_hash = manifest->catalog_hash();
1114  uint64_t root_size = manifest->catalog_size();
1115  if (tag_name != "") {
1116  if (!tag_db.IsValid()) {
1117  LogCvmfs(kLogCvmfs, kLogStderr, "no history");
1118  return 1;
1119  }
1121  const bool retval = tag_db->GetByName(tag_name, &tag);
1122  if (!retval) {
1123  LogCvmfs(kLogCvmfs, kLogStderr, "no such tag: %s", tag_name.c_str());
1124  return 1;
1125  }
1126  root_hash = tag.root_hash;
1127  root_size = tag.size;
1128  LogCvmfs(kLogCvmfs, kLogStdout, "Inspecting repository tag %s",
1129  tag_name.c_str());
1130  }
1131 
1132  const bool is_nested_catalog = (!subtree_path.empty());
1133  if (is_nested_catalog
1134  && !FindSubtreeRootCatalog(subtree_path, &root_hash, &root_size)) {
1135  LogCvmfs(kLogCvmfs, kLogStderr, "cannot find nested catalog at %s",
1136  subtree_path.c_str());
1137  return 1;
1138  }
1139 
1140 
1141  catalog::DeltaCounters computed_counters;
1142  successful = InspectTree(subtree_path,
1143  root_hash,
1144  root_size,
1145  is_nested_catalog,
1146  NULL,
1147  &computed_counters)
1148  && successful;
1149 
1150  if (!successful) {
1151  LogCvmfs(kLogCvmfs, kLogStderr, "CATALOG PROBLEMS OR OTHER ERRORS FOUND");
1152  return 1;
1153  }
1154 
1155  LogCvmfs(kLogCvmfs, kLogStdout, "no problems found");
1156  return 0;
1157 }
1158 
1159 } // namespace swissknife
uint32_t linkcount() const
void SetLogVerbosity(const LogLevels max_level)
Definition: logging.cc:258
void TakeDatabaseFileOwnership()
Definition: reflog.cc:294
bool IsExternalFile() const
bool ContainsHistory(const shash::Any &history) const
Definition: reflog.cc:232
bool InspectHistory(history::History *history)
bool IsNull() const
Definition: hash.h:371
Differences CompareTo(const DirectoryEntry &other) const
const manifest::Manifest * manifest() const
Definition: repository.h:125
ShortString< kDefaultMaxName, 1 > NameString
Definition: shortstring.h:214
Item At(const size_t index) const
Definition: bigvector.h:48
time_t mtime() const
bool IsDirectory() const
static SqliteHistory * Open(const std::string &file_name)
bool ListPathChunks(const PathString &path, const shash::Algorithms interpret_hashes_as, FileChunkList *chunks) const
Definition: catalog.h:144
T * weak_ref() const
Definition: pointer.h:46
const int kDefaultFileMode
Definition: posix.h:32
static bool ReadChecksum(const std::string &path, shash::Any *checksum)
Definition: reflog.cc:48
#define PANIC(...)
Definition: exception.h:29
CVMFS_EXPORT const LogSource source
Definition: exception.h:33
FILE * CreateTempFile(const std::string &path_prefix, const int mode, const char *open_flags, std::string *final_path)
Definition: posix.cc:1014
uint64_t size() const
string JoinStrings(const vector< string > &strings, const string &joint)
Definition: string.cc:356
std::string ToString(const bool with_suffix=false) const
Definition: hash.h:241
const history::History * history() const
std::string ToStringWithSuffix() const
Definition: hash.h:296
bool LookupPath(const PathString &path, DirectoryEntry *dirent) const
Definition: catalog.h:124
bool CompareEntries(const catalog::DirectoryEntry &a, const catalog::DirectoryEntry &b, const bool compare_names, const bool is_transition_point=false)
void ApplyDelta(const DeltaCounters &delta)
bool IsHttpUrl(const std::string &path)
Definition: posix.cc:167
bool ListingPath(const PathString &path, DirectoryEntryList *listing, const bool expand_symlink=true) const
Definition: catalog.h:132
std::map< std::string, const Counters_t * > FieldsMap
const char kSuffixMicroCatalog
Definition: hash.h:56
manifest::Manifest * FetchRemoteManifest(const std::string &repository_url, const std::string &repository_name, const shash::Any &base_hash=shash::Any()) const
Definition: server_tool.cc:121
const shash::Any & content_hash() const
Definition: file_chunk.h:37
catalog::Catalog * FetchCatalog(const std::string &path, const shash::Any &catalog_hash, const uint64_t catalog_size=0)
assert((mem||(size==0))&&"Out Of Memory")
bool InspectReflog(const shash::Any &reflog_hash, manifest::Manifest *manifest)
Algorithms algorithm
Definition: hash.h:122
shash::Any checksum() const
static uint32_t hasher_any(const shash::Any &key)
unsigned int mode() const
unsigned char digest[digest_size_]
Definition: hash.h:121
bool Find(const catalog::Catalog *catalog, const PathString &path, catalog::DeltaCounters *computed_counters, std::set< PathString > *bind_mountpoints)
bool ContainsMetainfo(const shash::Any &metainfo) const
Definition: reflog.cc:238
bool CopyPath2File(const std::string &src, FILE *fdest)
Definition: compression.cc:46
bool SymlinkExists(const std::string &path)
Definition: posix.cc:833
static Reflog * Open(const std::string &database_path)
Definition: reflog.cc:17
bool IsValid(const std::string &input) const
Definition: sanitizer.cc:112
bool IsNestedCatalogRoot() const
bool FileExists(const std::string &path)
Definition: posix.cc:803
std::string DownloadPiece(const shash::Any catalog_hash)
std::vector< DirectoryEntry > DirectoryEntryList
NameString name() const
int Main(const ArgumentList &args)
virtual bool ListBranches(std::vector< Branch > *branches) const =0
std::string DecompressPiece(const shash::Any catalog_hash)
bool HasXattrs() const
download::DownloadManager * download_manager() const
Definition: server_tool.cc:96
virtual bool List(std::vector< Tag > *tags) const =0
vector< string > SplitString(const string &str, char delim)
Definition: string.cc:306
bool Exists(const std::string &file)
off_t offset() const
Definition: file_chunk.h:38
shash::Any certificate() const
Definition: manifest.h:127
void PopulateToParent(DeltaCounters *parent) const
std::string FetchPath(const std::string &path)
LinkString symlink() const
void Insert(const Key &key, const Value &value)
Definition: smallhash.h:106
PathString mountpoint() const
Definition: catalog.h:175
static void HashDatabase(const std::string &database_path, shash::Any *hash_reflog)
Definition: reflog.cc:309
shash::Any catalog_hash() const
Definition: manifest.h:125
void Append(const char *chars, const unsigned length)
Definition: shortstring.h:80
SmallHashDynamic< shash::Any, char > duplicates_map_
bool FindSubtreeRootCatalog(const std::string &subtree_path, shash::Any *root_hash, uint64_t *root_size)
bool DirectoryExists(const std::string &path)
Definition: posix.cc:824
bool CompareCounters(const catalog::Counters &a, const catalog::Counters &b)
bool Contains(const Key &key) const
Definition: smallhash.h:99
manifest::Manifest * OpenLocalManifest(const std::string path) const
Definition: server_tool.cc:106
bool InitSignatureManager(const std::string &pubkey_path, const std::string &certificate_path="", const std::string &private_key_path="")
Definition: server_tool.cc:44
std::string ToString() const
Definition: shortstring.h:139
std::vector< NestedCatalog > NestedCatalogList
Definition: catalog.h:204
bool ContainsCatalog(const shash::Any &catalog) const
Definition: reflog.cc:217
bool IsEmpty() const
Definition: shortstring.h:137
uint64_t String2Uint64(const string &value)
Definition: string.cc:240
std::map< char, SharedPtr< std::string > > ArgumentList
Definition: swissknife.h:72
ShortString< kDefaultMaxPath, 0 > PathString
Definition: shortstring.h:213
Failures Fetch(JobInfo *info)
Definition: download.cc:1984
size_t size() const
Definition: file_chunk.h:39
shash::Any history() const
Definition: manifest.h:128
bool InspectTree(const std::string &path, const shash::Any &catalog_hash, const uint64_t catalog_size, const bool is_nested_catalog, const catalog::DirectoryEntry *transition_point, catalog::DeltaCounters *computed_counters)
shash::Any root_hash
Definition: history.h:93
static Catalog * AttachFreely(const std::string &imaginary_mountpoint, const std::string &file, const shash::Any &catalog_hash, Catalog *parent=NULL, const bool is_nested=false)
Definition: catalog.cc:29
bool FindNested(const PathString &mountpoint, shash::Any *hash, uint64_t *size) const
Definition: catalog.cc:680
bool DecompressPath2Path(const string &src, const string &dest)
Definition: compression.cc:383
int64_t GetFileSize(const std::string &path)
Definition: posix.cc:812
const int kLogVerboseMsg
uid_t uid() const
gid_t gid() const
std::string MakePath() const
Definition: hash.h:306
std::string MakeCanonicalPath(const std::string &path)
Definition: posix.cc:98
void Init(uint32_t expected_size, Key empty, uint32_t(*hasher)(const Key &key))
Definition: smallhash.h:58
bool InitDownloadManager(const bool follow_redirects, const std::string &proxy, const unsigned max_pool_handles=1)
Definition: server_tool.cc:17
const char * c_str() const
Definition: shortstring.h:143
uint32_t hardlink_group() const
bool ContainsCertificate(const shash::Any &certificate) const
Definition: reflog.cc:210
static void size_t size
Definition: smalloc.h:54
std::vector< std::string > FindFilesBySuffix(const std::string &dir, const std::string &suffix)
Definition: posix.cc:1129
unsigned int Differences
shash::Any meta_info() const
Definition: manifest.h:132
size_t size() const
Definition: bigvector.h:117
CVMFS_EXPORT void LogCvmfs(const LogSource source, const int mask, const char *format,...)
Definition: logging.cc:545