CernVM-FS  2.13.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
swissknife_scrub.cc
Go to the documentation of this file.
1 
5 #define __STDC_FORMAT_MACROS
6 
7 #include "swissknife_scrub.h"
8 
9 #include "util/fs_traversal.h"
10 #include "util/logging.h"
11 #include "util/posix.h"
12 #include "util/smalloc.h"
13 #include "util/string.h"
14 
15 using namespace std; // NOLINT
16 
17 namespace swissknife {
18 
19 const size_t kHashSubtreeLength = 2;
20 const std::string kTxnDirectoryName = "txn";
21 
22 CommandScrub::CommandScrub() : machine_readable_output_(false), alerts_(0) {
23  int retval = pthread_mutex_init(&alerts_mutex_, NULL);
24  assert(retval == 0);
25 }
26 
27 
28 CommandScrub::~CommandScrub() { pthread_mutex_destroy(&alerts_mutex_); }
29 
30 
33  r.push_back(Parameter::Mandatory('r', "repository directory"));
34  r.push_back(Parameter::Switch('m', "machine readable output"));
35  return r;
36 }
37 
39  switch (t) {
41  return "unexpected regular file";
43  return "unexpected symlink";
45  return "unexpected subdir in CAS subdir";
47  return "unknown object modifier";
49  return "malformed content hash";
51  return "malformed CAS subdir length";
53  return "mismatch of file name and content hash";
54  default:
55  return "unknown alert";
56  }
57 }
58 
59 void CommandScrub::FileCallback(const std::string &relative_path,
60  const std::string &file_name) {
61  assert(!file_name.empty());
62 
63  if (relative_path.empty()) {
64  PrintAlert(Alerts::kUnexpectedFile, repo_path_ + "/" + file_name);
65  return;
66  }
67  if (relative_path == kTxnDirectoryName) {
68  // transaction directory should be ignored
69  return;
70  }
71 
72  const string full_path = MakeFullPath(relative_path, file_name);
73  const std::string hash_string = CheckPathAndExtractHash(relative_path,
74  file_name, full_path);
75  if (hash_string.empty()) {
76  return;
77  }
78 
79  if (!shash::HexPtr(hash_string).IsValid()) {
80  PrintAlert(Alerts::kMalformedHash, full_path, hash_string);
81  return;
82  }
83 
84  shash::Any hash_from_name = shash::MkFromSuffixedHexPtr(
85  shash::HexPtr(hash_string));
86  IngestionSource *full_path_source = new FileIngestionSource(full_path);
88  full_path_source, hash_from_name.algorithm, hash_from_name.suffix);
89 }
90 
91 
92 void CommandScrub::DirCallback(const std::string &relative_path,
93  const std::string &dir_name) {
94  const string full_path = MakeFullPath(relative_path, dir_name);
95 
96  // The directory "/srv/cvmfs/<REPO_NAME>/data/txn/receiver" is whitelisted
97  if (HasSuffix(full_path, "data/txn/receiver", false)) {
98  return;
99  }
100 
101  // Check for nested subdirs
102  if (relative_path.size() > 0) {
104  return;
105  }
106 
107  // Check CAS hash subdirectory name length
108  if (!dir_name.empty() && dir_name.size() != kHashSubtreeLength
109  && dir_name != kTxnDirectoryName) {
111  }
112 }
113 
114 void CommandScrub::SymlinkCallback(const std::string &relative_path,
115  const std::string &symlink_name) {
116  const string full_path = MakeFullPath(relative_path, symlink_name);
118 }
119 
120 void CommandScrub::OnFileHashed(const ScrubbingResult &scrubbing_result) {
121  const string full_path = scrubbing_result.path;
122  const string file_name = GetFileName(full_path);
123  const string parent_path = GetParentPath(full_path);
124  const string relative_path = MakeRelativePath(parent_path);
125  assert(!file_name.empty());
126 
127  const std::string hash_string = CheckPathAndExtractHash(relative_path,
128  file_name, full_path);
129  assert(!hash_string.empty());
130  assert(shash::HexPtr(hash_string).IsValid());
131 
132 
133  if (scrubbing_result.hash
134  != shash::MkFromSuffixedHexPtr(shash::HexPtr(hash_string))) {
136  scrubbing_result.hash.ToString());
137  }
138 }
139 
141  const std::string &relative_path,
142  const std::string &file_name,
143  const std::string &full_path) const {
144  // check for a valid object modifier on the end of the file name
145  const char last_character = *(file_name.end() - 1);
146  bool has_object_modifier = false;
147  if (std::isupper(last_character)) {
148  has_object_modifier = true;
149  }
150  if (has_object_modifier && last_character != shash::kSuffixHistory
151  && last_character != shash::kSuffixCatalog
152  && last_character != shash::kSuffixPartial
153  && last_character != shash::kSuffixCertificate
154  && last_character != shash::kSuffixMicroCatalog
155  && last_character != shash::kSuffixMetainfo) {
157  return "";
158  }
159 
160  const string hash_string = GetFileName(GetParentPath(full_path))
161  + (has_object_modifier
162  ? file_name.substr(0,
163  file_name.length() - 1)
164  : file_name);
165  return hash_string;
166 }
167 
168 
170  repo_path_ = MakeCanonicalPath(*args.find('r')->second);
171  machine_readable_output_ = (args.find('m') != args.end());
172 
175 
176  // initialize file system recursion engine
177  FileSystemTraversal<CommandScrub> traverser(this, repo_path_, true);
181  traverser.Recurse(repo_path_);
182 
183  // wait for reader to finish all jobs
185 
186  return (alerts_ == 0) ? 0 : 1;
187 }
188 
190  const std::string &path,
191  const std::string &affected_hash) const {
193 
194  const char *msg = Alerts::ToString(type);
196  LogCvmfs(kLogUtility, kLogStderr, "%d %s %s", type,
197  ((affected_hash.empty()) ? "-" : affected_hash.c_str()),
198  path.c_str());
199  } else {
200  LogCvmfs(kLogUtility, kLogStderr, "%s | at: %s", msg, path.c_str());
201  }
202 
203  ++alerts_;
204 }
205 
206 std::string CommandScrub::MakeFullPath(const std::string &relative_path,
207  const std::string &file_name) const {
208  return (relative_path.empty())
209  ? repo_path_ + "/" + file_name
210  : repo_path_ + "/" + relative_path + "/" + file_name;
211 }
212 
213 std::string CommandScrub::MakeRelativePath(const std::string &full_path) {
214  assert(HasPrefix(full_path, repo_path_ + "/", false));
215  return full_path.substr(repo_path_.length() + 1);
216 }
217 
219  LogCvmfs(kLogUtility, kLogStdout, "to come...");
220 }
221 
222 } // namespace swissknife
const size_t kHashSubtreeLength
CallbackPtr RegisterListener(typename BoundClosure< ParamT, DelegateT, ClosureDataT >::CallbackMethod method, DelegateT *delegate, ClosureDataT data)
shash::Any hash
Definition: pipeline.h:97
void FileCallback(const std::string &relative_path, const std::string &file_name)
static Parameter Switch(const char key, const std::string &desc)
Definition: swissknife.h:44
NameString GetFileName(const PathString &path)
Definition: shortstring.cc:28
VoidCallback fn_new_symlink
Definition: fs_traversal.h:47
void DirCallback(const std::string &relative_path, const std::string &dir_name)
void Recurse(const std::string &dir_path) const
Definition: fs_traversal.h:110
std::vector< Parameter > ParameterList
Definition: swissknife.h:71
const char kSuffixCertificate
Definition: hash.h:59
std::string ToString(const bool with_suffix=false) const
Definition: hash.h:241
virtual ParameterList GetParams() const
void OnFileHashed(const ScrubbingResult &scrubbing_result)
const char kSuffixMicroCatalog
Definition: hash.h:56
A simple recursion engine to abstract the recursion of directories. It provides several callback hook...
Definition: fs_traversal.h:36
void Process(IngestionSource *source, shash::Algorithms hash_algorithm, shash::Suffix hash_suffix)
Definition: pipeline.cc:239
void SymlinkCallback(const std::string &relative_path, const std::string &symlink_name)
assert((mem||(size==0))&&"Out Of Memory")
std::string MakeFullPath(const std::string &relative_path, const std::string &file_name) const
Algorithms algorithm
Definition: hash.h:122
void PrintAlert(const Alerts::Type type, const std::string &path, const std::string &affected_hash="") const
VoidCallback fn_new_file
Definition: fs_traversal.h:46
const char kSuffixPartial
Definition: hash.h:57
VoidCallback fn_enter_dir
Definition: fs_traversal.h:44
static Parameter Mandatory(const char key, const std::string &desc)
Definition: swissknife.h:38
bool HasSuffix(const std::string &str, const std::string &suffix, const bool ignore_case)
Definition: string.cc:296
const char kSuffixCatalog
Definition: hash.h:54
std::string MakeRelativePath(const std::string &full_path)
ScrubbingPipeline pipeline_scrubbing_
pthread_mutex_t alerts_mutex_
const char kSuffixMetainfo
Definition: hash.h:60
bool HasPrefix(const string &str, const string &prefix, const bool ignore_case)
Definition: string.cc:279
const char kSuffixHistory
Definition: hash.h:55
std::string CheckPathAndExtractHash(const std::string &relative_path, const std::string &file_name, const std::string &full_path) const
std::map< char, SharedPtr< std::string > > ArgumentList
Definition: swissknife.h:72
Any MkFromSuffixedHexPtr(const HexPtr hex)
Definition: hash.cc:104
Definition: mutex.h:42
PathString GetParentPath(const PathString &path)
Definition: shortstring.cc:14
static const char * ToString(const Type t)
int Main(const ArgumentList &args)
Suffix suffix
Definition: hash.h:123
std::string MakeCanonicalPath(const std::string &path)
Definition: posix.cc:98
bool IsValid() const
Definition: hash.cc:36
const std::string kTxnDirectoryName
std::string path
Definition: pipeline.h:96
void ShowAlertsHelpMessage() const
CVMFS_EXPORT void LogCvmfs(const LogSource source, const int mask, const char *format,...)
Definition: logging.cc:545