CernVM-FS  2.12.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
swissknife_scrub.cc
Go to the documentation of this file.
1 
5 #define __STDC_FORMAT_MACROS
6 
7 #include "swissknife_scrub.h"
8 #include "cvmfs_config.h"
9 
10 #include "util/fs_traversal.h"
11 #include "util/logging.h"
12 #include "util/posix.h"
13 #include "util/smalloc.h"
14 #include "util/string.h"
15 
16 using namespace std; // NOLINT
17 
18 namespace swissknife {
19 
20 const size_t kHashSubtreeLength = 2;
21 const std::string kTxnDirectoryName = "txn";
22 
23 CommandScrub::CommandScrub()
24  : machine_readable_output_(false)
25  , alerts_(0)
26 {
27  int retval = pthread_mutex_init(&alerts_mutex_, NULL);
28  assert(retval == 0);
29 }
30 
31 
33  pthread_mutex_destroy(&alerts_mutex_);
34 }
35 
36 
39  r.push_back(Parameter::Mandatory('r', "repository directory"));
40  r.push_back(Parameter::Switch('m', "machine readable output"));
41  return r;
42 }
43 
45  switch (t) {
47  return "unexpected regular file";
49  return "unexpected symlink";
51  return "unexpected subdir in CAS subdir";
53  return "unknown object modifier";
55  return "malformed content hash";
57  return "malformed CAS subdir length";
59  return "mismatch of file name and content hash";
60  default:
61  return "unknown alert";
62  }
63 }
64 
66  const std::string &relative_path,
67  const std::string &file_name)
68 {
69  assert(!file_name.empty());
70 
71  if (relative_path.empty()) {
72  PrintAlert(Alerts::kUnexpectedFile, repo_path_ + "/" + file_name);
73  return;
74  }
75  if (relative_path == kTxnDirectoryName) {
76  // transaction directory should be ignored
77  return;
78  }
79 
80  const string full_path = MakeFullPath(relative_path, file_name);
81  const std::string hash_string =
82  CheckPathAndExtractHash(relative_path, file_name, full_path);
83  if (hash_string.empty()) {
84  return;
85  }
86 
87  if (!shash::HexPtr(hash_string).IsValid()) {
88  PrintAlert(Alerts::kMalformedHash, full_path, hash_string);
89  return;
90  }
91 
92  shash::Any hash_from_name =
94  IngestionSource* full_path_source = new FileIngestionSource(full_path);
96  full_path_source,
97  hash_from_name.algorithm,
98  hash_from_name.suffix);
99 }
100 
101 
103  const std::string &relative_path,
104  const std::string &dir_name)
105 {
106  const string full_path = MakeFullPath(relative_path, dir_name);
107 
108  // The directory "/srv/cvmfs/<REPO_NAME>/data/txn/receiver" is whitelisted
109  if (HasSuffix(full_path, "data/txn/receiver", false)) {
110  return;
111  }
112 
113  // Check for nested subdirs
114  if (relative_path.size() > 0) {
116  return;
117  }
118 
119  // Check CAS hash subdirectory name length
120  if (!dir_name.empty() && dir_name.size() != kHashSubtreeLength &&
121  dir_name != kTxnDirectoryName) {
123  }
124 }
125 
126 void CommandScrub::SymlinkCallback(const std::string &relative_path,
127  const std::string &symlink_name) {
128  const string full_path = MakeFullPath(relative_path, symlink_name);
130 }
131 
132 void CommandScrub::OnFileHashed(const ScrubbingResult &scrubbing_result) {
133  const string full_path = scrubbing_result.path;
134  const string file_name = GetFileName(full_path);
135  const string parent_path = GetParentPath(full_path);
136  const string relative_path = MakeRelativePath(parent_path);
137  assert(!file_name.empty());
138 
139  const std::string hash_string =
140  CheckPathAndExtractHash(relative_path, file_name, full_path);
141  assert(!hash_string.empty());
142  assert(shash::HexPtr(hash_string).IsValid());
143 
144 
145  if (scrubbing_result.hash !=
147  {
149  scrubbing_result.hash.ToString());
150  }
151 }
152 
154  const std::string &relative_path,
155  const std::string &file_name,
156  const std::string &full_path) const
157 {
158  // check for a valid object modifier on the end of the file name
159  const char last_character = *(file_name.end() - 1);
160  bool has_object_modifier = false;
161  if (std::isupper(last_character)) {
162  has_object_modifier = true;
163  }
164  if (has_object_modifier && last_character != shash::kSuffixHistory &&
165  last_character != shash::kSuffixCatalog &&
166  last_character != shash::kSuffixPartial &&
167  last_character != shash::kSuffixCertificate &&
168  last_character != shash::kSuffixMicroCatalog &&
169  last_character != shash::kSuffixMetainfo) {
171  return "";
172  }
173 
174  const string hash_string =
175  GetFileName(GetParentPath(full_path)) +
176  (has_object_modifier ? file_name.substr(0, file_name.length() - 1)
177  : file_name);
178  return hash_string;
179 }
180 
181 
183  repo_path_ = MakeCanonicalPath(*args.find('r')->second);
184  machine_readable_output_ = (args.find('m') != args.end());
185 
188 
189  // initialize file system recursion engine
190  FileSystemTraversal<CommandScrub> traverser(this, repo_path_, true);
194  traverser.Recurse(repo_path_);
195 
196  // wait for reader to finish all jobs
198 
199  return (alerts_ == 0) ? 0 : 1;
200 }
201 
203  const Alerts::Type type,
204  const std::string &path,
205  const std::string &affected_hash) const
206 {
208 
209  const char *msg = Alerts::ToString(type);
211  LogCvmfs(kLogUtility, kLogStderr, "%d %s %s", type,
212  ((affected_hash.empty()) ? "-" : affected_hash.c_str()),
213  path.c_str());
214  } else {
215  LogCvmfs(kLogUtility, kLogStderr, "%s | at: %s", msg, path.c_str());
216  }
217 
218  ++alerts_;
219 }
220 
221 std::string CommandScrub::MakeFullPath(const std::string &relative_path,
222  const std::string &file_name) const {
223  return (relative_path.empty())
224  ? repo_path_ + "/" + file_name
225  : repo_path_ + "/" + relative_path + "/" + file_name;
226 }
227 
228 std::string CommandScrub::MakeRelativePath(const std::string &full_path) {
229  assert(HasPrefix(full_path, repo_path_ + "/", false));
230  return full_path.substr(repo_path_.length() + 1);
231 }
232 
234  LogCvmfs(kLogUtility, kLogStdout, "to come...");
235 }
236 
237 } // namespace swissknife
const size_t kHashSubtreeLength
CallbackPtr RegisterListener(typename BoundClosure< ParamT, DelegateT, ClosureDataT >::CallbackMethod method, DelegateT *delegate, ClosureDataT data)
shash::Any hash
Definition: pipeline.h:97
void FileCallback(const std::string &relative_path, const std::string &file_name)
static Parameter Switch(const char key, const std::string &desc)
Definition: swissknife.h:44
NameString GetFileName(const PathString &path)
Definition: shortstring.cc:29
VoidCallback fn_new_symlink
Definition: fs_traversal.h:48
void DirCallback(const std::string &relative_path, const std::string &dir_name)
void Recurse(const std::string &dir_path) const
Definition: fs_traversal.h:112
std::vector< Parameter > ParameterList
Definition: swissknife.h:71
const char kSuffixCertificate
Definition: hash.h:59
std::string ToString(const bool with_suffix=false) const
Definition: hash.h:249
virtual ParameterList GetParams() const
void OnFileHashed(const ScrubbingResult &scrubbing_result)
const char kSuffixMicroCatalog
Definition: hash.h:56
A simple recursion engine to abstract the recursion of directories. It provides several callback hook...
Definition: fs_traversal.h:37
void Process(IngestionSource *source, shash::Algorithms hash_algorithm, shash::Suffix hash_suffix)
Definition: pipeline.cc:249
void SymlinkCallback(const std::string &relative_path, const std::string &symlink_name)
assert((mem||(size==0))&&"Out Of Memory")
std::string MakeFullPath(const std::string &relative_path, const std::string &file_name) const
Algorithms algorithm
Definition: hash.h:125
void PrintAlert(const Alerts::Type type, const std::string &path, const std::string &affected_hash="") const
VoidCallback fn_new_file
Definition: fs_traversal.h:47
const char kSuffixPartial
Definition: hash.h:57
VoidCallback fn_enter_dir
Definition: fs_traversal.h:45
static Parameter Mandatory(const char key, const std::string &desc)
Definition: swissknife.h:38
bool HasSuffix(const std::string &str, const std::string &suffix, const bool ignore_case)
Definition: string.cc:281
const char kSuffixCatalog
Definition: hash.h:54
std::string MakeRelativePath(const std::string &full_path)
ScrubbingPipeline pipeline_scrubbing_
pthread_mutex_t alerts_mutex_
const char kSuffixMetainfo
Definition: hash.h:60
bool HasPrefix(const string &str, const string &prefix, const bool ignore_case)
Definition: string.cc:267
const char kSuffixHistory
Definition: hash.h:55
std::string CheckPathAndExtractHash(const std::string &relative_path, const std::string &file_name, const std::string &full_path) const
std::map< char, SharedPtr< std::string > > ArgumentList
Definition: swissknife.h:72
Any MkFromSuffixedHexPtr(const HexPtr hex)
Definition: hash.cc:105
Definition: mutex.h:42
PathString GetParentPath(const PathString &path)
Definition: shortstring.cc:15
static const char * ToString(const Type t)
int Main(const ArgumentList &args)
Suffix suffix
Definition: hash.h:126
std::string MakeCanonicalPath(const std::string &path)
Definition: posix.cc:98
bool IsValid() const
Definition: hash.cc:37
const std::string kTxnDirectoryName
std::string path
Definition: pipeline.h:96
void ShowAlertsHelpMessage() const
CVMFS_EXPORT void LogCvmfs(const LogSource source, const int mask, const char *format,...)
Definition: logging.cc:528