GCC Code Coverage Report
Directory: cvmfs/ Exec Total Coverage
File: cvmfs/swissknife_scrub.cc Lines: 0 94 0.0 %
Date: 2019-02-03 02:48:13 Branches: 0 80 0.0 %

Line Branch Exec Source
1
/**
2
 * This file is part of the CernVM File System.
3
 */
4
5
#define __STDC_FORMAT_MACROS
6
7
#include "swissknife_scrub.h"
8
#include "cvmfs_config.h"
9
10
#include "fs_traversal.h"
11
#include "logging.h"
12
#include "smalloc.h"
13
#include "util/posix.h"
14
#include "util/string.h"
15
16
using namespace std;  // NOLINT
17
18
namespace swissknife {
19
20
const size_t kHashSubtreeLength = 2;
21
const std::string kTxnDirectoryName = "txn";
22
23
CommandScrub::CommandScrub()
24
  : machine_readable_output_(false)
25
  , alerts_(0)
26
{
27
  int retval = pthread_mutex_init(&alerts_mutex_, NULL);
28
  assert(retval == 0);
29
}
30
31
32
CommandScrub::~CommandScrub() {
33
  pthread_mutex_destroy(&alerts_mutex_);
34
}
35
36
37
swissknife::ParameterList CommandScrub::GetParams() const {
38
  swissknife::ParameterList r;
39
  r.push_back(Parameter::Mandatory('r', "repository directory"));
40
  r.push_back(Parameter::Switch('m', "machine readable output"));
41
  return r;
42
}
43
44
const char *CommandScrub::Alerts::ToString(const CommandScrub::Alerts::Type t) {
45
  switch (t) {
46
    case Alerts::kUnexpectedFile:
47
      return "unexpected regular file";
48
    case Alerts::kUnexpectedSymlink:
49
      return "unexpected symlink";
50
    case Alerts::kUnexpectedSubdir:
51
      return "unexpected subdir in CAS subdir";
52
    case Alerts::kUnexpectedModifier:
53
      return "unknown object modifier";
54
    case Alerts::kMalformedHash:
55
      return "malformed content hash";
56
    case Alerts::kMalformedCasSubdir:
57
      return "malformed CAS subdir length";
58
    case Alerts::kContentHashMismatch:
59
      return "mismatch of file name and content hash";
60
    default:
61
      return "unknown alert";
62
  }
63
}
64
65
void CommandScrub::FileCallback(
66
  const std::string &relative_path,
67
  const std::string &file_name)
68
{
69
  assert(!file_name.empty());
70
71
  if (relative_path.empty()) {
72
    PrintAlert(Alerts::kUnexpectedFile, repo_path_ + "/" + file_name);
73
    return;
74
  }
75
  if (relative_path == kTxnDirectoryName) {
76
    // transaction directory should be ignored
77
    return;
78
  }
79
80
  const string full_path = MakeFullPath(relative_path, file_name);
81
  const std::string hash_string =
82
      CheckPathAndExtractHash(relative_path, file_name, full_path);
83
  if (hash_string.empty()) {
84
    return;
85
  }
86
87
  if (!shash::HexPtr(hash_string).IsValid()) {
88
    PrintAlert(Alerts::kMalformedHash, full_path, hash_string);
89
    return;
90
  }
91
92
  shash::Any hash_from_name =
93
    shash::MkFromSuffixedHexPtr(shash::HexPtr(hash_string));
94
  IngestionSource* full_path_source = new FileIngestionSource(full_path);
95
  pipeline_scrubbing_.Process(
96
    full_path_source,
97
    hash_from_name.algorithm,
98
    hash_from_name.suffix);
99
}
100
101
102
void CommandScrub::DirCallback(
103
  const std::string &relative_path,
104
  const std::string &dir_name)
105
{
106
  const string full_path = MakeFullPath(relative_path, dir_name);
107
  // Check for nested subdirs
108
  if (relative_path.size() > 0) {
109
    PrintAlert(Alerts::kUnexpectedSubdir, full_path);
110
    return;
111
  }
112
113
  // Check CAS hash subdirectory name length
114
  if (!dir_name.empty() && dir_name.size() != kHashSubtreeLength &&
115
      dir_name != kTxnDirectoryName) {
116
    PrintAlert(Alerts::kMalformedCasSubdir, full_path);
117
  }
118
}
119
120
void CommandScrub::SymlinkCallback(const std::string &relative_path,
121
                                   const std::string &symlink_name) {
122
  const string full_path = MakeFullPath(relative_path, symlink_name);
123
  PrintAlert(Alerts::kUnexpectedSymlink, full_path);
124
}
125
126
void CommandScrub::OnFileHashed(const ScrubbingResult &scrubbing_result) {
127
  const string full_path = scrubbing_result.path;
128
  const string file_name = GetFileName(full_path);
129
  const string parent_path = GetParentPath(full_path);
130
  const string relative_path = MakeRelativePath(parent_path);
131
  assert(!file_name.empty());
132
133
  const std::string hash_string =
134
    CheckPathAndExtractHash(relative_path, file_name, full_path);
135
  assert(!hash_string.empty());
136
  assert(shash::HexPtr(hash_string).IsValid());
137
138
139
  if (scrubbing_result.hash !=
140
      shash::MkFromSuffixedHexPtr(shash::HexPtr(hash_string)))
141
  {
142
    PrintAlert(Alerts::kContentHashMismatch, full_path,
143
               scrubbing_result.hash.ToString());
144
  }
145
}
146
147
std::string CommandScrub::CheckPathAndExtractHash(
148
    const std::string &relative_path,
149
    const std::string &file_name,
150
    const std::string &full_path) const
151
{
152
  // check for a valid object modifier on the end of the file name
153
  const char last_character = *(file_name.end() - 1);
154
  bool has_object_modifier = false;
155
  if (std::isupper(last_character)) {
156
    has_object_modifier = true;
157
  }
158
  if (has_object_modifier && last_character != shash::kSuffixHistory &&
159
      last_character != shash::kSuffixCatalog &&
160
      last_character != shash::kSuffixPartial &&
161
      last_character != shash::kSuffixCertificate &&
162
      last_character != shash::kSuffixMicroCatalog &&
163
      last_character != shash::kSuffixMetainfo) {
164
    PrintAlert(Alerts::kUnexpectedModifier, full_path);
165
    return "";
166
  }
167
168
  const string hash_string =
169
      GetFileName(GetParentPath(full_path)) +
170
      (has_object_modifier ? file_name.substr(0, file_name.length() - 1)
171
                           : file_name);
172
  return hash_string;
173
}
174
175
176
int CommandScrub::Main(const swissknife::ArgumentList &args) {
177
  repo_path_ = MakeCanonicalPath(*args.find('r')->second);
178
  machine_readable_output_ = (args.find('m') != args.end());
179
180
  pipeline_scrubbing_.RegisterListener(&CommandScrub::OnFileHashed, this);
181
  pipeline_scrubbing_.Spawn();
182
183
  // initialize file system recursion engine
184
  FileSystemTraversal<CommandScrub> traverser(this, repo_path_, true);
185
  traverser.fn_new_file = &CommandScrub::FileCallback;
186
  traverser.fn_enter_dir = &CommandScrub::DirCallback;
187
  traverser.fn_new_symlink = &CommandScrub::SymlinkCallback;
188
  traverser.Recurse(repo_path_);
189
190
  // wait for reader to finish all jobs
191
  pipeline_scrubbing_.WaitFor();
192
193
  return (alerts_ == 0) ? 0 : 1;
194
}
195
196
void CommandScrub::PrintAlert(
197
  const Alerts::Type type,
198
  const std::string &path,
199
  const std::string &affected_hash) const
200
{
201
  MutexLockGuard l(alerts_mutex_);
202
203
  const char *msg = Alerts::ToString(type);
204
  if (machine_readable_output_) {
205
    LogCvmfs(kLogUtility, kLogStderr, "%d %s %s", type,
206
             ((affected_hash.empty()) ? "-" : affected_hash.c_str()),
207
             path.c_str());
208
  } else {
209
    LogCvmfs(kLogUtility, kLogStderr, "%s | at: %s", msg, path.c_str());
210
  }
211
212
  ++alerts_;
213
}
214
215
std::string CommandScrub::MakeFullPath(const std::string &relative_path,
216
                                       const std::string &file_name) const {
217
  return (relative_path.empty())
218
             ? repo_path_ + "/" + file_name
219
             : repo_path_ + "/" + relative_path + "/" + file_name;
220
}
221
222
std::string CommandScrub::MakeRelativePath(const std::string &full_path) {
223
  assert(HasPrefix(full_path, repo_path_ + "/", false));
224
  return full_path.substr(repo_path_.length() + 1);
225
}
226
227
void CommandScrub::ShowAlertsHelpMessage() const {
228
  LogCvmfs(kLogUtility, kLogStdout, "to come...");
229
}
230
231
}  // namespace swissknife