GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/swissknife_scrub.cc
Date: 2026-05-19 11:45:12
Exec Total Coverage
Lines: 0 117 0.0%
Branches: 0 80 0.0%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 */
4
5 #include "swissknife_scrub.h"
6
7 #include "util/fs_traversal.h"
8 #include "util/logging.h"
9 #include "util/posix.h"
10 #include "util/string.h"
11
12 using namespace std; // NOLINT
13
14 namespace swissknife {
15
16 const size_t kHashSubtreeLength = 2;
17 const std::string kTxnDirectoryName = "txn";
18
19 CommandScrub::CommandScrub() : machine_readable_output_(false), alerts_(0) {
20 const int retval = pthread_mutex_init(&alerts_mutex_, NULL);
21 assert(retval == 0);
22 }
23
24
25 CommandScrub::~CommandScrub() { pthread_mutex_destroy(&alerts_mutex_); }
26
27
28 swissknife::ParameterList CommandScrub::GetParams() const {
29 swissknife::ParameterList r;
30 r.push_back(Parameter::Mandatory('r', "repository directory"));
31 r.push_back(Parameter::Switch('m', "machine readable output"));
32 return r;
33 }
34
35 const char *CommandScrub::Alerts::ToString(const CommandScrub::Alerts::Type t) {
36 switch (t) {
37 case Alerts::kUnexpectedFile:
38 return "unexpected regular file";
39 case Alerts::kUnexpectedSymlink:
40 return "unexpected symlink";
41 case Alerts::kUnexpectedSubdir:
42 return "unexpected subdir in CAS subdir";
43 case Alerts::kUnexpectedModifier:
44 return "unknown object modifier";
45 case Alerts::kMalformedHash:
46 return "malformed content hash";
47 case Alerts::kMalformedCasSubdir:
48 return "malformed CAS subdir length";
49 case Alerts::kContentHashMismatch:
50 return "mismatch of file name and content hash";
51 default:
52 return "unknown alert";
53 }
54 }
55
56 void CommandScrub::FileCallback(const std::string &relative_path,
57 const std::string &file_name) {
58 assert(!file_name.empty());
59
60 if (relative_path.empty()) {
61 PrintAlert(Alerts::kUnexpectedFile, repo_path_ + "/" + file_name);
62 return;
63 }
64 if (relative_path == kTxnDirectoryName) {
65 // transaction directory should be ignored
66 return;
67 }
68
69 const string full_path = MakeFullPath(relative_path, file_name);
70 const std::string hash_string = CheckPathAndExtractHash(relative_path,
71 file_name, full_path);
72 if (hash_string.empty()) {
73 return;
74 }
75
76 if (!shash::HexPtr(hash_string).IsValid()) {
77 PrintAlert(Alerts::kMalformedHash, full_path, hash_string);
78 return;
79 }
80
81 const shash::Any hash_from_name = shash::MkFromSuffixedHexPtr(
82 shash::HexPtr(hash_string));
83 IngestionSource *full_path_source = new FileIngestionSource(full_path);
84 pipeline_scrubbing_.Process(full_path_source, hash_from_name.algorithm,
85 hash_from_name.suffix);
86 }
87
88
89 void CommandScrub::DirCallback(const std::string &relative_path,
90 const std::string &dir_name) {
91 const string full_path = MakeFullPath(relative_path, dir_name);
92
93 // The directory "/srv/cvmfs/<REPO_NAME>/data/txn/receiver" is whitelisted
94 if (HasSuffix(full_path, "data/txn/receiver", false)) {
95 return;
96 }
97
98 // Check for nested subdirs
99 if (relative_path.size() > 0) {
100 PrintAlert(Alerts::kUnexpectedSubdir, full_path);
101 return;
102 }
103
104 // Check CAS hash subdirectory name length
105 if (!dir_name.empty() && dir_name.size() != kHashSubtreeLength
106 && dir_name != kTxnDirectoryName) {
107 PrintAlert(Alerts::kMalformedCasSubdir, full_path);
108 }
109 }
110
111 void CommandScrub::SymlinkCallback(const std::string &relative_path,
112 const std::string &symlink_name) {
113 const string full_path = MakeFullPath(relative_path, symlink_name);
114 PrintAlert(Alerts::kUnexpectedSymlink, full_path);
115 }
116
117 void CommandScrub::OnFileHashed(const ScrubbingResult &scrubbing_result) {
118 const string full_path = scrubbing_result.path;
119 const string file_name = GetFileName(full_path);
120 const string parent_path = GetParentPath(full_path);
121 const string relative_path = MakeRelativePath(parent_path);
122 assert(!file_name.empty());
123
124 const std::string hash_string = CheckPathAndExtractHash(relative_path,
125 file_name, full_path);
126 assert(!hash_string.empty());
127 assert(shash::HexPtr(hash_string).IsValid());
128
129
130 if (scrubbing_result.hash
131 != shash::MkFromSuffixedHexPtr(shash::HexPtr(hash_string))) {
132 PrintAlert(Alerts::kContentHashMismatch, full_path,
133 scrubbing_result.hash.ToString());
134 }
135 }
136
137 std::string CommandScrub::CheckPathAndExtractHash(
138 const std::string &relative_path,
139 const std::string &file_name,
140 const std::string &full_path) const {
141 // check for a valid object modifier on the end of the file name
142 const char last_character = *(file_name.end() - 1);
143 bool has_object_modifier = false;
144 if (std::isupper(last_character)) {
145 has_object_modifier = true;
146 }
147 if (has_object_modifier && last_character != shash::kSuffixHistory
148 && last_character != shash::kSuffixCatalog
149 && last_character != shash::kSuffixPartial
150 && last_character != shash::kSuffixCertificate
151 && last_character != shash::kSuffixMicroCatalog
152 && last_character != shash::kSuffixMetainfo) {
153 PrintAlert(Alerts::kUnexpectedModifier, full_path);
154 return "";
155 }
156
157 const string hash_string = GetFileName(GetParentPath(full_path))
158 + (has_object_modifier
159 ? file_name.substr(0,
160 file_name.length() - 1)
161 : file_name);
162 return hash_string;
163 }
164
165
166 int CommandScrub::Main(const swissknife::ArgumentList &args) {
167 repo_path_ = MakeCanonicalPath(*args.find('r')->second);
168 machine_readable_output_ = (args.find('m') != args.end());
169
170 pipeline_scrubbing_.RegisterListener(&CommandScrub::OnFileHashed, this);
171 pipeline_scrubbing_.Spawn();
172
173 // initialize file system recursion engine
174 FileSystemTraversal<CommandScrub> traverser(this, repo_path_, true);
175 traverser.fn_new_file = &CommandScrub::FileCallback;
176 traverser.fn_enter_dir = &CommandScrub::DirCallback;
177 traverser.fn_new_symlink = &CommandScrub::SymlinkCallback;
178 traverser.Recurse(repo_path_);
179
180 // wait for reader to finish all jobs
181 pipeline_scrubbing_.WaitFor();
182
183 return (alerts_ == 0) ? 0 : 1;
184 }
185
186 void CommandScrub::PrintAlert(const Alerts::Type type,
187 const std::string &path,
188 const std::string &affected_hash) const {
189 const MutexLockGuard l(alerts_mutex_);
190
191 const char *msg = Alerts::ToString(type);
192 if (machine_readable_output_) {
193 LogCvmfs(kLogUtility, kLogStderr, "%d %s %s", type,
194 ((affected_hash.empty()) ? "-" : affected_hash.c_str()),
195 path.c_str());
196 } else {
197 LogCvmfs(kLogUtility, kLogStderr, "%s | at: %s", msg, path.c_str());
198 }
199
200 ++alerts_;
201 }
202
203 std::string CommandScrub::MakeFullPath(const std::string &relative_path,
204 const std::string &file_name) const {
205 return (relative_path.empty())
206 ? repo_path_ + "/" + file_name
207 : repo_path_ + "/" + relative_path + "/" + file_name;
208 }
209
210 std::string CommandScrub::MakeRelativePath(const std::string &full_path) {
211 assert(HasPrefix(full_path, repo_path_ + "/", false));
212 return full_path.substr(repo_path_.length() + 1);
213 }
214
215 void CommandScrub::ShowAlertsHelpMessage() const {
216 LogCvmfs(kLogUtility, kLogStdout, "to come...");
217 }
218
219 } // namespace swissknife
220