GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/swissknife_scrub.cc
Date: 2026-04-26 02:35:59
Exec Total Coverage
Lines: 0 117 0.0%
Branches: 0 80 0.0%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 */
4
5 #define __STDC_FORMAT_MACROS
6
7 #include "swissknife_scrub.h"
8
9 #include "util/fs_traversal.h"
10 #include "util/logging.h"
11 #include "util/posix.h"
12 #include "util/string.h"
13
14 using namespace std; // NOLINT
15
16 namespace swissknife {
17
18 const size_t kHashSubtreeLength = 2;
19 const std::string kTxnDirectoryName = "txn";
20
21 CommandScrub::CommandScrub() : machine_readable_output_(false), alerts_(0) {
22 const int retval = pthread_mutex_init(&alerts_mutex_, NULL);
23 assert(retval == 0);
24 }
25
26
27 CommandScrub::~CommandScrub() { pthread_mutex_destroy(&alerts_mutex_); }
28
29
30 swissknife::ParameterList CommandScrub::GetParams() const {
31 swissknife::ParameterList r;
32 r.push_back(Parameter::Mandatory('r', "repository directory"));
33 r.push_back(Parameter::Switch('m', "machine readable output"));
34 return r;
35 }
36
37 const char *CommandScrub::Alerts::ToString(const CommandScrub::Alerts::Type t) {
38 switch (t) {
39 case Alerts::kUnexpectedFile:
40 return "unexpected regular file";
41 case Alerts::kUnexpectedSymlink:
42 return "unexpected symlink";
43 case Alerts::kUnexpectedSubdir:
44 return "unexpected subdir in CAS subdir";
45 case Alerts::kUnexpectedModifier:
46 return "unknown object modifier";
47 case Alerts::kMalformedHash:
48 return "malformed content hash";
49 case Alerts::kMalformedCasSubdir:
50 return "malformed CAS subdir length";
51 case Alerts::kContentHashMismatch:
52 return "mismatch of file name and content hash";
53 default:
54 return "unknown alert";
55 }
56 }
57
58 void CommandScrub::FileCallback(const std::string &relative_path,
59 const std::string &file_name) {
60 assert(!file_name.empty());
61
62 if (relative_path.empty()) {
63 PrintAlert(Alerts::kUnexpectedFile, repo_path_ + "/" + file_name);
64 return;
65 }
66 if (relative_path == kTxnDirectoryName) {
67 // transaction directory should be ignored
68 return;
69 }
70
71 const string full_path = MakeFullPath(relative_path, file_name);
72 const std::string hash_string = CheckPathAndExtractHash(relative_path,
73 file_name, full_path);
74 if (hash_string.empty()) {
75 return;
76 }
77
78 if (!shash::HexPtr(hash_string).IsValid()) {
79 PrintAlert(Alerts::kMalformedHash, full_path, hash_string);
80 return;
81 }
82
83 const shash::Any hash_from_name = shash::MkFromSuffixedHexPtr(
84 shash::HexPtr(hash_string));
85 IngestionSource *full_path_source = new FileIngestionSource(full_path);
86 pipeline_scrubbing_.Process(full_path_source, hash_from_name.algorithm,
87 hash_from_name.suffix);
88 }
89
90
91 void CommandScrub::DirCallback(const std::string &relative_path,
92 const std::string &dir_name) {
93 const string full_path = MakeFullPath(relative_path, dir_name);
94
95 // The directory "/srv/cvmfs/<REPO_NAME>/data/txn/receiver" is whitelisted
96 if (HasSuffix(full_path, "data/txn/receiver", false)) {
97 return;
98 }
99
100 // Check for nested subdirs
101 if (relative_path.size() > 0) {
102 PrintAlert(Alerts::kUnexpectedSubdir, full_path);
103 return;
104 }
105
106 // Check CAS hash subdirectory name length
107 if (!dir_name.empty() && dir_name.size() != kHashSubtreeLength
108 && dir_name != kTxnDirectoryName) {
109 PrintAlert(Alerts::kMalformedCasSubdir, full_path);
110 }
111 }
112
113 void CommandScrub::SymlinkCallback(const std::string &relative_path,
114 const std::string &symlink_name) {
115 const string full_path = MakeFullPath(relative_path, symlink_name);
116 PrintAlert(Alerts::kUnexpectedSymlink, full_path);
117 }
118
119 void CommandScrub::OnFileHashed(const ScrubbingResult &scrubbing_result) {
120 const string full_path = scrubbing_result.path;
121 const string file_name = GetFileName(full_path);
122 const string parent_path = GetParentPath(full_path);
123 const string relative_path = MakeRelativePath(parent_path);
124 assert(!file_name.empty());
125
126 const std::string hash_string = CheckPathAndExtractHash(relative_path,
127 file_name, full_path);
128 assert(!hash_string.empty());
129 assert(shash::HexPtr(hash_string).IsValid());
130
131
132 if (scrubbing_result.hash
133 != shash::MkFromSuffixedHexPtr(shash::HexPtr(hash_string))) {
134 PrintAlert(Alerts::kContentHashMismatch, full_path,
135 scrubbing_result.hash.ToString());
136 }
137 }
138
139 std::string CommandScrub::CheckPathAndExtractHash(
140 const std::string &relative_path,
141 const std::string &file_name,
142 const std::string &full_path) const {
143 // check for a valid object modifier on the end of the file name
144 const char last_character = *(file_name.end() - 1);
145 bool has_object_modifier = false;
146 if (std::isupper(last_character)) {
147 has_object_modifier = true;
148 }
149 if (has_object_modifier && last_character != shash::kSuffixHistory
150 && last_character != shash::kSuffixCatalog
151 && last_character != shash::kSuffixPartial
152 && last_character != shash::kSuffixCertificate
153 && last_character != shash::kSuffixMicroCatalog
154 && last_character != shash::kSuffixMetainfo) {
155 PrintAlert(Alerts::kUnexpectedModifier, full_path);
156 return "";
157 }
158
159 const string hash_string = GetFileName(GetParentPath(full_path))
160 + (has_object_modifier
161 ? file_name.substr(0,
162 file_name.length() - 1)
163 : file_name);
164 return hash_string;
165 }
166
167
168 int CommandScrub::Main(const swissknife::ArgumentList &args) {
169 repo_path_ = MakeCanonicalPath(*args.find('r')->second);
170 machine_readable_output_ = (args.find('m') != args.end());
171
172 pipeline_scrubbing_.RegisterListener(&CommandScrub::OnFileHashed, this);
173 pipeline_scrubbing_.Spawn();
174
175 // initialize file system recursion engine
176 FileSystemTraversal<CommandScrub> traverser(this, repo_path_, true);
177 traverser.fn_new_file = &CommandScrub::FileCallback;
178 traverser.fn_enter_dir = &CommandScrub::DirCallback;
179 traverser.fn_new_symlink = &CommandScrub::SymlinkCallback;
180 traverser.Recurse(repo_path_);
181
182 // wait for reader to finish all jobs
183 pipeline_scrubbing_.WaitFor();
184
185 return (alerts_ == 0) ? 0 : 1;
186 }
187
188 void CommandScrub::PrintAlert(const Alerts::Type type,
189 const std::string &path,
190 const std::string &affected_hash) const {
191 const MutexLockGuard l(alerts_mutex_);
192
193 const char *msg = Alerts::ToString(type);
194 if (machine_readable_output_) {
195 LogCvmfs(kLogUtility, kLogStderr, "%d %s %s", type,
196 ((affected_hash.empty()) ? "-" : affected_hash.c_str()),
197 path.c_str());
198 } else {
199 LogCvmfs(kLogUtility, kLogStderr, "%s | at: %s", msg, path.c_str());
200 }
201
202 ++alerts_;
203 }
204
205 std::string CommandScrub::MakeFullPath(const std::string &relative_path,
206 const std::string &file_name) const {
207 return (relative_path.empty())
208 ? repo_path_ + "/" + file_name
209 : repo_path_ + "/" + relative_path + "/" + file_name;
210 }
211
212 std::string CommandScrub::MakeRelativePath(const std::string &full_path) {
213 assert(HasPrefix(full_path, repo_path_ + "/", false));
214 return full_path.substr(repo_path_.length() + 1);
215 }
216
217 void CommandScrub::ShowAlertsHelpMessage() const {
218 LogCvmfs(kLogUtility, kLogStdout, "to come...");
219 }
220
221 } // namespace swissknife
222