GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/swissknife_scrub.cc
Date: 2025-06-22 02:36:02
Exec Total Coverage
Lines: 0 117 0.0%
Branches: 0 80 0.0%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 */
4
5 #define __STDC_FORMAT_MACROS
6
7 #include "swissknife_scrub.h"
8
9 #include "util/fs_traversal.h"
10 #include "util/logging.h"
11 #include "util/posix.h"
12 #include "util/smalloc.h"
13 #include "util/string.h"
14
15 using namespace std; // NOLINT
16
17 namespace swissknife {
18
19 const size_t kHashSubtreeLength = 2;
20 const std::string kTxnDirectoryName = "txn";
21
22 CommandScrub::CommandScrub() : machine_readable_output_(false), alerts_(0) {
23 const int retval = pthread_mutex_init(&alerts_mutex_, NULL);
24 assert(retval == 0);
25 }
26
27
28 CommandScrub::~CommandScrub() { pthread_mutex_destroy(&alerts_mutex_); }
29
30
31 swissknife::ParameterList CommandScrub::GetParams() const {
32 swissknife::ParameterList r;
33 r.push_back(Parameter::Mandatory('r', "repository directory"));
34 r.push_back(Parameter::Switch('m', "machine readable output"));
35 return r;
36 }
37
38 const char *CommandScrub::Alerts::ToString(const CommandScrub::Alerts::Type t) {
39 switch (t) {
40 case Alerts::kUnexpectedFile:
41 return "unexpected regular file";
42 case Alerts::kUnexpectedSymlink:
43 return "unexpected symlink";
44 case Alerts::kUnexpectedSubdir:
45 return "unexpected subdir in CAS subdir";
46 case Alerts::kUnexpectedModifier:
47 return "unknown object modifier";
48 case Alerts::kMalformedHash:
49 return "malformed content hash";
50 case Alerts::kMalformedCasSubdir:
51 return "malformed CAS subdir length";
52 case Alerts::kContentHashMismatch:
53 return "mismatch of file name and content hash";
54 default:
55 return "unknown alert";
56 }
57 }
58
59 void CommandScrub::FileCallback(const std::string &relative_path,
60 const std::string &file_name) {
61 assert(!file_name.empty());
62
63 if (relative_path.empty()) {
64 PrintAlert(Alerts::kUnexpectedFile, repo_path_ + "/" + file_name);
65 return;
66 }
67 if (relative_path == kTxnDirectoryName) {
68 // transaction directory should be ignored
69 return;
70 }
71
72 const string full_path = MakeFullPath(relative_path, file_name);
73 const std::string hash_string = CheckPathAndExtractHash(relative_path,
74 file_name, full_path);
75 if (hash_string.empty()) {
76 return;
77 }
78
79 if (!shash::HexPtr(hash_string).IsValid()) {
80 PrintAlert(Alerts::kMalformedHash, full_path, hash_string);
81 return;
82 }
83
84 const shash::Any hash_from_name =
85 shash::MkFromSuffixedHexPtr(shash::HexPtr(hash_string));
86 IngestionSource *full_path_source = new FileIngestionSource(full_path);
87 pipeline_scrubbing_.Process(
88 full_path_source, hash_from_name.algorithm, hash_from_name.suffix);
89 }
90
91
92 void CommandScrub::DirCallback(const std::string &relative_path,
93 const std::string &dir_name) {
94 const string full_path = MakeFullPath(relative_path, dir_name);
95
96 // The directory "/srv/cvmfs/<REPO_NAME>/data/txn/receiver" is whitelisted
97 if (HasSuffix(full_path, "data/txn/receiver", false)) {
98 return;
99 }
100
101 // Check for nested subdirs
102 if (relative_path.size() > 0) {
103 PrintAlert(Alerts::kUnexpectedSubdir, full_path);
104 return;
105 }
106
107 // Check CAS hash subdirectory name length
108 if (!dir_name.empty() && dir_name.size() != kHashSubtreeLength
109 && dir_name != kTxnDirectoryName) {
110 PrintAlert(Alerts::kMalformedCasSubdir, full_path);
111 }
112 }
113
114 void CommandScrub::SymlinkCallback(const std::string &relative_path,
115 const std::string &symlink_name) {
116 const string full_path = MakeFullPath(relative_path, symlink_name);
117 PrintAlert(Alerts::kUnexpectedSymlink, full_path);
118 }
119
120 void CommandScrub::OnFileHashed(const ScrubbingResult &scrubbing_result) {
121 const string full_path = scrubbing_result.path;
122 const string file_name = GetFileName(full_path);
123 const string parent_path = GetParentPath(full_path);
124 const string relative_path = MakeRelativePath(parent_path);
125 assert(!file_name.empty());
126
127 const std::string hash_string = CheckPathAndExtractHash(relative_path,
128 file_name, full_path);
129 assert(!hash_string.empty());
130 assert(shash::HexPtr(hash_string).IsValid());
131
132
133 if (scrubbing_result.hash
134 != shash::MkFromSuffixedHexPtr(shash::HexPtr(hash_string))) {
135 PrintAlert(Alerts::kContentHashMismatch, full_path,
136 scrubbing_result.hash.ToString());
137 }
138 }
139
140 std::string CommandScrub::CheckPathAndExtractHash(
141 const std::string &relative_path,
142 const std::string &file_name,
143 const std::string &full_path) const {
144 // check for a valid object modifier on the end of the file name
145 const char last_character = *(file_name.end() - 1);
146 bool has_object_modifier = false;
147 if (std::isupper(last_character)) {
148 has_object_modifier = true;
149 }
150 if (has_object_modifier && last_character != shash::kSuffixHistory
151 && last_character != shash::kSuffixCatalog
152 && last_character != shash::kSuffixPartial
153 && last_character != shash::kSuffixCertificate
154 && last_character != shash::kSuffixMicroCatalog
155 && last_character != shash::kSuffixMetainfo) {
156 PrintAlert(Alerts::kUnexpectedModifier, full_path);
157 return "";
158 }
159
160 const string hash_string = GetFileName(GetParentPath(full_path))
161 + (has_object_modifier
162 ? file_name.substr(0,
163 file_name.length() - 1)
164 : file_name);
165 return hash_string;
166 }
167
168
169 int CommandScrub::Main(const swissknife::ArgumentList &args) {
170 repo_path_ = MakeCanonicalPath(*args.find('r')->second);
171 machine_readable_output_ = (args.find('m') != args.end());
172
173 pipeline_scrubbing_.RegisterListener(&CommandScrub::OnFileHashed, this);
174 pipeline_scrubbing_.Spawn();
175
176 // initialize file system recursion engine
177 FileSystemTraversal<CommandScrub> traverser(this, repo_path_, true);
178 traverser.fn_new_file = &CommandScrub::FileCallback;
179 traverser.fn_enter_dir = &CommandScrub::DirCallback;
180 traverser.fn_new_symlink = &CommandScrub::SymlinkCallback;
181 traverser.Recurse(repo_path_);
182
183 // wait for reader to finish all jobs
184 pipeline_scrubbing_.WaitFor();
185
186 return (alerts_ == 0) ? 0 : 1;
187 }
188
189 void CommandScrub::PrintAlert(const Alerts::Type type,
190 const std::string &path,
191 const std::string &affected_hash) const {
192 const MutexLockGuard l(alerts_mutex_);
193
194 const char *msg = Alerts::ToString(type);
195 if (machine_readable_output_) {
196 LogCvmfs(kLogUtility, kLogStderr, "%d %s %s", type,
197 ((affected_hash.empty()) ? "-" : affected_hash.c_str()),
198 path.c_str());
199 } else {
200 LogCvmfs(kLogUtility, kLogStderr, "%s | at: %s", msg, path.c_str());
201 }
202
203 ++alerts_;
204 }
205
206 std::string CommandScrub::MakeFullPath(const std::string &relative_path,
207 const std::string &file_name) const {
208 return (relative_path.empty())
209 ? repo_path_ + "/" + file_name
210 : repo_path_ + "/" + relative_path + "/" + file_name;
211 }
212
213 std::string CommandScrub::MakeRelativePath(const std::string &full_path) {
214 assert(HasPrefix(full_path, repo_path_ + "/", false));
215 return full_path.substr(repo_path_.length() + 1);
216 }
217
218 void CommandScrub::ShowAlertsHelpMessage() const {
219 LogCvmfs(kLogUtility, kLogStdout, "to come...");
220 }
221
222 } // namespace swissknife
223