GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/cvmfs_fsck.cc
Date: 2024-04-28 02:33:07
Exec Total Coverage
Lines: 0 175 0.0%
Branches: 0 120 0.0%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 *
4 * This tool checks a cvmfs cache directory for consistency.
5 * If necessary, the managed cache db is removed so that
6 * it will be rebuilt on next mount.
7 */
8
9 #define _FILE_OFFSET_BITS 64
10
11 #include "cvmfs_config.h"
12
13 #include <dirent.h>
14 #include <errno.h>
15 #include <fcntl.h>
16 #include <pthread.h>
17 #include <stdint.h>
18 #include <sys/stat.h>
19 #include <unistd.h>
20
21 #include <cstdio>
22 #include <cstdlib>
23 #include <cstring>
24 #include <string>
25
26 #include "compression.h"
27 #include "crypto/hash.h"
28 #include "util/atomic.h"
29 #include "util/concurrency.h"
30 #include "util/logging.h"
31 #include "util/platform.h"
32 #include "util/posix.h"
33 #include "util/smalloc.h"
34
35 using namespace std; // NOLINT
36
37 enum Errors {
38 kErrorOk = 0,
39 kErrorFixed = 1,
40 kErrorReboot = 2,
41 kErrorUnfixed = 4,
42 kErrorOperational = 8,
43 kErrorUsage = 16,
44 };
45
46 string *g_cache_dir;
47 atomic_int32 g_num_files;
48 atomic_int32 g_num_err_fixed;
49 atomic_int32 g_num_err_unfixed;
50 atomic_int32 g_num_err_operational;
51 atomic_int32 g_num_tmp_catalog;
52 /**
53 * Traversal of the file system tree is serialized.
54 */
55 pthread_mutex_t g_lock_traverse = PTHREAD_MUTEX_INITIALIZER;
56 DIR *g_DIRP_current = NULL;
57 int g_num_dirs = -1; /**< Number of cache directories already examined. */
58 string *g_current_dir; /**< Current cache sub directory */
59
60 int g_num_threads = 1;
61 bool g_fix_errors = false;
62 bool g_verbose = false;
63 atomic_int32 g_force_rebuild;
64 atomic_int32 g_modified_cache;
65
66
67 static void Usage() {
68 LogCvmfs(kLogCvmfs, kLogStdout,
69 "CernVM File System consistency checker, version %s\n\n"
70 "This tool checks a cvmfs cache directory for consistency.\n"
71 "If necessary, the managed cache db is removed so that\n"
72 "it will be rebuilt on next mount.\n\n"
73 "Usage: cvmfs_fsck [-v] [-p] [-f] [-j #threads] <cache directory>\n"
74 "Options:\n"
75 " -v verbose output\n"
76 " -p try to fix automatically\n"
77 " -f force rebuild of managed cache db on next mount\n"
78 " -j number of concurrent integrity check worker threads\n",
79 VERSION);
80 }
81
82
83 static bool GetNextFile(string *relative_path, string *hash_name) {
84 platform_dirent64 *d = NULL;
85
86 MutexLockGuard m(&g_lock_traverse);
87 get_next_file_again:
88 while (g_DIRP_current && ((d = platform_readdir(g_DIRP_current)) != NULL)) {
89 const string name = d->d_name;
90 if ((name == ".") || (name == "..")) continue;
91
92 platform_stat64 info;
93 *relative_path = *g_current_dir + "/" + name;
94 *hash_name = *g_current_dir + name;
95 const string path = *g_cache_dir + "/" + *relative_path;
96 if (platform_lstat(relative_path->c_str(), &info) != 0) {
97 LogCvmfs(kLogCvmfs, kLogStdout, "Warning: failed to stat() %s (%d)",
98 path.c_str(), errno);
99 continue;
100 }
101
102 if (!S_ISREG(info.st_mode)) {
103 LogCvmfs(kLogCvmfs, kLogStdout, "Warning: %s is not a regular file",
104 path.c_str());
105 continue;
106 }
107
108 break;
109 }
110
111 if (!d) {
112 if (g_DIRP_current) {
113 closedir(g_DIRP_current);
114 g_DIRP_current = NULL;
115 }
116 g_num_dirs++;
117 if (g_num_dirs < 256) {
118 char hex[3];
119 snprintf(hex, sizeof(hex), "%02x", g_num_dirs);
120 *g_current_dir = string(hex, 2);
121
122 if (g_verbose)
123 LogCvmfs(kLogCvmfs, kLogStdout, "Entering %s",
124 g_current_dir->c_str());
125 if ((g_DIRP_current = opendir(hex)) == NULL) {
126 LogCvmfs(kLogCvmfs, kLogStderr,
127 "Invalid cache directory, %s/%s does not exist",
128 g_cache_dir->c_str(), g_current_dir->c_str());
129 exit(kErrorUnfixed);
130 }
131 goto get_next_file_again;
132 }
133 }
134
135 return d != NULL;
136 }
137
138
139 static void *MainCheck(void *data __attribute__((unused))) {
140 string relative_path;
141 string hash_name;
142
143 while (GetNextFile(&relative_path, &hash_name)) {
144 const string path = *g_cache_dir + "/" + relative_path;
145
146 int n = atomic_xadd32(&g_num_files, 1);
147 if (g_verbose)
148 LogCvmfs(kLogCvmfs, kLogStdout, "Checking file %s", path.c_str());
149 if (!g_verbose && ((n % 1000) == 0))
150 LogCvmfs(kLogCvmfs, kLogStdout | kLogNoLinebreak, ".");
151
152 if (relative_path[relative_path.length()-1] == 'T') {
153 LogCvmfs(kLogCvmfs, kLogStdout,
154 "Warning: temporary file catalog %s found", path.c_str());
155 atomic_inc32(&g_num_tmp_catalog);
156 if (g_fix_errors) {
157 if (unlink(relative_path.c_str()) == 0) {
158 LogCvmfs(kLogCvmfs, kLogStdout, "Fix: %s unlinked", path.c_str());
159 atomic_inc32(&g_num_err_fixed);
160 } else {
161 LogCvmfs(kLogCvmfs, kLogStdout, "Error: failed to unlink %s",
162 path.c_str());
163 atomic_inc32(&g_num_err_unfixed);
164 }
165 }
166 continue;
167 }
168
169 int fd_src = open(relative_path.c_str() , O_RDONLY);
170 if (fd_src < 0) {
171 LogCvmfs(kLogCvmfs, kLogStdout, "Error: cannot open %s", path.c_str());
172 atomic_inc32(&g_num_err_operational);
173 continue;
174 }
175 // Don't thrash kernel buffers
176 platform_disable_kcache(fd_src);
177
178 // Compress every file and calculate SHA-1 of stream
179 shash::Any expected_hash = shash::MkFromHexPtr(shash::HexPtr(hash_name));
180 shash::Any hash(expected_hash.algorithm);
181 if (!zlib::CompressFd2Null(fd_src, &hash)) {
182 LogCvmfs(kLogCvmfs, kLogStdout, "Error: could not compress %s",
183 path.c_str());
184 atomic_inc32(&g_num_err_operational);
185 } else {
186 if (hash != expected_hash) {
187 // If the hashes don't match, try hashing the uncompressed file
188 if (!shash::HashFile(relative_path, &hash)) {
189 LogCvmfs(kLogCvmfs, kLogStdout, "Error: could not hash %s",
190 path.c_str());
191 atomic_inc32(&g_num_err_operational);
192 }
193 if (hash != expected_hash) {
194 if (g_fix_errors) {
195 const string quarantaine_path = "./quarantaine/" + hash_name;
196 bool fixed = false;
197 if (rename(relative_path.c_str(), quarantaine_path.c_str()) == 0) {
198 LogCvmfs(kLogCvmfs, kLogStdout,
199 "Fix: %s is corrupted, moved to quarantaine folder",
200 path.c_str());
201 fixed = true;
202 } else {
203 LogCvmfs(kLogCvmfs, kLogStdout,
204 "Warning: failed to move %s into quarantaine folder",
205 path.c_str());
206 if (unlink(relative_path.c_str()) == 0) {
207 LogCvmfs(kLogCvmfs, kLogStdout,
208 "Fix: %s is corrupted, file unlinked", path.c_str());
209 fixed = true;
210 } else {
211 LogCvmfs(kLogCvmfs, kLogStdout,
212 "Error: %s is corrupted, could not unlink",
213 path.c_str());
214 }
215 }
216
217 if (fixed) {
218 atomic_inc32(&g_num_err_fixed);
219
220 // Changes made, we have to rebuild the managed cache db
221 atomic_cas32(&g_force_rebuild, 0, 1);
222 atomic_cas32(&g_modified_cache, 0, 1);
223 } else {
224 atomic_inc32(&g_num_err_unfixed);
225 }
226 } else {
227 LogCvmfs(kLogCvmfs, kLogStdout,
228 "Error: %s has compressed checksum %s, "
229 "delete this file from cache directory!",
230 path.c_str(), hash.ToString().c_str());
231 atomic_inc32(&g_num_err_unfixed);
232 }
233 }
234 }
235 }
236 close(fd_src);
237 }
238
239 return NULL;
240 }
241
242
243 int main(int argc, char **argv) {
244 atomic_init32(&g_force_rebuild);
245 atomic_init32(&g_modified_cache);
246 g_current_dir = new string();
247
248 int c;
249 while ((c = getopt(argc, argv, "hvpfj:")) != -1) {
250 switch (c) {
251 case 'h':
252 Usage();
253 return kErrorOk;
254 case 'v':
255 g_verbose = true;
256 break;
257 case 'p':
258 g_fix_errors = true;
259 break;
260 case 'f':
261 atomic_cas32(&g_force_rebuild, 0, 1);
262 break;
263 case 'j':
264 g_num_threads = atoi(optarg);
265 if (g_num_threads < 1) {
266 LogCvmfs(kLogCvmfs, kLogStdout,
267 "There is at least one worker thread required");
268 return kErrorUsage;
269 }
270 break;
271 case '?':
272 default:
273 Usage();
274 return kErrorUsage;
275 }
276 }
277
278 // Switch to cache directory
279 if (optind >= argc) {
280 Usage();
281 return kErrorUsage;
282 }
283 g_cache_dir = new string(MakeCanonicalPath(argv[optind]));
284 if (chdir(g_cache_dir->c_str()) != 0) {
285 LogCvmfs(kLogCvmfs, kLogStderr, "Could not chdir to %s",
286 g_cache_dir->c_str());
287 return kErrorOperational;
288 }
289
290 // Check if txn directory is empty
291 DIR *dirp_txn;
292 if ((dirp_txn = opendir("txn")) == NULL) {
293 LogCvmfs(kLogCvmfs, kLogStderr,
294 "Invalid cache directory, %s/txn does not exist",
295 g_cache_dir->c_str());
296 return kErrorOperational;
297 }
298 platform_dirent64 *d;
299 while ((d = platform_readdir(dirp_txn)) != NULL) {
300 const string name = d->d_name;
301 if ((name == ".") || (name == "..")) continue;
302
303 LogCvmfs(kLogCvmfs, kLogStdout,
304 "Warning: temporary directory %s/txn is not empty\n"
305 "If this repository is currently _not_ mounted, "
306 "you can remove its contents", g_cache_dir->c_str());
307 break;
308 }
309 closedir(dirp_txn);
310
311 // Run workers to recalculate checksums
312 atomic_init32(&g_num_files);
313 atomic_init32(&g_num_err_fixed);
314 atomic_init32(&g_num_err_unfixed);
315 atomic_init32(&g_num_err_operational);
316 atomic_init32(&g_num_tmp_catalog);
317 pthread_t *workers = reinterpret_cast<pthread_t *>(
318 smalloc(g_num_threads * sizeof(pthread_t)));
319 if (!g_verbose)
320 LogCvmfs(kLogCvmfs, kLogStdout | kLogNoLinebreak, "Verifying: ");
321 for (int i = 0; i < g_num_threads; ++i) {
322 if (g_verbose)
323 LogCvmfs(kLogCvmfs, kLogStdout, "Starting worker %d", i+1);
324 if (pthread_create(&workers[i], NULL, MainCheck, NULL) != 0) {
325 LogCvmfs(kLogCvmfs, kLogStdout, "Fatal: could not create worker thread");
326 return kErrorOperational;
327 }
328 }
329 for (int i = g_num_threads-1; i >= 0; --i) {
330 pthread_join(workers[i], NULL);
331 if (g_verbose)
332 LogCvmfs(kLogCvmfs, kLogStdout, "Stopping worker %d", i+1);
333 }
334 free(workers);
335 if (!g_verbose)
336 LogCvmfs(kLogCvmfs, kLogStdout | kLogNoLinebreak, "\n");
337 LogCvmfs(kLogCvmfs, kLogStdout, "Verified %d files",
338 atomic_read32(&g_num_files));
339
340 if (atomic_read32(&g_num_tmp_catalog) > 0)
341 LogCvmfs(kLogCvmfs, kLogStdout, "Temporary file catalogs were found.");
342
343 if (atomic_read32(&g_force_rebuild)) {
344 if (unlink("cachedb") == 0) {
345 LogCvmfs(kLogCvmfs, kLogStdout,
346 "Fix: managed cache db unlinked, will be rebuilt on next mount");
347 atomic_inc32(&g_num_err_fixed);
348 } else {
349 if (errno != ENOENT) {
350 LogCvmfs(kLogCvmfs, kLogStdout,
351 "Error: could not unlink managed cache database (%d)", errno);
352 atomic_inc32(&g_num_err_unfixed);
353 }
354 }
355 }
356
357 if (atomic_read32(&g_modified_cache)) {
358 LogCvmfs(kLogCvmfs, kLogStdout, "\n"
359 "WARNING: There might by corrupted files in the kernel buffers.\n"
360 "Remount CernVM-FS or run 'echo 3 > /proc/sys/vm/drop_caches'"
361 "\n\n");
362 }
363
364 int retval = 0;
365 if (atomic_read32(&g_num_err_fixed) > 0)
366 retval |= kErrorFixed;
367 if (atomic_read32(&g_num_err_unfixed) > 0)
368 retval |= kErrorUnfixed;
369 if (atomic_read32(&g_num_err_operational) > 0)
370 retval |= kErrorOperational;
371
372 return retval;
373 }
374