GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/monitor.cc
Date: 2025-06-22 02:36:02
Exec Total Coverage
Lines: 85 320 26.6%
Branches: 60 500 12.0%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 *
4 * This module forks a watchdog process that listens on
5 * a pipe and prints a stackstrace into syslog, when cvmfs
6 * fails.
7 *
8 * Also, it handles getting and setting the maximum number of file descriptors.
9 */
10
11
12 #include "monitor.h"
13
14 #include <errno.h>
15 #include <execinfo.h>
16 #include <poll.h>
17 #include <pthread.h>
18 #include <signal.h>
19 #include <sys/resource.h>
20 #include <sys/types.h>
21 #ifdef __APPLE__
22 #include <sys/ucontext.h>
23 #else
24 #include <ucontext.h>
25 #endif
26 #include <sys/uio.h>
27 #include <sys/wait.h>
28 #include <syslog.h>
29 #include <time.h>
30 #include <unistd.h>
31
32 #include <cassert>
33 #include <cstdio>
34 #include <cstdlib>
35 #include <cstring>
36 #include <map>
37 #include <set>
38 #include <string>
39 #include <vector>
40
41 #if defined(CVMFS_FUSE_MODULE)
42 #include "cvmfs.h"
43 #endif
44 #include "util/exception.h"
45 #include "util/logging.h"
46 #include "util/platform.h"
47 #include "util/posix.h"
48 #include "util/smalloc.h"
49 #include "util/string.h"
50
51 // Used for address offset calculation
52 #if defined(CVMFS_FUSE_MODULE)
53 extern loader::CvmfsExports *g_cvmfs_exports;
54 #endif
55
56 using namespace std; // NOLINT
57
58 Watchdog *Watchdog::instance_ = NULL;
59
60 int Watchdog::g_suppressed_signals[] = {
61 SIGHUP, SIGINT, SIGQUIT, SIGILL, SIGABRT, SIGBUS, SIGFPE,
62 SIGUSR1, SIGSEGV, SIGUSR2, SIGTERM, SIGXCPU, SIGXFSZ};
63
64 int Watchdog::g_crash_signals[] = {SIGQUIT, SIGILL, SIGABRT, SIGFPE,
65 SIGSEGV, SIGBUS, SIGPIPE, SIGXFSZ};
66
67 49 Watchdog *Watchdog::Create(FnOnCrash on_crash) {
68
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 49 times.
49 assert(instance_ == NULL);
69
1/2
✓ Branch 2 taken 49 times.
✗ Branch 3 not taken.
49 instance_ = new Watchdog(on_crash);
70 49 instance_->Fork();
71 49 return instance_;
72 }
73
74
75 /**
76 * Uses an external shell and gdb to create a full stack trace of the dying
77 * process. The same shell is used to force-quit the client afterwards.
78 */
79 string Watchdog::GenerateStackTrace(pid_t pid) {
80 int retval;
81 string result = "";
82
83 // re-gain root permissions to allow for ptrace of died cvmfs2 process
84 const bool retrievable = true;
85 if (!SwitchCredentials(0, getgid(), retrievable)) {
86 result += "failed to re-gain root permissions... still give it a try\n";
87 }
88
89 // run gdb and attach to the dying process
90 int fd_stdin;
91 int fd_stdout;
92 int fd_stderr;
93 vector<string> argv;
94 argv.push_back("-p");
95 argv.push_back(StringifyInt(pid));
96 pid_t gdb_pid = 0;
97 const bool double_fork = false;
98 retval = ExecuteBinary(&fd_stdin,
99 &fd_stdout,
100 &fd_stderr,
101 #ifdef __APPLE__
102 "lldb",
103 #else
104 "gdb",
105 #endif
106 argv,
107 double_fork,
108 &gdb_pid);
109 assert(retval);
110
111
112 // Skip the gdb startup output
113 ReadUntilGdbPrompt(fd_stdout);
114
115 // Send stacktrace command to gdb
116 #ifdef __APPLE__
117 const string gdb_cmd = "bt all\n"
118 "quit\n";
119 #else
120 const string gdb_cmd = "thread apply all bt\n"
121 "quit\n";
122 #endif
123 // The execve can have failed, which can't be detected in ExecuteBinary.
124 // Instead, writing to the pipe will fail.
125 const ssize_t nbytes = write(fd_stdin, gdb_cmd.data(), gdb_cmd.length());
126 if ((nbytes < 0) || (static_cast<unsigned>(nbytes) != gdb_cmd.length())) {
127 result += "failed to start gdb/lldb (" + StringifyInt(nbytes)
128 + " bytes "
129 "written, errno "
130 + StringifyInt(errno) + ")\n";
131 return result;
132 }
133
134 // Read the stack trace from the stdout of our gdb process
135 #ifdef __APPLE__
136 // lldb has one more prompt
137 result += ReadUntilGdbPrompt(fd_stdout);
138 #endif
139 result += ReadUntilGdbPrompt(fd_stdout) + "\n\n";
140
141 // Check for output on stderr
142 string result_err;
143 Block2Nonblock(fd_stderr);
144 char cbuf;
145 while (read(fd_stderr, &cbuf, 1) == 1)
146 result_err.push_back(cbuf);
147 if (!result_err.empty())
148 result += "\nError output:\n" + result_err + "\n";
149
150 // Close the connection to the terminated gdb process
151 close(fd_stderr);
152 close(fd_stdout);
153 close(fd_stdin);
154
155 // Make sure gdb has terminated (wait for it for a short while)
156 unsigned int timeout = 15;
157 int statloc;
158 while (timeout > 0 && waitpid(gdb_pid, &statloc, WNOHANG) != gdb_pid) {
159 --timeout;
160 SafeSleepMs(1000);
161 }
162
163 // when the timeout expired, gdb probably hangs... we need to kill it
164 if (timeout == 0) {
165 result += "gdb did not exit as expected. sending SIGKILL... ";
166 result += (kill(gdb_pid, SIGKILL) != 0) ? "failed\n" : "okay\n";
167 }
168
169 return result;
170 }
171
172
173 pid_t Watchdog::GetPid() {
174 if (instance_ != NULL) {
175 return instance_->watchdog_pid_;
176 }
177 return getpid();
178 }
179
180 /**
181 * Log a string to syslog and into the crash dump file.
182 * We expect ideally nothing to be logged, so that file is created on demand.
183 */
184 void Watchdog::LogEmergency(string msg) {
185 char ctime_buffer[32];
186
187 if (!crash_dump_path_.empty()) {
188 FILE *fp = fopen(crash_dump_path_.c_str(), "a");
189 if (fp) {
190 const time_t now = time(NULL);
191 msg += "\nTimestamp: " + string(ctime_r(&now, ctime_buffer));
192 if (fwrite(&msg[0], 1, msg.length(), fp) != msg.length()) {
193 msg += " (failed to report into crash dump file " + crash_dump_path_
194 + ")";
195 } else {
196 msg += "\n Crash logged also on file: " + crash_dump_path_ + "\n";
197 }
198 fclose(fp);
199 } else {
200 msg += " (failed to open crash dump file " + crash_dump_path_ + ")";
201 }
202 }
203 LogCvmfs(kLogMonitor, kLogSyslogErr, "%s", msg.c_str());
204 }
205
206 /**
207 * Reads from the file descriptor until the specific gdb prompt is reached or
208 * the pipe gets broken.
209 *
210 * @param fd_pipe the file descriptor of the pipe to be read
211 * @return the data read from the pipe
212 */
213 string Watchdog::ReadUntilGdbPrompt(int fd_pipe) {
214 #ifdef __APPLE__
215 static const string gdb_prompt = "(lldb)";
216 #else
217 static const string gdb_prompt = "\n(gdb) ";
218 #endif
219
220 string result;
221 char mini_buffer;
222 int chars_io;
223 unsigned int ring_buffer_pos = 0;
224
225 // read from stdout of gdb until gdb prompt occurs --> (gdb)
226 while (1) {
227 chars_io = read(fd_pipe, &mini_buffer, 1);
228
229 // in case something goes wrong...
230 if (chars_io <= 0)
231 break;
232
233 result += mini_buffer;
234
235 // find the gdb_promt in the stdout data
236 if (mini_buffer == gdb_prompt[ring_buffer_pos]) {
237 ++ring_buffer_pos;
238 if (ring_buffer_pos == gdb_prompt.size()) {
239 break;
240 }
241 } else {
242 ring_buffer_pos = 0;
243 }
244 }
245
246 return result;
247 }
248
249
250 /**
251 * Generates useful information from the backtrace log in the pipe.
252 */
253 string Watchdog::ReportStacktrace() {
254 CrashData crash_data;
255 if (!pipe_watchdog_->TryRead<CrashData>(&crash_data)) {
256 return "failed to read crash data (" + StringifyInt(errno) + ")";
257 }
258
259 string debug = "--\n";
260 debug += "Signal: " + StringifyInt(crash_data.signal);
261 debug += ", errno: " + StringifyInt(crash_data.sys_errno);
262 debug += ", version: " + string(CVMFS_VERSION);
263 debug += ", PID: " + StringifyInt(crash_data.pid) + "\n";
264 debug += "Executable path: " + exe_path_ + "\n";
265
266 debug += GenerateStackTrace(crash_data.pid);
267
268 // Give the dying process the finishing stroke
269 if (kill(crash_data.pid, SIGKILL) != 0) {
270 debug += "Failed to kill cvmfs client! (";
271 switch (errno) {
272 case EINVAL:
273 debug += "invalid signal";
274 break;
275 case EPERM:
276 debug += "permission denied";
277 break;
278 case ESRCH:
279 debug += "no such process";
280 break;
281 default:
282 debug += "unknown error " + StringifyInt(errno);
283 }
284 debug += ")\n\n";
285 }
286
287 return debug;
288 }
289
290
291 void Watchdog::ReportSignalAndContinue(int sig, siginfo_t *siginfo,
292 void * /* context */) {
293 LogCvmfs(kLogMonitor, kLogSyslogErr,
294 "watchdog: received unexpected signal %d from PID %d / UID %d", sig,
295 siginfo->si_pid, siginfo->si_uid);
296 }
297
298
299 void Watchdog::SendTrace(int sig, siginfo_t *siginfo, void *context) {
300 const int send_errno = errno;
301 if (platform_spinlock_trylock(&Me()->lock_handler_) != 0) {
302 // Concurrent call, wait for the first one to exit the process
303 while (true) {
304 }
305 }
306
307 // Set the original signal handler for the raised signal in
308 // SIGQUIT (watchdog process will raise SIGQUIT)
309 (void)sigaction(SIGQUIT, &(Me()->old_signal_handlers_[sig]), NULL);
310
311 // Inform the watchdog that CernVM-FS crashed
312 if (!Me()->pipe_watchdog_->Write(ControlFlow::kProduceStacktrace)) {
313 _exit(1);
314 }
315
316 // Send crash information to the watchdog
317 CrashData crash_data;
318 crash_data.signal = sig;
319 crash_data.sys_errno = send_errno;
320 crash_data.pid = getpid();
321 if (!Me()->pipe_watchdog_->Write<CrashData>(crash_data)) {
322 _exit(1);
323 }
324
325 // Do not die before the stack trace was generated
326 // kill -SIGQUIT <pid> will finish this
327 int counter = 0;
328 while (true) {
329 SafeSleepMs(100);
330 // quit anyway after 30 seconds
331 if (++counter == 300) {
332 LogCvmfs(kLogCvmfs, kLogSyslogErr, "stack trace generation failed");
333 // Last attempt to log something useful
334 #if defined(CVMFS_FUSE_MODULE)
335 LogCvmfs(kLogCvmfs, kLogSyslogErr, "Signal %d, errno %d", sig,
336 send_errno);
337 void *addr[kMaxBacktrace];
338 // Note: this doesn't work due to the signal stack on OS X (it works on
339 // Linux). Since anyway lldb is supposed to produce the backtrace, we
340 // consider it more important to protect cvmfs against stack overflows.
341 const int num_addr = backtrace(addr, kMaxBacktrace);
342 char **symbols = backtrace_symbols(addr, num_addr);
343 string backtrace = "Backtrace (" + StringifyInt(num_addr)
344 + " symbols):\n";
345 for (int i = 0; i < num_addr; ++i)
346 backtrace += string(symbols[i]) + "\n";
347 LogCvmfs(kLogCvmfs, kLogSyslogErr, "%s", backtrace.c_str());
348 LogCvmfs(kLogCvmfs, kLogSyslogErr, "address of g_cvmfs_exports: %p",
349 &g_cvmfs_exports);
350 #endif
351
352 _exit(1);
353 }
354 }
355
356 _exit(1);
357 }
358
359
360 /**
361 * Sets the signal handlers of the current process according to the ones
362 * defined in the given SigactionMap.
363 *
364 * @param signal_handlers a map of SIGNAL -> struct sigaction
365 * @return a SigactionMap containing the old handlers
366 */
367 49 Watchdog::SigactionMap Watchdog::SetSignalHandlers(
368 const SigactionMap &signal_handlers) {
369 49 SigactionMap old_signal_handlers;
370 49 SigactionMap::const_iterator i = signal_handlers.begin();
371 49 const SigactionMap::const_iterator iend = signal_handlers.end();
372
2/2
✓ Branch 1 taken 637 times.
✓ Branch 2 taken 49 times.
686 for (; i != iend; ++i) {
373 struct sigaction old_signal_handler;
374
1/2
✗ Branch 3 not taken.
✓ Branch 4 taken 637 times.
637 if (sigaction(i->first, &i->second, &old_signal_handler) != 0) {
375 PANIC(NULL);
376 }
377
1/2
✓ Branch 2 taken 637 times.
✗ Branch 3 not taken.
637 old_signal_handlers[i->first] = old_signal_handler;
378 }
379
380 98 return old_signal_handlers;
381 }
382
383
384 /**
385 * Fork the watchdog process and put it on hold until Spawn() is called.
386 */
387 49 void Watchdog::Fork() {
388
1/2
✓ Branch 1 taken 49 times.
✗ Branch 2 not taken.
49 Pipe<kPipeWatchdogPid> pipe_pid;
389
3/6
✓ Branch 1 taken 49 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 49 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 49 times.
✗ Branch 8 not taken.
49 pipe_watchdog_ = new Pipe<kPipeWatchdog>();
390
3/6
✓ Branch 1 taken 49 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 49 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 49 times.
✗ Branch 8 not taken.
49 pipe_listener_ = new Pipe<kPipeWatchdogSupervisor>();
391
392 pid_t pid;
393 int statloc;
394
1/3
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 49 times.
49 switch (pid = fork()) {
395 case -1:
396 PANIC(NULL);
397 case 0:
398 // Double fork to avoid zombie
399 switch (fork()) {
400 case -1:
401 _exit(1);
402 case 0: {
403 pipe_watchdog_->CloseWriteFd();
404 Daemonize();
405 // send the watchdog PID to the supervisee
406 49 const pid_t watchdog_pid = getpid();
407
1/2
✓ Branch 1 taken 49 times.
✗ Branch 2 not taken.
49 pipe_pid.Write(watchdog_pid);
408
1/2
✓ Branch 1 taken 49 times.
✗ Branch 2 not taken.
49 pipe_pid.CloseWriteFd();
409 // Close all unused file descriptors
410 // close also usyslog, only get it back if necessary
411 // string usyslog_save = GetLogMicroSyslog();
412
1/3
✓ Branch 1 taken 49 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
49 const string debuglog_save = GetLogDebugFile();
413
2/4
✓ Branch 2 taken 49 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 49 times.
✗ Branch 6 not taken.
49 SetLogDebugFile("");
414
1/2
✓ Branch 1 taken 49 times.
✗ Branch 2 not taken.
49 const string usyslog_save = GetLogMicroSyslog();
415
2/4
✓ Branch 2 taken 49 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 49 times.
✗ Branch 6 not taken.
49 SetLogMicroSyslog("");
416 // Gracefully close the syslog before closing all fds. The next call
417 // to syslog will reopen it.
418
1/2
✓ Branch 1 taken 49 times.
✗ Branch 2 not taken.
49 closelog();
419 // Let's keep stdin, stdout, stderr open at /dev/null (daemonized)
420 // in order to prevent accidental outputs from messing with another
421 // file descriptor
422 49 std::set<int> preserve_fds;
423
1/2
✓ Branch 1 taken 49 times.
✗ Branch 2 not taken.
49 preserve_fds.insert(0);
424
1/2
✓ Branch 1 taken 49 times.
✗ Branch 2 not taken.
49 preserve_fds.insert(1);
425
1/2
✓ Branch 1 taken 49 times.
✗ Branch 2 not taken.
49 preserve_fds.insert(2);
426
1/2
✓ Branch 3 taken 49 times.
✗ Branch 4 not taken.
49 preserve_fds.insert(pipe_watchdog_->GetReadFd());
427
1/2
✓ Branch 3 taken 49 times.
✗ Branch 4 not taken.
49 preserve_fds.insert(pipe_listener_->GetWriteFd());
428
1/2
✓ Branch 1 taken 49 times.
✗ Branch 2 not taken.
49 CloseAllFildes(preserve_fds);
429
1/2
✓ Branch 1 taken 49 times.
✗ Branch 2 not taken.
49 SetLogMicroSyslog(usyslog_save); // no-op if usyslog not used
430
1/2
✓ Branch 1 taken 49 times.
✗ Branch 2 not taken.
49 SetLogDebugFile(debuglog_save); // no-op if debug log not used
431
432
2/4
✓ Branch 1 taken 49 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 49 times.
49 if (WaitForSupervisee())
433 Supervise();
434
435
1/2
✓ Branch 2 taken 49 times.
✗ Branch 3 not taken.
49 pipe_watchdog_->CloseReadFd();
436
1/2
✓ Branch 2 taken 49 times.
✗ Branch 3 not taken.
49 pipe_listener_->CloseWriteFd();
437 49 exit(0);
438 }
439 default:
440 _exit(0);
441 }
442 49 default:
443
1/2
✓ Branch 2 taken 49 times.
✗ Branch 3 not taken.
49 pipe_watchdog_->CloseReadFd();
444
1/2
✓ Branch 2 taken 49 times.
✗ Branch 3 not taken.
49 pipe_listener_->CloseWriteFd();
445
1/2
✓ Branch 1 taken 49 times.
✗ Branch 2 not taken.
49 pipe_pid.CloseWriteFd();
446
2/4
✓ Branch 1 taken 49 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 49 times.
49 if (waitpid(pid, &statloc, 0) != pid)
447 PANIC(NULL);
448
2/4
✓ Branch 0 taken 49 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 49 times.
49 if (!WIFEXITED(statloc) || WEXITSTATUS(statloc))
449 PANIC(NULL);
450 }
451
452 // retrieve the watchdog PID from the pipe
453
1/2
✓ Branch 1 taken 49 times.
✗ Branch 2 not taken.
49 pipe_pid.Read(&watchdog_pid_);
454
1/2
✓ Branch 1 taken 49 times.
✗ Branch 2 not taken.
49 pipe_pid.CloseReadFd();
455 49 }
456
457
458 49 bool Watchdog::WaitForSupervisee() {
459 // We want broken pipes not to raise a signal but handle the error in the
460 // read/write code
461 49 platform_sighandler_t rv_sig = signal(SIGPIPE, SIG_IGN);
462
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 49 times.
49 assert(rv_sig != SIG_ERR);
463
464 // The watchdog is not supposed to receive signals. If it does, report it.
465 struct sigaction sa;
466 49 memset(&sa, 0, sizeof(sa));
467 49 sa.sa_sigaction = ReportSignalAndContinue;
468 49 sa.sa_flags = SA_SIGINFO;
469 49 sigfillset(&sa.sa_mask);
470
471 49 SigactionMap signal_handlers;
472
2/2
✓ Branch 0 taken 637 times.
✓ Branch 1 taken 49 times.
686 for (size_t i = 0; i < sizeof(g_suppressed_signals) / sizeof(int); i++) {
473
1/2
✓ Branch 1 taken 637 times.
✗ Branch 2 not taken.
637 signal_handlers[g_suppressed_signals[i]] = sa;
474 }
475
1/2
✓ Branch 1 taken 49 times.
✗ Branch 2 not taken.
49 SetSignalHandlers(signal_handlers);
476
477 49 ControlFlow::Flags control_flow = ControlFlow::kUnknown;
478
479
3/4
✓ Branch 2 taken 49 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 48 times.
✓ Branch 5 taken 1 times.
49 if (!pipe_watchdog_->TryRead(&control_flow)) {
480
1/2
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
48 LogCvmfs(kLogMonitor, kLogDebug, "supervisee canceled watchdog");
481 48 return false;
482 }
483
484
1/3
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
1 switch (control_flow) {
485 1 case ControlFlow::kQuit:
486 1 return false;
487 case ControlFlow::kSupervise:
488 break;
489 default:
490 LogEmergency("Internal error: invalid control flow");
491 return false;
492 }
493
494 size_t size;
495 pipe_watchdog_->Read(&size);
496 crash_dump_path_.resize(size);
497 if (size > 0) {
498 pipe_watchdog_->Read(&crash_dump_path_[0], size);
499
500 const int retval = chdir(GetParentPath(crash_dump_path_).c_str());
501 if (retval != 0) {
502 LogEmergency(std::string("Cannot change to crash dump directory: ")
503 + crash_dump_path_);
504 return false;
505 }
506 crash_dump_path_ = GetFileName(crash_dump_path_);
507 }
508 return true;
509 49 }
510
511 /**
512 * Set up the signal handling and kick off the supervision.
513 */
514 void Watchdog::Spawn(const std::string &crash_dump_path) {
515 // lower restrictions for ptrace
516 if (!platform_allow_ptrace(watchdog_pid_)) {
517 LogCvmfs(kLogMonitor, kLogSyslogWarn,
518 "failed to allow ptrace() for watchdog (PID: %d). "
519 "Post crash stacktrace might not work",
520 watchdog_pid_);
521 }
522
523 // Extra stack for signal handlers
524 const int stack_size = kSignalHandlerStacksize; // 2 MB
525 sighandler_stack_.ss_sp = smalloc(stack_size);
526 sighandler_stack_.ss_size = stack_size;
527 sighandler_stack_.ss_flags = 0;
528 if (sigaltstack(&sighandler_stack_, NULL) != 0)
529 PANIC(NULL);
530
531 // define our crash signal handler
532 struct sigaction sa;
533 memset(&sa, 0, sizeof(sa));
534 sa.sa_sigaction = SendTrace;
535 sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
536 sigfillset(&sa.sa_mask);
537
538 SigactionMap signal_handlers;
539 for (size_t i = 0; i < sizeof(g_crash_signals) / sizeof(int); i++) {
540 signal_handlers[g_crash_signals[i]] = sa;
541 }
542 old_signal_handlers_ = SetSignalHandlers(signal_handlers);
543
544 pipe_terminate_ = new Pipe<kPipeThreadTerminator>();
545 const int retval =
546 pthread_create(&thread_listener_, NULL, MainWatchdogListener, this);
547 assert(retval == 0);
548
549 pipe_watchdog_->Write(ControlFlow::kSupervise);
550 const size_t path_size = crash_dump_path.size();
551 pipe_watchdog_->Write(path_size);
552 if (path_size > 0) {
553 pipe_watchdog_->Write(crash_dump_path.data(), path_size);
554 }
555
556 spawned_ = true;
557 }
558
559
560 void *Watchdog::MainWatchdogListener(void *data) {
561 Watchdog *watchdog = static_cast<Watchdog *>(data);
562 LogCvmfs(kLogMonitor, kLogDebug, "starting watchdog listener");
563
564 struct pollfd watch_fds[2];
565 watch_fds[0].fd = watchdog->pipe_listener_->GetReadFd();
566 watch_fds[0].events = 0; // Only check for POLL[ERR,HUP,NVAL] in revents
567 watch_fds[0].revents = 0;
568 watch_fds[1].fd = watchdog->pipe_terminate_->GetReadFd();
569 watch_fds[1].events = POLLIN | POLLPRI;
570 watch_fds[1].revents = 0;
571 while (true) {
572 const int retval = poll(watch_fds, 2, -1);
573 if (retval < 0) {
574 continue;
575 }
576
577 // Terminate I/O thread
578 if (watch_fds[1].revents)
579 break;
580
581 if (watch_fds[0].revents) {
582 if ((watch_fds[0].revents & POLLERR) || (watch_fds[0].revents & POLLHUP)
583 || (watch_fds[0].revents & POLLNVAL)) {
584 LogCvmfs(kLogMonitor, kLogDebug | kLogSyslogErr,
585 "watchdog disappeared, disabling stack trace reporting "
586 "(revents: %d / %d|%d|%d)",
587 watch_fds[0].revents, POLLERR, POLLHUP, POLLNVAL);
588 watchdog->SetSignalHandlers(watchdog->old_signal_handlers_);
589 PANIC(kLogDebug | kLogSyslogErr, "watchdog disappeared, aborting");
590 }
591 PANIC(NULL);
592 }
593 }
594
595 LogCvmfs(kLogMonitor, kLogDebug, "stopping watchdog listener");
596 return NULL;
597 }
598
599
600 void Watchdog::Supervise() {
601 ControlFlow::Flags control_flow = ControlFlow::kUnknown;
602
603 if (!pipe_watchdog_->TryRead<ControlFlow::Flags>(&control_flow)) {
604 LogEmergency("watchdog: unexpected termination ("
605 + StringifyInt(control_flow) + ")");
606 if (on_crash_)
607 on_crash_();
608 } else {
609 switch (control_flow) {
610 case ControlFlow::kProduceStacktrace:
611 LogEmergency(ReportStacktrace());
612 if (on_crash_)
613 on_crash_();
614 break;
615
616 case ControlFlow::kQuit:
617 break;
618
619 default:
620 LogEmergency("watchdog: unexpected error");
621 break;
622 }
623 }
624 }
625
626
627 49 Watchdog::Watchdog(FnOnCrash on_crash)
628 49 : spawned_(false)
629
1/2
✓ Branch 1 taken 49 times.
✗ Branch 2 not taken.
49 , exe_path_(string(platform_getexepath()))
630 49 , watchdog_pid_(0)
631
3/6
✓ Branch 3 taken 49 times.
✗ Branch 4 not taken.
✓ Branch 6 taken 49 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 49 times.
✗ Branch 10 not taken.
98 , on_crash_(on_crash) {
632 49 const int retval = platform_spinlock_init(&lock_handler_, 0);
633
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 49 times.
49 assert(retval == 0);
634 49 memset(&sighandler_stack_, 0, sizeof(sighandler_stack_));
635 49 }
636
637
638 1 Watchdog::~Watchdog() {
639
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 if (spawned_) {
640 // Reset signal handlers
641 signal(SIGQUIT, SIG_DFL);
642 signal(SIGILL, SIG_DFL);
643 signal(SIGABRT, SIG_DFL);
644 signal(SIGFPE, SIG_DFL);
645 signal(SIGSEGV, SIG_DFL);
646 signal(SIGBUS, SIG_DFL);
647 signal(SIGPIPE, SIG_DFL);
648 signal(SIGXFSZ, SIG_DFL);
649 free(sighandler_stack_.ss_sp);
650 sighandler_stack_.ss_size = 0;
651
652 pipe_terminate_->Write(ControlFlow::kQuit);
653 pthread_join(thread_listener_, NULL);
654 pipe_terminate_->Close();
655 }
656
657 1 pipe_watchdog_->Write(ControlFlow::kQuit);
658 1 pipe_watchdog_->CloseWriteFd();
659 1 pipe_listener_->CloseReadFd();
660
661 1 platform_spinlock_destroy(&lock_handler_);
662 1 LogCvmfs(kLogMonitor, kLogDebug, "monitor stopped");
663 1 instance_ = NULL;
664 1 }
665