GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/monitor.cc
Date: 2024-04-21 02:33:16
Exec Total Coverage
Lines: 85 313 27.2%
Branches: 60 500 12.0%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 *
4 * This module forks a watchdog process that listens on
5 * a pipe and prints a stackstrace into syslog, when cvmfs
6 * fails.
7 *
8 * Also, it handles getting and setting the maximum number of file descriptors.
9 */
10
11 #include "cvmfs_config.h"
12 #include "monitor.h"
13
14 #include <errno.h>
15 #include <execinfo.h>
16 #include <poll.h>
17 #include <pthread.h>
18 #include <signal.h>
19 #include <sys/resource.h>
20 #include <sys/types.h>
21 #ifdef __APPLE__
22 #include <sys/ucontext.h>
23 #else
24 #include <ucontext.h>
25 #endif
26 #include <sys/uio.h>
27 #include <sys/wait.h>
28 #include <syslog.h>
29 #include <time.h>
30 #include <unistd.h>
31
32 #include <cassert>
33 #include <cstdio>
34 #include <cstdlib>
35 #include <cstring>
36 #include <map>
37 #include <set>
38 #include <string>
39 #include <vector>
40
41 #if defined(CVMFS_FUSE_MODULE)
42 #include "cvmfs.h"
43 #endif
44 #include "util/exception.h"
45 #include "util/logging.h"
46 #include "util/platform.h"
47 #include "util/posix.h"
48 #include "util/smalloc.h"
49 #include "util/string.h"
50
51 // Used for address offset calculation
52 #if defined(CVMFS_FUSE_MODULE)
53 extern loader::CvmfsExports *g_cvmfs_exports;
54 #endif
55
56 using namespace std; // NOLINT
57
58 Watchdog *Watchdog::instance_ = NULL;
59
60 int Watchdog::g_suppressed_signals[] = { SIGHUP, SIGINT, SIGQUIT,
61 SIGILL, SIGABRT, SIGBUS,
62 SIGFPE, SIGUSR1, SIGSEGV,
63 SIGUSR2, SIGTERM, SIGXCPU,
64 SIGXFSZ};
65
66 int Watchdog::g_crash_signals[] = { SIGQUIT, SIGILL, SIGABRT,
67 SIGFPE, SIGSEGV, SIGBUS,
68 SIGPIPE, SIGXFSZ };
69
70 1 Watchdog *Watchdog::Create(FnOnCrash on_crash) {
71
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 assert(instance_ == NULL);
72
1/2
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
1 instance_ = new Watchdog(on_crash);
73 1 instance_->Fork();
74 1 return instance_;
75 }
76
77
78 /**
79 * Uses an external shell and gdb to create a full stack trace of the dying
80 * process. The same shell is used to force-quit the client afterwards.
81 */
82 string Watchdog::GenerateStackTrace(pid_t pid) {
83 int retval;
84 string result = "";
85
86 // re-gain root permissions to allow for ptrace of died cvmfs2 process
87 const bool retrievable = true;
88 if (!SwitchCredentials(0, getgid(), retrievable)) {
89 result += "failed to re-gain root permissions... still give it a try\n";
90 }
91
92 // run gdb and attach to the dying process
93 int fd_stdin;
94 int fd_stdout;
95 int fd_stderr;
96 vector<string> argv;
97 argv.push_back("-p");
98 argv.push_back(StringifyInt(pid));
99 pid_t gdb_pid = 0;
100 const bool double_fork = false;
101 retval = ExecuteBinary(&fd_stdin,
102 &fd_stdout,
103 &fd_stderr,
104 #ifdef __APPLE__
105 "lldb",
106 #else
107 "gdb",
108 #endif
109 argv,
110 double_fork,
111 &gdb_pid);
112 assert(retval);
113
114
115 // Skip the gdb startup output
116 ReadUntilGdbPrompt(fd_stdout);
117
118 // Send stacktrace command to gdb
119 #ifdef __APPLE__
120 const string gdb_cmd = "bt all\n" "quit\n";
121 #else
122 const string gdb_cmd = "thread apply all bt\n" "quit\n";
123 #endif
124 // The execve can have failed, which can't be detected in ExecuteBinary.
125 // Instead, writing to the pipe will fail.
126 ssize_t nbytes = write(fd_stdin, gdb_cmd.data(), gdb_cmd.length());
127 if ((nbytes < 0) || (static_cast<unsigned>(nbytes) != gdb_cmd.length())) {
128 result += "failed to start gdb/lldb (" + StringifyInt(nbytes) + " bytes "
129 "written, errno " + StringifyInt(errno) + ")\n";
130 return result;
131 }
132
133 // Read the stack trace from the stdout of our gdb process
134 #ifdef __APPLE__
135 // lldb has one more prompt
136 result += ReadUntilGdbPrompt(fd_stdout);
137 #endif
138 result += ReadUntilGdbPrompt(fd_stdout) + "\n\n";
139
140 // Check for output on stderr
141 string result_err;
142 Block2Nonblock(fd_stderr);
143 char cbuf;
144 while (read(fd_stderr, &cbuf, 1) == 1)
145 result_err.push_back(cbuf);
146 if (!result_err.empty())
147 result += "\nError output:\n" + result_err + "\n";
148
149 // Close the connection to the terminated gdb process
150 close(fd_stderr);
151 close(fd_stdout);
152 close(fd_stdin);
153
154 // Make sure gdb has terminated (wait for it for a short while)
155 unsigned int timeout = 15;
156 int statloc;
157 while (timeout > 0 && waitpid(gdb_pid, &statloc, WNOHANG) != gdb_pid) {
158 --timeout;
159 SafeSleepMs(1000);
160 }
161
162 // when the timeout expired, gdb probably hangs... we need to kill it
163 if (timeout == 0) {
164 result += "gdb did not exit as expected. sending SIGKILL... ";
165 result += (kill(gdb_pid, SIGKILL) != 0) ? "failed\n" : "okay\n";
166 }
167
168 return result;
169 }
170
171
172 pid_t Watchdog::GetPid() {
173 if (instance_ != NULL) {
174 return instance_->watchdog_pid_;
175 }
176 return getpid();
177 }
178
179 /**
180 * Log a string to syslog and into the crash dump file.
181 * We expect ideally nothing to be logged, so that file is created on demand.
182 */
183 void Watchdog::LogEmergency(string msg) {
184 char ctime_buffer[32];
185
186 if (!crash_dump_path_.empty()) {
187 FILE *fp = fopen(crash_dump_path_.c_str(), "a");
188 if (fp) {
189 time_t now = time(NULL);
190 msg += "\nTimestamp: " + string(ctime_r(&now, ctime_buffer));
191 if (fwrite(&msg[0], 1, msg.length(), fp) != msg.length()) {
192 msg +=
193 " (failed to report into crash dump file " + crash_dump_path_ + ")";
194 } else {
195 msg += "\n Crash logged also on file: " + crash_dump_path_ + "\n";
196 }
197 fclose(fp);
198 } else {
199 msg += " (failed to open crash dump file " + crash_dump_path_ + ")";
200 }
201 }
202 LogCvmfs(kLogMonitor, kLogSyslogErr, "%s", msg.c_str());
203 }
204
205 /**
206 * Reads from the file descriptor until the specific gdb prompt is reached or
207 * the pipe gets broken.
208 *
209 * @param fd_pipe the file descriptor of the pipe to be read
210 * @return the data read from the pipe
211 */
212 string Watchdog::ReadUntilGdbPrompt(int fd_pipe) {
213 #ifdef __APPLE__
214 static const string gdb_prompt = "(lldb)";
215 #else
216 static const string gdb_prompt = "\n(gdb) ";
217 #endif
218
219 string result;
220 char mini_buffer;
221 int chars_io;
222 unsigned int ring_buffer_pos = 0;
223
224 // read from stdout of gdb until gdb prompt occurs --> (gdb)
225 while (1) {
226 chars_io = read(fd_pipe, &mini_buffer, 1);
227
228 // in case something goes wrong...
229 if (chars_io <= 0) break;
230
231 result += mini_buffer;
232
233 // find the gdb_promt in the stdout data
234 if (mini_buffer == gdb_prompt[ring_buffer_pos]) {
235 ++ring_buffer_pos;
236 if (ring_buffer_pos == gdb_prompt.size()) {
237 break;
238 }
239 } else {
240 ring_buffer_pos = 0;
241 }
242 }
243
244 return result;
245 }
246
247
248 /**
249 * Generates useful information from the backtrace log in the pipe.
250 */
251 string Watchdog::ReportStacktrace() {
252 CrashData crash_data;
253 if (!pipe_watchdog_->TryRead<CrashData>(&crash_data)) {
254 return "failed to read crash data (" + StringifyInt(errno) + ")";
255 }
256
257 string debug = "--\n";
258 debug += "Signal: " + StringifyInt(crash_data.signal);
259 debug += ", errno: " + StringifyInt(crash_data.sys_errno);
260 debug += ", version: " + string(VERSION);
261 debug += ", PID: " + StringifyInt(crash_data.pid) + "\n";
262 debug += "Executable path: " + exe_path_ + "\n";
263
264 debug += GenerateStackTrace(crash_data.pid);
265
266 // Give the dying process the finishing stroke
267 if (kill(crash_data.pid, SIGKILL) != 0) {
268 debug += "Failed to kill cvmfs client! (";
269 switch (errno) {
270 case EINVAL:
271 debug += "invalid signal";
272 break;
273 case EPERM:
274 debug += "permission denied";
275 break;
276 case ESRCH:
277 debug += "no such process";
278 break;
279 default:
280 debug += "unknown error " + StringifyInt(errno);
281 }
282 debug += ")\n\n";
283 }
284
285 return debug;
286 }
287
288
289 void Watchdog::ReportSignalAndTerminate(
290 int sig, siginfo_t *siginfo, void * /* context */)
291 {
292 LogCvmfs(kLogMonitor, kLogSyslogErr,
293 "watchdog: received unexpected signal %d from PID %d / UID %d",
294 sig, siginfo->si_pid, siginfo->si_uid);
295 _exit(1);
296 }
297
298
299 void Watchdog::SendTrace(int sig, siginfo_t *siginfo, void *context) {
300 int send_errno = errno;
301 if (platform_spinlock_trylock(&Me()->lock_handler_) != 0) {
302 // Concurrent call, wait for the first one to exit the process
303 while (true) {}
304 }
305
306 // Set the original signal handler for the raised signal in
307 // SIGQUIT (watchdog process will raise SIGQUIT)
308 (void) sigaction(SIGQUIT, &(Me()->old_signal_handlers_[sig]), NULL);
309
310 // Inform the watchdog that CernVM-FS crashed
311 if (!Me()->pipe_watchdog_->Write(ControlFlow::kProduceStacktrace)) {
312 _exit(1);
313 }
314
315 // Send crash information to the watchdog
316 CrashData crash_data;
317 crash_data.signal = sig;
318 crash_data.sys_errno = send_errno;
319 crash_data.pid = getpid();
320 if (!Me()->pipe_watchdog_->Write<CrashData>(crash_data)) {
321 _exit(1);
322 }
323
324 // Do not die before the stack trace was generated
325 // kill -SIGQUIT <pid> will finish this
326 int counter = 0;
327 while (true) {
328 SafeSleepMs(100);
329 // quit anyway after 30 seconds
330 if (++counter == 300) {
331 LogCvmfs(kLogCvmfs, kLogSyslogErr, "stack trace generation failed");
332 // Last attempt to log something useful
333 #if defined(CVMFS_FUSE_MODULE)
334 LogCvmfs(kLogCvmfs, kLogSyslogErr, "Signal %d, errno %d",
335 sig, send_errno);
336 void *addr[kMaxBacktrace];
337 // Note: this doesn't work due to the signal stack on OS X (it works on
338 // Linux). Since anyway lldb is supposed to produce the backtrace, we
339 // consider it more important to protect cvmfs against stack overflows.
340 int num_addr = backtrace(addr, kMaxBacktrace);
341 char **symbols = backtrace_symbols(addr, num_addr);
342 string backtrace = "Backtrace (" + StringifyInt(num_addr) +
343 " symbols):\n";
344 for (int i = 0; i < num_addr; ++i)
345 backtrace += string(symbols[i]) + "\n";
346 LogCvmfs(kLogCvmfs, kLogSyslogErr, "%s", backtrace.c_str());
347 LogCvmfs(kLogCvmfs, kLogSyslogErr, "address of g_cvmfs_exports: %p",
348 &g_cvmfs_exports);
349 #endif
350
351 _exit(1);
352 }
353 }
354
355 _exit(1);
356 }
357
358
359 /**
360 * Sets the signal handlers of the current process according to the ones
361 * defined in the given SigactionMap.
362 *
363 * @param signal_handlers a map of SIGNAL -> struct sigaction
364 * @return a SigactionMap containing the old handlers
365 */
366 48 Watchdog::SigactionMap Watchdog::SetSignalHandlers(
367 const SigactionMap &signal_handlers)
368 {
369 48 SigactionMap old_signal_handlers;
370 48 SigactionMap::const_iterator i = signal_handlers.begin();
371 48 SigactionMap::const_iterator iend = signal_handlers.end();
372
2/2
✓ Branch 1 taken 624 times.
✓ Branch 2 taken 48 times.
672 for (; i != iend; ++i) {
373 struct sigaction old_signal_handler;
374
1/2
✗ Branch 3 not taken.
✓ Branch 4 taken 624 times.
624 if (sigaction(i->first, &i->second, &old_signal_handler) != 0) {
375 PANIC(NULL);
376 }
377
1/2
✓ Branch 2 taken 624 times.
✗ Branch 3 not taken.
624 old_signal_handlers[i->first] = old_signal_handler;
378 }
379
380 96 return old_signal_handlers;
381 }
382
383
384 /**
385 * Fork the watchdog process and put it on hold until Spawn() is called.
386 */
387 1 void Watchdog::Fork() {
388
1/2
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
1 Pipe<kPipeWatchdogPid> pipe_pid;
389
3/6
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 1 times.
✗ Branch 8 not taken.
1 pipe_watchdog_ = new Pipe<kPipeWatchdog>();
390
3/6
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 1 times.
✗ Branch 8 not taken.
1 pipe_listener_ = new Pipe<kPipeWatchdogSupervisor>();
391
392 pid_t pid;
393 int statloc;
394
1/3
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
1 switch (pid = fork()) {
395 case -1: PANIC(NULL);
396 case 0:
397 // Double fork to avoid zombie
398 switch (fork()) {
399 case -1: _exit(1);
400 case 0: {
401 pipe_watchdog_->CloseWriteFd();
402 Daemonize();
403 // send the watchdog PID to the supervisee
404 48 pid_t watchdog_pid = getpid();
405
1/2
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
48 pipe_pid.Write(watchdog_pid);
406
1/2
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
48 pipe_pid.CloseWriteFd();
407 // Close all unused file descriptors
408 // close also usyslog, only get it back if necessary
409 // string usyslog_save = GetLogMicroSyslog();
410
1/3
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 string debuglog_save = GetLogDebugFile();
411
2/4
✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 48 times.
✗ Branch 6 not taken.
48 SetLogDebugFile("");
412
1/2
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
48 string usyslog_save = GetLogMicroSyslog();
413
2/4
✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 48 times.
✗ Branch 6 not taken.
48 SetLogMicroSyslog("");
414 // Gracefully close the syslog before closing all fds. The next call
415 // to syslog will reopen it.
416
1/2
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
48 closelog();
417 // Let's keep stdin, stdout, stderr open at /dev/null (daemonized)
418 // in order to prevent accidental outputs from messing with another
419 // file descriptor
420 48 std::set<int> preserve_fds;
421
1/2
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
48 preserve_fds.insert(0);
422
1/2
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
48 preserve_fds.insert(1);
423
1/2
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
48 preserve_fds.insert(2);
424
1/2
✓ Branch 3 taken 48 times.
✗ Branch 4 not taken.
48 preserve_fds.insert(pipe_watchdog_->GetReadFd());
425
1/2
✓ Branch 3 taken 48 times.
✗ Branch 4 not taken.
48 preserve_fds.insert(pipe_listener_->GetWriteFd());
426
1/2
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
48 CloseAllFildes(preserve_fds);
427
1/2
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
48 SetLogMicroSyslog(usyslog_save); // no-op if usyslog not used
428
1/2
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
48 SetLogDebugFile(debuglog_save); // no-op if debug log not used
429
430
2/4
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 48 times.
48 if (WaitForSupervisee())
431 Supervise();
432
433
1/2
✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
48 pipe_watchdog_->CloseReadFd();
434
1/2
✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
48 pipe_listener_->CloseWriteFd();
435 48 exit(0);
436 }
437 default:
438 _exit(0);
439 }
440 1 default:
441
1/2
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
1 pipe_watchdog_->CloseReadFd();
442
1/2
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
1 pipe_listener_->CloseWriteFd();
443
1/2
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
1 pipe_pid.CloseWriteFd();
444
2/4
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
1 if (waitpid(pid, &statloc, 0) != pid) PANIC(NULL);
445
2/4
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
1 if (!WIFEXITED(statloc) || WEXITSTATUS(statloc)) PANIC(NULL);
446 }
447
448 // retrieve the watchdog PID from the pipe
449
1/2
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
1 pipe_pid.Read(&watchdog_pid_);
450
1/2
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
1 pipe_pid.CloseReadFd();
451 1 }
452
453
454 48 bool Watchdog::WaitForSupervisee() {
455 // We want broken pipes not to raise a signal but handle the error in the
456 // read/write code
457 48 platform_sighandler_t rv_sig = signal(SIGPIPE, SIG_IGN);
458
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 48 times.
48 assert(rv_sig != SIG_ERR);
459
460 // The watchdog is not supposed to receive signals. If it does, report it.
461 struct sigaction sa;
462 48 memset(&sa, 0, sizeof(sa));
463 48 sa.sa_sigaction = ReportSignalAndTerminate;
464 48 sa.sa_flags = SA_SIGINFO;
465 48 sigfillset(&sa.sa_mask);
466
467 48 SigactionMap signal_handlers;
468
2/2
✓ Branch 0 taken 624 times.
✓ Branch 1 taken 48 times.
672 for (size_t i = 0; i < sizeof(g_suppressed_signals)/sizeof(int); i++) {
469
1/2
✓ Branch 1 taken 624 times.
✗ Branch 2 not taken.
624 signal_handlers[g_suppressed_signals[i]] = sa;
470 }
471
1/2
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
48 SetSignalHandlers(signal_handlers);
472
473 48 ControlFlow::Flags control_flow = ControlFlow::kUnknown;
474
475
3/4
✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 47 times.
✓ Branch 5 taken 1 times.
48 if (!pipe_watchdog_->TryRead(&control_flow)) {
476
1/2
✓ Branch 1 taken 47 times.
✗ Branch 2 not taken.
47 LogCvmfs(kLogMonitor, kLogDebug, "supervisee canceled watchdog");
477 47 return false;
478 }
479
480
1/3
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
1 switch (control_flow) {
481 1 case ControlFlow::kQuit:
482 1 return false;
483 case ControlFlow::kSupervise:
484 break;
485 default:
486 LogEmergency("Internal error: invalid control flow");
487 return false;
488 }
489
490 size_t size;
491 pipe_watchdog_->Read(&size);
492 crash_dump_path_.resize(size);
493 if (size > 0) {
494 pipe_watchdog_->Read(&crash_dump_path_[0], size);
495
496 int retval = chdir(GetParentPath(crash_dump_path_).c_str());
497 if (retval != 0) {
498 LogEmergency(std::string("Cannot change to crash dump directory: ") +
499 crash_dump_path_);
500 return false;
501 }
502 crash_dump_path_ = GetFileName(crash_dump_path_);
503 }
504 return true;
505 48 }
506
507 /**
508 * Set up the signal handling and kick off the supervision.
509 */
510 void Watchdog::Spawn(const std::string &crash_dump_path) {
511 // lower restrictions for ptrace
512 if (!platform_allow_ptrace(watchdog_pid_)) {
513 LogCvmfs(kLogMonitor, kLogSyslogWarn,
514 "failed to allow ptrace() for watchdog (PID: %d). "
515 "Post crash stacktrace might not work",
516 watchdog_pid_);
517 }
518
519 // Extra stack for signal handlers
520 int stack_size = kSignalHandlerStacksize; // 2 MB
521 sighandler_stack_.ss_sp = smalloc(stack_size);
522 sighandler_stack_.ss_size = stack_size;
523 sighandler_stack_.ss_flags = 0;
524 if (sigaltstack(&sighandler_stack_, NULL) != 0)
525 PANIC(NULL);
526
527 // define our crash signal handler
528 struct sigaction sa;
529 memset(&sa, 0, sizeof(sa));
530 sa.sa_sigaction = SendTrace;
531 sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
532 sigfillset(&sa.sa_mask);
533
534 SigactionMap signal_handlers;
535 for (size_t i = 0; i < sizeof(g_crash_signals)/sizeof(int); i++) {
536 signal_handlers[g_crash_signals[i]] = sa;
537 }
538 old_signal_handlers_ = SetSignalHandlers(signal_handlers);
539
540 pipe_terminate_ = new Pipe<kPipeThreadTerminator>();
541 int retval =
542 pthread_create(&thread_listener_, NULL, MainWatchdogListener, this);
543 assert(retval == 0);
544
545 pipe_watchdog_->Write(ControlFlow::kSupervise);
546 size_t path_size = crash_dump_path.size();
547 pipe_watchdog_->Write(path_size);
548 if (path_size > 0) {
549 pipe_watchdog_->Write(crash_dump_path.data(), path_size);
550 }
551
552 spawned_ = true;
553 }
554
555
556 void *Watchdog::MainWatchdogListener(void *data) {
557 Watchdog *watchdog = static_cast<Watchdog *>(data);
558 LogCvmfs(kLogMonitor, kLogDebug, "starting watchdog listener");
559
560 struct pollfd watch_fds[2];
561 watch_fds[0].fd = watchdog->pipe_listener_->GetReadFd();
562 watch_fds[0].events = 0; // Only check for POLL[ERR,HUP,NVAL] in revents
563 watch_fds[0].revents = 0;
564 watch_fds[1].fd = watchdog->pipe_terminate_->GetReadFd();
565 watch_fds[1].events = POLLIN | POLLPRI;
566 watch_fds[1].revents = 0;
567 while (true) {
568 int retval = poll(watch_fds, 2, -1);
569 if (retval < 0) {
570 continue;
571 }
572
573 // Terminate I/O thread
574 if (watch_fds[1].revents)
575 break;
576
577 if (watch_fds[0].revents) {
578 if ((watch_fds[0].revents & POLLERR) ||
579 (watch_fds[0].revents & POLLHUP) ||
580 (watch_fds[0].revents & POLLNVAL))
581 {
582 LogCvmfs(kLogMonitor, kLogDebug | kLogSyslogErr,
583 "watchdog disappeared, disabling stack trace reporting "
584 "(revents: %d / %d|%d|%d)",
585 watch_fds[0].revents, POLLERR, POLLHUP, POLLNVAL);
586 watchdog->SetSignalHandlers(watchdog->old_signal_handlers_);
587 PANIC(kLogDebug | kLogSyslogErr, "watchdog disappeared, aborting");
588 }
589 PANIC(NULL);
590 }
591 }
592
593 LogCvmfs(kLogMonitor, kLogDebug, "stopping watchdog listener");
594 return NULL;
595 }
596
597
598 void Watchdog::Supervise() {
599 ControlFlow::Flags control_flow = ControlFlow::kUnknown;
600
601 if (!pipe_watchdog_->TryRead<ControlFlow::Flags>(&control_flow)) {
602 LogEmergency("watchdog: unexpected termination (" +
603 StringifyInt(control_flow) + ")");
604 if (on_crash_) on_crash_();
605 } else {
606 switch (control_flow) {
607 case ControlFlow::kProduceStacktrace:
608 LogEmergency(ReportStacktrace());
609 if (on_crash_) on_crash_();
610 break;
611
612 case ControlFlow::kQuit:
613 break;
614
615 default:
616 LogEmergency("watchdog: unexpected error");
617 break;
618 }
619 }
620 }
621
622
623 1 Watchdog::Watchdog(FnOnCrash on_crash)
624 1 : spawned_(false)
625
1/2
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
1 , exe_path_(string(platform_getexepath()))
626 1 , watchdog_pid_(0)
627
3/6
✓ Branch 3 taken 1 times.
✗ Branch 4 not taken.
✓ Branch 6 taken 1 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 1 times.
✗ Branch 10 not taken.
2 , on_crash_(on_crash)
628 {
629 1 int retval = platform_spinlock_init(&lock_handler_, 0);
630
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 assert(retval == 0);
631 1 memset(&sighandler_stack_, 0, sizeof(sighandler_stack_));
632 1 }
633
634
635 1 Watchdog::~Watchdog() {
636
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 if (spawned_) {
637 // Reset signal handlers
638 signal(SIGQUIT, SIG_DFL);
639 signal(SIGILL, SIG_DFL);
640 signal(SIGABRT, SIG_DFL);
641 signal(SIGFPE, SIG_DFL);
642 signal(SIGSEGV, SIG_DFL);
643 signal(SIGBUS, SIG_DFL);
644 signal(SIGPIPE, SIG_DFL);
645 signal(SIGXFSZ, SIG_DFL);
646 free(sighandler_stack_.ss_sp);
647 sighandler_stack_.ss_size = 0;
648
649 pipe_terminate_->Write(ControlFlow::kQuit);
650 pthread_join(thread_listener_, NULL);
651 pipe_terminate_->Close();
652 }
653
654 1 pipe_watchdog_->Write(ControlFlow::kQuit);
655 1 pipe_watchdog_->CloseWriteFd();
656 1 pipe_listener_->CloseReadFd();
657
658 1 platform_spinlock_destroy(&lock_handler_);
659 1 LogCvmfs(kLogMonitor, kLogDebug, "monitor stopped");
660 1 instance_ = NULL;
661 1 }
662