11 #include "cvmfs_config.h"
19 #include <sys/resource.h>
20 #include <sys/types.h>
22 #include <sys/ucontext.h>
40 #if defined(CVMFS_FUSE_MODULE)
51 #if defined(CVMFS_FUSE_MODULE)
62 instance_ =
new Watchdog(crash_dump_path);
76 const bool retrievable =
true;
78 result +=
"failed to re-gain root permissions... still give it a try\n";
89 const bool double_fork =
false;
105 ReadUntilGdbPrompt(fd_stdout);
109 const string gdb_cmd =
"bt all\n" "quit\n";
111 const string gdb_cmd =
"thread apply all bt\n" "quit\n";
115 ssize_t nbytes = write(fd_stdin, gdb_cmd.data(), gdb_cmd.length());
116 if ((nbytes < 0) || (
static_cast<unsigned>(nbytes) != gdb_cmd.length())) {
117 result +=
"failed to start gdb/lldb (" +
StringifyInt(nbytes) +
" bytes "
125 result += ReadUntilGdbPrompt(fd_stdout);
127 result += ReadUntilGdbPrompt(fd_stdout) +
"\n\n";
133 while (read(fd_stderr, &cbuf, 1) == 1)
134 result_err.push_back(cbuf);
135 if (!result_err.empty())
136 result +=
"\nError output:\n" + result_err +
"\n";
144 unsigned int timeout = 15;
146 while (timeout > 0 && waitpid(gdb_pid, &statloc, WNOHANG) != gdb_pid) {
153 result +=
"gdb did not exit as expected. sending SIGKILL... ";
154 result += (kill(gdb_pid, SIGKILL) != 0) ?
"failed\n" :
"okay\n";
162 if (instance_ != NULL) {
163 if (!instance_->spawned_)
166 return instance_->watchdog_pid_;
176 char ctime_buffer[32];
178 if (!crash_dump_path_.empty()) {
179 FILE *fp = fopen(crash_dump_path_.c_str(),
"a");
181 time_t now = time(NULL);
182 msg +=
"\nTimestamp: " + string(ctime_r(&now, ctime_buffer));
183 if (fwrite(&msg[0], 1, msg.length(), fp) != msg.length()) {
185 " (failed to report into crash dump file " + crash_dump_path_ +
")";
187 msg +=
"\n Crash logged also on file: " + crash_dump_path_ +
"\n";
191 msg +=
" (failed to open crash dump file " + crash_dump_path_ +
")";
206 static const string gdb_prompt =
"(lldb)";
208 static const string gdb_prompt =
"\n(gdb) ";
214 unsigned int ring_buffer_pos = 0;
218 chars_io = read(fd_pipe, &mini_buffer, 1);
221 if (chars_io <= 0)
break;
223 result += mini_buffer;
226 if (mini_buffer == gdb_prompt[ring_buffer_pos]) {
228 if (ring_buffer_pos == gdb_prompt.size()) {
241 on_crash_ = CleanupOnCrash;
253 if (!pipe_watchdog_->TryRead(&crash_data)) {
254 return "failed to read crash data (" +
StringifyInt(errno) +
")";
257 string debug =
"--\n";
260 debug +=
", version: " + string(VERSION);
262 debug +=
"Executable path: " + exe_path_ +
"\n";
264 debug += GenerateStackTrace(crash_data.
pid);
267 if (kill(crash_data.
pid, SIGKILL) != 0) {
268 debug +=
"Failed to kill cvmfs client! (";
271 debug +=
"invalid signal";
274 debug +=
"permission denied";
277 debug +=
"no such process";
290 int sig, siginfo_t *siginfo,
void * )
293 "watchdog: received unexpected signal %d from PID %d / UID %d",
294 sig, siginfo->si_pid, siginfo->si_uid);
300 int send_errno = errno;
308 (void) sigaction(SIGQUIT, &(Me()->old_signal_handlers_[sig]), NULL);
311 if (!Me()->pipe_watchdog_->Write(ControlFlow::kProduceStacktrace)) {
319 crash_data.
pid = getpid();
320 if (!Me()->pipe_watchdog_->Write(crash_data)) {
330 if (++counter == 300) {
333 #if defined(CVMFS_FUSE_MODULE)
336 void *addr[kMaxBacktrace];
340 int num_addr = backtrace(addr, kMaxBacktrace);
341 char **symbols = backtrace_symbols(addr, num_addr);
342 string backtrace =
"Backtrace (" +
StringifyInt(num_addr) +
344 for (
int i = 0; i < num_addr; ++i)
345 backtrace +=
string(symbols[i]) +
"\n";
370 SigactionMap::const_iterator i = signal_handlers.begin();
371 SigactionMap::const_iterator iend = signal_handlers.end();
372 for (; i != iend; ++i) {
373 struct sigaction old_signal_handler;
374 if (sigaction(i->first, &i->second, &old_signal_handler) != 0) {
377 old_signal_handlers[i->first] = old_signal_handler;
380 return old_signal_handlers;
389 pipe_watchdog_ =
new Pipe();
390 pipe_listener_ =
new Pipe();
394 int max_fd = sysconf(_SC_OPEN_MAX);
396 switch (pid = fork()) {
397 case -1:
PANIC(NULL);
403 close(pipe_watchdog_->write_end);
406 pid_t watchdog_pid = getpid();
407 pipe_pid.
Write(watchdog_pid);
421 for (
int fd = 3; fd < max_fd; fd++) {
422 if (fd == pipe_watchdog_->read_end)
424 if (fd == pipe_listener_->write_end)
437 close(pipe_watchdog_->read_end);
438 close(pipe_listener_->write_end);
439 if (waitpid(pid, &statloc, 0) != pid)
PANIC(NULL);
440 if (!WIFEXITED(statloc) || WEXITSTATUS(statloc))
PANIC(NULL);
445 pipe_pid.
Read(&watchdog_pid_);
451 "failed to allow ptrace() for watchdog (PID: %d). "
452 "Post crash stacktrace might not work",
457 int stack_size = kSignalHandlerStacksize;
458 sighandler_stack_.ss_sp = smalloc(stack_size);
459 sighandler_stack_.ss_size = stack_size;
460 sighandler_stack_.ss_flags = 0;
461 if (sigaltstack(&sighandler_stack_, NULL) != 0)
466 memset(&sa, 0,
sizeof(sa));
467 sa.sa_sigaction = SendTrace;
468 sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
469 sigfillset(&sa.sa_mask);
472 signal_handlers[SIGQUIT] = sa;
473 signal_handlers[SIGILL] = sa;
474 signal_handlers[SIGABRT] = sa;
475 signal_handlers[SIGFPE] = sa;
476 signal_handlers[SIGSEGV] = sa;
477 signal_handlers[SIGBUS] = sa;
478 signal_handlers[SIGPIPE] = sa;
479 signal_handlers[SIGXFSZ] = sa;
480 old_signal_handlers_ = SetSignalHandlers(signal_handlers);
482 pipe_terminate_ =
new Pipe();
495 struct pollfd watch_fds[2];
497 watch_fds[0].events = 0;
498 watch_fds[0].revents = 0;
500 watch_fds[1].events = POLLIN | POLLPRI;
501 watch_fds[1].revents = 0;
503 int retval = poll(watch_fds, 2, -1);
509 if (watch_fds[1].revents)
512 if (watch_fds[0].revents) {
513 if ((watch_fds[0].revents & POLLERR) ||
514 (watch_fds[0].revents & POLLHUP) ||
515 (watch_fds[0].revents & POLLNVAL))
518 "watchdog disappeared, disabling stack trace reporting "
519 "(revents: %d / %d|%d|%d)",
520 watch_fds[0].revents, POLLERR, POLLHUP, POLLNVAL);
537 signal(SIGPIPE, SIG_IGN);
541 memset(&sa, 0,
sizeof(sa));
542 sa.sa_sigaction = ReportSignalAndTerminate;
543 sa.sa_flags = SA_SIGINFO;
544 sigfillset(&sa.sa_mask);
547 signal_handlers[SIGHUP] = sa;
548 signal_handlers[SIGINT] = sa;
549 signal_handlers[SIGQUIT] = sa;
550 signal_handlers[SIGILL] = sa;
551 signal_handlers[SIGABRT] = sa;
552 signal_handlers[SIGBUS] = sa;
553 signal_handlers[SIGFPE] = sa;
554 signal_handlers[SIGUSR1] = sa;
555 signal_handlers[SIGSEGV] = sa;
556 signal_handlers[SIGUSR2] = sa;
557 signal_handlers[SIGTERM] = sa;
558 signal_handlers[SIGXCPU] = sa;
559 signal_handlers[SIGXFSZ] = sa;
560 SetSignalHandlers(signal_handlers);
564 if (!pipe_watchdog_->TryRead(&control_flow)) {
567 LogEmergency(
"watchdog: unexpected termination (" +
569 if (on_crash_) on_crash_();
571 switch (control_flow) {
572 case ControlFlow::kProduceStacktrace:
573 LogEmergency(ReportStacktrace());
574 if (on_crash_) on_crash_();
577 case ControlFlow::kQuit:
583 LogEmergency(
"watchdog: unexpected error");
588 close(pipe_watchdog_->read_end);
589 close(pipe_listener_->write_end);
595 , crash_dump_path_(crash_dump_path)
598 , pipe_watchdog_(NULL)
599 , pipe_listener_(NULL)
600 , pipe_terminate_(NULL)
612 signal(SIGQUIT, SIG_DFL);
613 signal(SIGILL, SIG_DFL);
614 signal(SIGABRT, SIG_DFL);
615 signal(SIGFPE, SIG_DFL);
616 signal(SIGSEGV, SIG_DFL);
617 signal(SIGBUS, SIG_DFL);
618 signal(SIGPIPE, SIG_DFL);
619 signal(SIGXFSZ, SIG_DFL);
647 static unsigned max_open_files;
648 static bool already_done =
false;
652 unsigned soft_limit = 0;
653 unsigned hard_limit = 0;
658 "Warning: current limits for number of open files are "
660 "CernVM-FS is likely to run out of file descriptors, "
661 "set ulimit -n to at least %lu",
664 max_open_files = soft_limit;
668 return max_open_files;
#define LogCvmfs(source, mask,...)
Watchdog(const std::string &crash_dump_path)
std::map< int, struct sigaction > SigactionMap
std::string GenerateStackTrace(pid_t pid)
std::string ReportStacktrace()
static void ReportSignalAndTerminate(int sig, siginfo_t *siginfo, void *context)
SigactionMap old_signal_handlers_
static void SendTrace(int sig, siginfo_t *siginfo, void *context)
static Watchdog * Create(const std::string &crash_dump_path)
static void * MainWatchdogListener(void *data)
void LogEmergency(std::string msg)
assert((mem||(size==0))&&"Out Of Memory")
void SetLogMicroSyslog(const std::string &filename)
static Watchdog * instance_
bool Write(const T &data)
#define SetLogDebugFile(filename)
stack_t sighandler_stack_
pthread_t thread_listener_
void GetLimitNoFile(unsigned *soft_limit, unsigned *hard_limit)
#define GetLogDebugFile()
std::string ReadUntilGdbPrompt(int fd_pipe)
static void * MainWatchdogListener(void *data)
unsigned GetMaxOpenFiles()
Pipe * pipe_terminate_
Send the terminate signal to the listener.
SigactionMap SetSignalHandlers(const SigactionMap &signal_handlers)
string StringifyInt(const int64_t value)
std::string GetLogMicroSyslog()
bool ExecuteBinary(int *fd_stdin, int *fd_stdout, int *fd_stderr, const std::string &binary_path, const std::vector< std::string > &argv, const bool double_fork, pid_t *child_pid)
void RegisterOnCrash(void(*CleanupOnCrash)(void))
bool SwitchCredentials(const uid_t uid, const gid_t gid, const bool temporarily)
Pipe * pipe_listener_
The supervisee makes sure its watchdog does not die.
void SafeSleepMs(const unsigned ms)
void Block2Nonblock(int filedes)
const unsigned kMinOpenFiles
platform_spinlock lock_handler_