Fawkes API  Fawkes Development Version
ffwatchdog.cpp
1 
2 /***************************************************************************
3  * ffwatchdog.cpp - Fawkes process watchdog
4  *
5  * Created: Thu Mar 31 09:53:53 2011 (RoboCup German Open 2011)
6  * Copyright 2011 Tim Niemueller [www.niemueller.de]
7  *
8  ****************************************************************************/
9 
10 /* This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  * GNU Library General Public License for more details.
19  *
20  * Read the full text in the LICENSE.GPL file in the doc directory.
21  */
22 
23 #include <core/exception.h>
24 #include <libdaemon/dfork.h>
25 #include <libdaemon/dlog.h>
26 #include <libdaemon/dpid.h>
27 #include <sys/stat.h>
28 #include <sys/wait.h>
29 
30 #include <cerrno>
31 #include <csignal>
32 #include <cstdio>
33 #include <cstdlib>
34 #include <cstring>
35 #include <unistd.h>
36 
37 int g_quit = 0;
38 bool g_force_quit = false;
39 int g_signum = SIGINT;
40 
41 void
42 handle_signal(int signum)
43 {
44  printf("Received %s signal\n", strsignal(signum));
45  g_signum = signum;
46  switch (signum) {
47  case SIGINT: g_quit += 1; break; // sigint escalates
48  case SIGTERM: g_quit = 3; break;
49  case SIGKILL: g_quit = 4; break;
50  default: break;
51  }
52 }
53 
54 /** Print usage instructions.
55  * @param progname program name
56  */
57 void
58 usage(const char *progname)
59 {
60  printf("Usage: %s [options] <progfile> [args...]\n"
61  "progfile full absolute path to executable\n"
62  "args any number of arguments, passed to program as-is\n\n"
63  "where [options] passed in before <progfile> are one or more of:\n"
64  " -D[pid file] Run daemonized in the background, pid file is optional,\n"
65  " defaults to /var/run/ffwatchdog_basename.pid, must be absolute path.\n"
66  " -D[pid file] -k Kill a daemonized process running in the background,\n"
67  " pid file is optional as above.\n"
68  " -D[pid file] -s Check status of daemon.\n"
69  " -h Show help instructions.\n\n",
70  progname);
71 }
72 
73 pid_t
74 fork_and_exec(int argc, char **argv, int prog_start)
75 {
76  pid_t pid = fork();
77  if (pid == -1) {
78  // error
79  printf("Forking for new process failed: %s\n", strerror(errno));
80  throw fawkes::Exception(errno, "Forking for new process failed: %s");
81  } else if (pid == 0) {
82  // child
83  setsid();
84  signal(SIGINT, SIG_IGN);
85  if (execve(argv[prog_start], &argv[prog_start], environ) == -1) {
86  printf("Failed to execute %s, exited with %i: %s\n",
87  argv[prog_start],
88  errno,
89  strerror(errno));
90  exit(-1);
91  }
92  }
93 
94  return pid;
95 }
96 
97 void
98 daemonize_cleanup()
99 {
100  daemon_retval_send(-1);
101  daemon_retval_done();
102  daemon_pid_file_remove();
103 }
104 
105 pid_t
106 daemonize(int argc, char **argv)
107 {
108  pid_t pid;
109  mode_t old_umask = umask(0);
110 
111  // Prepare for return value passing
112  daemon_retval_init();
113 
114  // Do the fork
115  if ((pid = daemon_fork()) < 0) {
116  return -1;
117 
118  } else if (pid) { // the parent
119  int ret;
120 
121  // Wait for 20 seconds for the return value passed from the daemon process
122  if ((ret = daemon_retval_wait(20)) < 0) {
123  daemon_log(LOG_ERR, "Could not recieve return value from daemon process.");
124  return -1;
125  }
126 
127  if (ret != 0) {
128  daemon_log(LOG_ERR, "*** Daemon startup failed, see syslog for details. ***");
129  switch (ret) {
130  case 1: daemon_log(LOG_ERR, "Daemon failed to close file descriptors"); break;
131  case 2: daemon_log(LOG_ERR, "Daemon failed to create PID file"); break;
132  }
133  return -1;
134  } else {
135  return pid;
136  }
137 
138  } else { // the daemon
139 #ifdef DAEMON_CLOSE_ALL_AVAILABLE
140  if (daemon_close_all(-1) < 0) {
141  daemon_log(LOG_ERR, "Failed to close all file descriptors: %s", strerror(errno));
142  // Send the error condition to the parent process
143  daemon_retval_send(1);
144  return -1;
145  }
146 #endif
147 
148  // Create the PID file
149  if (daemon_pid_file_create() < 0) {
150  printf("Could not create PID file (%s).", strerror(errno));
151  daemon_log(LOG_ERR, "Could not create PID file (%s).", strerror(errno));
152 
153  // Send the error condition to the parent process
154  daemon_retval_send(2);
155  return -1;
156  }
157 
158  // Send OK to parent process
159  daemon_retval_send(0);
160 
161  daemon_log(LOG_INFO, "Sucessfully started");
162 
163  umask(old_umask);
164  return 0;
165  }
166 }
167 
168 /** Global variable containing the path to the PID file.
169  * unfortunately needed for libdaemon */
170 const char *ffwatchdog_pid_file;
171 
172 /** Function that returns the PID file name.
173  * @return PID file name
174  */
175 const char *
176 ffwatchdog_daemon_pid_file_proc()
177 {
178  return ffwatchdog_pid_file;
179 }
180 
181 /** Watchdog main.
182  * @param argc argument count
183  * @param argv arguments
184  */
185 int
186 main(int argc, char **argv)
187 {
188  if (argc < 2) {
189  usage(argv[0]);
190  exit(1);
191  }
192 
193  bool arg_verbose = false;
194  bool arg_daemonize = false;
195  bool arg_daemon_kill = false;
196  bool arg_daemon_status = false;
197  const char *daemon_pid_file = NULL;
198 
199  int prog_start;
200  for (prog_start = 1; prog_start < argc; ++prog_start) {
201  if (argv[prog_start][0] == '-') {
202  // argument starts
203  char param = argv[prog_start][1];
204  if (param == '-') {
205  ++prog_start;
206  break;
207  } else {
208  if (param == 'D') {
209  arg_daemonize = true;
210  daemon_pid_file = NULL;
211  if (strlen(&argv[prog_start][1]) > 1) {
212  daemon_pid_file = &argv[prog_start][2];
213  }
214  } else if (param == 'k') {
215  arg_daemon_kill = true;
216  } else if (param == 's') {
217  arg_daemon_status = true;
218  } else if (param == 'v') {
219  arg_verbose = true;
220  } else if (param == 'h') {
221  usage(argv[0]);
222  exit(0);
223  } else {
224  printf("Unknown argument '%c'\n", param);
225  usage(argv[0]);
226  exit(3);
227  }
228  }
229  } else {
230  break;
231  }
232  }
233 
234  if (prog_start >= argc) {
235  usage(argv[0]);
236  exit(1);
237  }
238 
239  if (access(argv[prog_start], X_OK) != 0) {
240  printf("Cannot execute '%s': %s\n\n", argv[1], strerror(errno));
241  usage(argv[0]);
242  exit(2);
243  }
244 
245  pid_t dpid;
246 
247  char *daemon_ident = NULL;
248 
249  if (arg_daemonize) {
250  // Set identification string for the daemon for both syslog and PID file
251 
252  char *argv_copy = strdup(argv[prog_start]);
253  if (asprintf(&daemon_ident, "ffwatchdog_%s", basename(argv_copy)) == -1) {
254  free(argv_copy);
255  printf("Failed to create daemon ident, not enough memory\n");
256  exit(5);
257  }
258  free(argv_copy);
259  daemon_pid_file_ident = daemon_log_ident = daemon_ident;
260  if (daemon_pid_file != NULL) {
261  ffwatchdog_pid_file = daemon_pid_file;
262  daemon_pid_file_proc = ffwatchdog_daemon_pid_file_proc;
263  }
264 
265  // We should daemonize, check if we were called to kill a daemonized copy
266  if (arg_daemon_kill) {
267  // Check that the daemon is not run twice a the same time
268  if ((dpid = daemon_pid_file_is_running()) < 0) {
269  daemon_log(LOG_ERR, "Watchdog daemon for %s not running.", argv[prog_start]);
270  return 1;
271  }
272 
273  // Kill daemon with SIGINT
274  int ret;
275  if ((ret = daemon_pid_file_kill_wait(SIGINT, 5)) < 0) {
276  daemon_log(LOG_WARNING, "Failed to kill watchdog daemon for %s", argv[prog_start]);
277  }
278  return (ret < 0) ? 1 : 0;
279  }
280 
281  if (arg_daemon_status) {
282  // Check daemon status
283  if (daemon_pid_file_is_running() < 0) {
284  if (arg_verbose) {
285  printf("Watchdog daemon for %s is not running\n", argv[prog_start]);
286  }
287  return 1;
288  } else {
289  if (arg_verbose) {
290  printf("Watchdog daemon for %s is running\n", argv[prog_start]);
291  }
292  return 0;
293  }
294  }
295 
296  // Check that the daemon is not run twice a the same time
297  if ((dpid = daemon_pid_file_is_running()) >= 0) {
298  daemon_log(LOG_ERR,
299  "Watchdog daemon for %s already running on (PID %u)",
300  argv[prog_start],
301  dpid);
302  return 201;
303  }
304 
305  dpid = daemonize(argc, argv);
306  if (dpid < 0) {
307  daemonize_cleanup();
308  return 201;
309  } else if (dpid) {
310  // parent
311  return 0;
312  } // else child, continue as usual
313  }
314 
315  struct sigaction sa;
316  sa.sa_handler = handle_signal;
317  sigemptyset(&sa.sa_mask);
318  sa.sa_flags = 0;
319  sigaction(SIGINT, &sa, NULL);
320  sigaction(SIGKILL, &sa, NULL);
321  sigaction(SIGTERM, &sa, NULL);
322  sigaction(SIGUSR1, &sa, NULL);
323  sigaction(SIGUSR2, &sa, NULL);
324 
325  pid_t pid = -1;
326  while (!g_quit) {
327  pid = fork_and_exec(argc, argv, prog_start);
328 
329  while (pid != -1 && !g_quit) {
330  int status = 0;
331  pid_t cpid = waitpid(pid, &status, WUNTRACED | WCONTINUED);
332  printf("Wait returned\n");
333 
334  if (cpid == -1) {
335  printf("Failed to wait for child: %s\n", strerror(errno));
336  } else if (WIFEXITED(status)) {
337  printf("%i|%s exited, status=%d\n", cpid, argv[prog_start], WEXITSTATUS(status));
338  pid = -1;
339  } else if (WIFSIGNALED(status)) {
340  printf("%i|%s killed by signal %s\n", cpid, argv[prog_start], strsignal(WTERMSIG(status)));
341  pid = -1;
342  } else if (WIFSTOPPED(status)) {
343  printf("%i|%s stopped by signal %s\n", cpid, argv[prog_start], strsignal(WSTOPSIG(status)));
344  pid = -1;
345  } else if (WIFCONTINUED(status)) {
346  printf("%i|%s continued\n", cpid, argv[prog_start]);
347  }
348  }
349  }
350 
351  if (pid != -1) {
352  int last_quit = 0;
353  printf("Stopping child. Press Ctrl-C again to escalate.\n");
354 
355  for (unsigned int i = 0; i < 600; ++i) {
356  if (last_quit != g_quit) {
357  int signum;
358  if (g_quit <= 2) {
359  signum = SIGINT;
360  } else if (g_quit == 3) {
361  signum = SIGTERM;
362  } else {
363  signum = SIGKILL;
364  }
365 
366  printf("Killing %s with signal %s\n", argv[prog_start], strsignal(signum));
367  if (kill(pid, signum) == -1) {
368  printf("Failed to kill %s: %s\n", argv[prog_start], strerror(errno));
369  }
370  }
371  last_quit = g_quit;
372 
373  usleep(10000);
374  int status;
375  int rv = waitpid(pid, &status, WNOHANG);
376  if (rv == -1) {
377  if (errno == EINTR)
378  continue;
379  if (errno == ECHILD) {
380  pid = -1;
381  break;
382  }
383  } else if (rv > 0) {
384  pid = -1;
385  break;
386  }
387  if (i >= 300)
388  g_quit = 2;
389  if (i >= 500)
390  g_quit = 3;
391  }
392  }
393 
394  if (arg_daemonize) {
395  daemonize_cleanup();
396  }
397 
398  return 0;
399 }
fawkes::Exception
Base class for exceptions in Fawkes.
Definition: exception.h:36