Mini Shell
/*
* Phusion Passenger - https://www.phusionpassenger.com/
* Copyright (c) 2010-2017 Phusion Holding B.V.
*
* "Passenger", "Phusion Passenger" and "Union Station" are registered
* trademarks of Phusion Holding B.V.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/**
* Abstract base class for watching agent processes.
*/
class AgentWatcher: public boost::enable_shared_from_this<AgentWatcher> {
private:
/** The watcher thread. */
oxt::thread *thr;
void threadMain(boost::shared_ptr<AgentWatcher> self) {
try {
pid_t pid, ret;
int status, e;
while (!boost::this_thread::interruption_requested()) {
{
boost::lock_guard<boost::mutex> l(lock);
pid = this->pid;
}
// Process can be started before the watcher thread is launched.
if (pid == 0) {
pid = start();
}
ret = syscalls::waitpid(pid, &status, 0);
if (ret == -1 && errno == ECHILD) {
/* If the agent is attached to gdb then waitpid()
* here can return -1 with errno == ECHILD.
* Fallback to kill() polling for checking
* whether the agent is alive.
*/
ret = pid;
status = 0;
P_WARN("waitpid() on " << name() << " (pid=" << pid <<
") returned -1 with " <<
"errno = ECHILD, falling back to kill polling");
waitpidUsingKillPolling(pid);
e = 0;
} else {
e = errno;
}
{
boost::lock_guard<boost::mutex> l(lock);
this->pid = 0;
}
boost::this_thread::disable_interruption di;
boost::this_thread::disable_syscall_interruption dsi;
if (ret == -1) {
P_WARN(name() << " (pid=" << pid << ") crashed or killed for "
"an unknown reason (errno = " <<
strerror(e) << "), restarting it...");
} else if (WIFEXITED(status)) {
if (WEXITSTATUS(status) == 0) {
/* When the web server is gracefully exiting, it will
* tell one or more agents to gracefully exit with exit
* status 0. If we see this then it means the watchdog
* is gracefully shutting down too and we should stop
* watching.
*/
return;
} else {
P_WARN(name() << " (pid=" << pid <<
") crashed with exit status " <<
WEXITSTATUS(status) << ", restarting it...");
}
} else {
P_WARN(name() << " (pid=" << pid <<
") crashed with signal " <<
getSignalName(WTERMSIG(status)) <<
", restarting it...");
}
const char *sleepTime;
if ((sleepTime = getenv("PASSENGER_AGENT_RESTART_SLEEP")) != NULL) {
sleep(atoi(sleepTime));
}
}
} catch (const boost::thread_interrupted &) {
} catch (const tracable_exception &e) {
boost::lock_guard<boost::mutex> l(lock);
threadExceptionMessage = e.what();
threadExceptionBacktrace = e.backtrace();
wo->errorEvent.notify();
} catch (const std::exception &e) {
boost::lock_guard<boost::mutex> l(lock);
threadExceptionMessage = e.what();
wo->errorEvent.notify();
} catch (...) {
boost::lock_guard<boost::mutex> l(lock);
threadExceptionMessage = "Unknown error";
wo->errorEvent.notify();
}
}
protected:
/** PID of the process we're watching. 0 if no process is started at this time. */
pid_t pid;
/** If the watcher thread threw an uncaught exception then its information will
* be stored here so that the main thread can check whether a watcher encountered
* an error. These are empty strings if everything is OK.
*/
string threadExceptionMessage;
string threadExceptionBacktrace;
/** The agent process's feedback fd. */
FileDescriptor feedbackFd;
/**
* Lock for protecting the exchange of data between the main thread and
* the watcher thread.
*/
mutable boost::mutex lock;
WorkingObjectsPtr wo;
/**
* Returns the filename of the agent process's executable. This method may be
* called in a forked child process and may therefore not allocate memory.
*/
virtual string getExeFilename() const = 0;
/**
* This method is to exec() the agent with the right arguments.
* It is called from within a forked child process, so don't do any dynamic
* memory allocations in here. It must also not throw any exceptions.
* It must also preserve the value of errno after exec() is called.
*/
virtual void execProgram() const {
execl(getExeFilename().c_str(),
getExeFilename().c_str(),
"3", // feedback fd
(char *) 0);
}
/**
* This method is to send startup arguments to the agent process through
* the given file descriptor, which is the agent process's feedback fd.
* May throw arbitrary exceptions.
*/
virtual void sendStartupArguments(pid_t pid, FileDescriptor &fd) = 0;
/**
* This method is to process the startup info that the agent process has
* sent back. May throw arbitrary exceptions.
*/
virtual bool processStartupInfo(pid_t pid, FileDescriptor &fd, const vector<string> &args) = 0;
/**
* Kill a process (but not its children) with SIGTERM.
* Does not wait until it has quit.
*/
static void killAndDontWait(pid_t pid) {
boost::this_thread::disable_interruption di;
boost::this_thread::disable_syscall_interruption dsi;
syscalls::kill(pid, SIGTERM);
}
/**
* Kill a process with SIGKILL, and attempt to kill its children too.
* Then wait until it has quit.
*/
static void killProcessGroupAndWait(pid_t pid) {
boost::this_thread::disable_interruption di;
boost::this_thread::disable_syscall_interruption dsi;
// If the process is a process group leader then killing the
// group will likely kill all its child processes too.
if (syscalls::killpg(pid, SIGKILL) == -1) {
syscalls::kill(pid, SIGKILL);
}
syscalls::waitpid(pid, NULL, 0);
}
/**
* Behaves like <tt>waitpid(pid, status, WNOHANG)</tt>, but waits at most
* <em>timeout</em> miliseconds for the process to exit.
*/
static int timedWaitPid(pid_t pid, int *status, unsigned long long timeout) {
Timer<SystemTime::GRAN_10MSEC> timer;
int ret;
do {
ret = syscalls::waitpid(pid, status, WNOHANG);
if (ret > 0 || ret == -1) {
return ret;
} else {
syscalls::usleep(10000);
}
} while (timer.elapsed() < timeout);
return 0; // timed out
}
static void waitpidUsingKillPolling(pid_t pid) {
bool done = false;
while (!done) {
int ret = syscalls::kill(pid, 0);
done = ret == -1;
if (!done) {
syscalls::usleep(20000);
}
}
}
public:
AgentWatcher(const WorkingObjectsPtr &wo) {
thr = NULL;
pid = 0;
this->wo = wo;
}
virtual ~AgentWatcher() {
delete thr;
}
/**
* Store information about the started agent process in the given report object.
* May throw arbitrary exceptions.
*
* @pre start() has been called and succeeded.
*/
virtual void reportAgentStartupResult(Json::Value &report) = 0;
/** Returns the name of the agent that this class is watching. */
virtual const char *name() const = 0;
/**
* Starts the agent process. May throw arbitrary exceptions.
*/
virtual pid_t start() {
boost::this_thread::disable_interruption di;
boost::this_thread::disable_syscall_interruption dsi;
string exeFilename = getExeFilename();
SocketPair fds;
int e, ret;
pid_t pid;
/* Create feedback fd for this agent process. We'll send some startup
* arguments to this agent process through this fd, and we'll receive
* startup information through it as well.
*/
fds = createUnixSocketPair(__FILE__, __LINE__);
pid = syscalls::fork();
if (pid == 0) {
// Child
/* Make sure file descriptor FEEDBACK_FD refers to the newly created
* feedback fd (fds[1]) and close all other file descriptors.
* In this child process we don't care about the original FEEDBACK_FD
* (which is the Watchdog's communication channel to the agents starter.)
*
* fds[1] is guaranteed to be != FEEDBACK_FD because the watchdog
* is started with FEEDBACK_FD already assigned.
*/
syscalls::close(fds[0]);
if (syscalls::dup2(fds[1], FEEDBACK_FD) == -1) {
/* Something went wrong, report error through feedback fd. */
e = errno;
try {
writeArrayMessage(fds[1],
"system error before exec",
"dup2() failed",
toString(e).c_str(),
NULL);
_exit(1);
} catch (...) {
fprintf(stderr, "PassengerWatchdog: dup2() failed: %s (%d)\n",
strerror(e), e);
fflush(stderr);
_exit(1);
}
}
resetSignalHandlersAndMask();
closeAllFileDescriptors(FEEDBACK_FD);
/* Become the process group leader so that the watchdog can kill the
* agent as well as all its descendant processes, and so that a Ctrl-C
* only affects the watchdog but not agents. */
setpgid(getpid(), getpid());
try {
execProgram();
} catch (...) {
fprintf(stderr, "PassengerWatchdog: execProgram() threw an exception\n");
fflush(stderr);
_exit(1);
}
e = errno;
try {
writeArrayMessage(FEEDBACK_FD,
"exec error",
toString(e).c_str(),
NULL);
} catch (...) {
fprintf(stderr, "PassengerWatchdog: could not execute %s: %s (%d)\n",
exeFilename.c_str(), strerror(e), e);
fflush(stderr);
}
_exit(1);
} else if (pid == -1) {
// Error
e = errno;
throw SystemException("Cannot fork a new process", e);
} else {
// Parent
FileDescriptor feedbackFd(fds[0]);
vector<string> args;
fds[1].close();
boost::this_thread::restore_interruption ri(di);
boost::this_thread::restore_syscall_interruption rsi(dsi);
ScopeGuard failGuard(boost::bind(killProcessGroupAndWait, pid));
/* Send startup arguments. Ignore EPIPE and ECONNRESET here
* because the child process might have sent an feedback message
* without reading startup arguments.
*/
try {
sendStartupArguments(pid, feedbackFd);
} catch (const SystemException &ex) {
if (ex.code() != EPIPE && ex.code() != ECONNRESET) {
throw SystemException(string("Unable to start the ") + name() +
": an error occurred while sending startup arguments",
ex.code());
}
}
// Now read its feedback.
try {
ret = readArrayMessage(feedbackFd, args);
} catch (const SystemException &e) {
if (e.code() == ECONNRESET) {
ret = false;
} else {
throw SystemException(string("Unable to start the ") + name() +
": unable to read its startup information",
e.code());
}
}
if (!ret) {
boost::this_thread::disable_interruption di2;
boost::this_thread::disable_syscall_interruption dsi2;
int status;
/* The feedback fd was prematurely closed for an unknown reason.
* Did the agent process crash?
*
* We use timedWaitPid() here because if the process crashed
* because of an uncaught exception, the file descriptor
* might be closed before the process has printed an error
* message, so we give it some time to print the error
* before we kill it.
*/
ret = timedWaitPid(pid, &status, 5000);
if (ret == 0) {
/* Doesn't look like it; it seems it's still running.
* We can't do anything without proper feedback so kill
* the agent process and throw an exception.
*/
failGuard.runNow();
throw RuntimeException(string("Unable to start the ") + name() +
": it froze and reported an unknown error during its startup");
} else if (ret != -1 && WIFSIGNALED(status)) {
/* Looks like a crash which caused a signal. */
throw RuntimeException(string("Unable to start the ") + name() +
": it seems to have been killed with signal " +
getSignalName(WTERMSIG(status)) + " during startup");
} else if (ret == -1) {
/* Looks like it exited after detecting an error. */
throw RuntimeException(string("Unable to start the ") + name() +
": it seems to have crashed during startup for an unknown reason");
} else {
/* Looks like it exited after detecting an error, but has an exit code. */
throw RuntimeException(string("Unable to start the ") + name() +
": it seems to have crashed during startup for an unknown reason, "
"with exit code " + toString(WEXITSTATUS(status)));
}
}
if (args[0] == "system error before exec") {
throw SystemException(string("Unable to start the ") + name() +
": " + args[1], atoi(args[2]));
} else if (args[0] == "exec error") {
e = atoi(args[1]);
if (e == ENOENT) {
throw RuntimeException(string("Unable to start the ") + name() +
" because its executable (" + getExeFilename() + ") "
"doesn't exist. This probably means that your "
PROGRAM_NAME " installation is broken or "
"incomplete. Please reinstall " PROGRAM_NAME);
} else {
throw SystemException(string("Unable to start the ") + name() +
" because exec(\"" + getExeFilename() + "\") failed",
atoi(args[1]));
}
} else if (!processStartupInfo(pid, feedbackFd, args)) {
throw RuntimeException(string("The ") + name() +
" sent an unknown startup info message '" +
args[0] + "'");
}
boost::lock_guard<boost::mutex> l(lock);
this->feedbackFd = feedbackFd;
this->pid = pid;
failGuard.clear();
return pid;
}
}
/**
* Begin watching the agent process.
*
* @pre start() has been called and succeeded.
* @pre This watcher isn't already watching.
* @throws RuntimeException If a precondition failed.
* @throws thread_interrupted
* @throws thread_resource_error
*/
virtual void beginWatching() {
boost::lock_guard<boost::mutex> l(lock);
if (pid == 0) {
throw RuntimeException("start() hasn't been called yet");
}
if (thr != NULL) {
throw RuntimeException("Already started watching.");
}
thr = new oxt::thread(boost::bind(&AgentWatcher::threadMain, this, shared_from_this()),
name(), 256 * 1024);
}
static void stopWatching(vector< boost::shared_ptr<AgentWatcher> > &watchers) {
vector< boost::shared_ptr<AgentWatcher> >::const_iterator it;
vector<oxt::thread *> threads;
unsigned int i = 0;
for (it = watchers.begin(); it != watchers.end(); it++, i++) {
threads.push_back((*it)->thr);
threads[i] = (*it)->thr;
}
oxt::thread::interrupt_and_join_multiple(&threads[0], threads.size());
for (it = watchers.begin(); it != watchers.end(); it++, i++) {
delete (*it)->thr;
(*it)->thr = NULL;
}
}
/**
* Tell the agent process to gracefully shut down. Returns true if it
* was signaled, or false if it wasn't started.
*/
virtual bool signalShutdown() {
boost::lock_guard<boost::mutex> l(lock);
if (pid == 0) {
return false;
} else {
killAndDontWait(pid);
return true;
}
}
/**
* Force the agent process to shut down. Returns true if it was shut down,
* or false if it wasn't started.
*/
virtual bool forceShutdown() {
boost::lock_guard<boost::mutex> l(lock);
if (pid == 0) {
return false;
} else {
killProcessGroupAndWait(pid);
this->pid = 0;
return true;
}
}
/**
* If the watcher thread has encountered an error, then the error message
* will be stored here. If the error message is empty then it means
* everything is still OK.
*/
string getErrorMessage() const {
boost::lock_guard<boost::mutex> l(lock);
return threadExceptionMessage;
}
/**
* The error backtrace, if applicable.
*/
string getErrorBacktrace() const {
boost::lock_guard<boost::mutex> l(lock);
return threadExceptionBacktrace;
}
/**
* Returns the agent process feedback fd, or -1 if the agent process
* hasn't been started yet. Can be used to check whether this agent process
* has exited without using waitpid().
*/
const FileDescriptor getFeedbackFd() const {
boost::lock_guard<boost::mutex> l(lock);
return feedbackFd;
}
};
typedef boost::shared_ptr<AgentWatcher> AgentWatcherPtr;
Zerion Mini Shell 1.0