mirror of
https://git.kore.io/kore.git
synced 2024-11-16 15:06:37 +01:00
Add worker_death_policy setting.
By default kore will restart worker processes if they terminate unexpected. However in certain scenarios you may want to bring down an entire kore instance if a worker process fails. By setting worker_death_policy to "terminate" the Kore server will completely stop if a worker exits unexpected.
This commit is contained in:
parent
cd80685d9d
commit
4238431b9e
@ -53,6 +53,14 @@ workers 4
|
||||
# before releasing the lock to others.
|
||||
#worker_accept_threshold 16
|
||||
|
||||
# What should the Kore parent process do if a worker
|
||||
# process unexpectly exits. The default policy is that
|
||||
# the worker process is automatically restarted.
|
||||
#
|
||||
# If you want the kore server to exit if a worker dies
|
||||
# you can swap the policy to "terminate-server".
|
||||
#worker_death_policy restart-worker
|
||||
|
||||
# Workers bind themselves to a single CPU by default.
|
||||
# Turn this off by setting this option to 0
|
||||
#worker_set_affinity 1
|
||||
|
@ -486,7 +486,9 @@ struct kore_timer {
|
||||
TAILQ_ENTRY(kore_timer) list;
|
||||
};
|
||||
|
||||
#define KORE_WORKER_KEYMGR 0
|
||||
#define KORE_WORKER_KEYMGR 0
|
||||
#define KORE_WORKER_POLICY_RESTART 1
|
||||
#define KORE_WORKER_POLICY_TERMINATE 2
|
||||
|
||||
/* Reserved message ids, registered on workers. */
|
||||
#define KORE_MSG_WEBSOCKET 1
|
||||
@ -557,6 +559,7 @@ extern u_int8_t nlisteners;
|
||||
extern u_int16_t cpu_count;
|
||||
extern u_int8_t worker_count;
|
||||
extern const char *kore_version;
|
||||
extern int worker_policy;
|
||||
extern u_int8_t worker_set_affinity;
|
||||
extern u_int32_t worker_rlimit_nofiles;
|
||||
extern u_int32_t worker_max_connections;
|
||||
@ -575,7 +578,7 @@ extern struct kore_pool nb_pool;
|
||||
void kore_signal(int);
|
||||
void kore_shutdown(void);
|
||||
void kore_signal_setup(void);
|
||||
void kore_worker_wait(int);
|
||||
void kore_worker_reap(void);
|
||||
void kore_worker_init(void);
|
||||
void kore_worker_make_busy(void);
|
||||
void kore_worker_shutdown(void);
|
||||
|
17
src/config.c
17
src/config.c
@ -62,6 +62,7 @@ static int configure_pidfile(char *);
|
||||
static int configure_rlimit_nofiles(char *);
|
||||
static int configure_max_connections(char *);
|
||||
static int configure_accept_threshold(char *);
|
||||
static int configure_death_policy(char *);
|
||||
static int configure_set_affinity(char *);
|
||||
static int configure_socket_backlog(char *);
|
||||
|
||||
@ -146,6 +147,7 @@ static struct {
|
||||
{ "worker_max_connections", configure_max_connections },
|
||||
{ "worker_rlimit_nofiles", configure_rlimit_nofiles },
|
||||
{ "worker_accept_threshold", configure_accept_threshold },
|
||||
{ "worker_death_policy", configure_death_policy },
|
||||
{ "worker_set_affinity", configure_set_affinity },
|
||||
{ "pidfile", configure_pidfile },
|
||||
{ "socket_backlog", configure_socket_backlog },
|
||||
@ -1312,6 +1314,21 @@ configure_accept_threshold(char *option)
|
||||
return (KORE_RESULT_OK);
|
||||
}
|
||||
|
||||
static int
|
||||
configure_death_policy(char *option)
|
||||
{
|
||||
if (!strcmp(option, "restart")) {
|
||||
worker_policy = KORE_WORKER_POLICY_RESTART;
|
||||
} else if (!strcmp(option, "terminate")) {
|
||||
worker_policy = KORE_WORKER_POLICY_TERMINATE;
|
||||
} else {
|
||||
printf("bad value for worker_death_policy: %s\n", option);
|
||||
return (KORE_RESULT_ERROR);
|
||||
}
|
||||
|
||||
return (KORE_RESULT_OK);
|
||||
}
|
||||
|
||||
static int
|
||||
configure_set_affinity(char *option)
|
||||
{
|
||||
|
11
src/kore.c
11
src/kore.c
@ -596,9 +596,9 @@ static void
|
||||
kore_server_start(int argc, char *argv[])
|
||||
{
|
||||
u_int32_t tmp;
|
||||
int quit;
|
||||
struct kore_runtime_call *rcall;
|
||||
u_int64_t netwait;
|
||||
int quit, last_sig;
|
||||
|
||||
if (foreground == 0) {
|
||||
if (daemon(1, 0) == -1)
|
||||
@ -662,6 +662,8 @@ kore_server_start(int argc, char *argv[])
|
||||
|
||||
while (quit != 1) {
|
||||
if (sig_recv != 0) {
|
||||
last_sig = sig_recv;
|
||||
|
||||
switch (sig_recv) {
|
||||
case SIGHUP:
|
||||
kore_worker_dispatch_signal(sig_recv);
|
||||
@ -677,13 +679,16 @@ kore_server_start(int argc, char *argv[])
|
||||
kore_worker_dispatch_signal(sig_recv);
|
||||
break;
|
||||
case SIGCHLD:
|
||||
kore_worker_wait(0);
|
||||
kore_worker_reap();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
sig_recv = 0;
|
||||
if (sig_recv == last_sig)
|
||||
sig_recv = 0;
|
||||
else
|
||||
continue;
|
||||
}
|
||||
|
||||
netwait = kore_timer_next_run(kore_time_ms());
|
||||
|
55
src/worker.c
55
src/worker.c
@ -94,6 +94,7 @@ u_int32_t worker_accept_threshold = 16;
|
||||
u_int32_t worker_rlimit_nofiles = 768;
|
||||
u_int32_t worker_max_connections = 512;
|
||||
u_int32_t worker_active_connections = 0;
|
||||
int worker_policy = KORE_WORKER_POLICY_RESTART;
|
||||
|
||||
void
|
||||
kore_worker_init(void)
|
||||
@ -191,6 +192,8 @@ void
|
||||
kore_worker_shutdown(void)
|
||||
{
|
||||
struct kore_worker *kw;
|
||||
pid_t pid;
|
||||
int status;
|
||||
u_int16_t id, done;
|
||||
|
||||
if (!kore_quiet) {
|
||||
@ -199,12 +202,20 @@ kore_worker_shutdown(void)
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
for (id = 0; id < worker_count; id++) {
|
||||
kw = WORKER(id);
|
||||
if (kw->pid != 0) {
|
||||
pid = waitpid(kw->pid, &status, 0);
|
||||
if (pid == -1)
|
||||
continue;
|
||||
kw->pid = 0;
|
||||
}
|
||||
}
|
||||
|
||||
done = 0;
|
||||
for (id = 0; id < worker_count; id++) {
|
||||
kw = WORKER(id);
|
||||
if (kw->pid != 0)
|
||||
kore_worker_wait(1);
|
||||
else
|
||||
if (kw->pid == 0)
|
||||
done++;
|
||||
}
|
||||
|
||||
@ -501,7 +512,7 @@ kore_worker_entry(struct kore_worker *kw)
|
||||
}
|
||||
|
||||
void
|
||||
kore_worker_wait(int final)
|
||||
kore_worker_reap(void)
|
||||
{
|
||||
u_int16_t id;
|
||||
pid_t pid;
|
||||
@ -509,14 +520,20 @@ kore_worker_wait(int final)
|
||||
const char *func;
|
||||
int status;
|
||||
|
||||
if (final)
|
||||
pid = waitpid(WAIT_ANY, &status, 0);
|
||||
else
|
||||
for (;;) {
|
||||
pid = waitpid(WAIT_ANY, &status, WNOHANG);
|
||||
|
||||
if (pid == -1) {
|
||||
kore_debug("waitpid(): %s", errno_s);
|
||||
return;
|
||||
if (pid == -1) {
|
||||
if (errno == ECHILD)
|
||||
return;
|
||||
if (errno == EINTR)
|
||||
continue;
|
||||
kore_log(LOG_ERR,
|
||||
"failed to wait for children: %s", errno_s);
|
||||
return;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if (pid == 0)
|
||||
@ -527,16 +544,11 @@ kore_worker_wait(int final)
|
||||
if (kw->pid != pid)
|
||||
continue;
|
||||
|
||||
if (final == 0 || (final == 1 && !kore_quiet)) {
|
||||
if (!kore_quiet) {
|
||||
kore_log(LOG_NOTICE, "worker %d (%d)-> status %d",
|
||||
kw->id, pid, status);
|
||||
}
|
||||
|
||||
if (final) {
|
||||
kw->pid = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
if (WEXITSTATUS(status) || WTERMSIG(status) ||
|
||||
WCOREDUMP(status)) {
|
||||
func = "none";
|
||||
@ -574,6 +586,17 @@ kore_worker_wait(int final)
|
||||
}
|
||||
#endif
|
||||
|
||||
if (worker_policy == KORE_WORKER_POLICY_TERMINATE) {
|
||||
kw->pid = 0;
|
||||
kore_log(LOG_NOTICE,
|
||||
"worker policy is 'terminate', stopping");
|
||||
if (raise(SIGTERM) != 0) {
|
||||
kore_log(LOG_WARNING,
|
||||
"failed to raise SIGTERM signal");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
kore_log(LOG_NOTICE, "restarting worker %d", kw->id);
|
||||
kw->restarted = 1;
|
||||
kore_msg_parent_remove(kw);
|
||||
|
Loading…
Reference in New Issue
Block a user