Kore will now keep track of page handlers that cause workers to die.

This is useful to track down any issues you might have in your module.

A log entry with a page handler causing issues looks like:
Jul  7 14:44:30 devbook kore[18191]: [parent]: worker 1 (18193)-> status 11
Jul  7 14:44:30 devbook kore[18191]: [parent]: worker 1 (pid: 18193) (hdlr: 0x242d9c0) gone
Jul  7 14:44:30 devbook kore[18191]: [parent]: hdlr serve_intro has caused 2 error(s)
This commit is contained in:
Joris Vink 2013-07-07 14:48:32 +02:00
parent 7df5339c8d
commit 95bacb5690
4 changed files with 36 additions and 21 deletions

View File

@ -131,6 +131,7 @@ struct kore_module_handle {
char *func;
void *addr;
int type;
int errors;
regex_t rctx;
TAILQ_ENTRY(kore_module_handle) list;
@ -144,6 +145,7 @@ struct kore_worker {
u_int8_t has_lock;
u_int16_t accepted;
u_int16_t accept_treshold;
struct kore_module_handle *active_hdlr;
};
struct kore_domain {
@ -256,10 +258,10 @@ void kore_module_load(char *);
void kore_module_reload(void);
int kore_module_loaded(void);
void kore_domain_closelogs(void);
void *kore_module_handler_find(char *, char *);
void kore_domain_sslstart(struct kore_domain *);
int kore_module_handler_new(char *, char *, char *, int);
struct kore_domain *kore_domain_lookup(const char *);
struct kore_domain *kore_domain_lookup(const char *);
struct kore_module_handle *kore_module_handler_find(char *, char *);
void fatal(const char *, ...);
void kore_debug_internal(char *, int, const char *, ...);

View File

@ -92,8 +92,9 @@ http_request_new(struct connection *c, struct spdy_stream *s, char *host,
void
http_process(void)
{
struct http_request *req, *next;
int r, (*hdlr)(struct http_request *);
struct http_request *req, *next;
struct kore_module_handle *hdlr;
int r, (*cb)(struct http_request *);
for (req = TAILQ_FIRST(&http_requests); req != NULL; req = next) {
next = TAILQ_NEXT(req, list);
@ -110,10 +111,15 @@ http_process(void)
hdlr = kore_module_handler_find(req->host, req->path);
req->start = kore_time_ms();
if (hdlr == NULL)
if (hdlr == NULL) {
r = http_generic_404(req);
else
r = hdlr(req);
} else {
cb = hdlr->addr;
worker->active_hdlr = hdlr;
r = cb(req);
worker->active_hdlr = NULL;
}
req->end = kore_time_ms();
switch (r) {

View File

@ -71,6 +71,7 @@ kore_module_reload(void)
TAILQ_FOREACH(dom, &domains, list) {
TAILQ_FOREACH(hdlr, &(dom->handlers), list) {
hdlr->errors = 0;
hdlr->addr = dlsym(mod_handle, hdlr->func);
if (hdlr->func == NULL)
fatal("no function '%s' found", hdlr->func);
@ -113,6 +114,7 @@ kore_module_handler_new(char *path, char *domain, char *func, int type)
return (KORE_RESULT_ERROR);
hdlr = (struct kore_module_handle *)kore_malloc(sizeof(*hdlr));
hdlr->errors = 0;
hdlr->addr = addr;
hdlr->type = type;
hdlr->path = kore_strdup(path);
@ -132,7 +134,7 @@ kore_module_handler_new(char *path, char *domain, char *func, int type)
return (KORE_RESULT_OK);
}
void *
struct kore_module_handle *
kore_module_handler_find(char *domain, char *path)
{
struct kore_domain *dom;
@ -144,10 +146,10 @@ kore_module_handler_find(char *domain, char *path)
TAILQ_FOREACH(hdlr, &(dom->handlers), list) {
if (hdlr->type == HANDLER_TYPE_STATIC) {
if (!strcmp(hdlr->path, path))
return (hdlr->addr);
return (hdlr);
} else {
if (!regexec(&(hdlr->rctx), path, 0, NULL, 0))
return (hdlr->addr);
return (hdlr);
}
}

View File

@ -109,8 +109,10 @@ kore_worker_spawn(u_int16_t id, u_int16_t cpu)
kw->cpu = cpu;
kw->load = 0;
kw->accepted = 0;
kw->pid = fork();
kw->has_lock = 0;
kw->active_hdlr = NULL;
kw->pid = fork();
if (kw->pid == -1)
fatal("could not spawn worker child: %s", errno_s);
@ -170,7 +172,6 @@ kore_worker_entry(struct kore_worker *kw)
u_int64_t now, idle_check;
worker = kw;
kw->has_lock = 0;
if (chroot(chroot_path) == -1)
fatal("cannot chroot(): %s", errno_s);
@ -325,22 +326,26 @@ kore_worker_wait(int final)
if (WEXITSTATUS(status) || WTERMSIG(status) ||
WCOREDUMP(status)) {
kore_log(LOG_NOTICE,
"worker %d (pid: %d) gone, respawning new one",
kw->id, kw->pid);
if (kw->pid == accept_lock->lock) {
kore_log(LOG_NOTICE,
"worker %d owned accept lock, releasing",
kw->id);
"worker %d (pid: %d) (hdlr: %p) gone",
kw->id, kw->pid, kw->active_hdlr);
if (kw->pid == accept_lock->lock)
accept_lock->lock = accept_lock->next;
if (kw->active_hdlr != NULL) {
kw->active_hdlr->errors++;
kore_log(LOG_NOTICE,
"hdlr %s has caused %d error(s)",
kw->active_hdlr->func,
kw->active_hdlr->errors);
}
kore_log(LOG_NOTICE, "restarting worker %d", kw->id);
kore_worker_spawn(kw->id, kw->cpu);
} else {
kore_log(LOG_NOTICE,
"worker %d (pid: %d) signaled us",
kw->id, kw->pid);
"worker %d (pid: %d) signaled us (%d)",
kw->id, kw->pid, status);
}
break;