Kore will now keep track of page handlers that cause workers to die.

This is useful to track down any issues you might have in your module.

A log entry with a page handler causing issues looks like:
Jul  7 14:44:30 devbook kore[18191]: [parent]: worker 1 (18193)-> status 11
Jul  7 14:44:30 devbook kore[18191]: [parent]: worker 1 (pid: 18193) (hdlr: 0x242d9c0) gone
Jul  7 14:44:30 devbook kore[18191]: [parent]: hdlr serve_intro has caused 2 error(s)
This commit is contained in:
Joris Vink 2013-07-07 14:48:32 +02:00
parent 7df5339c8d
commit 95bacb5690
4 changed files with 36 additions and 21 deletions

View File

@ -131,6 +131,7 @@ struct kore_module_handle {
char *func; char *func;
void *addr; void *addr;
int type; int type;
int errors;
regex_t rctx; regex_t rctx;
TAILQ_ENTRY(kore_module_handle) list; TAILQ_ENTRY(kore_module_handle) list;
@ -144,6 +145,7 @@ struct kore_worker {
u_int8_t has_lock; u_int8_t has_lock;
u_int16_t accepted; u_int16_t accepted;
u_int16_t accept_treshold; u_int16_t accept_treshold;
struct kore_module_handle *active_hdlr;
}; };
struct kore_domain { struct kore_domain {
@ -256,10 +258,10 @@ void kore_module_load(char *);
void kore_module_reload(void); void kore_module_reload(void);
int kore_module_loaded(void); int kore_module_loaded(void);
void kore_domain_closelogs(void); void kore_domain_closelogs(void);
void *kore_module_handler_find(char *, char *);
void kore_domain_sslstart(struct kore_domain *); void kore_domain_sslstart(struct kore_domain *);
int kore_module_handler_new(char *, char *, char *, int); int kore_module_handler_new(char *, char *, char *, int);
struct kore_domain *kore_domain_lookup(const char *); struct kore_domain *kore_domain_lookup(const char *);
struct kore_module_handle *kore_module_handler_find(char *, char *);
void fatal(const char *, ...); void fatal(const char *, ...);
void kore_debug_internal(char *, int, const char *, ...); void kore_debug_internal(char *, int, const char *, ...);

View File

@ -92,8 +92,9 @@ http_request_new(struct connection *c, struct spdy_stream *s, char *host,
void void
http_process(void) http_process(void)
{ {
struct http_request *req, *next; struct http_request *req, *next;
int r, (*hdlr)(struct http_request *); struct kore_module_handle *hdlr;
int r, (*cb)(struct http_request *);
for (req = TAILQ_FIRST(&http_requests); req != NULL; req = next) { for (req = TAILQ_FIRST(&http_requests); req != NULL; req = next) {
next = TAILQ_NEXT(req, list); next = TAILQ_NEXT(req, list);
@ -110,10 +111,15 @@ http_process(void)
hdlr = kore_module_handler_find(req->host, req->path); hdlr = kore_module_handler_find(req->host, req->path);
req->start = kore_time_ms(); req->start = kore_time_ms();
if (hdlr == NULL) if (hdlr == NULL) {
r = http_generic_404(req); r = http_generic_404(req);
else } else {
r = hdlr(req); cb = hdlr->addr;
worker->active_hdlr = hdlr;
r = cb(req);
worker->active_hdlr = NULL;
}
req->end = kore_time_ms(); req->end = kore_time_ms();
switch (r) { switch (r) {

View File

@ -71,6 +71,7 @@ kore_module_reload(void)
TAILQ_FOREACH(dom, &domains, list) { TAILQ_FOREACH(dom, &domains, list) {
TAILQ_FOREACH(hdlr, &(dom->handlers), list) { TAILQ_FOREACH(hdlr, &(dom->handlers), list) {
hdlr->errors = 0;
hdlr->addr = dlsym(mod_handle, hdlr->func); hdlr->addr = dlsym(mod_handle, hdlr->func);
if (hdlr->func == NULL) if (hdlr->func == NULL)
fatal("no function '%s' found", hdlr->func); fatal("no function '%s' found", hdlr->func);
@ -113,6 +114,7 @@ kore_module_handler_new(char *path, char *domain, char *func, int type)
return (KORE_RESULT_ERROR); return (KORE_RESULT_ERROR);
hdlr = (struct kore_module_handle *)kore_malloc(sizeof(*hdlr)); hdlr = (struct kore_module_handle *)kore_malloc(sizeof(*hdlr));
hdlr->errors = 0;
hdlr->addr = addr; hdlr->addr = addr;
hdlr->type = type; hdlr->type = type;
hdlr->path = kore_strdup(path); hdlr->path = kore_strdup(path);
@ -132,7 +134,7 @@ kore_module_handler_new(char *path, char *domain, char *func, int type)
return (KORE_RESULT_OK); return (KORE_RESULT_OK);
} }
void * struct kore_module_handle *
kore_module_handler_find(char *domain, char *path) kore_module_handler_find(char *domain, char *path)
{ {
struct kore_domain *dom; struct kore_domain *dom;
@ -144,10 +146,10 @@ kore_module_handler_find(char *domain, char *path)
TAILQ_FOREACH(hdlr, &(dom->handlers), list) { TAILQ_FOREACH(hdlr, &(dom->handlers), list) {
if (hdlr->type == HANDLER_TYPE_STATIC) { if (hdlr->type == HANDLER_TYPE_STATIC) {
if (!strcmp(hdlr->path, path)) if (!strcmp(hdlr->path, path))
return (hdlr->addr); return (hdlr);
} else { } else {
if (!regexec(&(hdlr->rctx), path, 0, NULL, 0)) if (!regexec(&(hdlr->rctx), path, 0, NULL, 0))
return (hdlr->addr); return (hdlr);
} }
} }

View File

@ -109,8 +109,10 @@ kore_worker_spawn(u_int16_t id, u_int16_t cpu)
kw->cpu = cpu; kw->cpu = cpu;
kw->load = 0; kw->load = 0;
kw->accepted = 0; kw->accepted = 0;
kw->pid = fork(); kw->has_lock = 0;
kw->active_hdlr = NULL;
kw->pid = fork();
if (kw->pid == -1) if (kw->pid == -1)
fatal("could not spawn worker child: %s", errno_s); fatal("could not spawn worker child: %s", errno_s);
@ -170,7 +172,6 @@ kore_worker_entry(struct kore_worker *kw)
u_int64_t now, idle_check; u_int64_t now, idle_check;
worker = kw; worker = kw;
kw->has_lock = 0;
if (chroot(chroot_path) == -1) if (chroot(chroot_path) == -1)
fatal("cannot chroot(): %s", errno_s); fatal("cannot chroot(): %s", errno_s);
@ -325,22 +326,26 @@ kore_worker_wait(int final)
if (WEXITSTATUS(status) || WTERMSIG(status) || if (WEXITSTATUS(status) || WTERMSIG(status) ||
WCOREDUMP(status)) { WCOREDUMP(status)) {
kore_log(LOG_NOTICE, kore_log(LOG_NOTICE,
"worker %d (pid: %d) gone, respawning new one", "worker %d (pid: %d) (hdlr: %p) gone",
kw->id, kw->pid); kw->id, kw->pid, kw->active_hdlr);
if (kw->pid == accept_lock->lock) {
kore_log(LOG_NOTICE,
"worker %d owned accept lock, releasing",
kw->id);
if (kw->pid == accept_lock->lock)
accept_lock->lock = accept_lock->next; accept_lock->lock = accept_lock->next;
if (kw->active_hdlr != NULL) {
kw->active_hdlr->errors++;
kore_log(LOG_NOTICE,
"hdlr %s has caused %d error(s)",
kw->active_hdlr->func,
kw->active_hdlr->errors);
} }
kore_log(LOG_NOTICE, "restarting worker %d", kw->id);
kore_worker_spawn(kw->id, kw->cpu); kore_worker_spawn(kw->id, kw->cpu);
} else { } else {
kore_log(LOG_NOTICE, kore_log(LOG_NOTICE,
"worker %d (pid: %d) signaled us", "worker %d (pid: %d) signaled us (%d)",
kw->id, kw->pid); kw->id, kw->pid, status);
} }
break; break;