[master] ddac7e41b Add a watchdog to worker pools.
Dridi Boukelmoune
dridi at varni.sh
Wed Oct 3 12:37:48 UTC 2018
On Wed, Oct 3, 2018 at 12:17 PM Poul-Henning Kamp <phk at freebsd.org> wrote:
>
>
> commit ddac7e41b64af09662305d07ef3be0376d6e2021
> Author: Poul-Henning Kamp <phk at FreeBSD.org>
> Date: Wed Oct 3 10:12:59 2018 +0000
>
> Add a watchdog to worker pools.
<snip>
> diff --git a/bin/varnishd/cache/cache_pool.h b/bin/varnishd/cache/cache_pool.h
> index e7a6a618b..9902e32ad 100644
> --- a/bin/varnishd/cache/cache_pool.h
> +++ b/bin/varnishd/cache/cache_pool.h
> @@ -53,6 +53,7 @@ struct pool {
> uintmax_t sdropped;
> uintmax_t rdropped;
> uintmax_t nqueued;
> + uintmax_t ndequeued;
> struct VSC_main_wrk *a_stat;
> struct VSC_main_wrk *b_stat;
>
> diff --git a/bin/varnishd/cache/cache_wrk.c b/bin/varnishd/cache/cache_wrk.c
> index 589ce68cb..f7f95cfc5 100644
> --- a/bin/varnishd/cache/cache_wrk.c
> +++ b/bin/varnishd/cache/cache_wrk.c
> @@ -341,6 +341,7 @@ Pool_Work_Thread(struct pool *pp, struct worker *wrk)
> tp = VTAILQ_FIRST(&pp->queues[i]);
> if (tp != NULL) {
> pp->lqueue--;
> + pp->ndequeued--;
I thought we'd increment pp->ndequeued above based on the field's name.
> VTAILQ_REMOVE(&pp->queues[i], tp, list);
> break;
> }
> @@ -487,6 +488,8 @@ pool_herder(void *priv)
> struct worker *wrk;
> double delay;
> int wthread_min;
> + uintmax_t dq = (1ULL << 31);
> + double dqt;
If we increment pp->ndequeued and let dq start at zero, and initialize
dqt here [...]
> CAST_OBJ_NOTNULL(pp, priv, POOL_MAGIC);
>
> @@ -494,6 +497,29 @@ pool_herder(void *priv)
> THR_Init();
>
> while (!pp->die || pp->nthr > 0) {
> + /*
> + * If the worker pool is configured too small, we can
> + * end up deadlocking it (see #2418 for details).
> + *
> + * Recovering from this would require a lot of complicated
> + * code, and fundamentally, either people configured their
> + * pools wrong, in which case we want them to notice, or
> + * they are under DoS, in which case recovering gracefully
> + * is unlikely be a major improvement.
> + *
> + * Instead we implement a watchdog and kill the worker if
> + * nothing has been dequeued for that long.
> + */
> + if (dq != pp->ndequeued) {
> + dq = pp->ndequeued;
> + dqt = VTIM_real();
[...] we could log the dequeue delta (both count and time) to let
users know how to sensibly set the parameter.
> + } else if (pp->lqueue &&
> + VTIM_real() - dqt > cache_param->wthread_watchdog) {
> + VSL(SLT_Error, 0,
> + "Pool Herder: Queue does not move ql=%u dt=%f",
> + pp->lqueue, VTIM_real() - dqt);
> + WRONG("Worker Pool Queue does not move");
> + }
> wthread_min = cache_param->wthread_min;
> if (pp->die)
> wthread_min = 0;
<snip>
Dridi
More information about the varnish-commit
mailing list