[master] beeaa19 take v1l memory from the thread workspace (again)

Mon Nov 13 11:32:06 UTC 2017

commit beeaa19cced3fe1ab79381b2b1b7b0b5594cbb18
Author: Nils Goroll <nils.goroll at uplex.de>
Date:   Sun Nov 12 14:47:45 2017 +0100

    take v1l memory from the thread workspace (again)
    
    ... as it was the case before 69d45413bb1fca69cb487b0098f1a7323114fdf7
    and as documented.
    
    The motivation is to remove the reservation from req->ws during
    delivery, but actually line delivery memory should not come from the
    request space - as originally designed:
    
    - We avoid requiring an obscure surplus of workspace_client for delivery
    
      - which is also allocated for every subrequest though not required there
    
    - We get predictable performance as the number of IO-vectors available
      is now only a function of workspace_thread or esi_iovs (see below)
      rather than the amount of memory which happens to be available on
      the request workspace.
    
    As a sensible side effect, we now also fail with an internal 500 error
    for workspace_session and workspace_thread overflows in addition to
    the existing check on workspace_client for completeness.
    
    For ESI requests, we run all of the client side processing, which uses
    the thread workspace, with V1L set up. Thus, V1L now needs its control
    structure together with a small amount of io vectors as an allocation
    on the workspace.
    
    Real world observation has shown that no more than five io vectors are
    normally in use during ESI, yet still we make this number configurable
    and have a default with some safety margin.
    
    For non-ESI requests and headers, we use all of the thread_workspace
    for io vectors, as before.
    
    As V1L does not necessarily reserve workspace any more, functions have
    been renamed to better reflect the purpose:
    
    V1L_Reserve -> V1L_Open
    V1L_FlushRelease -> V1L_Close

diff --git a/bin/varnishd/cache/cache_shmlog.c b/bin/varnishd/cache/cache_shmlog.c
index cf72726..f663bff 100644
--- a/bin/varnishd/cache/cache_shmlog.c
+++ b/bin/varnishd/cache/cache_shmlog.c
@@ -379,7 +379,7 @@ VSLb_ts(struct vsl_log *vsl, const char *event, double first, double *pprev,
 
 	/* XXX: Make an option to turn off some unnecessary timestamp
 	   logging. This must be done carefully because some functions
-	   (e.g. V1L_Reserve) takes the last timestamp as its initial
+	   (e.g. V1L_Open) takes the last timestamp as its initial
 	   value for timeout calculation. */
 	vsl_sanity(vsl);
 	assert(!isnan(now) && now != 0.);
diff --git a/bin/varnishd/http1/cache_http1.h b/bin/varnishd/http1/cache_http1.h
index 8b4c592..ba010db 100644
--- a/bin/varnishd/http1/cache_http1.h
+++ b/bin/varnishd/http1/cache_http1.h
@@ -56,8 +56,8 @@ void V1P_Charge(struct req *, const struct v1p_acct *, struct VSC_vbe *);
 /* cache_http1_line.c */
 void V1L_Chunked(const struct worker *w);
 void V1L_EndChunk(const struct worker *w);
-void V1L_Reserve(struct worker *, struct ws *, int *fd, struct vsl_log *,
-    double t0);
+void V1L_Open(struct worker *, struct ws *, int *fd, struct vsl_log *,
+    double t0, unsigned niov);
 unsigned V1L_Flush(const struct worker *w);
-unsigned V1L_FlushRelease(struct worker *w);
+unsigned V1L_Close(struct worker *w);
 size_t V1L_Write(const struct worker *w, const void *ptr, ssize_t len);
diff --git a/bin/varnishd/http1/cache_http1_deliver.c b/bin/varnishd/http1/cache_http1_deliver.c
index 9bf5bf8..1ccf59d 100644
--- a/bin/varnishd/http1/cache_http1_deliver.c
+++ b/bin/varnishd/http1/cache_http1_deliver.c
@@ -114,31 +114,71 @@ V1D_Deliver(struct req *req, struct boc *boc, int sendbody)
 	} else if (!http_GetHdr(req->resp, H_Connection, NULL))
 		http_SetHeader(req->resp, "Connection: keep-alive");
 
-	if (sendbody && req->resp_len != 0)
+	if (WS_Overflowed(req->ws)) {
+		v1d_error(req, "workspace_client overflow");
+		return;
+	}
+
+	if (WS_Overflowed(req->sp->ws)) {
+		v1d_error(req, "workspace_session overflow");
+		return;
+	}
+
+	if (req->resp_len == 0)
+		sendbody = 0;
+
+	if (sendbody)
 		VDP_push(req, &v1d_vdp, NULL, 1);
 
 	AZ(req->wrk->v1l);
-	V1L_Reserve(req->wrk, req->ws, &req->sp->fd, req->vsl, req->t_prev);
+	V1L_Open(req->wrk, req->wrk->aws,
+		 &req->sp->fd, req->vsl, req->t_prev, 0);
 
-	if (WS_Overflowed(req->ws)) {
-		v1d_error(req, "workspace_client overflow");
+	if (WS_Overflowed(req->wrk->aws)) {
+		v1d_error(req, "workspace_thread overflow");
 		AZ(req->wrk->v1l);
 		return;
 	}
 
 	req->acct.resp_hdrbytes += HTTP1_Write(req->wrk, req->resp, HTTP1_Resp);
+
 	if (DO_DEBUG(DBG_FLUSH_HEAD))
 		(void)V1L_Flush(req->wrk);
 
-	if (sendbody && req->resp_len != 0) {
-		if (req->res_mode & RES_CHUNKED)
-			V1L_Chunked(req->wrk);
-		err = VDP_DeliverObj(req);
-		if (!err && (req->res_mode & RES_CHUNKED))
-			V1L_EndChunk(req->wrk);
+	if (! sendbody || req->res_mode & RES_ESI)
+		if (V1L_Close(req->wrk) && req->sp->fd >= 0) {
+			Req_Fail(req, SC_REM_CLOSE);
+			sendbody = 0;
+		}
+
+	if (! sendbody) {
+		AZ(req->wrk->v1l);
+		VDP_close(req);
+		return;
 	}
 
-	if ((V1L_FlushRelease(req->wrk) || err) && req->sp->fd >= 0)
+	AN(sendbody);
+	if (req->res_mode & RES_ESI) {
+		AZ(req->wrk->v1l);
+
+		V1L_Open(req->wrk, req->wrk->aws,
+			 &req->sp->fd, req->vsl, req->t_prev,
+			 cache_param->esi_iovs);
+
+		if (WS_Overflowed(req->wrk->aws)) {
+			v1d_error(req, "workspace_thread overflow");
+			AZ(req->wrk->v1l);
+			return;
+		}
+	}
+
+	if (req->res_mode & RES_CHUNKED)
+		V1L_Chunked(req->wrk);
+	err = VDP_DeliverObj(req);
+	if (!err && (req->res_mode & RES_CHUNKED))
+		V1L_EndChunk(req->wrk);
+
+	if ((V1L_Close(req->wrk) || err) && req->sp->fd >= 0)
 		Req_Fail(req, SC_REM_CLOSE);
 	AZ(req->wrk->v1l);
 	VDP_close(req);
diff --git a/bin/varnishd/http1/cache_http1_fetch.c b/bin/varnishd/http1/cache_http1_fetch.c
index 9efb938..81acbda 100644
--- a/bin/varnishd/http1/cache_http1_fetch.c
+++ b/bin/varnishd/http1/cache_http1_fetch.c
@@ -99,7 +99,7 @@ V1F_SendReq(struct worker *wrk, struct busyobj *bo, uint64_t *ctr,
 	VSLb(bo->vsl, SLT_BackendStart, "%s %s", abuf, pbuf);
 
 	(void)VTCP_blocking(*htc->rfd);	/* XXX: we should timeout instead */
-	V1L_Reserve(wrk, wrk->aws, htc->rfd, bo->vsl, bo->t_prev);
+	V1L_Open(wrk, wrk->aws, htc->rfd, bo->vsl, bo->t_prev, 0);
 	*ctr += HTTP1_Write(wrk, hp, HTTP1_Req);
 
 	/* Deal with any message-body the request might (still) have */
@@ -122,7 +122,7 @@ V1F_SendReq(struct worker *wrk, struct busyobj *bo, uint64_t *ctr,
 			V1L_EndChunk(wrk);
 	}
 
-	j = V1L_FlushRelease(wrk);
+	j = V1L_Close(wrk);
 	if (j != 0 || i < 0) {
 		VSLb(bo->vsl, SLT_FetchError, "backend write error: %d (%s)",
 		    errno, strerror(errno));
diff --git a/bin/varnishd/http1/cache_http1_line.c b/bin/varnishd/http1/cache_http1_line.c
index cb86c83..2ab0712 100644
--- a/bin/varnishd/http1/cache_http1_line.c
+++ b/bin/varnishd/http1/cache_http1_line.c
@@ -69,11 +69,13 @@ struct v1l {
 };
 
 /*--------------------------------------------------------------------
+ * for niov == 0, reserve the ws for max number of iovs
+ * otherwise, up to niov
  */
 
 void
-V1L_Reserve(struct worker *wrk, struct ws *ws, int *fd, struct vsl_log *vsl,
-    double t0)
+V1L_Open(struct worker *wrk, struct ws *ws, int *fd, struct vsl_log *vsl,
+    double t0, unsigned niov)
 {
 	struct v1l *v1l;
 	unsigned u;
@@ -84,7 +86,12 @@ V1L_Reserve(struct worker *wrk, struct ws *ws, int *fd, struct vsl_log *vsl,
 
 	if (WS_Overflowed(ws))
 		return;
+
+	if (niov != 0)
+		assert(niov >= 3);
+
 	res = WS_Snapshot(ws);
+
 	v1l = WS_Alloc(ws, sizeof *v1l);
 	if (v1l == NULL)
 		return;
@@ -102,6 +109,8 @@ V1L_Reserve(struct worker *wrk, struct ws *ws, int *fd, struct vsl_log *vsl,
 	}
 	if (u > IOV_MAX)
 		u = IOV_MAX;
+	if (niov != 0 && u > niov)
+		u = niov;
 	v1l->iov = (void*)ws->f;
 	v1l->siov = u;
 	v1l->ciov = u;
@@ -112,10 +121,13 @@ V1L_Reserve(struct worker *wrk, struct ws *ws, int *fd, struct vsl_log *vsl,
 	v1l->t0 = t0;
 	v1l->vsl = vsl;
 	wrk->v1l = v1l;
+
+	if (niov != 0)
+		WS_Release(ws, u * sizeof(struct iovec));
 }
 
 unsigned
-V1L_FlushRelease(struct worker *wrk)
+V1L_Close(struct worker *wrk)
 {
 	struct v1l *v1l;
 	unsigned u;
@@ -125,7 +137,8 @@ V1L_FlushRelease(struct worker *wrk)
 	v1l = wrk->v1l;
 	wrk->v1l = NULL;
 	CHECK_OBJ_NOTNULL(v1l, V1L_MAGIC);
-	WS_Release(v1l->ws, 0);
+	if (v1l->ws->r)
+		WS_Release(v1l->ws, 0);
 	WS_Reset(v1l->ws, v1l->res);
 	return (u);
 }
diff --git a/bin/varnishtest/tests/c00071.vtc b/bin/varnishtest/tests/c00071.vtc
index fdeb2f8..734c575 100644
--- a/bin/varnishtest/tests/c00071.vtc
+++ b/bin/varnishtest/tests/c00071.vtc
@@ -18,6 +18,7 @@ varnish v1 -vcl+backend {
 
 		if (req.url ~ "/bar") {
 			set resp.http.x-foo = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+			vtc.workspace_alloc(client, -10);
 		}
 		else if (req.url ~ "/baz") {
 			set resp.http.x-foo = regsub(req.url, "baz", "baaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaz");
diff --git a/bin/varnishtest/tests/r02275.vtc b/bin/varnishtest/tests/r02275.vtc
index a705169..812f871 100644
--- a/bin/varnishtest/tests/r02275.vtc
+++ b/bin/varnishtest/tests/r02275.vtc
@@ -1,5 +1,22 @@
 varnishtest "Chunked with no space for iov's on workspace"
 
+# For this test to work with iovecs on the thread workspace (where
+# they belong!), we would need to circumvent the very sensible vrt
+# check that vcl make no permanent reservations on the thread
+# workspace (see vcl_call_method()).
+#
+# One possible way would be to push a VDP just for this.
+#
+# For now, we consider this issue low priority because with v1l living
+# on the thread workspace, the case for hitting #2275 again is as
+# exotic as can be.
+#
+# The values used in this test have been carefully worked out and
+# tested with v1l on the session workspace, so they should work 1:1 on
+# the thread workspace.
+
+feature cmd false
+
 server s1 -repeat 2 {
 	rxreq
 	txresp -nolen -hdr "Transfer-encoding: chunked"
@@ -15,14 +32,19 @@ varnish v1 -vcl+backend {
 	import vtc;
 
 	sub vcl_deliver {
+		# struct v1l is 13 - 15 pointer-sizes,
+		# an iovec should be two pointer-sizes
+		#
+		# we want to hit the case where we got just not
+		# enough space for three iovecs
 		if (req.url == "/1") {
-			vtc.workspace_alloc(client,
-			    -1 * (32 + vtc.typesize("p") * 25));
+			vtc.workspace_alloc(thread,
+			    -1 * vtc.typesize("p") * (13 + 4));
 		} else {
-			vtc.workspace_alloc(client,
-			    -1 * (56 + vtc.typesize("p") * 25));
+			vtc.workspace_alloc(thread,
+			    -1 * vtc.typesize("p") * (15 + 6));
 		}
-		set resp.http.foo = vtc.workspace_free(client);
+		set resp.http.foo = vtc.workspace_free(thread);
 	}
 } -start
 
@@ -36,4 +58,3 @@ client c1 {
 	rxresp
 	expect resp.status == 200
 } -run
-
diff --git a/include/tbl/params.h b/include/tbl/params.h
index 7bf45e9..454021d 100644
--- a/include/tbl/params.h
+++ b/include/tbl/params.h
@@ -458,6 +458,21 @@ PARAM(
 	/* func */	NULL
 )
 
+PARAM(
+	/* name */	esi_iovs,
+	/* typ */	uint,
+	/* min */	"3",
+	/* max */	"1024",	// XXX stringify IOV_MAX
+	/* default */	"10",		// 5 should suffice, add headroom
+	/* units */	"struct iovec",
+	/* flags */	WIZARD,
+	/* s-text */
+	"Number of io vectors to allocate on the thread workspace for "
+	"ESI requests.",
+	/* l-text */	"",
+	/* func */	NULL
+)
+
 #if 0
 /* actual location mgt_param_bits.c*/
 /* See tbl/feature_bits.h */
@@ -1675,9 +1690,12 @@ PARAM(
 	"Bytes of auxiliary workspace per thread.\n"
 	"This workspace is used for certain temporary data structures "
 	"during the operation of a worker thread.\n"
-	"One use is for the io-vectors for writing requests and responses "
-	"to sockets, having too little space will result in more writev(2) "
-	"system calls, having too much just wastes the space.",
+	"One use is for the IO-vectors used during delivery. Setting "
+	"this parameter too low may increase the number of writev() "
+	"syscalls, setting it too high just wastes space.  ~0.1k + "
+	"UIO_MAXIOV * sizeof(struct iovec) (typically = ~16k for 64bit) "
+	"is considered the maximum sensible value under any known "
+	"circumstances (excluding exotic vmod use).",
 	/* l-text */	"",
 	/* func */	NULL
 )