[master] 5ea8940c9 hash: Revert recent hash changes
Dridi Boukelmoune
dridi.boukelmoune at gmail.com
Tue Mar 2 16:11:05 UTC 2021
commit 5ea8940c9d937697e8563609ac3a921e8755255f
Author: Dridi Boukelmoune <dridi.boukelmoune at gmail.com>
Date: Tue Mar 2 16:02:48 2021 +0100
hash: Revert recent hash changes
This reverts the following commits:
- e98e8e6497b9bbcca3e709a5ea0e094f2ec4b930.
"Documentation updates for changed `vcl_hash{}` / `hash_data()`"
- 001279ebd6ee83fafbc7fc1d1805bf2224361dd9.
"Document proper design pattern for using hash_data() in vcl_recv,"
- e36573e255af2aa1ed40fb367fcb5257ef7e0288.
"Add a test-case for hash_data() in vcl_recv{}"
- 03fe0cee1abc3741a7652ba2cf9c1bf822d65ab1.
"Allow hash_data() in vcl_recv{}"
- 4ebc3cfec133586cd8c4a715d8de18efb76402f1.
"Make it possible to override the initial digest, and explain in"
- d6ad52f5ff95daa3668fdad28b4c97e59b4c49d3
"Change the way we calculate the hash key for the cache."
Conflicts:
doc/sphinx/reference/dp_vcl_recv_hash.rst
doc/sphinx/reference/index.rst
Concerns were raised regarding a change of the way we compute the hash
key outside of the dot-zero release where we would expect such breaking
changes (among other things, vmod_shard relies on hash stability).
There is also no definite consensus of how to handle hashing from
vcl_recv.
diff --git a/bin/varnishd/cache/cache_hash.c b/bin/varnishd/cache/cache_hash.c
index cc1e44c26..d94ec2ad5 100644
--- a/bin/varnishd/cache/cache_hash.c
+++ b/bin/varnishd/cache/cache_hash.c
@@ -183,6 +183,19 @@ HSH_DeleteObjHead(const struct worker *wrk, struct objhead *oh)
FREE_OBJ(oh);
}
+void
+HSH_AddString(struct req *req, void *ctx, const char *str)
+{
+
+ CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
+ AN(ctx);
+ if (str != NULL) {
+ VSHA256_Update(ctx, str, strlen(str));
+ VSLb(req->vsl, SLT_Hash, "%s", str);
+ } else
+ VSHA256_Update(ctx, &str, 1);
+}
+
/*---------------------------------------------------------------------
* This is a debugging hack to enable testing of boundary conditions
* in the hash algorithm.
diff --git a/bin/varnishd/cache/cache_objhead.h b/bin/varnishd/cache/cache_objhead.h
index af1797462..bc1782379 100644
--- a/bin/varnishd/cache/cache_objhead.h
+++ b/bin/varnishd/cache/cache_objhead.h
@@ -71,6 +71,7 @@ int HSH_DerefObjCore(struct worker *, struct objcore **, int rushmax);
enum lookup_e HSH_Lookup(struct req *, struct objcore **, struct objcore **);
void HSH_Ref(struct objcore *o);
+void HSH_AddString(struct req *, void *ctx, const char *str);
unsigned HSH_Purge(struct worker *, struct objhead *, vtim_real ttl_now,
vtim_dur ttl, vtim_dur grace, vtim_dur keep);
struct objcore *HSH_Private(const struct worker *wrk);
diff --git a/bin/varnishd/cache/cache_req_fsm.c b/bin/varnishd/cache/cache_req_fsm.c
index ea09689f6..1dc349b99 100644
--- a/bin/varnishd/cache/cache_req_fsm.c
+++ b/bin/varnishd/cache/cache_req_fsm.c
@@ -50,6 +50,7 @@
#include "storage/storage.h"
#include "common/heritage.h"
#include "vcl.h"
+#include "vsha256.h"
#include "vtim.h"
#define REQ_STEPS \
@@ -76,14 +77,6 @@
REQ_STEPS
#undef REQ_STEP
-/*
- * In this specific context we use SHA256 only as a very good
- * hashing function. That renders most of the normal concerns
- * about salting & seeding moot. However, if for some reason
- * you want to salt your hashes, this is where you do it.
- */
-static const uint8_t initial_digest[DIGEST_LEN];
-
/*--------------------------------------------------------------------
* Handle "Expect:" and "Connection:" on incoming request
*/
@@ -898,6 +891,7 @@ static enum req_fsm_nxt v_matchproto_(req_state_f)
cnt_recv(struct worker *wrk, struct req *req)
{
unsigned recv_handling;
+ struct VSHA256Context sha256ctx;
const char *ci, *cp, *endpname;
CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
@@ -927,8 +921,6 @@ cnt_recv(struct worker *wrk, struct req *req)
return (REQ_FSM_DONE);
}
- memcpy(req->digest, initial_digest, sizeof req->digest);
-
VCL_recv_method(req->vcl, wrk, req, NULL, NULL);
if (wrk->handling == VCL_RET_FAIL) {
@@ -970,13 +962,13 @@ cnt_recv(struct worker *wrk, struct req *req)
}
}
- if (!memcmp(req->digest, initial_digest, sizeof req->digest)) {
- VCL_hash_method(req->vcl, wrk, req, NULL, NULL);
- if (wrk->handling == VCL_RET_FAIL)
- recv_handling = wrk->handling;
- else
- assert(wrk->handling == VCL_RET_LOOKUP);
- }
+ VSHA256_Init(&sha256ctx);
+ VCL_hash_method(req->vcl, wrk, req, NULL, &sha256ctx);
+ if (wrk->handling == VCL_RET_FAIL)
+ recv_handling = wrk->handling;
+ else
+ assert(wrk->handling == VCL_RET_LOOKUP);
+ VSHA256_Final(req->digest, &sha256ctx);
switch (recv_handling) {
case VCL_RET_VCL:
diff --git a/bin/varnishd/cache/cache_vrt.c b/bin/varnishd/cache/cache_vrt.c
index 6d323a22c..0a729c0e4 100644
--- a/bin/varnishd/cache/cache_vrt.c
+++ b/bin/varnishd/cache/cache_vrt.c
@@ -689,29 +689,19 @@ VRT_fail(VRT_CTX, const char *fmt, ...)
VCL_VOID
VRT_hashdata(VRT_CTX, VCL_STRANDS s)
{
- struct VSHA256Context sha256ctx;
int i;
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
CHECK_OBJ_NOTNULL(ctx->req, REQ_MAGIC);
- AZ(ctx->specific);
- VSHA256_Init(&sha256ctx);
- VSHA256_Update(&sha256ctx, ctx->req->digest, sizeof ctx->req->digest);
+ AN(ctx->specific);
AN(s);
- for (i = 0; i < s->n; i++) {
- if (s->p[i] != NULL) {
- VSHA256_Update(&sha256ctx, s->p[i], strlen(s->p[i]));
- VSLb(ctx->req->vsl, SLT_Hash, "%s", s->p[i]);
- } else {
- VSHA256_Update(&sha256ctx, "", 1);
- }
- }
+ for (i = 0; i < s->n; i++)
+ HSH_AddString(ctx->req, ctx->specific, s->p[i]);
/*
* Add a 'field-separator' to make it more difficult to
* manipulate the hash.
*/
- VSHA256_Update(&sha256ctx, "", 1);
- VSHA256_Final(ctx->req->digest, &sha256ctx);
+ HSH_AddString(ctx->req, ctx->specific, NULL);
}
/*--------------------------------------------------------------------*/
diff --git a/bin/varnishtest/tests/b00051.vtc b/bin/varnishtest/tests/b00051.vtc
index 8119dc6b5..3681ad2b3 100644
--- a/bin/varnishtest/tests/b00051.vtc
+++ b/bin/varnishtest/tests/b00051.vtc
@@ -22,5 +22,5 @@ client c1 {
rxresp
expect resp.http.req_hash ~ "[[:xdigit:]]{64}"
expect resp.http.req_hash == resp.http.bereq_hash
- expect resp.http.req_hash-sf == ":0jkH41nfmD0PRFsKpM1m7ucOApnxFc62B//mQWxOnmQ=:"
+ expect resp.http.req_hash-sf == ":3k0f0yRKtKt7akzkyNsTGSDOJAZOQowTwKWhu5+kIu0=:"
} -run
diff --git a/bin/varnishtest/tests/b00076.vtc b/bin/varnishtest/tests/b00076.vtc
deleted file mode 100644
index 120339275..000000000
--- a/bin/varnishtest/tests/b00076.vtc
+++ /dev/null
@@ -1,67 +0,0 @@
-varnishtest "hash_data() in vcl_recv{}"
-
-server s1 {
- rxreq
- txresp -hdr "Same: One"
-} -start
-
-varnish v1 -vcl+backend {
- sub vcl_recv {
- if (req.url == "/2") {
- hash_data(req.http.foo);
- }
- }
- sub vcl_hash {
- hash_data(req.url);
- return (lookup);
- }
-} -start
-
-varnish v1 -cliok "param.set vsl_mask +Hash"
-
-client c1 {
- txreq -url /1
- rxresp
- expect resp.status == 200
- expect resp.http.same == One
- txreq -url /2 -hdr "foo: /1"
- rxresp
- expect resp.status == 200
- expect resp.http.same == One
-} -run
-
-server s1 {
- rxreq
- txresp -hdr "Second: One"
-} -start
-
-varnish v1 -vcl+backend {
- sub make_hash_key {
- hash_data("Documented Design Pattern");
- hash_data(req.url);
- }
-
- sub vcl_hash {
- call make_hash_key;
- return (lookup);
- }
-
- sub vcl_recv {
- if (req.http.early) {
- call make_hash_key;
- }
- }
-}
-
-client c1 {
- txreq
- rxresp
- expect resp.status == 200
- expect resp.http.second == One
-
- txreq -hdr "early: yes"
- rxresp
- expect resp.status == 200
- expect resp.http.second == One
-} -run
-
diff --git a/bin/varnishtest/tests/d00020.vtc b/bin/varnishtest/tests/d00020.vtc
index 9f70ff2d3..0c42285d7 100644
--- a/bin/varnishtest/tests/d00020.vtc
+++ b/bin/varnishtest/tests/d00020.vtc
@@ -218,9 +218,9 @@ client c1 {
client c2 {
txreq -url /b/def
rxresp
- expect resp.http.hash == "2e03d4fee2722154a84036faa3e4b6851e003830eadbe0d2874e3df09c8efe55"
+ expect resp.http.hash == "93d1c4ad76396c91dd97fa310f7f26445332662c89393dbeeb77fe49f9111ee4"
expect resp.http.by == "HASH"
- expect resp.http.key -eq 0x2e03d4fe
+ expect resp.http.key -eq 0x93d1c4ad
expect resp.http.alt == 0
expect resp.http.warmup == "-1.000"
expect resp.http.rampup == "true"
@@ -228,9 +228,9 @@ client c2 {
txreq -url /b/hash
rxresp
- expect resp.http.hash == "49f33e9019891091d3dcf6edab6d9433b756678bdf5202dd41b8a667c89887b1"
+ expect resp.http.hash == "e47da20ea4db49d4f22acdadc69f02f445002be520a2865cd3351272add62540"
expect resp.http.by == "HASH"
- expect resp.http.key -eq 0x49f33e90
+ expect resp.http.key -eq 0xe47da20e
expect resp.http.alt == 1
expect resp.http.warmup == "-1.000"
expect resp.http.rampup == "true"
@@ -268,9 +268,9 @@ client c2 {
client c3 {
txreq -url /b/c/hash/def
rxresp
- expect resp.http.hash == "dd70dcbbf385c398ee3b53a849f12d0d846fc21292349fb45d37b2d9d8eca25e"
+ expect resp.http.hash == "df9a465f8a0455c334b24c1638d3adda0f6e64fbe759029ab83602e3b9138884"
expect resp.http.by == "HASH"
- expect resp.http.key -eq 0xdd70dcbb
+ expect resp.http.key -eq 0xdf9a465f
expect resp.http.alt == 7
expect resp.http.warmup == "-1.000"
expect resp.http.rampup == "true"
@@ -278,9 +278,9 @@ client c3 {
txreq -url /b/c/hash/hash
rxresp
- expect resp.http.hash == "41d09b9877cd0ac0eab888359b0ad54f0bf41da0ac03dc1b8ae12aff18465a8d"
+ expect resp.http.hash == "0eb35bc1fab5aad5902fd1bac86540bd13d43aa31c6c46f54e776b43392e66e6"
expect resp.http.by == "HASH"
- expect resp.http.key -eq 0x41d09b98
+ expect resp.http.key -eq 0x0eb35bc1
expect resp.http.alt == 8
expect resp.http.warmup == "-1.000"
expect resp.http.rampup == "true"
@@ -288,9 +288,9 @@ client c3 {
txreq -url /b/c/hash/url
rxresp
- expect resp.http.hash == "dcac849e02b3322f5fd3dddf9b9f5fc26d295733e6f1c51b190dfb7239a56e28"
+ expect resp.http.hash == "1eb67b701ea07151cac5bea1f11b6267b9de15a3ff83cec995590480cbc2c750"
expect resp.http.by == "HASH"
- expect resp.http.key -eq 0xdcac849e
+ expect resp.http.key -eq 0x1eb67b70
expect resp.http.alt == 9
expect resp.http.warmup == "0.500"
expect resp.http.rampup == "true"
@@ -298,9 +298,9 @@ client c3 {
txreq -url /b/c/hash/key
rxresp
- expect resp.http.hash == "112393761506e85f0c700a5d669a48b54001c870eb2e8d95f4d2f6fdccbe80a3"
+ expect resp.http.hash == "a11b617e21aa7db22b6205d7612002e595b1b00d8c11602017f65456a1be3a35"
expect resp.http.by == "HASH"
- expect resp.http.key -eq 0x11239376
+ expect resp.http.key -eq 0xa11b617e
expect resp.http.alt == 10
expect resp.http.warmup == "-1.000"
expect resp.http.rampup == "false"
@@ -308,9 +308,9 @@ client c3 {
txreq -url /b/c/hash/blob
rxresp
- expect resp.http.hash == "5ef050c1185ac02a66d9f79703b8cd5f0636abf3b1f15b9f22e0fe64df985d28"
+ expect resp.http.hash == "d7eecc0ac83e1727332dcd8c7c8ae9f3114123abb2bf7e3fb15ecea8c84bb239"
expect resp.http.by == "HASH"
- expect resp.http.key -eq 0x5ef050c1
+ expect resp.http.key -eq 0xd7eecc0a
expect resp.http.alt == 11
expect resp.http.warmup == "-1.000"
expect resp.http.rampup == "true"
diff --git a/doc/changes.rst b/doc/changes.rst
index 146ec9f60..c11ba2cf5 100644
--- a/doc/changes.rst
+++ b/doc/changes.rst
@@ -35,15 +35,6 @@ release process.
Varnish Cache Next (2021-03-15)
================================
-* `hash_data()` can be called from `vcl_recv`, in which case
- `vcl_hash` is not called. This allows `vcl_recv` and
- backends to take the object identity into account, for
- instance when choosing backend and grace periods.
-
-* `hash_data()` calculates the hash-key differently than previously.
- This means that persistent storage will be lost, and it may break
- very specific `*.vtc` test-scripts.
-
* counters MAIN.s_req_bodybytes and VBE.*.tools.beresp_bodybytes
are now always the number of bodybytes moved on the wire.
diff --git a/doc/sphinx/reference/dp_vcl_recv_hash.rst b/doc/sphinx/reference/dp_vcl_recv_hash.rst
deleted file mode 100644
index e63607e1e..000000000
--- a/doc/sphinx/reference/dp_vcl_recv_hash.rst
+++ /dev/null
@@ -1,29 +0,0 @@
-..
- Copyright (c) 2021 Varnish Software AS
- SPDX-License-Identifier: BSD-2-Clause
- See LICENSE file for full text of license
-
-.. _db_vcl_recv_hash:
-
-Hashing in `vcl_recv{}`
-=======================
-
-Calculating the `hash` used for cache lookup already in `vcl_recv{}`
-makes it possible for certain directors to offer targeted health status.
-
-To ensure consistent hashing, use this design pattern::
-
- sub make_hash_key {
- hash_data([…]);
- }
-
- sub vcl_hash {
- call make_hash_key;
- return (lookup);
- }
-
- sub vcl_recv {
- […]
- call make_hash_key;
- […]
- }
diff --git a/doc/sphinx/reference/index.rst b/doc/sphinx/reference/index.rst
index 11fe4737c..c5c751019 100644
--- a/doc/sphinx/reference/index.rst
+++ b/doc/sphinx/reference/index.rst
@@ -27,7 +27,6 @@ VCL Design Patterns
.. toctree::
:maxdepth: 1
- dp_vcl_recv_hash.rst
dp_vcl_resp_status.rst
Bundled VMODs
diff --git a/doc/sphinx/reference/vcl.rst b/doc/sphinx/reference/vcl.rst
index 02e2a64df..3dbae425b 100644
--- a/doc/sphinx/reference/vcl.rst
+++ b/doc/sphinx/reference/vcl.rst
@@ -355,9 +355,7 @@ hash_data(input)
~~~~~~~~~~~~~~~~
Adds an input to the hash input. In the built-in VCL ``hash_data()``
- is called on the host and URL of the request.
- Available in ``vcl_hash`` or ``vcl_recv``. If used in ``vcl_recv``
- ``vcl_hash`` will not be called.
+ is called on the host and URL of the request. Available in ``vcl_hash``.
synthetic(STRING)
~~~~~~~~~~~~~~~~~
diff --git a/doc/sphinx/users-guide/vcl-built-in-subs.rst b/doc/sphinx/users-guide/vcl-built-in-subs.rst
index 3c0990910..2f46d8b87 100644
--- a/doc/sphinx/users-guide/vcl-built-in-subs.rst
+++ b/doc/sphinx/users-guide/vcl-built-in-subs.rst
@@ -134,9 +134,8 @@ of the following keywords:
vcl_hash
~~~~~~~~
-Called after `vcl_recv` to create a hash value for the request,
-unless `vcl_recv` already did that.
-This is used as the key to store and look up objects in the cache.
+Called after `vcl_recv` to create a hash value for the request. This is
+used as a key to look up the object in Varnish.
The `vcl_hash` subroutine may terminate with calling ``return()`` with one
of the following keywords:
diff --git a/doc/sphinx/users-guide/vcl-hashing.rst b/doc/sphinx/users-guide/vcl-hashing.rst
index 1f50a5e63..c605f5589 100644
--- a/doc/sphinx/users-guide/vcl-hashing.rst
+++ b/doc/sphinx/users-guide/vcl-hashing.rst
@@ -6,10 +6,12 @@
Hashing
-------
-Internally, when Varnish stores content in the cache indexed by a hash
-key used to find the object again. In the default setup
-this key is calculated based on `URL`, the `Host:` header, or
-if there is none, the IP address of the server::
+Internally, when Varnish stores content in the cache it stores the object
+together with a hash key to find the object again. In the default setup
+this key is calculated based on the content of the *Host* header or the
+IP address of the server and the URL.
+
+Behold the `default vcl`::
sub vcl_hash {
hash_data(req.url);
@@ -21,7 +23,7 @@ if there is none, the IP address of the server::
return (lookup);
}
-As you can see it first hashes `req.url` and then `req.http.host` if
+As you can see it first checks in `req.url` then `req.http.host` if
it exists. It is worth pointing out that Varnish doesn't lowercase the
hostname or the URL before hashing it so in theory having "Varnish.org/"
and "varnish.org/" would result in different cache entries. Browsers
@@ -45,16 +47,7 @@ And then add a `vcl_hash`::
hash_data(req.http.X-Country-Code);
}
-Because there is no `return(lookup)`, the builtin VCL will take care
-of adding the URL, `Host:` or server IP# to the hash as usual.
-
-If `vcl_hash` did return, ie::
-
- sub vcl_hash {
- hash_data(req.http.X-Country-Code);
- return(lookup);
- }
-
-then *only* the country-code would matter, and Varnish would return
-seemingly random objects, ignoring the URL, (but they would always
-have the correct `X-Country-Code`).
+As the default VCL will take care of adding the host and URL to the hash
+we don't have to do anything else. Be careful calling ``return (lookup)``
+as this will abort the execution of the default VCL and Varnish can end
+up returning data based on more or less random inputs.
diff --git a/include/vrt.h b/include/vrt.h
index 70eb45179..01ca336c5 100644
--- a/include/vrt.h
+++ b/include/vrt.h
@@ -54,7 +54,6 @@
* binary/load-time compatible, increment MAJOR version
*
* 13.0 (2021-03-15)
- * VRT_hashdata() produces different hash-keys.
* Move VRT_synth_page() to deprecated status
* Add VRT_synth_strands() and VRT_synth_blob()
* struct vrt_type now produced by generate.py
diff --git a/lib/libvcc/vcc_action.c b/lib/libvcc/vcc_action.c
index 57a2f6daf..60fbdfa7b 100644
--- a/lib/libvcc/vcc_action.c
+++ b/lib/libvcc/vcc_action.c
@@ -452,7 +452,7 @@ vcc_Action_Init(struct vcc *tl)
ACT(ban, vcc_act_ban, 0);
ACT(call, vcc_act_call, 0);
ACT(hash_data, vcc_act_hash_data,
- VCL_MET_RECV | VCL_MET_HASH);
+ VCL_MET_HASH);
ACT(if, vcc_Act_If, 0);
ACT(new, vcc_Act_New,
VCL_MET_INIT);
More information about the varnish-commit
mailing list