From 1225a28d4158065f86f65f2aacf3e2e1f39c7678 Mon Sep 17 00:00:00 2001 From: Patrick J Cherry Date: Tue, 13 Nov 2018 16:42:13 +0000 Subject: [PATCH 01/12] Reset proxy req size/needle on timeout --- src/proxy/proxy.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/proxy/proxy.c b/src/proxy/proxy.c index 8781308..9f3591a 100644 --- a/src/proxy/proxy.c +++ b/src/proxy/proxy.c @@ -859,6 +859,8 @@ void proxy_session(struct proxier *proxy) proxy->init.needle = 0; proxy->rsp.size = 0; proxy->rsp.needle = 0; + proxy->req.size = 0; + proxy->req.needle = 0; } } } From 9b1518806d482b798850ec95391eead685689abe Mon Sep 17 00:00:00 2001 From: Patrick J Cherry Date: Tue, 13 Nov 2018 21:33:06 +0000 Subject: [PATCH 02/12] Move state-resetting to after before the init is read from upstream This removes repetition and ensures a constant state before the upstream init is read. --- src/proxy/proxy.c | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/src/proxy/proxy.c b/src/proxy/proxy.c index 9f3591a..f08d00d 100644 --- a/src/proxy/proxy.c +++ b/src/proxy/proxy.c @@ -507,6 +507,20 @@ int proxy_continue_connecting_to_upstream(struct proxier *proxy, int state) /* Data may have changed while we were disconnected */ prefetch_set_is_empty(proxy->prefetch); + /* Reset our needles and sizes. + * + * Don't zero the req buffer size in case there's an outstanding request + * waiting to be re-sent following a disconnection. The init and rsp + * buffers are used for reading from upstream. If we're in this state then + * any upstream reads will be re-requested. + */ + proxy->init.needle = 0; + proxy->init.size = 0; + proxy->rsp.needle = 0; + proxy->rsp.size = 0; + proxy->req.needle = 0; + /* Don't zero the req.size, as we may need to re-write it upstream */ + info("Connected to upstream on fd %i", proxy->upstream_fd); return READ_INIT_FROM_UPSTREAM; } @@ -540,18 +554,19 @@ int proxy_read_init_from_upstream(struct proxier *proxy, int state) proxy_finish_connect_to_upstream(proxy, upstream_size, upstream_flags); - /* Currently, we only get disconnected from upstream (so needing to come - * here) when we have an outstanding request. If that becomes false, - * we'll need to choose the right state to return to here */ + /* Finished reading the init response now, reset the needle. */ proxy->init.needle = 0; + + /* Currently, we only get disconnected from upstream (so needing to + * come here) when we have an outstanding request. If that becomes + * false, we'll need to choose the right state to return to here. + */ return WRITE_TO_UPSTREAM; } return state; disconnect: - proxy->init.needle = 0; - proxy->init.size = 0; return CONNECT_TO_UPSTREAM; } @@ -574,7 +589,6 @@ int proxy_write_to_upstream(struct proxier *proxy, int state) if (count == -1) { warn(SHOW_ERRNO("Failed to send request to upstream")); - proxy->req.needle = 0; // We're throwing the socket away so no need to uncork return CONNECT_TO_UPSTREAM; } @@ -637,8 +651,6 @@ int proxy_read_from_upstream(struct proxier *proxy, int state) return state; disconnect: - proxy->rsp.needle = 0; - proxy->rsp.size = 0; return CONNECT_TO_UPSTREAM; } @@ -852,15 +864,6 @@ void proxy_session(struct proxier *proxy) warn("Timed out in state %s while communicating with upstream", proxy_session_state_names[state] ); state = CONNECT_TO_UPSTREAM; - - /* Since we've timed out, we won't have gone through the timeout logic - * in the various state handlers that resets these appropriately... */ - proxy->init.size = 0; - proxy->init.needle = 0; - proxy->rsp.size = 0; - proxy->rsp.needle = 0; - proxy->req.size = 0; - proxy->req.needle = 0; } } } From 391a17bfccdca67e24c5693f748358efa03447c6 Mon Sep 17 00:00:00 2001 From: Patrick J Cherry Date: Tue, 13 Nov 2018 21:38:15 +0000 Subject: [PATCH 03/12] Updated changelog --- debian/changelog | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/debian/changelog b/debian/changelog index aaa837c..a8af0d9 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +flexnbd (0.4.0) UNRELEASED; urgency=medium + + * Ensure proxy state is completely reset before upstream init is read, + ensuring any waiting requests are fully replayed (#39, !54) + + -- Patrick J Cherry Tue, 13 Nov 2018 21:35:00 +0000 + flexnbd (0.3.0) stable; urgency=medium * Force a msync after every write, ignoring FUA flag, or lack thereof (!51). From 3fe9f2c6a1a80e9ccb408bd3cf0dd53d657bebb0 Mon Sep 17 00:00:00 2001 From: Patrick J Cherry Date: Wed, 14 Nov 2018 10:23:05 +0000 Subject: [PATCH 04/12] Removed a couple of gotos. Hadn't you heard? They're considered harmful. --- Makefile | 3 ++- src/proxy/proxy.c | 17 +++++------------ 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index e8869db..12b038d 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,8 @@ VPATH=src:tests/unit DESTDIR?=/ PREFIX?=/usr/local/bin INSTALLDIR=$(DESTDIR)/$(PREFIX) - +DEBUG=1 + ifdef DEBUG CFLAGS_EXTRA=-g -DDEBUG LDFLAGS_EXTRA=-g diff --git a/src/proxy/proxy.c b/src/proxy/proxy.c index f08d00d..b17dd11 100644 --- a/src/proxy/proxy.c +++ b/src/proxy/proxy.c @@ -537,7 +537,7 @@ int proxy_read_init_from_upstream(struct proxier *proxy, int state) if (count == -1) { warn(SHOW_ERRNO("Failed to read init from upstream")); - goto disconnect; + return CONNECT_TO_UPSTREAM; } if (proxy->init.needle == proxy->init.size) { @@ -547,7 +547,7 @@ int proxy_read_init_from_upstream(struct proxier *proxy, int state) ((struct nbd_init_raw *) proxy->init.buf, &upstream_size, &upstream_flags)) { warn("Upstream sent invalid init"); - goto disconnect; + return CONNECT_TO_UPSTREAM; } /* record the flags, and log the reconnection, set TCP_NODELAY */ @@ -565,9 +565,6 @@ int proxy_read_init_from_upstream(struct proxier *proxy, int state) } return state; - - disconnect: - return CONNECT_TO_UPSTREAM; } int proxy_write_to_upstream(struct proxier *proxy, int state) @@ -625,7 +622,7 @@ int proxy_read_from_upstream(struct proxier *proxy, int state) if (count == -1) { warn(SHOW_ERRNO("Failed to get reply from upstream")); - goto disconnect; + return CONNECT_TO_UPSTREAM; } if (proxy->rsp.needle == NBD_REPLY_SIZE) { @@ -633,7 +630,7 @@ int proxy_read_from_upstream(struct proxier *proxy, int state) if (reply->magic != REPLY_MAGIC) { warn("Reply magic is incorrect"); - goto disconnect; + return CONNECT_TO_UPSTREAM; } if (proxy->req_hdr.type == REQUEST_READ) { @@ -649,9 +646,6 @@ int proxy_read_from_upstream(struct proxier *proxy, int state) } return state; - - disconnect: - return CONNECT_TO_UPSTREAM; } @@ -861,8 +855,7 @@ void proxy_session(struct proxier *proxy) */ if (old_state == state && proxy_state_upstream(state)) { if ((monotonic_time_ms()) - state_started > UPSTREAM_TIMEOUT) { - warn("Timed out in state %s while communicating with upstream", proxy_session_state_names[state] - ); + warn("Timed out in state %s while communicating with upstream", proxy_session_state_names[state]); state = CONNECT_TO_UPSTREAM; } } From 3e00a88d45d66ea4ba5cd774425137148c5e9fbf Mon Sep 17 00:00:00 2001 From: Patrick J Cherry Date: Wed, 14 Nov 2018 10:23:58 +0000 Subject: [PATCH 05/12] Removed debug --- Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/Makefile b/Makefile index 12b038d..bcfa23b 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,6 @@ VPATH=src:tests/unit DESTDIR?=/ PREFIX?=/usr/local/bin INSTALLDIR=$(DESTDIR)/$(PREFIX) -DEBUG=1 ifdef DEBUG CFLAGS_EXTRA=-g -DDEBUG From b26b308e681f16913934e82f2e8538aae4331cfb Mon Sep 17 00:00:00 2001 From: Patrick J Cherry Date: Wed, 14 Nov 2018 16:49:55 +0000 Subject: [PATCH 06/12] Add test to check when proxy times-out mid-write to upstream I've add to add code to allow the environment to specify the upstream tiemout so we don't have to wait 30s for this test to happen. --- src/proxy/proxy.c | 19 +++++++++- src/proxy/proxy.h | 14 ++++++-- tests/acceptance/flexnbd/fake_dest.rb | 8 +++++ tests/acceptance/proxy_tests.rb | 50 +++++++++++++++++++++++++++ 4 files changed, 88 insertions(+), 3 deletions(-) diff --git a/src/proxy/proxy.c b/src/proxy/proxy.c index b17dd11..0e277e1 100644 --- a/src/proxy/proxy.c +++ b/src/proxy/proxy.c @@ -58,6 +58,20 @@ struct proxier *proxy_create(char *s_downstream_address, out->downstream_fd = -1; out->upstream_fd = -1; + int upstream_timeout = UPSTREAM_TIMEOUT; + + char *env_upstream_timeout = getenv("FLEXNBD_UPSTREAM_TIMEOUT"); + if (NULL != env_upstream_timeout) { + int ut = atoi(env_upstream_timeout); + warn("Got %i from atoi\n", ut); + if (ut > 0) { + upstream_timeout = ut; + } + } + + out->upstream_timeout = upstream_timeout; + out->upstream_timeout_ms = (long unsigned int) upstream_timeout * 1000; + out->prefetch = NULL; if (s_cache_bytes) { int cache_bytes = atoi(s_cache_bytes); @@ -781,6 +795,9 @@ void proxy_session(struct proxier *proxy) }; if (select_timeout.tv_sec > 0) { + if (select_timeout.tv_sec > proxy->upstream_timeout) { + select_timeout.tv_sec = proxy->upstream_timeout; + } select_timeout_ptr = &select_timeout; } @@ -854,7 +871,7 @@ void proxy_session(struct proxier *proxy) /* In these states, we're interested in restarting after a timeout. */ if (old_state == state && proxy_state_upstream(state)) { - if ((monotonic_time_ms()) - state_started > UPSTREAM_TIMEOUT) { + if ((monotonic_time_ms()) - state_started > proxy->upstream_timeout_ms) { warn("Timed out in state %s while communicating with upstream", proxy_session_state_names[state]); state = CONNECT_TO_UPSTREAM; } diff --git a/src/proxy/proxy.h b/src/proxy/proxy.h index 3652350..43d12ce 100644 --- a/src/proxy/proxy.h +++ b/src/proxy/proxy.h @@ -14,10 +14,10 @@ #endif /** UPSTREAM_TIMEOUT - * How long ( in ms ) to allow for upstream to respond. If it takes longer + * How long (in s) to allow for upstream to respond. If it takes longer * than this, we will cancel the current request-response to them and resubmit */ -#define UPSTREAM_TIMEOUT 30 * 1000 +#define UPSTREAM_TIMEOUT 30 struct proxier { /** address/port to bind to */ @@ -72,6 +72,16 @@ struct proxier { uint64_t req_count; int hello_sent; + /* + * How long (in s) to allow for upstream to respond. If it takes longer + * than this, we will cancel the current request-response to them and + * resubmit + * + * Defaults to UPSTREAM_TIMEOUT but can be overridden in the environment. + */ + int upstream_timeout; + unsigned long int upstream_timeout_ms; + /** These are only used if we pass --cache on the command line */ /* While the in-flight request has been munged by prefetch, these two are diff --git a/tests/acceptance/flexnbd/fake_dest.rb b/tests/acceptance/flexnbd/fake_dest.rb index 26d82bf..0b39bdc 100644 --- a/tests/acceptance/flexnbd/fake_dest.rb +++ b/tests/acceptance/flexnbd/fake_dest.rb @@ -54,6 +54,10 @@ module FlexNBD write_reply(handle, 1) end + def nread + @sock.nread + end + def disconnected? Timeout.timeout(2) do @sock.read(1).nil? @@ -85,6 +89,10 @@ module FlexNBD @sock.write(len) end + def getsockopt(level, optname) + @sock.getsockopt(level, optname) + end + def self.parse_be64(str) raise "String is the wrong length: 8 bytes expected (#{str.length} received)" unless str.length == 8 diff --git a/tests/acceptance/proxy_tests.rb b/tests/acceptance/proxy_tests.rb index afd7a11..2296453 100644 --- a/tests/acceptance/proxy_tests.rb +++ b/tests/acceptance/proxy_tests.rb @@ -206,6 +206,56 @@ module ProxyTests end end + def test_write_request_retried_when_upstream_times_out_during_write_phase + ENV['FLEXNBD_UPSTREAM_TIMEOUT'] = '4' + maker = make_fake_server + + with_ld_preload('setsockopt_logger') do + with_proxied_client(4096) do |client| + server, sc1 = maker.value + + # Guess an approprate request size, based on the send buffer size. + sz = sc1.getsockopt(Socket::SOL_SOCKET, Socket::SO_SNDBUF).int * 4 + data1 = (b * sz) + + # Send the read request to the proxy + client.write(0, data1) + + # ensure we're given the read request + req1 = sc1.read_request + assert_equal ::FlexNBD::REQUEST_MAGIC, req1[:magic] + assert_equal ::FlexNBD::REQUEST_WRITE, req1[:type] + assert_equal 0, req1[:from] + assert_equal data1.size, req1[:len] + + # Need to sleep longer than the timeout set above + sleep 5 + # Check the number of bytes that can be read from the socket without + # blocking. If this equal to the size of the original request, then + # the whole request has been buffered. If this is the case, then the + # proxy will not time-out in the WRITE_UPSTREAM statem which is what + # we're trying to test. + assert sc1.nread < sz, 'Request from proxy completely buffered. Test is useless' + + # Kill the server again, now we're sure the read request has been sent once + sc1.close + + # We expect the proxy to reconnect without our client doing anything. + sc2 = server.accept + sc2.write_hello + + # And once reconnected, it should resend an identical request. + req2 = sc2.read_request + assert_equal req1, req2 + data2 = sc2.read_data(req2[:len]) + assert_equal data1, data2 + + sc2.close + server.close + end + end + end + def test_only_one_client_can_connect_to_proxy_at_a_time with_proxied_client do |_client| c2 = nil From b7b50faa17d63d8d0935e82c9500ed7a825f79c8 Mon Sep 17 00:00:00 2001 From: Patrick J Cherry Date: Wed, 14 Nov 2018 16:57:24 +0000 Subject: [PATCH 07/12] Updated comments --- tests/acceptance/proxy_tests.rb | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/acceptance/proxy_tests.rb b/tests/acceptance/proxy_tests.rb index 2296453..d181add 100644 --- a/tests/acceptance/proxy_tests.rb +++ b/tests/acceptance/proxy_tests.rb @@ -158,7 +158,7 @@ module ProxyTests # Send the read request to the proxy client.write(0, (b * 4096)) - # ensure we're given the read request + # ensure we're given the write request req1 = sc1.read_request assert_equal ::FlexNBD::REQUEST_MAGIC, req1[:magic] assert_equal ::FlexNBD::REQUEST_WRITE, req1[:type] @@ -221,15 +221,19 @@ module ProxyTests # Send the read request to the proxy client.write(0, data1) - # ensure we're given the read request + # ensure we're given the write request req1 = sc1.read_request assert_equal ::FlexNBD::REQUEST_MAGIC, req1[:magic] assert_equal ::FlexNBD::REQUEST_WRITE, req1[:type] assert_equal 0, req1[:from] assert_equal data1.size, req1[:len] + # We do not read it at this point, as we want the proxy to be waiting + # in the WRITE_UPSTREAM state. + # Need to sleep longer than the timeout set above sleep 5 + # Check the number of bytes that can be read from the socket without # blocking. If this equal to the size of the original request, then # the whole request has been buffered. If this is the case, then the @@ -237,7 +241,7 @@ module ProxyTests # we're trying to test. assert sc1.nread < sz, 'Request from proxy completely buffered. Test is useless' - # Kill the server again, now we're sure the read request has been sent once + # Kill the server now that the timeout has happened. sc1.close # We expect the proxy to reconnect without our client doing anything. @@ -247,6 +251,8 @@ module ProxyTests # And once reconnected, it should resend an identical request. req2 = sc2.read_request assert_equal req1, req2 + + # And now lets read the data to make sure we get it all. data2 = sc2.read_data(req2[:len]) assert_equal data1, data2 From bb5271cea31c4248af6fe80aee4000aeeb528f29 Mon Sep 17 00:00:00 2001 From: Patrick J Cherry Date: Wed, 14 Nov 2018 16:58:33 +0000 Subject: [PATCH 08/12] Remove jessie packaging :'( --- .gitlab-ci.yml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e5951fc..d9b80fc 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,12 +1,10 @@ stages: - package - publish - -package:jessie: &package + +.package: &package stage: package image: $CI_REGISTRY/docker-images/layers:$DISTRO-deb - variables: - DISTRO: jessie script: - package artifacts: @@ -20,7 +18,7 @@ package:stretch: publish: stage: publish - tags: + tags: - shell script: - publish From 93308bbda1928b114a44c6ce3f822e73719cb0f9 Mon Sep 17 00:00:00 2001 From: Patrick J Cherry Date: Wed, 14 Nov 2018 17:20:33 +0000 Subject: [PATCH 09/12] Added jessie back. --- .gitlab-ci.yml | 5 +++++ tests/acceptance/flexnbd/fake_dest.rb | 1 + 2 files changed, 6 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d9b80fc..8b3a543 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -11,6 +11,11 @@ stages: paths: - pkg/ +package:stretch: + <<: *package + variables: + DISTRO: jessie + package:stretch: <<: *package variables: diff --git a/tests/acceptance/flexnbd/fake_dest.rb b/tests/acceptance/flexnbd/fake_dest.rb index 0b39bdc..ddc0437 100644 --- a/tests/acceptance/flexnbd/fake_dest.rb +++ b/tests/acceptance/flexnbd/fake_dest.rb @@ -1,5 +1,6 @@ require 'socket' require 'timeout' +require 'io/wait' # For IO#nread require 'flexnbd/constants' From 5d1b0472decc8dcd17e695b54820a573fc422470 Mon Sep 17 00:00:00 2001 From: Patrick J Cherry Date: Wed, 14 Nov 2018 17:21:08 +0000 Subject: [PATCH 10/12] Fixed typo --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 8b3a543..b35cc05 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -11,7 +11,7 @@ stages: paths: - pkg/ -package:stretch: +package:jessie: <<: *package variables: DISTRO: jessie From 256cba79e35e039734624aedb39707b862517c39 Mon Sep 17 00:00:00 2001 From: Patrick J Cherry Date: Wed, 14 Nov 2018 17:31:42 +0000 Subject: [PATCH 11/12] Added note about the new environment variable --- README.proxy.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.proxy.txt b/README.proxy.txt index 91ec590..c4ae69f 100644 --- a/README.proxy.txt +++ b/README.proxy.txt @@ -169,6 +169,11 @@ That is, the '=' is required. This is a limitation of getopt-long. If no cache size is given, a size of 4096 bytes is assumed. Caching can be explicitly disabled by setting a size of 0. +ENVIRONMENT + + FLEXNBD_UPSTREAM_TIMEOUT The timeout in seconds for the proxy communicating + with the upstream server defaults to 30 seconds. + BUGS Should be reported via GitHub. From 8bc6ebbb0f317f66610c8e5ec2c53469ff4e4706 Mon Sep 17 00:00:00 2001 From: Patrick J Cherry Date: Thu, 15 Nov 2018 14:24:59 +0000 Subject: [PATCH 12/12] Updated changelog for release --- debian/changelog | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debian/changelog b/debian/changelog index a8af0d9..f115fbf 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,9 +1,9 @@ -flexnbd (0.4.0) UNRELEASED; urgency=medium +flexnbd (0.4.0) stable; urgency=medium * Ensure proxy state is completely reset before upstream init is read, ensuring any waiting requests are fully replayed (#39, !54) - -- Patrick J Cherry Tue, 13 Nov 2018 21:35:00 +0000 + -- Patrick J Cherry Thu, 15 Nov 2018 14:24:26 +0000 flexnbd (0.3.0) stable; urgency=medium