diff --git a/src/client.c b/src/client.c index 5ca192a..d9a4cca 100644 --- a/src/client.c +++ b/src/client.c @@ -366,7 +366,11 @@ void client_reply_to_read( struct client* client, struct nbd_request request ) client_write_reply( client, &request, 0); offset = request.from; - FATAL_IF_NEGATIVE( + + /* If we get cut off partway through this sendfile, we don't + * want to kill the server. This should be an error. + */ + ERROR_IF_NEGATIVE( sendfileloop( client->socket, client->fileno, diff --git a/src/serve.c b/src/serve.c index 0045e07..f408dbf 100644 --- a/src/serve.c +++ b/src/serve.c @@ -192,6 +192,62 @@ int server_port( struct server * server ) } +/* Try to bind to our serving socket, retrying until it works or gives a + * fatal error. */ +void serve_bind( struct server * serve ) +{ + int bind_result; + + char s_address[64]; + memset( s_address, 0, 64 ); + strcpy( s_address, "???" ); + inet_ntop( serve->bind_to.generic.sa_family, + sockaddr_address_data( &serve->bind_to.generic), + s_address, 64 ); + + do { + bind_result = bind( + serve->server_fd, + &serve->bind_to.generic, + sizeof(serve->bind_to)); + + if ( 0 == bind_result ) { + info( "Bound to %s port %d", + s_address, + ntohs(serve->bind_to.v4.sin_port)); + break; + } + else { + + warn( "Couldn't bind to %s port %d: %s", + s_address, + ntohs(serve->bind_to.v4.sin_port), + strerror( errno ) ); + + switch (errno){ + /* bind() can give us EACCES, + * EADDRINUSE, EADDRNOTAVAIL, EBADF, + * EINVAL or ENOTSOCK. + * + * Any of these other than EACCES, + * EADDRINUSE or EADDRNOTAVAIL signify + * that there's a logic error somewhere. + */ + case EACCES: + case EADDRINUSE: + case EADDRNOTAVAIL: + debug("retrying"); + sleep(1); + continue; + default: + fatal( "Giving up" ); + } + } + } while ( 1 ); +} + + + /** Prepares a listening socket for the NBD server, binding etc. */ void serve_open_server_socket(struct server* params) { @@ -205,21 +261,32 @@ void serve_open_server_socket(struct server* params) FATAL_IF_NEGATIVE(params->server_fd, "Couldn't create server socket"); + /* We need SO_REUSEADDR so that when we switch from listening to + * serving we don't have to change address if we don't want to. + * + * If this fails, it's not necessarily bad in principle, but at + * this point in the code we can't tell if it's going to be a + * problem. It's also indicative of something odd going on, so + * we barf. + */ FATAL_IF_NEGATIVE( setsockopt(params->server_fd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)), "Couldn't set SO_REUSEADDR" ); + /* TCP_NODELAY makes everything not be slow. If we can't set + * this, again, there's something odd going on which we don't + * understand. + */ FATAL_IF_NEGATIVE( setsockopt(params->server_fd, IPPROTO_TCP, TCP_NODELAY, &optval, sizeof(optval)), "Couldn't set TCP_NODELAY" ); - FATAL_IF_NEGATIVE( - bind(params->server_fd, ¶ms->bind_to.generic, - sizeof(params->bind_to)), - "Couldn't bind server to IP address" - ); + /* If we can't bind, presumably that's because someone else is + * squatting on our ip/port combo, or the ip isn't yet + * configured. Ideally we want to retry this. */ + serve_bind(params); FATAL_IF_NEGATIVE( listen(params->server_fd, params->tcp_backlog), diff --git a/src/util.c b/src/util.c index 96e1391..f6fd4b8 100644 --- a/src/util.c +++ b/src/util.c @@ -44,13 +44,24 @@ void exit_err( const char *msg ) void mylog(int line_level, const char* format, ...) { va_list argptr; + if (line_level < log_level) { return; } + /* Copy the format sideways so that we can append a "\n" to it + * and avoid a second fprintf. This stops log lines from getting + * interleaved. + */ + int format_len = strlen( format ); + char *format_n = xmalloc( format_len + 2 ); + memcpy( format_n, format, format_len ); + format_n[format_len] = '\n'; + va_start(argptr, format); - vfprintf(stderr, format, argptr); + vfprintf(stderr, format_n, argptr); va_end(argptr); - fprintf(stderr, "\n"); + + free( format_n ); } void* xrealloc(void* ptr, size_t size) diff --git a/tests/acceptance/environment.rb b/tests/acceptance/environment.rb index 98a2705..69f015b 100644 --- a/tests/acceptance/environment.rb +++ b/tests/acceptance/environment.rb @@ -5,7 +5,7 @@ require 'file_writer' class Environment attr_reader( :blocksize, :filename1, :filename2, :ip, - :port1, :port2, :nbd1, :nbd2, :file1, :file2 ) + :port1, :port2, :nbd1, :nbd2, :file1, :file2, :rebind_port1 ) def initialize @blocksize = 1024 @@ -14,9 +14,11 @@ class Environment @ip = "127.0.0.1" @available_ports = [*40000..41000] - listening_ports @port1 = @available_ports.shift + @rebind_port1 = @available_ports.shift @port2 = @available_ports.shift - @nbd1 = FlexNBD.new("../../build/flexnbd", @ip, @port1) - @nbd2 = FlexNBD.new("../../build/flexnbd", @ip, @port2) + @rebind_port2 = @available_ports.shift + @nbd1 = FlexNBD.new("../../build/flexnbd", @ip, @port1, @ip, @rebind_port1) + @nbd2 = FlexNBD.new("../../build/flexnbd", @ip, @port2, @ip, @rebind_port2) @fake_pid = nil end @@ -95,6 +97,7 @@ class Environment end + @nbd1.can_die(0) @nbd1.kill @nbd2.kill @@ -104,7 +107,7 @@ class Environment end - def run_fake( name, addr, port ) + def run_fake( name, addr, port, rebind_addr = addr, rebind_port = port ) fakedir = File.join( File.dirname( __FILE__ ), "fakes" ) fake = Dir[File.join( fakedir, name ) + "*"].sort.find { |fn| File.executable?( fn ) @@ -113,8 +116,11 @@ class Environment raise "no fake executable" unless fake raise "no addr" unless addr raise "no port" unless port + raise "no rebind_addr" unless rebind_addr + raise "no rebind_port" unless rebind_port + @fake_pid = fork do - exec fake + " " + addr.to_s + " " + port.to_s + " " + @nbd1.pid.to_s + exec [fake, addr, port, @nbd1.pid, rebind_addr, rebind_port].map{|x| x.to_s}.join(" ") end sleep(0.5) end diff --git a/tests/acceptance/fakes/dest/close_after_entrust.rb b/tests/acceptance/fakes/dest/close_after_entrust.rb index bf08bf1..5a5a6b7 100755 --- a/tests/acceptance/fakes/dest/close_after_entrust.rb +++ b/tests/acceptance/fakes/dest/close_after_entrust.rb @@ -24,5 +24,6 @@ client.close client2 = server.accept client2.receive_mirror + exit(0) diff --git a/tests/acceptance/fakes/source/close_after_entrust_reply.rb b/tests/acceptance/fakes/source/close_after_entrust_reply.rb index 988897b..0ec149d 100755 --- a/tests/acceptance/fakes/source/close_after_entrust_reply.rb +++ b/tests/acceptance/fakes/source/close_after_entrust_reply.rb @@ -9,7 +9,7 @@ require 'flexnbd/fake_source' include FlexNBD -addr, port, srv_pid = *ARGV +addr, port, srv_pid, rebind_addr, rebind_port = *ARGV client = FakeSource.new( addr, port, "Timed out connecting" ) client.read_hello @@ -25,8 +25,8 @@ sleep(0.25) client2 = FakeSource.new( addr, port, "Timed out reconnecting to mirror" ) client2.send_mirror -sleep(0.25) -client3 = FakeSource.new( addr, port, "Timed out reconnecting to read" ) +sleep(1) +client3 = FakeSource.new( rebind_addr, rebind_port, "Timed out reconnecting to read" ) client3.close exit(0) diff --git a/tests/acceptance/fakes/source/close_mid_read.rb b/tests/acceptance/fakes/source/close_mid_read.rb new file mode 100755 index 0000000..3b5fa20 --- /dev/null +++ b/tests/acceptance/fakes/source/close_mid_read.rb @@ -0,0 +1,17 @@ +#!/usr/bin/env ruby + +# Connect, but get the protocol wrong: don't read the hello, so we +# close and break the sendfile. + +require 'flexnbd/fake_source' +include FlexNBD + +addr, port, srv_pid, newaddr, newport = *ARGV + +client = FakeSource.new( addr, port, "Timed out connecting" ) +client.write_read_request( 0, 8 ) +client.read_raw( 4 ) +client.close + + +exit(0) diff --git a/tests/acceptance/fakes/source/successful_transfer.rb b/tests/acceptance/fakes/source/successful_transfer.rb new file mode 100755 index 0000000..4472aba --- /dev/null +++ b/tests/acceptance/fakes/source/successful_transfer.rb @@ -0,0 +1,29 @@ +#!/usr/bin/env ruby + +# Successfully send a migration, but squat on the IP and port which +# the destination wants to rebind to. The destination should retry +# every second, so we give it up then attempt to connect to the new +# server. + +require 'flexnbd/fake_source' +include FlexNBD + +addr, port, srv_pid, newaddr, newport = *ARGV + +squatter = TCPServer.open( newaddr, newport.to_i ) + +client = FakeSource.new( addr, port, "Timed out connecting" ) +client.send_mirror() + +sleep(1) + +squatter.close() + +sleep(1) + +client2 = FakeSource.new( newaddr, newport.to_i, "Timed out reconnecting" ) +client2.read_hello +client2.read( 0, 8 ) +client2.close + +exit( 0 ) diff --git a/tests/acceptance/flexnbd.rb b/tests/acceptance/flexnbd.rb index 7109f83..deb02c3 100644 --- a/tests/acceptance/flexnbd.rb +++ b/tests/acceptance/flexnbd.rb @@ -166,7 +166,7 @@ end # class ValgrindExecutor # Noddy test class to exercise FlexNBD from the outside for testing. # class FlexNBD - attr_reader :bin, :ctrl, :pid, :ip, :port + attr_reader :bin, :ctrl, :pid, :ip, :port, :rebind_ip, :rebind_port class << self def counter @@ -187,7 +187,7 @@ class FlexNBD end - def initialize(bin, ip, port) + def initialize(bin, ip, port, rebind_ip = ip, rebind_port = port) @bin = bin @debug = (ENV['DEBUG'] && `#{@bin} serve --help` =~ /--verbose/) ? "--verbose" : "" raise "#{bin} not executable" unless File.executable?(bin) @@ -195,6 +195,8 @@ class FlexNBD @ctrl = "/tmp/.flexnbd.ctrl.#{Time.now.to_i}.#{rand}" @ip = ip @port = port + @rebind_ip = rebind_ip + @rebind_port = rebind_port @kill = [] end @@ -224,6 +226,8 @@ class FlexNBD "--addr #{ip} "\ "--port #{port} "\ "--file #{file} "\ + "--rebind-addr #{rebind_ip} " \ + "--rebind-port #{rebind_port} " \ "--sock #{ctrl} "\ "#{@debug} "\ "#{acl.join(' ')}" diff --git a/tests/acceptance/flexnbd/fake_source.rb b/tests/acceptance/flexnbd/fake_source.rb index 9850ffd..eb231f9 100644 --- a/tests/acceptance/flexnbd/fake_source.rb +++ b/tests/acceptance/flexnbd/fake_source.rb @@ -54,12 +54,28 @@ module FlexNBD send_request( 2, handle ) end + def write_read_request( from, len, handle="myhandle" ) + send_request( 0, "myhandle", from, len ) + end + def write_data( data ) @sock.write( data ) end + # Handy utility + def read( from, len ) + timing_out( 2, "Timed out reading" ) do + send_request( 0, "myhandle", from, len ) + read_raw( len ) + end + end + + def read_raw( len ) + @sock.read( len ) + end + def send_mirror read_hello() write_write_request( 0, 8 ) diff --git a/tests/acceptance/test_dest_error_handling.rb b/tests/acceptance/test_dest_error_handling.rb index b9f830f..085cec8 100644 --- a/tests/acceptance/test_dest_error_handling.rb +++ b/tests/acceptance/test_dest_error_handling.rb @@ -34,6 +34,10 @@ class TestDestErrorHandling < Test::Unit::TestCase end + def test_partial_read_causes_error + run_fake( "source/close_mid_read" ) + end + def test_double_connect_during_hello run_fake( "source/connect_during_hello" ) end @@ -72,18 +76,32 @@ class TestDestErrorHandling < Test::Unit::TestCase # This fake runs a failed migration then a succeeding one, so we # expect the destination to take control. run_fake( "source/close_after_entrust_reply" ) + assert_control end + + def test_cant_rebind_retries + run_fake( "source/successful_transfer" ) + end + + private def run_fake( name ) - @env.run_fake( name, @env.ip, @env.port1 ) + @env.run_fake( name, @env.ip, @env.port1, @env.ip, @env.rebind_port1 ) assert @env.fake_reports_success, "#{name} failed." end + def status + stat, _ = @env.status1 + stat + end + def assert_no_control - status, stderr = @env.status1 assert !status['has_control'], "Thought it had control" end + def assert_control + assert status['has_control'], "Didn't think it had control" + end end # class TestDestErrorHandling diff --git a/tests/acceptance/test_source_error_handling.rb b/tests/acceptance/test_source_error_handling.rb index eaae454..e048f24 100644 --- a/tests/acceptance/test_source_error_handling.rb +++ b/tests/acceptance/test_source_error_handling.rb @@ -80,6 +80,7 @@ class TestSourceErrorHandling < Test::Unit::TestCase def test_post_entrust_disconnect_causes_retry + @env.nbd1.can_die(0) run_fake( "dest/close_after_entrust" ) end