diff --git a/debian/changelog b/debian/changelog index f115fbf..e79dead 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +flexnbd (0.4.0) UNRELEASED; urgency=medium + + * Explicitly close the server control socket, and wait for it to close, to + prevent deadlocks during the server clean-up process (#40 !58) + + -- Patrick J Cherry Fri, 07 Dec 2018 16:38:56 +0000 + flexnbd (0.4.0) stable; urgency=medium * Ensure proxy state is completely reset before upstream init is read, diff --git a/src/server/control.c b/src/server/control.c index 394854e..51e58e9 100644 --- a/src/server/control.c +++ b/src/server/control.c @@ -78,6 +78,14 @@ void control_destroy(struct control *control) free(control); } +void control_wait_for_close(struct control *control) +{ + NULLCHECK(control); + while (!fd_is_closed(control->control_fd)) { + usleep(10000); + } +} + struct control_client *control_client_create(struct flexnbd *flexnbd, int client_fd, struct mbox *state_mbox) diff --git a/src/server/control.h b/src/server/control.h index 017611d..551265f 100644 --- a/src/server/control.h +++ b/src/server/control.h @@ -47,6 +47,7 @@ struct control_client { struct control *control_create(struct flexnbd *, const char *control_socket_name); void control_signal_close(struct control *); +void control_wait_for_close(struct control *control); void control_destroy(struct control *); void *control_runner(void *); diff --git a/src/server/serve.c b/src/server/serve.c index e5cc588..c262189 100644 --- a/src/server/serve.c +++ b/src/server/serve.c @@ -812,8 +812,6 @@ void server_control_arrived(struct server *serve) } -void flexnbd_stop_control(struct flexnbd *flexnbd); - /** Closes sockets, frees memory and waits for all client threads to finish */ void serve_cleanup(struct server *params, int fatal __attribute__ ((unused))) @@ -822,8 +820,20 @@ void serve_cleanup(struct server *params, void *status; info("cleaning up"); + + /* Close the control socket, and wait for it to close before proceeding. + * If we do not wait, we risk a race condition with the tail supervisor + * sending a status command, and deadlocking the mirroring. */ + if (params->flexnbd && params->flexnbd->control) { + debug("closing control socket"); + control_signal_close(params->flexnbd->control); + debug("waiting for control socket to close"); + control_wait_for_close(params->flexnbd->control); + } + if (params->server_fd) { + debug("closing server_fd"); close(params->server_fd); } @@ -861,15 +871,6 @@ void serve_cleanup(struct server *params, server_unlock_acl(params); } - /* if( params->flexnbd ) { */ - /* if ( params->flexnbd->control ) { */ - /* flexnbd_stop_control( params->flexnbd ); */ - /* } */ - /* flexnbd_destroy( params->flexnbd ); */ - /* } */ - - /* server_destroy( params ); */ - debug("Cleanup done"); } diff --git a/tests/acceptance/test_write_during_migration.rb b/tests/acceptance/test_write_during_migration.rb old mode 100644 new mode 100755 index dd259c5..70ef9e6 --- a/tests/acceptance/test_write_during_migration.rb +++ b/tests/acceptance/test_write_during_migration.rb @@ -82,15 +82,14 @@ class TestWriteDuringMigration < Test::Unit::TestCase UNIXSocket.open(@source_sock) do |sock| sock.write(['mirror', '127.0.0.1', @dest_port.to_s, 'exit'].join("\x0A") + "\x0A\x0A") sock.flush - rsp = sock.readline + sock.readline end end def wait_for_quit Timeout.timeout(10) do - start_time = Time.now - dst_result = Process.waitpid2(@dst_proc) - src_result = Process.waitpid2(@src_proc) + Process.waitpid2(@dst_proc) + Process.waitpid2(@src_proc) end end @@ -100,13 +99,28 @@ class TestWriteDuringMigration < Test::Unit::TestCase loop do begin client.write(offsets[rand(offsets.size)] * 4096, @write_data) - rescue StandardError => err + rescue StandardError # We expect a broken write at some point, so ignore it break end end end + def bombard_with_status + loop do + begin + UNIXSocket.open(@source_sock) do |sock| + sock.write("status\x0A\x0A") + sock.flush + sock.readline + end + rescue StandardError + # If the socket disappears, that's OK. + break + end + end + end + def assert_both_sides_identical # puts `md5sum #{@source_file} #{@dest_file}` @@ -160,5 +174,25 @@ class TestWriteDuringMigration < Test::Unit::TestCase (src_writers_1 + src_writers_2).each(&:join) assert_both_sides_identical end - end end + end + end + + + def test_status_call_after_cleanup + Dir.mktmpdir do |tmpdir| + Dir.chdir(tmpdir) do + make_files + + launch_servers + + status_poker = Thread.new { bombard_with_status } + + start_mirror + + wait_for_quit + status_poker.join + assert_both_sides_identical + end + end + end end