Automated merge with ssh://dev.bytemark.co.uk//repos/flexnbd-c

This commit is contained in:
nick
2014-01-22 11:49:26 +00:00
7 changed files with 100 additions and 55 deletions

View File

@@ -25,7 +25,7 @@ COMMANDS
serve serve
~~~~~ ~~~~~
$ flexnbd serve --addr <ADDR> --port <PORT> --file <FILE> $ flexnbd serve --addr <ADDR> --port <PORT> --file <FILE>
[--sock <SOCK>] [--default-deny] [global option]* [acl entry]* [--sock <SOCK>] [--default-deny] [-k] [global option]* [acl entry]*
Serve a file. If any ACL entries are given (which should be IP Serve a file. If any ACL entries are given (which should be IP
addresses), only those clients listed will be permitted to connect. addresses), only those clients listed will be permitted to connect.
@@ -55,6 +55,12 @@ Options
empty ACL will let no clients connect. If it is not given, an empty ACL will let no clients connect. If it is not given, an
empty ACL will let any client connect. empty ACL will let any client connect.
*--killswitch, -k*:
If set, we implement a 2-minute timeout on NBD requests and
responses. If a request takes longer than that to complete,
the client is disconnected. This is useful to keep broken
clients from breaking migrations, among other things.
listen listen
~~~~~~ ~~~~~~

View File

@@ -33,9 +33,13 @@ CCFLAGS = %w(
) + # Added -Wno-missing-field-initializers to shut GCC up over {0} struct initialisers ) + # Added -Wno-missing-field-initializers to shut GCC up over {0} struct initialisers
[ENV['CFLAGS']] [ENV['CFLAGS']]
LIBCHECK = File.exists?("/usr/lib/libcheck.a") ? LIBCHECK = if File.exists?("/usr/lib/libcheck.a")
"/usr/lib/libcheck.a" : "/usr/lib/libcheck.a"
elsif File.exists?("/usr/local/lib/libcheck.a")
"/usr/local/lib/libcheck.a" "/usr/local/lib/libcheck.a"
else
"-lcheck"
end
TEST_MODULES = Dir["tests/unit/check_*.c"].map { |n| TEST_MODULES = Dir["tests/unit/check_*.c"].map { |n|
File.basename( n )[%r{check_(.+)\.c},1] } File.basename( n )[%r{check_(.+)\.c},1] }
@@ -114,7 +118,7 @@ end
def gcc_compile( target, source ) def gcc_compile( target, source )
FileUtils.mkdir_p File.dirname( target ) FileUtils.mkdir_p File.dirname( target )
sh "#{CC} -Isrc -c #{CCFLAGS.join(' ')} -o #{target} #{source} " sh "#{CC} -I/usr/include/libev -Isrc -c #{CCFLAGS.join(' ')} -o #{target} #{source} "
end end
def gcc_link(target, objects) def gcc_link(target, objects)

View File

@@ -15,6 +15,20 @@
#include <sys/stat.h> #include <sys/stat.h>
#include <fcntl.h> #include <fcntl.h>
// When this signal is invoked, we call shutdown() on the client fd, which
// results in the thread being wound up
void client_killswitch_hit(int signal __attribute__ ((unused)), siginfo_t *info, void *ptr __attribute__ ((unused)))
{
int fd = info->si_value.sival_int;
warn( "Killswitch for fd %i activated, calling shutdown on socket", fd );
FATAL_IF(
-1 == shutdown( fd, SHUT_RDWR ),
"Failed to kill shutdown() the socket, killing the server"
);
}
struct client *client_create( struct server *serve, int socket ) struct client *client_create( struct server *serve, int socket )
{ {
NULLCHECK( serve ); NULLCHECK( serve );
@@ -25,6 +39,13 @@ struct client *client_create( struct server *serve, int socket )
.sigev_signo = CLIENT_KILLSWITCH_SIGNAL .sigev_signo = CLIENT_KILLSWITCH_SIGNAL
}; };
/*
* Our killswitch closes this socket, forcing read() and write() calls
* blocked on it to return with an error. The thread then close()s the
* socket itself, avoiding races.
*/
evp.sigev_value.sival_int = socket;
c = xmalloc( sizeof( struct client ) ); c = xmalloc( sizeof( struct client ) );
c->stopped = 0; c->stopped = 0;
c->socket = socket; c->socket = socket;
@@ -199,36 +220,6 @@ int client_read_request( struct client * client , struct nbd_request *out_reques
NULLCHECK( out_request ); NULLCHECK( out_request );
struct nbd_request_raw request_raw; struct nbd_request_raw request_raw;
fd_set fds;
struct timeval * ptv = NULL;
int fd_count;
/* We want a timeout if this is an inbound migration, but not otherwise.
* This is compile-time selectable, as it will break mirror max_bps
*/
#ifdef HAS_LISTEN_TIMEOUT
struct timeval tv = {CLIENT_MAX_WAIT_SECS, 0};
if ( !server_is_in_control( client->serve ) ) {
ptv = &tv;
}
#endif
FD_ZERO(&fds);
FD_SET(client->socket, &fds);
self_pipe_fd_set( client->stop_signal, &fds );
fd_count = sock_try_select(FD_SETSIZE, &fds, NULL, NULL, ptv);
if ( fd_count == 0 ) {
/* This "can't ever happen" */
if ( NULL == ptv ) { fatal( "No FDs selected, and no timeout!" ); }
else { error("Timed out waiting for I/O"); }
}
else if ( fd_count < 0 ) { fatal( "Select failed" ); }
if ( self_pipe_fd_isset( client->stop_signal, &fds ) ){
debug("Client received stop signal.");
return 0;
}
if (fd_read_request(client->socket, &request_raw) == -1) { if (fd_read_request(client->socket, &request_raw) == -1) {
*disconnected = 1; *disconnected = 1;
@@ -553,6 +544,44 @@ int client_serve_request(struct client* client)
struct nbd_request request = {0}; struct nbd_request request = {0};
int stop = 1; int stop = 1;
int disconnected = 0; int disconnected = 0;
fd_set fds;
int fd_count;
/* wait until there are some bytes on the fd before committing to reads
* FIXME: this whole scheme is broken because we're using blocking reads.
* read() can block directly after a select anyway, and it's possible that,
* without the killswitch, we'd hang forever. With the killswitch, we just
* hang for "a while". The Right Thing to do is to rewrite client.c to be
* non-blocking.
*/
FD_ZERO(&fds);
FD_SET(client->socket, &fds);
self_pipe_fd_set( client->stop_signal, &fds );
fd_count = sock_try_select(FD_SETSIZE, &fds, NULL, NULL, NULL);
if ( fd_count == 0 ) {
/* This "can't ever happen" */
fatal( "No FDs selected, and no timeout!" );
}
else if ( fd_count < 0 ) { fatal( "Select failed" ); }
if ( self_pipe_fd_isset( client->stop_signal, &fds ) ){
debug("Client received stop signal.");
return 0;
}
/* We arm / disarm around the whole request cycle. The reason for this is
* that the remote peer could uncleanly die at any point; if we're stuck on
* a blocking read(), then that will hang for (almost) forever. This is bad
* in general, makes the server respond only to kill -9, and breaks
* outward mirroring in a most unpleasant way.
*
* The replication is simple: open a connection to the flexnbd server, write
* a single byte, and then wait.
*
*/
client_arm_killswitch( client );
if ( !client_read_request( client, &request, &disconnected ) ) { return stop; } if ( !client_read_request( client, &request, &disconnected ) ) { return stop; }
if ( disconnected ) { return stop; } if ( disconnected ) { return stop; }
@@ -562,25 +591,12 @@ int client_serve_request(struct client* client)
{ {
if ( !server_is_closed( client->serve ) ) { if ( !server_is_closed( client->serve ) ) {
/* We arm / disarm around client_reply() to catch cases where the
* remote peer sends part of a write request data before dying,
* and cases where we send part of read reply data before they die.
*
* That last is theoretical right now, but could break us in the
* same way as a half-write (which causes us to sit in read forever)
*
* We only arm/disarm inside the server io lock because it's common
* during migrations for us to be hanging on that mutex for quite
* a while while the final pass happens - it's held for the entire
* time.
*/
client_arm_killswitch( client );
client_reply( client, request ); client_reply( client, request );
client_disarm_killswitch( client );
stop = 0; stop = 0;
} }
} }
client_disarm_killswitch( client );
return stop; return stop;
} }

View File

@@ -58,6 +58,7 @@ struct client {
}; };
void client_killswitch_hit(int signal, siginfo_t *info, void *ptr);
void* client_serve(void* client_uncast); void* client_serve(void* client_uncast);
struct client * client_create( struct server * serve, int socket ); struct client * client_create( struct server * serve, int socket );

View File

@@ -101,12 +101,24 @@ struct flexnbd * flexnbd_create_serving(
max_nbd_clients, max_nbd_clients,
use_killswitch, use_killswitch,
1); 1);
flexnbd_create_shared( flexnbd, flexnbd_create_shared( flexnbd, s_ctrl_sock );
s_ctrl_sock );
// Beats installing one handler per client instance
if ( use_killswitch ) {
struct sigaction act = {
.sa_sigaction = client_killswitch_hit,
.sa_flags = SA_RESTART | SA_SIGINFO
};
FATAL_UNLESS(
0 == sigaction( CLIENT_KILLSWITCH_SIGNAL, &act, NULL ),
"Installing client killswitch signal failed"
);
}
return flexnbd; return flexnbd;
} }
struct flexnbd * flexnbd_create_listening( struct flexnbd * flexnbd_create_listening(
char* s_ip_address, char* s_ip_address,
char* s_port, char* s_port,
@@ -127,6 +139,10 @@ struct flexnbd * flexnbd_create_listening(
s_acl_entries, s_acl_entries,
1, 0, 0); 1, 0, 0);
flexnbd_create_shared( flexnbd, s_ctrl_sock ); flexnbd_create_shared( flexnbd, s_ctrl_sock );
// listen can't use killswitch, as mirror may pause on sending things
// for a very long time.
return flexnbd; return flexnbd;
} }

View File

@@ -5,6 +5,7 @@
#include "mirror.h" #include "mirror.h"
#include "serve.h" #include "serve.h"
#include "proxy.h" #include "proxy.h"
#include "client.h"
#include "self_pipe.h" #include "self_pipe.h"
#include "mbox.h" #include "mbox.h"
#include "control.h" #include "control.h"

View File

@@ -71,11 +71,12 @@ START_TEST( test_fatal_kills_process )
sleep(10); sleep(10);
} }
else { else {
int kidstatus; int kidret, kidstatus, result;
int result; result = waitpid( pid, &kidret, 0 );
result = waitpid( pid, &kidstatus, 0 );
fail_if( result < 0, "Wait failed." ); fail_if( result < 0, "Wait failed." );
fail_unless( kidstatus == 6, "Kid was not aborted." ); kidstatus = WEXITSTATUS( kidret );
ck_assert_int_eq( kidstatus, SIGABRT );
// fail_unless( kidstatus == 6, "Kid was not aborted." );
} }
} }