Automated merge with ssh://dev.bytemark.co.uk//repos/flexnbd-c

This commit is contained in:
nick
2014-01-22 11:49:26 +00:00
7 changed files with 100 additions and 55 deletions

View File

@@ -25,7 +25,7 @@ COMMANDS
serve
~~~~~
$ flexnbd serve --addr <ADDR> --port <PORT> --file <FILE>
[--sock <SOCK>] [--default-deny] [global option]* [acl entry]*
[--sock <SOCK>] [--default-deny] [-k] [global option]* [acl entry]*
Serve a file. If any ACL entries are given (which should be IP
addresses), only those clients listed will be permitted to connect.
@@ -55,6 +55,12 @@ Options
empty ACL will let no clients connect. If it is not given, an
empty ACL will let any client connect.
*--killswitch, -k*:
If set, we implement a 2-minute timeout on NBD requests and
responses. If a request takes longer than that to complete,
the client is disconnected. This is useful to keep broken
clients from breaking migrations, among other things.
listen
~~~~~~

View File

@@ -33,9 +33,13 @@ CCFLAGS = %w(
) + # Added -Wno-missing-field-initializers to shut GCC up over {0} struct initialisers
[ENV['CFLAGS']]
LIBCHECK = File.exists?("/usr/lib/libcheck.a") ?
"/usr/lib/libcheck.a" :
LIBCHECK = if File.exists?("/usr/lib/libcheck.a")
"/usr/lib/libcheck.a"
elsif File.exists?("/usr/local/lib/libcheck.a")
"/usr/local/lib/libcheck.a"
else
"-lcheck"
end
TEST_MODULES = Dir["tests/unit/check_*.c"].map { |n|
File.basename( n )[%r{check_(.+)\.c},1] }
@@ -114,7 +118,7 @@ end
def gcc_compile( target, source )
FileUtils.mkdir_p File.dirname( target )
sh "#{CC} -Isrc -c #{CCFLAGS.join(' ')} -o #{target} #{source} "
sh "#{CC} -I/usr/include/libev -Isrc -c #{CCFLAGS.join(' ')} -o #{target} #{source} "
end
def gcc_link(target, objects)

View File

@@ -15,6 +15,20 @@
#include <sys/stat.h>
#include <fcntl.h>
// When this signal is invoked, we call shutdown() on the client fd, which
// results in the thread being wound up
void client_killswitch_hit(int signal __attribute__ ((unused)), siginfo_t *info, void *ptr __attribute__ ((unused)))
{
int fd = info->si_value.sival_int;
warn( "Killswitch for fd %i activated, calling shutdown on socket", fd );
FATAL_IF(
-1 == shutdown( fd, SHUT_RDWR ),
"Failed to kill shutdown() the socket, killing the server"
);
}
struct client *client_create( struct server *serve, int socket )
{
NULLCHECK( serve );
@@ -25,6 +39,13 @@ struct client *client_create( struct server *serve, int socket )
.sigev_signo = CLIENT_KILLSWITCH_SIGNAL
};
/*
* Our killswitch closes this socket, forcing read() and write() calls
* blocked on it to return with an error. The thread then close()s the
* socket itself, avoiding races.
*/
evp.sigev_value.sival_int = socket;
c = xmalloc( sizeof( struct client ) );
c->stopped = 0;
c->socket = socket;
@@ -199,36 +220,6 @@ int client_read_request( struct client * client , struct nbd_request *out_reques
NULLCHECK( out_request );
struct nbd_request_raw request_raw;
fd_set fds;
struct timeval * ptv = NULL;
int fd_count;
/* We want a timeout if this is an inbound migration, but not otherwise.
* This is compile-time selectable, as it will break mirror max_bps
*/
#ifdef HAS_LISTEN_TIMEOUT
struct timeval tv = {CLIENT_MAX_WAIT_SECS, 0};
if ( !server_is_in_control( client->serve ) ) {
ptv = &tv;
}
#endif
FD_ZERO(&fds);
FD_SET(client->socket, &fds);
self_pipe_fd_set( client->stop_signal, &fds );
fd_count = sock_try_select(FD_SETSIZE, &fds, NULL, NULL, ptv);
if ( fd_count == 0 ) {
/* This "can't ever happen" */
if ( NULL == ptv ) { fatal( "No FDs selected, and no timeout!" ); }
else { error("Timed out waiting for I/O"); }
}
else if ( fd_count < 0 ) { fatal( "Select failed" ); }
if ( self_pipe_fd_isset( client->stop_signal, &fds ) ){
debug("Client received stop signal.");
return 0;
}
if (fd_read_request(client->socket, &request_raw) == -1) {
*disconnected = 1;
@@ -553,6 +544,44 @@ int client_serve_request(struct client* client)
struct nbd_request request = {0};
int stop = 1;
int disconnected = 0;
fd_set fds;
int fd_count;
/* wait until there are some bytes on the fd before committing to reads
* FIXME: this whole scheme is broken because we're using blocking reads.
* read() can block directly after a select anyway, and it's possible that,
* without the killswitch, we'd hang forever. With the killswitch, we just
* hang for "a while". The Right Thing to do is to rewrite client.c to be
* non-blocking.
*/
FD_ZERO(&fds);
FD_SET(client->socket, &fds);
self_pipe_fd_set( client->stop_signal, &fds );
fd_count = sock_try_select(FD_SETSIZE, &fds, NULL, NULL, NULL);
if ( fd_count == 0 ) {
/* This "can't ever happen" */
fatal( "No FDs selected, and no timeout!" );
}
else if ( fd_count < 0 ) { fatal( "Select failed" ); }
if ( self_pipe_fd_isset( client->stop_signal, &fds ) ){
debug("Client received stop signal.");
return 0;
}
/* We arm / disarm around the whole request cycle. The reason for this is
* that the remote peer could uncleanly die at any point; if we're stuck on
* a blocking read(), then that will hang for (almost) forever. This is bad
* in general, makes the server respond only to kill -9, and breaks
* outward mirroring in a most unpleasant way.
*
* The replication is simple: open a connection to the flexnbd server, write
* a single byte, and then wait.
*
*/
client_arm_killswitch( client );
if ( !client_read_request( client, &request, &disconnected ) ) { return stop; }
if ( disconnected ) { return stop; }
@@ -562,25 +591,12 @@ int client_serve_request(struct client* client)
{
if ( !server_is_closed( client->serve ) ) {
/* We arm / disarm around client_reply() to catch cases where the
* remote peer sends part of a write request data before dying,
* and cases where we send part of read reply data before they die.
*
* That last is theoretical right now, but could break us in the
* same way as a half-write (which causes us to sit in read forever)
*
* We only arm/disarm inside the server io lock because it's common
* during migrations for us to be hanging on that mutex for quite
* a while while the final pass happens - it's held for the entire
* time.
*/
client_arm_killswitch( client );
client_reply( client, request );
client_disarm_killswitch( client );
stop = 0;
}
}
client_disarm_killswitch( client );
return stop;
}

View File

@@ -58,6 +58,7 @@ struct client {
};
void client_killswitch_hit(int signal, siginfo_t *info, void *ptr);
void* client_serve(void* client_uncast);
struct client * client_create( struct server * serve, int socket );

View File

@@ -101,11 +101,23 @@ struct flexnbd * flexnbd_create_serving(
max_nbd_clients,
use_killswitch,
1);
flexnbd_create_shared( flexnbd,
s_ctrl_sock );
return flexnbd;
flexnbd_create_shared( flexnbd, s_ctrl_sock );
// Beats installing one handler per client instance
if ( use_killswitch ) {
struct sigaction act = {
.sa_sigaction = client_killswitch_hit,
.sa_flags = SA_RESTART | SA_SIGINFO
};
FATAL_UNLESS(
0 == sigaction( CLIENT_KILLSWITCH_SIGNAL, &act, NULL ),
"Installing client killswitch signal failed"
);
}
return flexnbd;
}
struct flexnbd * flexnbd_create_listening(
char* s_ip_address,
@@ -127,6 +139,10 @@ struct flexnbd * flexnbd_create_listening(
s_acl_entries,
1, 0, 0);
flexnbd_create_shared( flexnbd, s_ctrl_sock );
// listen can't use killswitch, as mirror may pause on sending things
// for a very long time.
return flexnbd;
}

View File

@@ -5,6 +5,7 @@
#include "mirror.h"
#include "serve.h"
#include "proxy.h"
#include "client.h"
#include "self_pipe.h"
#include "mbox.h"
#include "control.h"

View File

@@ -71,11 +71,12 @@ START_TEST( test_fatal_kills_process )
sleep(10);
}
else {
int kidstatus;
int result;
result = waitpid( pid, &kidstatus, 0 );
int kidret, kidstatus, result;
result = waitpid( pid, &kidret, 0 );
fail_if( result < 0, "Wait failed." );
fail_unless( kidstatus == 6, "Kid was not aborted." );
kidstatus = WEXITSTATUS( kidret );
ck_assert_int_eq( kidstatus, SIGABRT );
// fail_unless( kidstatus == 6, "Kid was not aborted." );
}
}