Automated merge with ssh://dev.bytemark.co.uk//repos/flexnbd-c
This commit is contained in:
@@ -25,7 +25,7 @@ COMMANDS
|
||||
serve
|
||||
~~~~~
|
||||
$ flexnbd serve --addr <ADDR> --port <PORT> --file <FILE>
|
||||
[--sock <SOCK>] [--default-deny] [global option]* [acl entry]*
|
||||
[--sock <SOCK>] [--default-deny] [-k] [global option]* [acl entry]*
|
||||
|
||||
Serve a file. If any ACL entries are given (which should be IP
|
||||
addresses), only those clients listed will be permitted to connect.
|
||||
@@ -55,6 +55,12 @@ Options
|
||||
empty ACL will let no clients connect. If it is not given, an
|
||||
empty ACL will let any client connect.
|
||||
|
||||
*--killswitch, -k*:
|
||||
If set, we implement a 2-minute timeout on NBD requests and
|
||||
responses. If a request takes longer than that to complete,
|
||||
the client is disconnected. This is useful to keep broken
|
||||
clients from breaking migrations, among other things.
|
||||
|
||||
listen
|
||||
~~~~~~
|
||||
|
||||
|
10
Rakefile
10
Rakefile
@@ -33,9 +33,13 @@ CCFLAGS = %w(
|
||||
) + # Added -Wno-missing-field-initializers to shut GCC up over {0} struct initialisers
|
||||
[ENV['CFLAGS']]
|
||||
|
||||
LIBCHECK = File.exists?("/usr/lib/libcheck.a") ?
|
||||
"/usr/lib/libcheck.a" :
|
||||
LIBCHECK = if File.exists?("/usr/lib/libcheck.a")
|
||||
"/usr/lib/libcheck.a"
|
||||
elsif File.exists?("/usr/local/lib/libcheck.a")
|
||||
"/usr/local/lib/libcheck.a"
|
||||
else
|
||||
"-lcheck"
|
||||
end
|
||||
|
||||
TEST_MODULES = Dir["tests/unit/check_*.c"].map { |n|
|
||||
File.basename( n )[%r{check_(.+)\.c},1] }
|
||||
@@ -114,7 +118,7 @@ end
|
||||
|
||||
def gcc_compile( target, source )
|
||||
FileUtils.mkdir_p File.dirname( target )
|
||||
sh "#{CC} -Isrc -c #{CCFLAGS.join(' ')} -o #{target} #{source} "
|
||||
sh "#{CC} -I/usr/include/libev -Isrc -c #{CCFLAGS.join(' ')} -o #{target} #{source} "
|
||||
end
|
||||
|
||||
def gcc_link(target, objects)
|
||||
|
104
src/client.c
104
src/client.c
@@ -15,6 +15,20 @@
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
|
||||
// When this signal is invoked, we call shutdown() on the client fd, which
|
||||
// results in the thread being wound up
|
||||
void client_killswitch_hit(int signal __attribute__ ((unused)), siginfo_t *info, void *ptr __attribute__ ((unused)))
|
||||
{
|
||||
int fd = info->si_value.sival_int;
|
||||
warn( "Killswitch for fd %i activated, calling shutdown on socket", fd );
|
||||
|
||||
FATAL_IF(
|
||||
-1 == shutdown( fd, SHUT_RDWR ),
|
||||
"Failed to kill shutdown() the socket, killing the server"
|
||||
);
|
||||
}
|
||||
|
||||
struct client *client_create( struct server *serve, int socket )
|
||||
{
|
||||
NULLCHECK( serve );
|
||||
@@ -25,6 +39,13 @@ struct client *client_create( struct server *serve, int socket )
|
||||
.sigev_signo = CLIENT_KILLSWITCH_SIGNAL
|
||||
};
|
||||
|
||||
/*
|
||||
* Our killswitch closes this socket, forcing read() and write() calls
|
||||
* blocked on it to return with an error. The thread then close()s the
|
||||
* socket itself, avoiding races.
|
||||
*/
|
||||
evp.sigev_value.sival_int = socket;
|
||||
|
||||
c = xmalloc( sizeof( struct client ) );
|
||||
c->stopped = 0;
|
||||
c->socket = socket;
|
||||
@@ -199,36 +220,6 @@ int client_read_request( struct client * client , struct nbd_request *out_reques
|
||||
NULLCHECK( out_request );
|
||||
|
||||
struct nbd_request_raw request_raw;
|
||||
fd_set fds;
|
||||
struct timeval * ptv = NULL;
|
||||
int fd_count;
|
||||
|
||||
/* We want a timeout if this is an inbound migration, but not otherwise.
|
||||
* This is compile-time selectable, as it will break mirror max_bps
|
||||
*/
|
||||
#ifdef HAS_LISTEN_TIMEOUT
|
||||
struct timeval tv = {CLIENT_MAX_WAIT_SECS, 0};
|
||||
|
||||
if ( !server_is_in_control( client->serve ) ) {
|
||||
ptv = &tv;
|
||||
}
|
||||
#endif
|
||||
|
||||
FD_ZERO(&fds);
|
||||
FD_SET(client->socket, &fds);
|
||||
self_pipe_fd_set( client->stop_signal, &fds );
|
||||
fd_count = sock_try_select(FD_SETSIZE, &fds, NULL, NULL, ptv);
|
||||
if ( fd_count == 0 ) {
|
||||
/* This "can't ever happen" */
|
||||
if ( NULL == ptv ) { fatal( "No FDs selected, and no timeout!" ); }
|
||||
else { error("Timed out waiting for I/O"); }
|
||||
}
|
||||
else if ( fd_count < 0 ) { fatal( "Select failed" ); }
|
||||
|
||||
if ( self_pipe_fd_isset( client->stop_signal, &fds ) ){
|
||||
debug("Client received stop signal.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (fd_read_request(client->socket, &request_raw) == -1) {
|
||||
*disconnected = 1;
|
||||
@@ -553,6 +544,44 @@ int client_serve_request(struct client* client)
|
||||
struct nbd_request request = {0};
|
||||
int stop = 1;
|
||||
int disconnected = 0;
|
||||
fd_set fds;
|
||||
int fd_count;
|
||||
|
||||
/* wait until there are some bytes on the fd before committing to reads
|
||||
* FIXME: this whole scheme is broken because we're using blocking reads.
|
||||
* read() can block directly after a select anyway, and it's possible that,
|
||||
* without the killswitch, we'd hang forever. With the killswitch, we just
|
||||
* hang for "a while". The Right Thing to do is to rewrite client.c to be
|
||||
* non-blocking.
|
||||
*/
|
||||
|
||||
FD_ZERO(&fds);
|
||||
FD_SET(client->socket, &fds);
|
||||
self_pipe_fd_set( client->stop_signal, &fds );
|
||||
fd_count = sock_try_select(FD_SETSIZE, &fds, NULL, NULL, NULL);
|
||||
if ( fd_count == 0 ) {
|
||||
/* This "can't ever happen" */
|
||||
fatal( "No FDs selected, and no timeout!" );
|
||||
}
|
||||
else if ( fd_count < 0 ) { fatal( "Select failed" ); }
|
||||
|
||||
if ( self_pipe_fd_isset( client->stop_signal, &fds ) ){
|
||||
debug("Client received stop signal.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* We arm / disarm around the whole request cycle. The reason for this is
|
||||
* that the remote peer could uncleanly die at any point; if we're stuck on
|
||||
* a blocking read(), then that will hang for (almost) forever. This is bad
|
||||
* in general, makes the server respond only to kill -9, and breaks
|
||||
* outward mirroring in a most unpleasant way.
|
||||
*
|
||||
* The replication is simple: open a connection to the flexnbd server, write
|
||||
* a single byte, and then wait.
|
||||
*
|
||||
*/
|
||||
client_arm_killswitch( client );
|
||||
|
||||
if ( !client_read_request( client, &request, &disconnected ) ) { return stop; }
|
||||
if ( disconnected ) { return stop; }
|
||||
@@ -562,25 +591,12 @@ int client_serve_request(struct client* client)
|
||||
|
||||
{
|
||||
if ( !server_is_closed( client->serve ) ) {
|
||||
/* We arm / disarm around client_reply() to catch cases where the
|
||||
* remote peer sends part of a write request data before dying,
|
||||
* and cases where we send part of read reply data before they die.
|
||||
*
|
||||
* That last is theoretical right now, but could break us in the
|
||||
* same way as a half-write (which causes us to sit in read forever)
|
||||
*
|
||||
* We only arm/disarm inside the server io lock because it's common
|
||||
* during migrations for us to be hanging on that mutex for quite
|
||||
* a while while the final pass happens - it's held for the entire
|
||||
* time.
|
||||
*/
|
||||
client_arm_killswitch( client );
|
||||
client_reply( client, request );
|
||||
client_disarm_killswitch( client );
|
||||
stop = 0;
|
||||
}
|
||||
}
|
||||
|
||||
client_disarm_killswitch( client );
|
||||
|
||||
return stop;
|
||||
}
|
||||
|
@@ -58,6 +58,7 @@ struct client {
|
||||
|
||||
};
|
||||
|
||||
void client_killswitch_hit(int signal, siginfo_t *info, void *ptr);
|
||||
|
||||
void* client_serve(void* client_uncast);
|
||||
struct client * client_create( struct server * serve, int socket );
|
||||
|
@@ -101,11 +101,23 @@ struct flexnbd * flexnbd_create_serving(
|
||||
max_nbd_clients,
|
||||
use_killswitch,
|
||||
1);
|
||||
flexnbd_create_shared( flexnbd,
|
||||
s_ctrl_sock );
|
||||
return flexnbd;
|
||||
flexnbd_create_shared( flexnbd, s_ctrl_sock );
|
||||
|
||||
// Beats installing one handler per client instance
|
||||
if ( use_killswitch ) {
|
||||
struct sigaction act = {
|
||||
.sa_sigaction = client_killswitch_hit,
|
||||
.sa_flags = SA_RESTART | SA_SIGINFO
|
||||
};
|
||||
|
||||
FATAL_UNLESS(
|
||||
0 == sigaction( CLIENT_KILLSWITCH_SIGNAL, &act, NULL ),
|
||||
"Installing client killswitch signal failed"
|
||||
);
|
||||
}
|
||||
|
||||
return flexnbd;
|
||||
}
|
||||
|
||||
struct flexnbd * flexnbd_create_listening(
|
||||
char* s_ip_address,
|
||||
@@ -127,6 +139,10 @@ struct flexnbd * flexnbd_create_listening(
|
||||
s_acl_entries,
|
||||
1, 0, 0);
|
||||
flexnbd_create_shared( flexnbd, s_ctrl_sock );
|
||||
|
||||
// listen can't use killswitch, as mirror may pause on sending things
|
||||
// for a very long time.
|
||||
|
||||
return flexnbd;
|
||||
}
|
||||
|
||||
|
@@ -5,6 +5,7 @@
|
||||
#include "mirror.h"
|
||||
#include "serve.h"
|
||||
#include "proxy.h"
|
||||
#include "client.h"
|
||||
#include "self_pipe.h"
|
||||
#include "mbox.h"
|
||||
#include "control.h"
|
||||
|
@@ -71,11 +71,12 @@ START_TEST( test_fatal_kills_process )
|
||||
sleep(10);
|
||||
}
|
||||
else {
|
||||
int kidstatus;
|
||||
int result;
|
||||
result = waitpid( pid, &kidstatus, 0 );
|
||||
int kidret, kidstatus, result;
|
||||
result = waitpid( pid, &kidret, 0 );
|
||||
fail_if( result < 0, "Wait failed." );
|
||||
fail_unless( kidstatus == 6, "Kid was not aborted." );
|
||||
kidstatus = WEXITSTATUS( kidret );
|
||||
ck_assert_int_eq( kidstatus, SIGABRT );
|
||||
// fail_unless( kidstatus == 6, "Kid was not aborted." );
|
||||
}
|
||||
|
||||
}
|
||||
|
Reference in New Issue
Block a user