Automated merge with ssh://dev.bytemark.co.uk//repos/flexnbd-c
This commit is contained in:
@@ -25,7 +25,7 @@ COMMANDS
|
|||||||
serve
|
serve
|
||||||
~~~~~
|
~~~~~
|
||||||
$ flexnbd serve --addr <ADDR> --port <PORT> --file <FILE>
|
$ flexnbd serve --addr <ADDR> --port <PORT> --file <FILE>
|
||||||
[--sock <SOCK>] [--default-deny] [global option]* [acl entry]*
|
[--sock <SOCK>] [--default-deny] [-k] [global option]* [acl entry]*
|
||||||
|
|
||||||
Serve a file. If any ACL entries are given (which should be IP
|
Serve a file. If any ACL entries are given (which should be IP
|
||||||
addresses), only those clients listed will be permitted to connect.
|
addresses), only those clients listed will be permitted to connect.
|
||||||
@@ -55,6 +55,12 @@ Options
|
|||||||
empty ACL will let no clients connect. If it is not given, an
|
empty ACL will let no clients connect. If it is not given, an
|
||||||
empty ACL will let any client connect.
|
empty ACL will let any client connect.
|
||||||
|
|
||||||
|
*--killswitch, -k*:
|
||||||
|
If set, we implement a 2-minute timeout on NBD requests and
|
||||||
|
responses. If a request takes longer than that to complete,
|
||||||
|
the client is disconnected. This is useful to keep broken
|
||||||
|
clients from breaking migrations, among other things.
|
||||||
|
|
||||||
listen
|
listen
|
||||||
~~~~~~
|
~~~~~~
|
||||||
|
|
||||||
|
10
Rakefile
10
Rakefile
@@ -33,9 +33,13 @@ CCFLAGS = %w(
|
|||||||
) + # Added -Wno-missing-field-initializers to shut GCC up over {0} struct initialisers
|
) + # Added -Wno-missing-field-initializers to shut GCC up over {0} struct initialisers
|
||||||
[ENV['CFLAGS']]
|
[ENV['CFLAGS']]
|
||||||
|
|
||||||
LIBCHECK = File.exists?("/usr/lib/libcheck.a") ?
|
LIBCHECK = if File.exists?("/usr/lib/libcheck.a")
|
||||||
"/usr/lib/libcheck.a" :
|
"/usr/lib/libcheck.a"
|
||||||
|
elsif File.exists?("/usr/local/lib/libcheck.a")
|
||||||
"/usr/local/lib/libcheck.a"
|
"/usr/local/lib/libcheck.a"
|
||||||
|
else
|
||||||
|
"-lcheck"
|
||||||
|
end
|
||||||
|
|
||||||
TEST_MODULES = Dir["tests/unit/check_*.c"].map { |n|
|
TEST_MODULES = Dir["tests/unit/check_*.c"].map { |n|
|
||||||
File.basename( n )[%r{check_(.+)\.c},1] }
|
File.basename( n )[%r{check_(.+)\.c},1] }
|
||||||
@@ -114,7 +118,7 @@ end
|
|||||||
|
|
||||||
def gcc_compile( target, source )
|
def gcc_compile( target, source )
|
||||||
FileUtils.mkdir_p File.dirname( target )
|
FileUtils.mkdir_p File.dirname( target )
|
||||||
sh "#{CC} -Isrc -c #{CCFLAGS.join(' ')} -o #{target} #{source} "
|
sh "#{CC} -I/usr/include/libev -Isrc -c #{CCFLAGS.join(' ')} -o #{target} #{source} "
|
||||||
end
|
end
|
||||||
|
|
||||||
def gcc_link(target, objects)
|
def gcc_link(target, objects)
|
||||||
|
104
src/client.c
104
src/client.c
@@ -15,6 +15,20 @@
|
|||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
|
|
||||||
|
|
||||||
|
// When this signal is invoked, we call shutdown() on the client fd, which
|
||||||
|
// results in the thread being wound up
|
||||||
|
void client_killswitch_hit(int signal __attribute__ ((unused)), siginfo_t *info, void *ptr __attribute__ ((unused)))
|
||||||
|
{
|
||||||
|
int fd = info->si_value.sival_int;
|
||||||
|
warn( "Killswitch for fd %i activated, calling shutdown on socket", fd );
|
||||||
|
|
||||||
|
FATAL_IF(
|
||||||
|
-1 == shutdown( fd, SHUT_RDWR ),
|
||||||
|
"Failed to kill shutdown() the socket, killing the server"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
struct client *client_create( struct server *serve, int socket )
|
struct client *client_create( struct server *serve, int socket )
|
||||||
{
|
{
|
||||||
NULLCHECK( serve );
|
NULLCHECK( serve );
|
||||||
@@ -25,6 +39,13 @@ struct client *client_create( struct server *serve, int socket )
|
|||||||
.sigev_signo = CLIENT_KILLSWITCH_SIGNAL
|
.sigev_signo = CLIENT_KILLSWITCH_SIGNAL
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Our killswitch closes this socket, forcing read() and write() calls
|
||||||
|
* blocked on it to return with an error. The thread then close()s the
|
||||||
|
* socket itself, avoiding races.
|
||||||
|
*/
|
||||||
|
evp.sigev_value.sival_int = socket;
|
||||||
|
|
||||||
c = xmalloc( sizeof( struct client ) );
|
c = xmalloc( sizeof( struct client ) );
|
||||||
c->stopped = 0;
|
c->stopped = 0;
|
||||||
c->socket = socket;
|
c->socket = socket;
|
||||||
@@ -199,36 +220,6 @@ int client_read_request( struct client * client , struct nbd_request *out_reques
|
|||||||
NULLCHECK( out_request );
|
NULLCHECK( out_request );
|
||||||
|
|
||||||
struct nbd_request_raw request_raw;
|
struct nbd_request_raw request_raw;
|
||||||
fd_set fds;
|
|
||||||
struct timeval * ptv = NULL;
|
|
||||||
int fd_count;
|
|
||||||
|
|
||||||
/* We want a timeout if this is an inbound migration, but not otherwise.
|
|
||||||
* This is compile-time selectable, as it will break mirror max_bps
|
|
||||||
*/
|
|
||||||
#ifdef HAS_LISTEN_TIMEOUT
|
|
||||||
struct timeval tv = {CLIENT_MAX_WAIT_SECS, 0};
|
|
||||||
|
|
||||||
if ( !server_is_in_control( client->serve ) ) {
|
|
||||||
ptv = &tv;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
FD_ZERO(&fds);
|
|
||||||
FD_SET(client->socket, &fds);
|
|
||||||
self_pipe_fd_set( client->stop_signal, &fds );
|
|
||||||
fd_count = sock_try_select(FD_SETSIZE, &fds, NULL, NULL, ptv);
|
|
||||||
if ( fd_count == 0 ) {
|
|
||||||
/* This "can't ever happen" */
|
|
||||||
if ( NULL == ptv ) { fatal( "No FDs selected, and no timeout!" ); }
|
|
||||||
else { error("Timed out waiting for I/O"); }
|
|
||||||
}
|
|
||||||
else if ( fd_count < 0 ) { fatal( "Select failed" ); }
|
|
||||||
|
|
||||||
if ( self_pipe_fd_isset( client->stop_signal, &fds ) ){
|
|
||||||
debug("Client received stop signal.");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fd_read_request(client->socket, &request_raw) == -1) {
|
if (fd_read_request(client->socket, &request_raw) == -1) {
|
||||||
*disconnected = 1;
|
*disconnected = 1;
|
||||||
@@ -553,6 +544,44 @@ int client_serve_request(struct client* client)
|
|||||||
struct nbd_request request = {0};
|
struct nbd_request request = {0};
|
||||||
int stop = 1;
|
int stop = 1;
|
||||||
int disconnected = 0;
|
int disconnected = 0;
|
||||||
|
fd_set fds;
|
||||||
|
int fd_count;
|
||||||
|
|
||||||
|
/* wait until there are some bytes on the fd before committing to reads
|
||||||
|
* FIXME: this whole scheme is broken because we're using blocking reads.
|
||||||
|
* read() can block directly after a select anyway, and it's possible that,
|
||||||
|
* without the killswitch, we'd hang forever. With the killswitch, we just
|
||||||
|
* hang for "a while". The Right Thing to do is to rewrite client.c to be
|
||||||
|
* non-blocking.
|
||||||
|
*/
|
||||||
|
|
||||||
|
FD_ZERO(&fds);
|
||||||
|
FD_SET(client->socket, &fds);
|
||||||
|
self_pipe_fd_set( client->stop_signal, &fds );
|
||||||
|
fd_count = sock_try_select(FD_SETSIZE, &fds, NULL, NULL, NULL);
|
||||||
|
if ( fd_count == 0 ) {
|
||||||
|
/* This "can't ever happen" */
|
||||||
|
fatal( "No FDs selected, and no timeout!" );
|
||||||
|
}
|
||||||
|
else if ( fd_count < 0 ) { fatal( "Select failed" ); }
|
||||||
|
|
||||||
|
if ( self_pipe_fd_isset( client->stop_signal, &fds ) ){
|
||||||
|
debug("Client received stop signal.");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* We arm / disarm around the whole request cycle. The reason for this is
|
||||||
|
* that the remote peer could uncleanly die at any point; if we're stuck on
|
||||||
|
* a blocking read(), then that will hang for (almost) forever. This is bad
|
||||||
|
* in general, makes the server respond only to kill -9, and breaks
|
||||||
|
* outward mirroring in a most unpleasant way.
|
||||||
|
*
|
||||||
|
* The replication is simple: open a connection to the flexnbd server, write
|
||||||
|
* a single byte, and then wait.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
client_arm_killswitch( client );
|
||||||
|
|
||||||
if ( !client_read_request( client, &request, &disconnected ) ) { return stop; }
|
if ( !client_read_request( client, &request, &disconnected ) ) { return stop; }
|
||||||
if ( disconnected ) { return stop; }
|
if ( disconnected ) { return stop; }
|
||||||
@@ -562,25 +591,12 @@ int client_serve_request(struct client* client)
|
|||||||
|
|
||||||
{
|
{
|
||||||
if ( !server_is_closed( client->serve ) ) {
|
if ( !server_is_closed( client->serve ) ) {
|
||||||
/* We arm / disarm around client_reply() to catch cases where the
|
|
||||||
* remote peer sends part of a write request data before dying,
|
|
||||||
* and cases where we send part of read reply data before they die.
|
|
||||||
*
|
|
||||||
* That last is theoretical right now, but could break us in the
|
|
||||||
* same way as a half-write (which causes us to sit in read forever)
|
|
||||||
*
|
|
||||||
* We only arm/disarm inside the server io lock because it's common
|
|
||||||
* during migrations for us to be hanging on that mutex for quite
|
|
||||||
* a while while the final pass happens - it's held for the entire
|
|
||||||
* time.
|
|
||||||
*/
|
|
||||||
client_arm_killswitch( client );
|
|
||||||
client_reply( client, request );
|
client_reply( client, request );
|
||||||
client_disarm_killswitch( client );
|
|
||||||
stop = 0;
|
stop = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
client_disarm_killswitch( client );
|
||||||
|
|
||||||
return stop;
|
return stop;
|
||||||
}
|
}
|
||||||
|
@@ -58,6 +58,7 @@ struct client {
|
|||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void client_killswitch_hit(int signal, siginfo_t *info, void *ptr);
|
||||||
|
|
||||||
void* client_serve(void* client_uncast);
|
void* client_serve(void* client_uncast);
|
||||||
struct client * client_create( struct server * serve, int socket );
|
struct client * client_create( struct server * serve, int socket );
|
||||||
|
@@ -101,12 +101,24 @@ struct flexnbd * flexnbd_create_serving(
|
|||||||
max_nbd_clients,
|
max_nbd_clients,
|
||||||
use_killswitch,
|
use_killswitch,
|
||||||
1);
|
1);
|
||||||
flexnbd_create_shared( flexnbd,
|
flexnbd_create_shared( flexnbd, s_ctrl_sock );
|
||||||
s_ctrl_sock );
|
|
||||||
|
// Beats installing one handler per client instance
|
||||||
|
if ( use_killswitch ) {
|
||||||
|
struct sigaction act = {
|
||||||
|
.sa_sigaction = client_killswitch_hit,
|
||||||
|
.sa_flags = SA_RESTART | SA_SIGINFO
|
||||||
|
};
|
||||||
|
|
||||||
|
FATAL_UNLESS(
|
||||||
|
0 == sigaction( CLIENT_KILLSWITCH_SIGNAL, &act, NULL ),
|
||||||
|
"Installing client killswitch signal failed"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
return flexnbd;
|
return flexnbd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
struct flexnbd * flexnbd_create_listening(
|
struct flexnbd * flexnbd_create_listening(
|
||||||
char* s_ip_address,
|
char* s_ip_address,
|
||||||
char* s_port,
|
char* s_port,
|
||||||
@@ -127,6 +139,10 @@ struct flexnbd * flexnbd_create_listening(
|
|||||||
s_acl_entries,
|
s_acl_entries,
|
||||||
1, 0, 0);
|
1, 0, 0);
|
||||||
flexnbd_create_shared( flexnbd, s_ctrl_sock );
|
flexnbd_create_shared( flexnbd, s_ctrl_sock );
|
||||||
|
|
||||||
|
// listen can't use killswitch, as mirror may pause on sending things
|
||||||
|
// for a very long time.
|
||||||
|
|
||||||
return flexnbd;
|
return flexnbd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -5,6 +5,7 @@
|
|||||||
#include "mirror.h"
|
#include "mirror.h"
|
||||||
#include "serve.h"
|
#include "serve.h"
|
||||||
#include "proxy.h"
|
#include "proxy.h"
|
||||||
|
#include "client.h"
|
||||||
#include "self_pipe.h"
|
#include "self_pipe.h"
|
||||||
#include "mbox.h"
|
#include "mbox.h"
|
||||||
#include "control.h"
|
#include "control.h"
|
||||||
|
@@ -71,11 +71,12 @@ START_TEST( test_fatal_kills_process )
|
|||||||
sleep(10);
|
sleep(10);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
int kidstatus;
|
int kidret, kidstatus, result;
|
||||||
int result;
|
result = waitpid( pid, &kidret, 0 );
|
||||||
result = waitpid( pid, &kidstatus, 0 );
|
|
||||||
fail_if( result < 0, "Wait failed." );
|
fail_if( result < 0, "Wait failed." );
|
||||||
fail_unless( kidstatus == 6, "Kid was not aborted." );
|
kidstatus = WEXITSTATUS( kidret );
|
||||||
|
ck_assert_int_eq( kidstatus, SIGABRT );
|
||||||
|
// fail_unless( kidstatus == 6, "Kid was not aborted." );
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user