Files
flexnbd-c/src/mirror.c

572 lines
14 KiB
C
Raw Normal View History

2012-06-27 15:45:33 +01:00
/* FlexNBD server (C) Bytemark Hosting 2012
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "mirror.h"
#include "serve.h"
#include "util.h"
#include "ioutil.h"
#include "parse.h"
#include "readwrite.h"
#include "bitset.h"
#include "self_pipe.h"
#include "status.h"
#include <stdlib.h>
#include <string.h>
#include <sys/un.h>
#include <unistd.h>
struct mirror * mirror_alloc(
2012-06-27 15:45:33 +01:00
union mysockaddr * connect_to,
union mysockaddr * connect_from,
int max_Bps,
int action_at_finish,
struct mbox * commit_signal)
2012-06-27 15:45:33 +01:00
{
struct mirror * mirror;
2012-06-27 15:45:33 +01:00
mirror = xmalloc(sizeof(struct mirror));
2012-06-27 15:45:33 +01:00
mirror->connect_to = connect_to;
mirror->connect_from = connect_from;
mirror->max_bytes_per_second = max_Bps;
mirror->action_at_finish = action_at_finish;
mirror->commit_signal = commit_signal;
mirror->commit_state = MS_UNKNOWN;
return mirror;
}
void mirror_set_state_f( struct mirror * mirror, enum mirror_state state )
2012-06-27 15:45:33 +01:00
{
NULLCHECK( mirror );
mirror->commit_state = state;
}
#define mirror_set_state( mirror, state ) do{\
debug( "Mirror state => " #state );\
mirror_set_state_f( mirror, state );\
} while(0)
enum mirror_state mirror_get_state( struct mirror * mirror )
2012-06-27 15:45:33 +01:00
{
NULLCHECK( mirror );
return mirror->commit_state;
}
void mirror_init( struct mirror * mirror, const char * filename )
2012-06-27 15:45:33 +01:00
{
int map_fd;
off64_t size;
NULLCHECK( mirror );
NULLCHECK( filename );
FATAL_IF_NEGATIVE(
open_and_mmap(
2012-09-20 13:37:48 +01:00
filename,
2012-06-27 15:45:33 +01:00
&map_fd,
2012-09-20 13:37:48 +01:00
&size,
2012-06-27 15:45:33 +01:00
(void**) &mirror->mapped
),
"Failed to open and mmap %s",
filename
);
2012-09-20 13:37:48 +01:00
2012-06-27 15:45:33 +01:00
mirror->dirty_map = bitset_alloc(size, 4096);
}
/* Call this before a mirror attempt. */
void mirror_reset( struct mirror * mirror )
2012-06-27 15:45:33 +01:00
{
NULLCHECK( mirror );
NULLCHECK( mirror->dirty_map );
mirror_set_state( mirror, MS_INIT );
bitset_set(mirror->dirty_map);
}
struct mirror * mirror_create(
2012-06-27 15:45:33 +01:00
const char * filename,
union mysockaddr * connect_to,
union mysockaddr * connect_from,
int max_Bps,
int action_at_finish,
struct mbox * commit_signal)
2012-06-27 15:45:33 +01:00
{
/* FIXME: shouldn't map_fd get closed? */
struct mirror * mirror;
2012-06-27 15:45:33 +01:00
mirror = mirror_alloc( connect_to,
2012-06-27 15:45:33 +01:00
connect_from,
max_Bps,
action_at_finish,
commit_signal);
2012-09-20 13:37:48 +01:00
mirror_init( mirror, filename );
mirror_reset( mirror );
2012-06-27 15:45:33 +01:00
return mirror;
}
void mirror_destroy( struct mirror *mirror )
2012-06-27 15:45:33 +01:00
{
NULLCHECK( mirror );
free(mirror->connect_to);
free(mirror->connect_from);
free(mirror->dirty_map);
free(mirror);
}
/** The mirror code will split NBD writes, making them this long as a maximum */
static const int mirror_longest_write = 8<<20;
/** If, during a mirror pass, we have sent this number of bytes or fewer, we
* go to freeze the I/O and finish it off. This is just a guess.
*/
static const unsigned int mirror_last_pass_after_bytes_written = 100<<20;
2012-09-20 13:37:48 +01:00
/** The largest number of full passes we'll do - the last one will always
2012-06-27 15:45:33 +01:00
* cause the I/O to freeze, however many bytes are left to copy.
*/
static const int mirror_maximum_passes = 7;
/* A single mirror pass over the disc, optionally locking IO around the
* transfer.
*/
int mirror_pass(struct server * serve, int should_lock, uint64_t *written)
{
uint64_t current = 0;
int success = 1;
struct bitset_mapping *map = serve->mirror->dirty_map;
*written = 0;
while (current < serve->size) {
int run = bitset_run_count(map, current, mirror_longest_write);
debug("mirror current=%ld, run=%d", current, run);
2012-09-20 13:37:48 +01:00
/* FIXME: we could avoid sending sparse areas of the
2012-06-27 15:45:33 +01:00
* disc here, and probably save a lot of bandwidth and
* time (if we know the destination starts off zeroed).
2012-09-20 13:37:48 +01:00
*/
2012-06-27 15:45:33 +01:00
if (bitset_is_set_at(map, current)) {
/* We've found a dirty area, send it */
debug("^^^ writing");
2012-09-20 13:37:48 +01:00
/* We need to stop the main thread from working
2012-06-27 15:45:33 +01:00
* because it might corrupt the dirty map. This
* is likely to slow things down but will be
* safe.
*/
if (should_lock) { server_lock_io( serve ); }
{
debug("in lock block");
2012-06-27 15:45:33 +01:00
/** FIXME: do something useful with bytes/second */
/** FIXME: error handling code here won't unlock */
2012-09-20 13:37:48 +01:00
socket_nbd_write( serve->mirror->client,
2012-06-27 15:45:33 +01:00
current,
run,
0,
serve->mirror->mapped + current,
MS_REQUEST_LIMIT_SECS);
2012-06-27 15:45:33 +01:00
/* now mark it clean */
bitset_clear_range(map, current, run);
debug("leaving lock block");
2012-06-27 15:45:33 +01:00
}
if (should_lock) { server_unlock_io( serve ); }
*written += run;
}
current += run;
if (serve->mirror->signal_abandon) {
debug("Abandon message received" );
success = 0;
break;
}
}
return success;
}
/* THIS FUNCTION MUST ONLY BE CALLED WITH THE SERVER'S IO LOCKED. */
void mirror_on_exit( struct server * serve )
{
/* If we're still here, we can shut the server down.
2012-06-27 15:45:33 +01:00
*
* It doesn't matter if we get new client connections before
* now, the IO lock will stop them from doing anything.
*/
debug("serve_signal_close");
serve_signal_close( serve );
/* We have to wait until the server is closed before unlocking
* IO. This is because the client threads check to see if the
* server is still open before reading or writing inside their
* own locks. If we don't wait for the close, there's no way to
* guarantee the server thread will win the race and we risk the
* clients seeing a "successful" write to a dead disc image.
*/
debug("serve_wait_for_close");
serve_wait_for_close( serve );
debug("Unlinking %s", serve->filename );
if ( ACTION_UNLINK == serve->mirror->action_at_finish ) {
server_unlink( serve );
}
debug("Sending disconnect");
socket_nbd_disconnect( serve->mirror->client );
2012-06-27 15:45:33 +01:00
info("Mirror sent.");
}
void mirror_cleanup( struct server * serve,
2012-06-27 15:45:33 +01:00
int fatal __attribute__((unused)))
{
NULLCHECK( serve );
struct mirror * mirror = serve->mirror;
2012-06-27 15:45:33 +01:00
NULLCHECK( mirror );
info( "Cleaning up mirror thread");
if( mirror->client && mirror->client > 0 ){
close( mirror->client );
}
mirror->client = -1;
if( server_io_locked( serve ) ){ server_unlock_io( serve ); }
2012-06-27 15:45:33 +01:00
}
int mirror_connect( struct mirror * mirror, off64_t local_size )
2012-06-27 15:45:33 +01:00
{
struct sockaddr * connect_from = NULL;
int connected = 0;
2012-06-27 15:45:33 +01:00
if ( mirror->connect_from ) {
connect_from = &mirror->connect_from->generic;
}
NULLCHECK( mirror->connect_to );
mirror->client = socket_connect(&mirror->connect_to->generic, connect_from);
if ( 0 < mirror->client ) {
fd_set fds;
struct timeval tv = { MS_HELLO_TIME_SECS, 0};
FD_ZERO( &fds );
FD_SET( mirror->client, &fds );
FATAL_UNLESS( 0 <= select( FD_SETSIZE, &fds, NULL, NULL, &tv ),
"Select failed." );
if( FD_ISSET( mirror->client, &fds ) ){
off64_t remote_size;
if ( socket_nbd_read_hello( mirror->client, &remote_size ) ) {
if( remote_size == local_size ){
connected = 1;
2012-06-27 15:45:33 +01:00
mirror_set_state( mirror, MS_GO );
}
else {
2012-09-20 13:37:48 +01:00
warn("Remote size (%d) doesn't match local (%d)",
2012-06-27 15:45:33 +01:00
remote_size, local_size );
mirror_set_state( mirror, MS_FAIL_SIZE_MISMATCH );
}
}
else {
warn( "Mirror attempt rejected." );
mirror_set_state( mirror, MS_FAIL_REJECTED );
}
}
else {
warn( "No NBD Hello received." );
mirror_set_state( mirror, MS_FAIL_NO_HELLO );
}
if ( !connected ) { close( mirror->client ); }
2012-06-27 15:45:33 +01:00
}
else {
warn( "Mirror failed to connect.");
mirror_set_state( mirror, MS_FAIL_CONNECT );
}
return connected;
2012-06-27 15:45:33 +01:00
}
int mirror_should_quit( struct mirror * mirror )
{
switch( mirror->action_at_finish ) {
case ACTION_EXIT:
case ACTION_UNLINK:
return 1;
default:
return 0;
}
}
2012-06-27 15:45:33 +01:00
void mirror_run( struct server *serve )
{
NULLCHECK( serve );
NULLCHECK( serve->mirror );
int pass;
uint64_t written;
info("Starting mirror" );
for (pass=0; pass < mirror_maximum_passes-1; pass++) {
debug("mirror start pass=%d", pass);
2012-09-20 13:37:48 +01:00
if ( !mirror_pass( serve, 1, &written ) ){
2012-06-27 15:45:33 +01:00
debug("Failed mirror pass state is %d", mirror_get_state( serve->mirror ) );
debug("pass failed, giving up");
return; }
2012-09-20 13:37:48 +01:00
2012-06-27 15:45:33 +01:00
/* if we've not written anything */
if (written < mirror_last_pass_after_bytes_written) { break; }
}
server_lock_io( serve );
{
if ( mirror_pass( serve, 0, &written ) &&
mirror_should_quit( serve->mirror ) ) {
2012-06-27 15:45:33 +01:00
debug("exit!");
mirror_on_exit( serve );
info("Server closed, quitting "
"after successful migration");
2012-09-20 13:37:48 +01:00
}
2012-06-27 15:45:33 +01:00
}
server_unlock_io( serve );
}
void mbox_post_mirror_state( struct mbox * mbox, enum mirror_state st )
{
NULLCHECK( mbox );
enum mirror_state * contents = xmalloc( sizeof( enum mirror_state ) );
*contents = st;
mbox_post( mbox, contents );
}
void mirror_signal_commit( struct mirror * mirror )
2012-06-27 15:45:33 +01:00
{
NULLCHECK( mirror );
2012-09-20 13:37:48 +01:00
mbox_post_mirror_state( mirror->commit_signal,
mirror_get_state( mirror ) );
2012-06-27 15:45:33 +01:00
}
2012-09-20 13:37:48 +01:00
/** Thread launched to drive mirror process
* This is needed for two reasons: firstly, it decouples the mirroring
* from the control thread (although that's less valid with mboxes
* passing state back and forth) and to provide an error context so that
* retries can be cleanly handled without a bespoke error handling
* mechanism.
* */
2012-06-27 15:45:33 +01:00
void* mirror_runner(void* serve_params_uncast)
{
/* The supervisor thread relies on there not being any ERROR
* calls until after the mirror_signal_commit() call in this
* function.
* However, *after* that, we should call ERROR_* instead of
* FATAL_* wherever possible.
*/
struct server *serve = (struct server*) serve_params_uncast;
NULLCHECK( serve );
NULLCHECK( serve->mirror );
struct mirror * mirror = serve->mirror;
2012-06-27 15:45:33 +01:00
NULLCHECK( mirror->dirty_map );
error_set_handler( (cleanup_handler *) mirror_cleanup, serve );
2012-06-27 15:45:33 +01:00
info( "Connecting to mirror" );
2012-09-20 13:37:48 +01:00
2012-06-27 15:45:33 +01:00
time_t start_time = time(NULL);
int connected = mirror_connect( mirror, serve->size );
2012-06-27 15:45:33 +01:00
mirror_signal_commit( mirror );
if ( !connected ) { goto abandon_mirror; }
/* After this point, if we see a failure we need to disconnect
* and retry everything from mirror_set_state(_, MS_INIT), but
* *without* signaling the commit or abandoning the mirror.
* */
2012-09-20 13:37:48 +01:00
if ( (time(NULL) - start_time) > MS_CONNECT_TIME_SECS ){
2012-06-27 15:45:33 +01:00
/* If we get here, then we managed to connect but the
* control thread feeding status back to the user will
* have gone away, leaving the user without meaningful
* feedback. In this instance, they have to assume a
* failure, so we can't afford to let the mirror happen.
* We have to set the state to avoid a race.
*/
mirror_set_state( mirror, MS_FAIL_CONNECT );
warn( "Mirror connected, but too slowly" );
goto abandon_mirror;
}
mirror_run( serve );
mirror_set_state( mirror, MS_DONE );
abandon_mirror:
return NULL;
}
struct mirror_super * mirror_super_create(
const char * filename,
union mysockaddr * connect_to,
union mysockaddr * connect_from,
int max_Bps,
int action_at_finish,
struct mbox * state_mbox)
2012-06-27 15:45:33 +01:00
{
struct mirror_super * super = xmalloc( sizeof( struct mirror_super) );
2012-09-20 13:37:48 +01:00
super->mirror = mirror_create(
filename,
connect_to,
connect_from,
max_Bps,
action_at_finish,
mbox_create() ) ;
super->state_mbox = state_mbox;
2012-06-27 15:45:33 +01:00
return super;
}
/* Post the current state of the mirror into super->state_mbox.*/
2012-09-20 13:37:48 +01:00
void mirror_super_signal_committed(
struct mirror_super * super ,
enum mirror_state commit_state )
2012-06-27 15:45:33 +01:00
{
NULLCHECK( super );
NULLCHECK( super->state_mbox );
2012-09-20 13:37:48 +01:00
mbox_post_mirror_state(
super->state_mbox,
commit_state );
2012-06-27 15:45:33 +01:00
}
void mirror_super_destroy( struct mirror_super * super )
{
NULLCHECK( super );
mbox_destroy( super->mirror->commit_signal );
mirror_destroy( super->mirror );
2012-06-27 15:45:33 +01:00
free( super );
}
/* The mirror supervisor thread. Responsible for kicking off retries if
* the mirror thread fails.
* The mirror and mirror_super objects are never freed, and the
2012-06-27 15:45:33 +01:00
* mirror_super_runner thread is never joined.
*/
void * mirror_super_runner( void * serve_uncast )
{
struct server * serve = (struct server *) serve_uncast;
NULLCHECK( serve );
NULLCHECK( serve->mirror );
NULLCHECK( serve->mirror_super );
int first_pass = 1;
2012-06-27 15:45:33 +01:00
int should_retry = 0;
int success = 0;
struct mirror * mirror = serve->mirror;
2012-06-27 15:45:33 +01:00
struct mirror_super * super = serve->mirror_super;
do {
FATAL_IF( 0 != pthread_create(
2012-09-20 13:37:48 +01:00
&mirror->thread,
NULL,
mirror_runner,
2012-06-27 15:45:33 +01:00
serve),
"Failed to create mirror thread");
debug("Supervisor waiting for commit signal");
2012-09-20 13:37:48 +01:00
enum mirror_state * commit_state =
mbox_receive( mirror->commit_signal );
debug( "Supervisor got commit signal" );
if ( first_pass ) {
/* Only retry if the connection attempt was
* successful. Otherwise the user will see an
* error reported while we're still trying to
* retry behind the scenes.
*/
should_retry = *commit_state == MS_GO;
/* Only send this signal the first time */
mirror_super_signal_committed(
2012-09-20 13:37:48 +01:00
super,
*commit_state);
debug("Mirror supervisor committed");
2012-06-27 15:45:33 +01:00
}
/* We only care about the value of the commit signal on
* the first pass, so this is ok
*/
free( commit_state );
2012-06-27 15:45:33 +01:00
debug("Supervisor waiting for mirror thread" );
pthread_join( mirror->thread, NULL );
success = MS_DONE == mirror_get_state( mirror );
2012-09-20 13:37:48 +01:00
if( success ){
info( "Mirror supervisor success, exiting" ); }
2012-06-27 15:45:33 +01:00
else if ( mirror->signal_abandon ) {
info( "Mirror abandoned" );
should_retry = 0;
}
else if (should_retry){
info( "Mirror failed, retrying" );
2012-06-27 15:45:33 +01:00
}
else { info( "Mirror failed before commit, giving up" ); }
first_pass = 0;
2012-09-20 13:37:48 +01:00
if ( should_retry ) {
/* We don't want to hammer the destination too
* hard, so if this is a retry, insert a delay. */
sleep( MS_RETRY_DELAY_SECS );
/* We also have to reset the bitmap to be sure
* we transfer everything */
mirror_reset( mirror );
}
2012-09-20 13:37:48 +01:00
}
2012-06-27 15:45:33 +01:00
while ( should_retry && !success );
return NULL;
}