mirror: Don't start migrating until the allocation map is built

There is a fun race that can happen if we begin migrating while the
allocation map is still building. We call bitset_enable_stream()
when the migration begins, which causes the builder to start putting
events into the stream. This is bad all by itself, as it slows the
migration down for no reason, but the stream is a limited-size queue
and there are situations (migration fails and is restarted) where we
can end up with the queue full and nobody able to empty it, freezing
the whole thing.
This commit is contained in:
nick
2013-10-23 15:58:47 +01:00
parent 335261869d
commit 97a923afdf
3 changed files with 49 additions and 6 deletions

View File

@@ -70,6 +70,7 @@ struct mirror_ctrl {
/* libev stuff */
struct ev_loop *ev_loop;
ev_timer begin_watcher;
ev_io read_watcher;
ev_io write_watcher;
ev_timer timeout_watcher;
@@ -669,6 +670,7 @@ void mirror_abandon_cb( struct ev_loop *loop, ev_io *w, int revents )
return;
}
void mirror_limit_cb( struct ev_loop *loop, ev_timer *w, int revents )
{
struct mirror_ctrl* ctrl = (struct mirror_ctrl*) w->data;
@@ -693,6 +695,36 @@ void mirror_limit_cb( struct ev_loop *loop, ev_timer *w, int revents )
return;
}
/* We use this to periodically check whether the allocation map has built, and
* if it has, start migrating. If it's not finished, then enabling the bitset
* stream does not go well for us.
*/
void mirror_begin_cb( struct ev_loop *loop, ev_timer *w, int revents )
{
struct mirror_ctrl* ctrl = (struct mirror_ctrl*) w->data;
NULLCHECK( ctrl );
if ( !(revents & EV_TIMER ) ) {
warn( "Mirror limit callback executed but no timer event signalled" );
return;
}
if ( ctrl->serve->allocation_map_built || ctrl->serve->allocation_map_not_built ) {
debug( "allocation map builder is finished, beginning migration" );
/* Start by writing xfer 0 to the listener */
ev_io_start( loop, &ctrl->write_watcher );
/* We want to timeout during the first write as well as subsequent ones */
ev_timer_again( loop, &ctrl->timeout_watcher );
/* We're now interested in events */
bitset_enable_stream( ctrl->serve->allocation_map );
} else {
/* not done yet, so wait another second */
ev_timer_again( loop, w );
}
return;
}
void mirror_run( struct server *serve )
{
NULLCHECK( serve );
@@ -723,6 +755,10 @@ void mirror_run( struct server *serve )
ctrl.ev_loop = EV_DEFAULT;
/* gcc warns on -O2. clang is fine. Seems to be the fault of ev.h */
ev_init( &ctrl.begin_watcher, mirror_begin_cb );
ctrl.begin_watcher.repeat = 1.0; // We check bps every second. seems sane.
ctrl.begin_watcher.data = (void*) &ctrl;
ev_io_init( &ctrl.read_watcher, mirror_read_cb, m->client, EV_READ );
ctrl.read_watcher.data = (void*) &ctrl;
@@ -746,19 +782,23 @@ void mirror_run( struct server *serve )
"Couldn't find first transfer for mirror!"
);
if ( serve->allocation_map_built ) {
/* Start by writing xfer 0 to the listener */
ev_io_start( ctrl.ev_loop, &ctrl.write_watcher );
/* We want to timeout during the first write as well as subsequent ones */
ev_timer_again( ctrl.ev_loop, &ctrl.timeout_watcher );
bitset_enable_stream( serve->allocation_map );
} else {
debug( "Waiting for allocation map to be built" );
ev_timer_again( ctrl.ev_loop, &ctrl.begin_watcher );
}
/* Everything up to here is blocking. We switch to non-blocking so we
* can handle rate-limiting and weird error conditions better. TODO: We
* should expand the event loop upwards so we can do the same there too */
sock_set_nonblock( m->client, 1 );
bitset_enable_stream( serve->allocation_map );
info( "Entering event loop" );
ev_run( ctrl.ev_loop, 0 );
info( "Exited event loop" );

View File

@@ -686,6 +686,7 @@ void* build_allocation_map_thread(void* serve_uncast)
* the future, we'll need to wait for the allocation map to finish or
* fail before we can complete the migration.
*/
serve->allocation_map_not_built = 1;
warn( "Didn't build allocation map for %s", serve->filename );
}

View File

@@ -76,8 +76,10 @@ struct server {
struct bitset * allocation_map;
/* when starting up, this thread builds the allocation_map */
pthread_t allocation_map_builder_thread;
/* when the thread has finished, it sets this to 1 */
volatile sig_atomic_t allocation_map_built;
volatile sig_atomic_t allocation_map_not_built;
int max_nbd_clients;
struct client_tbl_entry *nbd_client;