Don't retry a mirror which failed on the first attempt

If the mirror attempt failed and we were able to report an error to the
user, it makes no sense to attempt a retry.  We don't have a way to
abort a mirror attempt yet, so if the user got a setting wrong and it's
failing for that reason, the only recourse they'd have would be to
restart the server.
This commit is contained in:
Alex Young
2012-07-15 20:07:17 +01:00
parent a10adf007c
commit b20fbc6a66
3 changed files with 39 additions and 19 deletions

View File

@@ -533,6 +533,7 @@ void * mirror_super_runner( void * serve_uncast )
NULLCHECK( serve->mirror ); NULLCHECK( serve->mirror );
NULLCHECK( serve->mirror_super ); NULLCHECK( serve->mirror_super );
int first_pass = 1;
int should_retry = 0; int should_retry = 0;
int success = 0; int success = 0;
@@ -540,16 +541,6 @@ void * mirror_super_runner( void * serve_uncast )
struct mirror_super * super = serve->mirror_super; struct mirror_super * super = serve->mirror_super;
do { do {
if ( should_retry ) {
/* We don't want to hammer the destination too
* hard, so if this is a retry, insert a delay. */
sleep( MS_RETRY_DELAY_SECS );
/* We also have to reset the bitmap to be sure
* we transfer everything */
mirror_reset( mirror );
}
FATAL_IF( 0 != pthread_create( FATAL_IF( 0 != pthread_create(
&mirror->thread, &mirror->thread,
NULL, NULL,
@@ -562,14 +553,22 @@ void * mirror_super_runner( void * serve_uncast )
mbox_receive( mirror->commit_signal ); mbox_receive( mirror->commit_signal );
debug( "Supervisor got commit signal" ); debug( "Supervisor got commit signal" );
if ( 0 == should_retry ) { if ( first_pass ) {
should_retry = 1; /* Only retry if the connection attempt was
* successful. Otherwise the user will see an
* error reported while we're still trying to
* retry behind the scenes.
*/
should_retry = *commit_state == MS_GO;
/* Only send this signal the first time */ /* Only send this signal the first time */
mirror_super_signal_committed( mirror_super_signal_committed(
super, super,
*commit_state); *commit_state);
debug("Mirror supervisor committed"); debug("Mirror supervisor committed");
} }
/* We only care about the value of the commit signal on
* the first pass, so this is ok
*/
free( commit_state ); free( commit_state );
debug("Supervisor waiting for mirror thread" ); debug("Supervisor waiting for mirror thread" );
@@ -577,8 +576,8 @@ void * mirror_super_runner( void * serve_uncast )
success = MS_DONE == mirror_get_state( mirror ); success = MS_DONE == mirror_get_state( mirror );
if( success ){
if( success ){ info( "Mirror supervisor success, exiting" ); } info( "Mirror supervisor success, exiting" ); }
else if ( mirror->signal_abandon ) { else if ( mirror->signal_abandon ) {
info( "Mirror abandoned" ); info( "Mirror abandoned" );
should_retry = 0; should_retry = 0;
@@ -587,6 +586,19 @@ void * mirror_super_runner( void * serve_uncast )
info( "Mirror failed, retrying" ); info( "Mirror failed, retrying" );
} }
else { info( "Mirror failed before commit, giving up" ); } else { info( "Mirror failed before commit, giving up" ); }
first_pass = 0;
if ( should_retry ) {
/* We don't want to hammer the destination too
* hard, so if this is a retry, insert a delay. */
sleep( MS_RETRY_DELAY_SECS );
/* We also have to reset the bitmap to be sure
* we transfer everything */
mirror_reset( mirror );
}
} }
while ( should_retry && !success ); while ( should_retry && !success );

View File

@@ -6,15 +6,24 @@
require 'flexnbd/fake_dest' require 'flexnbd/fake_dest'
include FlexNBD include FlexNBD
Thread.abort_on_exception = true
addr, port = *ARGV addr, port = *ARGV
server = FakeDest.new( addr, port ) server = FakeDest.new( addr, port )
client = server.accept client = server.accept
t = Thread.new do t = Thread.new do
# The sender *should not reconnect.* Since this is a first-pass
# mirror attempt, the user will have been told that the mirror failed,
# so it makes no sense to continue. This means we have to invert the
# sense of the exception.
begin
client2 = server.accept( "Timed out waiting for a reconnection", client2 = server.accept( "Timed out waiting for a reconnection",
FlexNBD::MS_RETRY_DELAY_SECS + 1 ) FlexNBD::MS_RETRY_DELAY_SECS + 1 )
client2.close client2.close
fail "Unexpected reconnection."
rescue Timeout::Error
end
end end
client.write_hello( :size => :wrong ) client.write_hello( :size => :wrong )

View File

@@ -144,8 +144,7 @@ module FlexNBD
client_sock = @sock.accept client_sock = @sock.accept
end end
rescue Timeout::Error rescue Timeout::Error
$stderr.puts err_msg raise Timeout::Error.new(err_msg)
exit 1
end end
client_sock client_sock