diff options
author | stanleyk <stanleyk@ae88bc3d-4319-0410-8dbf-d08b4c9d3795> | 2013-03-08 00:15:02 +0000 |
---|---|---|
committer | stanleyk <stanleyk@ae88bc3d-4319-0410-8dbf-d08b4c9d3795> | 2013-03-08 00:15:02 +0000 |
commit | 14546537a9bde40c222e8947fd152b0c904287f6 (patch) | |
tree | 24b7d8dadf73e3d9a7f3954fdba113591c3e20d9 | |
parent | 9b634370befcaec15842749f437309b1b4815ad7 (diff) | |
download | ATCD-14546537a9bde40c222e8947fd152b0c904287f6.tar.gz |
Revise startup functionality for FT_Naming_Service
6 files changed, 632 insertions, 162 deletions
diff --git a/TAO/ChangeLog b/TAO/ChangeLog index 7c8139f2a07..3a64dbdae3e 100644 --- a/TAO/ChangeLog +++ b/TAO/ChangeLog @@ -1,3 +1,26 @@ +Fri Mar 8 00:06:30 UTC 2013 Kevin Stanley <stanleyk@ociweb.com> + + * orbsvcs/ImplRepo_Service/Shared_Backing_Store.cpp: + + Print correct error message. Should report replica not running. + + * orbsvcs/orbsvcs/Naming/FaultTolerant/FT_Naming_Server.h: + * orbsvcs/orbsvcs/Naming/FaultTolerant/FT_Naming_Server.cpp: + + Refactored init to have a separate function for replication pairing. + In replication pairing, add logic to allow the backup to start withou + a running primary if the the primary/backup have successfully started + previously. It will check to see if the primary ior file was written + and if it cannot connect to the primary it will assume the primary + will be started in the future and initiate the pairing then. + + * orbsvcs/tests/FT_Naming/FaultTolerant/run_backup_restart_test.pl: + * orbsvcs/tests/FT_Naming/FaultTolerant/run_test.pl: + + Add a test for starting the primary, then the backup naming service. + Then shutdown both the primary and backup and restart the backup + naming service and try and communicate with it from the client. + Thu Mar 7 17:03:16 UTC 2013 Phil Mesnier <mesnier_p@ociweb.com> * MPC/config/dynamic_tp.mpb: diff --git a/TAO/orbsvcs/ImplRepo_Service/Shared_Backing_Store.cpp b/TAO/orbsvcs/ImplRepo_Service/Shared_Backing_Store.cpp index 15fcc09d31a..354852586ce 100644 --- a/TAO/orbsvcs/ImplRepo_Service/Shared_Backing_Store.cpp +++ b/TAO/orbsvcs/ImplRepo_Service/Shared_Backing_Store.cpp @@ -650,7 +650,7 @@ Shared_Backing_Store::connect_replicas (Replica_ptr this_replica) if (this->imr_type_ == Options::BACKUP_IMR) { ACE_ERROR_RETURN ((LM_ERROR, - ACE_TEXT("Error: No primary ImR replica file found <%s>\n"), + ACE_TEXT("Error: No primary ImR replica is running <%s>\n"), replica_ior.c_str()), -1); } diff --git a/TAO/orbsvcs/orbsvcs/Naming/FaultTolerant/FT_Naming_Server.cpp b/TAO/orbsvcs/orbsvcs/Naming/FaultTolerant/FT_Naming_Server.cpp index ac11469f585..d6ce860a4da 100644 --- a/TAO/orbsvcs/orbsvcs/Naming/FaultTolerant/FT_Naming_Server.cpp +++ b/TAO/orbsvcs/orbsvcs/Naming/FaultTolerant/FT_Naming_Server.cpp @@ -121,22 +121,40 @@ TAO_FT_Naming_Server::init_with_orb (int argc, result = init_replication_manager_with_orb (argc, argv, orb); if (result != 0) return result; + + // Setup the pairing with peer + result = init_replication_pairing (); + + // If we successfully paired, we are a backup and + // we have a peer_root_context, then export the combined IORs + if ((result == 0) && + (this->server_role_ == TAO_FT_Naming_Server::BACKUP) && + (!CORBA::is_nil (peer_root_context_.in ()))) + { // If we successfully initialized the replication manager and we are + // a backup server, then we should export the multi-profile + // references to files. No need to write out the IOR if we + + if (TAO_debug_level > 3) + ACE_DEBUG ((LM_DEBUG, + ACE_TEXT ("(%P|%t) - FT_Naming_Server ") + ACE_TEXT ("Writing combined IOR.\n"))); + + result = export_ft_naming_references (); + } + else if (result == 1) + { // Primary was started in the past, but it is not currently + // accessible for pairing. Primary will initiate pairing when + // it is restarted. + ACE_DEBUG ((LM_DEBUG, + ACE_TEXT ("Unable to pair with primary\n"))); + result = 0; // This is a normal situation on backup restart. + } } catch (const CORBA::Exception& ex) - { - ex._tao_print_exception ( - ACE_TEXT ("TAO_FT_Naming_Server::init_with_orb")); - return -1; - } - - // If we successfully initialized the replication manager and we are - // a backup server, then we should export the multi-profile - // references to files. - if (this->server_role_ == TAO_FT_Naming_Server::BACKUP) - { - // The backup should write out the combined IOR for the primary - // and backup naming service and naming manager. - result = export_ft_naming_references (); + { // No exceptions are expected. + ex._tao_print_exception ( + ACE_TEXT ("TAO_FT_Naming_Server::init_with_orb")); + return -1; } return result; @@ -394,67 +412,82 @@ TAO_FT_Naming_Server::init_replication_manager_with_orb (int argc, this->replication_manager_->initialize ( this->orb_.in (), this->replication_manager_poa_.in ()); + } + catch (const CORBA::Exception& ex) + { + ex._tao_print_exception ( + ACE_TEXT ("TAO_FT_Naming_Server::init_replication_manager_with_orb.\n")); + return -1; + } + + // Success + return 0; +} - ACE_CString primary_file_name ( +int +TAO_FT_Naming_Server::init_replication_pairing (void) +{ + + ACE_CString primary_file_name ( ACE_TEXT_ALWAYS_CHAR (this->persistence_file_name_)); - primary_file_name += "/"; - primary_file_name += - ACE_TEXT_ALWAYS_CHAR (TAO_FT_Naming_Server::primary_replica_ior_filename); + primary_file_name += "/"; + primary_file_name += + ACE_TEXT_ALWAYS_CHAR (TAO_FT_Naming_Server::primary_replica_ior_filename); - ACE_CString backup_file_name ( + ACE_CString backup_file_name ( ACE_TEXT_ALWAYS_CHAR (this->persistence_file_name_)); - backup_file_name += "/"; - backup_file_name += - ACE_TEXT_ALWAYS_CHAR (TAO_FT_Naming_Server::backup_replica_ior_filename); + backup_file_name += "/"; + backup_file_name += + ACE_TEXT_ALWAYS_CHAR (TAO_FT_Naming_Server::backup_replica_ior_filename); - if (this->server_role_ == PRIMARY) - { // We are the primary - if (TAO_debug_level > 3) - ACE_DEBUG ((LM_DEBUG, - ACE_TEXT ("(%P|%t) - FT_Naming_Server ") - ACE_TEXT ("is a primary\n"))); - - // Write out this replicas IOR for the backup to use to bootstrap - CORBA::String_var replication_ior = naming_service_ior (); - this->write_ior_to_file ( - this->replication_manager_ior_.in (), - primary_file_name.c_str ()); - - // Check if there is already a backup IOR file. If so, then the backup - // may be up and running so we should register with it. - CORBA::Object_var backup_ior; - if (TAO_debug_level > 3) - ACE_DEBUG ((LM_DEBUG, - ACE_TEXT ("(%P|%t) - FT_Naming_Server reading ") - ACE_TEXT ("backup ior file\n"))); - - if ((ACE_OS::access (primary_file_name.c_str (), - R_OK) == 0) && - this->read_reference_from_file (backup_file_name.c_str (), - backup_ior.out ()) == 0) - {// Success in reading backup IOR file - // Store the backup reference as our peer - FT_Naming::ReplicationManager_var peer_ref = + if (this->server_role_ == PRIMARY) + { // We are the primary + if (TAO_debug_level > 3) + ACE_DEBUG ((LM_DEBUG, + ACE_TEXT ("(%P|%t) - FT_Naming_Server ") + ACE_TEXT ("is a primary\n"))); + + // Write out this replicas IOR for the backup to use to bootstrap + CORBA::String_var replication_ior = naming_service_ior (); + this->write_ior_to_file ( + this->replication_manager_ior_.in (), + primary_file_name.c_str ()); + + // Check if there is already a backup IOR file. If so, then the backup + // may be up and running so we should register with it. + CORBA::Object_var backup_ior; + if (TAO_debug_level > 3) + ACE_DEBUG ((LM_DEBUG, + ACE_TEXT ("(%P|%t) - FT_Naming_Server reading ") + ACE_TEXT ("backup ior file\n"))); + + if ((ACE_OS::access (primary_file_name.c_str (), + R_OK) == 0) && + this->read_reference_from_file (backup_file_name.c_str (), + backup_ior.out ()) == 0) + {// Success in reading backup IOR file + // Store the backup reference as our peer + FT_Naming::ReplicationManager_var peer_ref = FT_Naming::ReplicationManager::_narrow (backup_ior.in ()); - if (TAO_debug_level > 3) - ACE_DEBUG ((LM_DEBUG, - ACE_TEXT ("(%P|%t) - FT_Naming_Server ") - ACE_TEXT ("narrowing IOR\n"))); - if (CORBA::is_nil (peer_ref.in ())) - ACE_ERROR_RETURN ( - (LM_ERROR, - ACE_TEXT ("(%P|%t) ERROR: IOR in file %s is not ") - ACE_TEXT ("a FT_Naming::ReplicationManager\n"), - primary_file_name.c_str ()), - -1); - - try { + if (TAO_debug_level > 3) + ACE_DEBUG ((LM_DEBUG, + ACE_TEXT ("(%P|%t) - FT_Naming_Server ") + ACE_TEXT ("narrowing IOR\n"))); + if (CORBA::is_nil (peer_ref.in ())) + ACE_ERROR_RETURN ( + (LM_ERROR, + ACE_TEXT ("(%P|%t) ERROR: IOR in file %s is not ") + ACE_TEXT ("a FT_Naming::ReplicationManager\n"), + primary_file_name.c_str ()), + -1); + + try { if (TAO_debug_level > 3) ACE_DEBUG ( - (LM_DEBUG, - ACE_TEXT ("(%P|%t) - FT_Naming_Server registering ") - ACE_TEXT ("with backup.\n"))); + (LM_DEBUG, + ACE_TEXT ("(%P|%t) - FT_Naming_Server registering ") + ACE_TEXT ("with backup.\n"))); // Register with the backup CosNaming::NamingContext_var root = this->my_root_context (); @@ -480,116 +513,105 @@ TAO_FT_Naming_Server::init_replication_manager_with_orb (int argc, ACE_TEXT ("(%P|%t) - FT_Naming_Server:Backup peer ") ACE_TEXT ("replica not started yet.\n"))); } - } - catch (const CORBA::Exception& ex) - { - // Its Ok that we were unable to contact the backup peer. - // It has apparently not started yet. - // It will register with the primary when it starts up. - ex._tao_print_exception ( - ACE_TEXT ("Backup peer replica not started yet.\n")); - } } - else - { - // Could not get the backup replica from the IOR file, which is OK. - // The backup will register with us in the future. - if (TAO_debug_level > 3) - ACE_DEBUG ((LM_DEBUG, - ACE_TEXT ("(%P|%t) - FT_Naming_Server no Replica ") - ACE_TEXT ("IOR file. Waiting for registration.\n"))); + catch (const CORBA::Exception& ex) + { + // Its Ok that we were unable to contact the backup peer. + // It has apparently not started yet. + // It will register with the primary when it starts up. + ex._tao_print_exception ( + ACE_TEXT ("Backup peer replica not started yet.\n")); + } + } + else + { + // Could not get the backup replica from the IOR file, which is OK. + // The backup will register with us in the future. + if (TAO_debug_level > 3) + ACE_DEBUG ((LM_DEBUG, + ACE_TEXT ("(%P|%t) - FT_Naming_Server no Replica ") + ACE_TEXT ("IOR file. Waiting for registration.\n"))); } - } - else if (this->server_role_ == TAO_FT_Naming_Server::BACKUP) - { // We are the backup - if (TAO_debug_level > 3) - ACE_DEBUG ((LM_DEBUG, - ACE_TEXT ("(%P|%t) - FT_Naming_Server:Is a Backup\n"))); + } + else if (this->server_role_ == TAO_FT_Naming_Server::BACKUP) + { // We are the backup + if (TAO_debug_level > 3) + ACE_DEBUG ((LM_DEBUG, + ACE_TEXT ("(%P|%t) - FT_Naming_Server:Is a Backup\n"))); - if (TAO_debug_level > 3) + if (TAO_debug_level > 3) ACE_DEBUG ((LM_DEBUG, ACE_TEXT ("(%P|%t) - FT_Naming_Server writing ") ACE_TEXT ("replica ior\n"))); - // Write out the backup ior for use by the primary if it must be restarted. - this->write_ior_to_file ( - replication_manager_ior_.in (), - backup_file_name.c_str ()); + // Write out the backup ior for use by the primary if it must be restarted. + this->write_ior_to_file ( + replication_manager_ior_.in (), + backup_file_name.c_str ()); - // Get the ior file for the primary from the - // persistence directory. If not there, fail. - CORBA::Object_var primary_ref = CORBA::Object::_nil (); + CORBA::Object_var primary_ref = CORBA::Object::_nil (); - if (TAO_debug_level > 3) - ACE_DEBUG ((LM_DEBUG, - ACE_TEXT ("(%P|%t) - FT_Naming_Server ") - ACE_TEXT ("reading primary ior file\n"))); - // Check for the primary IOR. We must have it to bootstrap the redundant - // naming pair. - if ((ACE_OS::access (primary_file_name.c_str (), R_OK) == 0) && - (this->read_reference_from_file (primary_file_name.c_str (), - primary_ref.out ()) == 0)) - { - if (TAO_debug_level > 3) - ACE_DEBUG ((LM_DEBUG, - ACE_TEXT ("(%P|%t) - FT_Naming_Server ") - ACE_TEXT ("toring the primary reference ior\n"))); - // Store the primary reference as our peer - FT_Naming::ReplicationManager_var peer_ref = - FT_Naming::ReplicationManager::_narrow (primary_ref.in ()); - - if (CORBA::is_nil (peer_ref.in ())) - ACE_ERROR_RETURN ((LM_ERROR, - ACE_TEXT ("(%P|%t) ERROR: IOR in file %s ") - ACE_TEXT ("is not a FT_Naming::ReplicationManager\n"), - primary_file_name.c_str ()), - -1); + if (TAO_debug_level > 3) + ACE_DEBUG ((LM_DEBUG, + ACE_TEXT ("(%P|%t) - FT_Naming_Server ") + ACE_TEXT ("reading primary ior file\n"))); + // Check for the primary IOR. We must have it to bootstrap the redundant + // naming pair. + if ((ACE_OS::access (primary_file_name.c_str (), R_OK) == 0) && + (this->read_reference_from_file (primary_file_name.c_str (), + primary_ref.out ()) == 0)) + { // There is a primary IOR file, so we must be restarting. + if (TAO_debug_level > 3) + ACE_DEBUG ((LM_DEBUG, + ACE_TEXT ("(%P|%t) - FT_Naming_Server ") + ACE_TEXT ("storing the primary reference ior\n"))); + // Store the primary reference as our peer + FT_Naming::ReplicationManager_var peer_ref = + FT_Naming::ReplicationManager::_narrow (primary_ref.in ()); - if (TAO_debug_level > 3) - ACE_DEBUG ((LM_DEBUG, - ACE_TEXT ("(%P|%t) - FT_Naming_Server ") - ACE_TEXT ("backup registering with primary.\n"))); - // Register with the primary - CosNaming::NamingContext_var root = this->my_root_context (); - FT_Naming::NamingManager_var nm = this->my_naming_manager (); - int registration_result = - this->replication_manager_->register_with_peer_replica (peer_ref.in (), - root.in (), - nm.in ()); - if (registration_result == -1) - ACE_ERROR_RETURN ((LM_ERROR, - ACE_TEXT ("(%P|%t) ERROR: Backup unable to ") - ACE_TEXT ("register with the primary\n")), - -1); - } - else - { + if (CORBA::is_nil (peer_ref.in ())) ACE_ERROR_RETURN ((LM_ERROR, - ACE_TEXT ("(%P|%t) ERROR: No primary IOR ") - ACE_TEXT ("available. Have you started the ") - ACE_TEXT ("primary? Exiting.\n")), + ACE_TEXT ("(%P|%t) ERROR: IOR in file %s ") + ACE_TEXT ("is not a FT_Naming::ReplicationManager\n"), + primary_file_name.c_str ()), -1); - } - } - else - {// We are neither a primary or replica, but running in standalone mode - if (TAO_debug_level > 3) - ACE_DEBUG ((LM_DEBUG, - ACE_TEXT ("(%P|%t) - FT_Naming_Server:Is Standalone\n"))); - } - } - catch (const CORBA::Exception& ex) - { - ex._tao_print_exception ( - ACE_TEXT ("TAO_FT_Naming_Server::init_replication_manager_with_orb.\n")); - return -1; + if (TAO_debug_level > 3) + ACE_DEBUG ((LM_DEBUG, + ACE_TEXT ("(%P|%t) - FT_Naming_Server ") + ACE_TEXT ("backup registering with primary.\n"))); + // Register with the primary + CosNaming::NamingContext_var root = this->my_root_context (); + FT_Naming::NamingManager_var nm = this->my_naming_manager (); + int registration_result = + this->replication_manager_->register_with_peer_replica (peer_ref.in (), + root.in (), + nm.in ()); + if (registration_result == -1) + ACE_ERROR_RETURN ((LM_ERROR, + ACE_TEXT ("(%P|%t) Backup unable to ") + ACE_TEXT ("register with the primary at this time.\n")), + 1); + } + else + { + ACE_ERROR_RETURN ((LM_ERROR, + ACE_TEXT ("(%P|%t) ERROR: No primary IOR ") + ACE_TEXT ("available. Have you started the ") + ACE_TEXT ("primary? Exiting.\n")), + -1); + } } + else + {// We are neither a primary or replica, but running in standalone mode + if (TAO_debug_level > 3) + ACE_DEBUG ((LM_DEBUG, + ACE_TEXT ("(%P|%t) - FT_Naming_Server:Is Standalone\n"))); + + } - // Success return 0; } - int TAO_FT_Naming_Server::parse_args (int argc, ACE_TCHAR *argv[]) @@ -795,7 +817,6 @@ TAO_FT_Naming_Server::parse_args (int argc, ACE_TEXT ("IOR (using '-c' option) from the backup") ACE_TEXT (" server.\n\n")), -1); - } return 0; } diff --git a/TAO/orbsvcs/orbsvcs/Naming/FaultTolerant/FT_Naming_Server.h b/TAO/orbsvcs/orbsvcs/Naming/FaultTolerant/FT_Naming_Server.h index b1b6f978024..f4216678e74 100644 --- a/TAO/orbsvcs/orbsvcs/Naming/FaultTolerant/FT_Naming_Server.h +++ b/TAO/orbsvcs/orbsvcs/Naming/FaultTolerant/FT_Naming_Server.h @@ -73,6 +73,12 @@ public: ACE_TCHAR *argv [], CORBA::ORB_ptr orb); + /// Setup pairing with peer replica. + /// Returns 0 on successful pairing with peer. + /// Returns 1 if peer IOR file is present, but peer is not responding + /// Returns -1 if pairing is not possible. + int init_replication_pairing (void); + /// Overridden parse operation. Only allows options supported by the FT_Naming_Server /// and adds options for the object group manager virtual int parse_args (int argc, diff --git a/TAO/orbsvcs/tests/FT_Naming/FaultTolerant/run_backup_restart_test.pl b/TAO/orbsvcs/tests/FT_Naming/FaultTolerant/run_backup_restart_test.pl new file mode 100755 index 00000000000..ece8e711428 --- /dev/null +++ b/TAO/orbsvcs/tests/FT_Naming/FaultTolerant/run_backup_restart_test.pl @@ -0,0 +1,419 @@ +eval '(exit $?0)' && eval 'exec perl -S $0 ${1+"$@"}' + & eval 'exec perl -S $0 $argv:q' + if 0; + +# $Id$ +# -*- perl -*- + +use lib "$ENV{ACE_ROOT}/bin"; +use PerlACE::TestTarget; +use Cwd; + +#$ENV{ACE_TEST_VERBOSE} = "1"; + +my $startdir = getcwd(); +my $debug_level = '0'; +my $redirection_enabled = 0; + +foreach $i (@ARGV) { + if ($i eq '-debug') { + $debug_level = '4'; + } + if ($i eq '-verbose') { + $redirection_enabled = 0; + } +} + +my $server = PerlACE::TestTarget::create_target (1) || die "Create target 1 failed\n"; +my $client = PerlACE::TestTarget::create_target (2) || die "Create target 2 failed\n"; + + +# Variables for command-line arguments to client and server +# executables. +my $hostname = $server->HostName (); + + +my $ns_orb_port1 = 10001; +my $ns_orb_port2 = 10002; + +my $ns_endpoint1 = "iiop://$hostname:$ns_orb_port1"; +my $ns_endpoint2 = "iiop://$hostname:$ns_orb_port2"; + + + +# References to both naming services +my $default_init_ref = "-ORBDefaultInitRef corbaloc:iiop:$hostname:$ns_orb_port1,iiop:$hostname:$ns_orb_port2"; + +# References to primary naming service only +my $primary_default_init_ref = "-ORBDefaultInitRef corbaloc:iiop:$hostname:$ns_orb_port1"; + +# References to backup naming service only +my $backup_default_init_ref = "-ORBDefaultInitRef corbaloc:iiop:$hostname:$ns_orb_port2"; + +## Allow the user to determine where the persistence file will be located +## just in case the current directory is not suitable for locking. +## We can't change the name of the persistence file because that is not +## sufficient to work around locking problems for Tru64 when the current +## directory is NFS mounted from a system that does not properly support +## locking. +foreach my $possible ($ENV{TMPDIR}, $ENV{TEMP}, $ENV{TMP}) { + if (defined $possible && -d $possible) { + if (chdir($possible)) { + last; + } + } +} + +my $status = 0; +my $POSITIVE_TEST_RESULT = 0; +my $NEGATIVE_TEST_RESULT = 1; + +my $NSGROUP = $client->CreateProcess ("$ENV{ACE_ROOT}/bin/tao_nsgroup"); +my $NSLIST = $client->CreateProcess ("$ENV{ACE_ROOT}/bin/tao_nslist"); +my $NSADD = $client->CreateProcess ("$ENV{ACE_ROOT}/bin/tao_nsadd"); +my $NSDEL = $client->CreateProcess ("$ENV{ACE_ROOT}/bin/tao_nsdel"); + +sub cat_file($) +{ + my $file_name = shift; + if (-s $file_name ) # size of file is greater than zero + { + open TESTFILE, $file_name or die "Couldn't open file: $!"; + my @teststring = <TESTFILE>; # read in all of the file + print STDERR "\n@teststring\n"; + close TESTFILE; + } +} + +sub redirect_output() +{ + open (OLDOUT, ">&", \*STDOUT) or die "Can't dup STDOUT: $!"; + open (OLDERR, ">&", \*STDERR) or die "Can't dup STDERR: $!"; + open STDERR, '>', $client_stderr_file; + open STDOUT, '>', $client_stdout_file; +} + +sub restore_output() +{ + open (STDERR, ">&OLDERR") or die "Can't dup OLDERR: $!"; + open (STDOUT, ">&OLDOUT") or die "Can't dup OLDOUT: $!"; +} + +sub run_nsgroup ($$) +{ + my $args = shift; + my $expected_test_result = shift; + + my $arglist = "$args"; + + $NSGROUP->Arguments ($arglist); + + if ($redirection_enabled) { + redirect_output(); + } + + my $nsgroup_status = $NSGROUP->SpawnWaitKill ($client->ProcessStartWaitInterval()); + + if ($redirection_enabled) { + restore_output(); + } + + if ($nsgroup_status != $expected_test_result) { + my $time = localtime; + print STDERR "ERROR: nsgroup returned $nsgroup_status at $time\n"; + if ($redirection_enabled) { + cat_file($client_stderr_file); + cat_file($client_stdout_file); + } + $status = 1; + } +} + +sub run_nslist($$) +{ + my $args = shift; + my $expected_test_result = shift; + + $NSLIST->Arguments ($args); + + if ($redirection_enabled) { + redirect_output(); + } + + #tao_nslist --ns file://ns.ior + my $nslist_status = $NSLIST->SpawnWaitKill ($client->ProcessStartWaitInterval()); + + if ($redirection_enabled) { + restore_output(); + } + + if ($nslist_status != $expected_test_result) { + my $time = localtime; + print STDERR "ERROR: nslist returned $nslist_status at $time\n"; + if ($redirection_enabled) { + cat_file($client_stderr_file); + cat_file($client_stdout_file); + } + $status = 1; + } +} + +sub run_nsadd($$) +{ + my $args = shift; + my $expected_test_result = shift; + + $NSADD->Arguments ($args); + + if ($redirection_enabled) { + redirect_output(); + } + + #tao_nsadd --ns file://ns.ior --name iso --ctx + my $nsadd_status = $NSADD->SpawnWaitKill ($client->ProcessStartWaitInterval()); + + if ($redirection_enabled) { + restore_output(); + } + + if ($nsadd_status != $expected_test_result) { + my $time = localtime; + print STDERR "ERROR: nsadd returned $nsadd_status at $time\n"; + if ($redirection_enabled) { + cat_file($client_stderr_file); + cat_file($client_stdout_file); + } + $status = 1; + } +} + +sub run_nsdel($$) +{ + my $args = shift; + my $expected_test_result = shift; + + $NSDEL->Arguments ($args); + + if ($redirection_enabled) { + redirect_output(); + } + + #tao_nsdel --ns file://ns.ior --name iso --destroy + my $nsdel_status = $NSDEL->SpawnWaitKill ($client->ProcessStartWaitInterval()); + + if ($redirection_enabled) { + restore_output(); + } + + if ($nsdel_status != $expected_test_result) { + my $time = localtime; + print STDERR "ERROR: nsdel returned $nsdel_status at $time\n"; + if ($redirection_enabled) { + cat_file($client_stderr_file); + cat_file($client_stdout_file); + } + $status = 1; + } +} + +sub clean_persistence_dir($$) +{ + my $target = shift; + my $directory_name = shift; + chdir $directory_name; + opendir(THISDIR, "."); + @allfiles = grep(!/^\.\.?$/, readdir(THISDIR)); + closedir(THISDIR); + foreach $tmp (@allfiles){ + $target->DeleteFile ($tmp); + } + chdir ".."; +} + +# Make sure that the directory to use to hold the naming contexts exists +# and is cleaned out +sub init_naming_context_directory($$) +{ + my $target = shift; + my $directory_name = shift; + + if ( ! -d $directory_name ) { + mkdir ($directory_name, 0777); + } else { + clean_persistence_dir ($target, $directory_name); + } +} + +my $name_dir = "NameService"; +my $group_dir = "GroupService"; +my $primary_iorfile = "$name_dir/ns_replica_primary.ior"; +my $nm_iorfile = "nm.ior"; +my $ns_iorfile = "ns.ior"; +my $stderr_file = "test.err"; +my $stdout_file = "test.out"; + +################################################################################ +# setup END block to cleanup after exit call +################################################################################ +END +{ + print STDERR "INFO: cleanup up after tests\n"; + $server->DeleteFile($primary_iorfile); + $server->DeleteFile($nm_iorfile); + $server->DeleteFile($ns_iorfile); + $client->DeleteFile ($stdout_file); + $client->DeleteFile ($stderr_file); + + if ( -d $name_dir ) { + print STDERR "INFO: removing <$name_dir>\n"; + clean_persistence_dir ($server, $name_dir); + rmdir ($name_dir); + } + + if ( -d $group_dir ) { + print STDERR "INFO: removing <$group_dir>\n"; + clean_persistence_dir ($server, $group_dir); + rmdir ($group_dir); + } + +} + +################################################################################ +# Validate that a client can seamlessly connect to the alternate server of a +# server naming server pair after the other server has been terminated. +################################################################################ +sub backup_restart_test() +{ + my $previous_status = $status; + $status = 0; + + # The file that is written by the primary when ready to start backup + my $server_primary_iorfile = $server->LocalFile ($primary_iorfile); + my $server_nm_iorfile = $server->LocalFile ($nm_iorfile); + my $server_ns_iorfile = $server->LocalFile ($ns_iorfile); + my $client_stdout_file = $client->LocalFile ($stdout_file); + my $client_stderr_file = $client->LocalFile ($stderr_file); + + print_msg("Backup Restart Test"); + init_naming_context_directory ($server, $name_dir ); + init_naming_context_directory ($server, $group_dir ); + + # Run two Naming Servers + my $ns1_args = "--primary ". + "-ORBDebugLevel $debug_level ". + "-ORBListenEndPoints $ns_endpoint1 ". + "-m 0 ". + "-r $name_dir ". + "-v $group_dir"; + + my $ns2_args = "--backup ". + "-ORBDebugLevel $debug_level ". + "-ORBListenEndPoints $ns_endpoint2 ". + "-c $server_ns_iorfile ". + "-g $server_nm_iorfile ". + "-m 0 ". + "-r $name_dir ". + "-v $group_dir"; + + my $tao_ft_naming = "$ENV{TAO_ROOT}/orbsvcs/FT_Naming_Service/tao_ft_naming"; + + my $client_args = "--failover " . + "-ORBDebugLevel $debug_level " . + "-p file://$server_ns_iorfile " . + "-r file://$server_nm_iorfile " . + "-b 4 " . + "-d 4 " ; + + my $client_prog = "$startdir/client"; + + $NS1 = $server->CreateProcess ($tao_ft_naming, $ns1_args); + $NS2 = $server->CreateProcess ($tao_ft_naming, $ns2_args); + + $CL = $client->CreateProcess ($client_prog, $client_args); + + $server->DeleteFile ($primary_iorfile); + $server->DeleteFile ($ns_iorfile); + $server->DeleteFile ($nm_iorfile); + + print_msg("INFO: Starting the primary"); + $NS1->Spawn (); + if ($server->WaitForFileTimed ($primary_iorfile, + $server->ProcessStartWaitInterval()) == -1) { + print STDERR "ERROR: cannot find file <$server_primary_iorfile>\n"; + $NS1->Kill (); $NS1->TimedWait (1); + exit 1; + } + + print_msg("INFO: Starting the backup"); + $NS2->Spawn (); + if ($server->WaitForFileTimed ($ns_iorfile, + $server->ProcessStartWaitInterval()) == -1) { + print STDERR "ERROR: cannot find file <$server_ns_iorfile>\n"; + $NS2->Kill (); $NS2->TimedWait (1); + $NS1->Kill (); $NS1->TimedWait (1); + exit 1; + } + + print_msg("INFO: Killing the backup"); + $server_status = $NS2->TerminateWaitKill ($server->ProcessStopWaitInterval()); + if ($server_status != 0) { + print STDERR "ERROR: server 2 returned $server_status\n"; + $status = 1; + } + + print_msg("INFO: Killing the primary"); + $server_status = $NS1->TerminateWaitKill ($server->ProcessStopWaitInterval()); + if ($server_status != 0) { + print STDERR "ERROR: server 1 returned $server_status\n"; + $status = 1; + } + + print_msg("INFO: restart backup server"); + $NS2->Spawn (); + + sleep (5); + + print_msg("INFO: Starting the client"); + $client_status = $CL->SpawnWaitKill ($client->ProcessStartWaitInterval()); + + if ($client_status != 0) { + print STDERR "ERROR: client returned $client_status\n"; + $status = 1; + } + + $server_status = $NS2->TerminateWaitKill ($server->ProcessStopWaitInterval()); + if ($server_status != 0) { + print STDERR "ERROR: server 2 returned $server_status\n"; + $status = 1; + } + + if ( $status == 0 ) { + $status = $previous_status; + } + + return $status; +} + +sub print_msg($) +{ + my $msg = shift; + my $bar = "==============================================================================="; + print STDERR "\n\n$bar\n$msg\n$bar\n"; +} + +sub show_result($$) +{ + my $test_result = shift; + my $test_name = shift; + + if ( 0 == $test_result ) { + print_msg("$test_name: SUCCESS"); + } else { + print_msg("$test_name: ERROR"); + } +} + +my $result = backup_restart_test (); +show_result($result, "Backup Restart Test"); + +exit $result; diff --git a/TAO/orbsvcs/tests/FT_Naming/FaultTolerant/run_test.pl b/TAO/orbsvcs/tests/FT_Naming/FaultTolerant/run_test.pl index 1f2a652df81..46143cf9615 100755 --- a/TAO/orbsvcs/tests/FT_Naming/FaultTolerant/run_test.pl +++ b/TAO/orbsvcs/tests/FT_Naming/FaultTolerant/run_test.pl @@ -12,7 +12,8 @@ use Cwd; my @list=("run_failover_test.pl", "run_persistence_test.pl", - "run_equivalence_test.pl"); + "run_equivalence_test.pl", + "run_backup_restart_test.pl"); my $status = 0; |