From f54813a6a22398e86199df62fb287169f00b6b67 Mon Sep 17 00:00:00 2001 From: Steve Totten Date: Thu, 16 Oct 2003 23:38:04 +0000 Subject: ChangeLogTag: Thu Oct 16 18:16:07 2003 Steve Totten --- TAO/ChangeLog | 14 +- .../FT_FaultEventDescriptor.cpp | 62 +- .../FT_FaultEventDescriptor.h | 8 +- .../FT_ReplicationManagerFaultAnalyzer.cpp | 856 ++++++++++++++------- .../FT_ReplicationManagerFaultAnalyzer.h | 63 +- 5 files changed, 704 insertions(+), 299 deletions(-) diff --git a/TAO/ChangeLog b/TAO/ChangeLog index fb2dd653207..ce5b4702e72 100644 --- a/TAO/ChangeLog +++ b/TAO/ChangeLog @@ -1,3 +1,16 @@ +Thu Oct 16 18:16:07 2003 Steve Totten + + * orbsvcs/FT_ReplicationManager/FT_FaultEventDescriptor.cpp: + * orbsvcs/FT_ReplicationManager/FT_FaultEventDescriptor.h: + * orbsvcs/FT_ReplicationManager/FT_ReplicationManagerFaultAnalyzer.cpp: + * orbsvcs/FT_ReplicationManager/FT_ReplicationManagerFaultAnalyzer.h: + Extended the fault analysis logic to determine if the number + of replicas has fallen below the minimum for the object + group and to add new members (via the registered factories + for the object group's type). + + These changes were made in the oci_haft branch. + Tue Oct 14 18:35:07 2003 Dale Wilson * orbsvcs/tests/FT_App/FT_ReplicaFactory_i.cpp: @@ -20,7 +33,6 @@ Tue Oct 14 18:35:07 2003 Dale Wilson * orbsvcs/tests/FT_App/run_test_registry.pl: new unit test using ReplicationManager as FactoryRegistry - Tue Oct 14 18:30:45 2003 Steve Totten * orbsvcs/FT_ReplicationManager/FT_ReplicationManager.cpp: diff --git a/TAO/orbsvcs/FT_ReplicationManager/FT_FaultEventDescriptor.cpp b/TAO/orbsvcs/FT_ReplicationManager/FT_FaultEventDescriptor.cpp index cd998c80a97..ae54c8daca7 100755 --- a/TAO/orbsvcs/FT_ReplicationManager/FT_FaultEventDescriptor.cpp +++ b/TAO/orbsvcs/FT_ReplicationManager/FT_FaultEventDescriptor.cpp @@ -21,13 +21,71 @@ ACE_RCSID (FT_FaultEventDescriptor, FT_FaultEventDescriptor, "$Id$") -/// Default constructor. +// Default constructor. TAO::FT_FaultEventDescriptor::FT_FaultEventDescriptor () : all_at_location_failed (0) , all_of_type_at_location_failed (0) , object_at_location_failed (0) , object_is_primary (0) + , type_id (CORBA::string_dup ("")) , object_group_id (PortableGroup::ObjectGroupId (0)) { - type_id = CORBA::string_dup (""); } + +// Debugging support. +void TAO::FT_FaultEventDescriptor::dump () +{ + // Get the location as a string. + ACE_CString loc_as_string; + for (CORBA::ULong li = 0; li < this->location->length(); ++li) + { + if (li > 0) loc_as_string += "/"; + // Assume only the "id" field of the CosNaming::Name is used. + loc_as_string += CORBA::string_dup (this->location[li].id); + } + + if (this->all_at_location_failed == 1) + { + ACE_DEBUG ((LM_DEBUG, + ACE_TEXT ( + "TAO::FT_FaultEventDescriptor::dump: " + "All objects at location <%s> failed.\n"), + loc_as_string.c_str() + )); + } + + if (this->all_of_type_at_location_failed == 1) + { + ACE_DEBUG ((LM_DEBUG, + ACE_TEXT ( + "TAO::FT_FaultEventDescriptor::dump: " + "All objects of type <%s> at location <%s> failed.\n"), + this->type_id.in(), + loc_as_string.c_str() + )); + } + + if (this->object_at_location_failed == 1) + { + ACE_DEBUG ((LM_DEBUG, + ACE_TEXT ( + "TAO::FT_FaultEventDescriptor::dump: " + "Replica of type <%s> with ObjectGroupId <%Q> " + "at location <%s> failed.\n"), + this->type_id.in(), + this->object_group_id, + loc_as_string.c_str() + )); + } + + if (this->object_is_primary == 1) + { + ACE_DEBUG ((LM_DEBUG, + ACE_TEXT ( + "TAO::FT_FaultEventDescriptor::dump: " + "Primary replica of ObjectGroupId <%Q> failed.\n"), + this->object_group_id + )); + } +} + diff --git a/TAO/orbsvcs/FT_ReplicationManager/FT_FaultEventDescriptor.h b/TAO/orbsvcs/FT_ReplicationManager/FT_FaultEventDescriptor.h index a012de9e42f..694bcd96de5 100755 --- a/TAO/orbsvcs/FT_ReplicationManager/FT_FaultEventDescriptor.h +++ b/TAO/orbsvcs/FT_ReplicationManager/FT_FaultEventDescriptor.h @@ -34,7 +34,10 @@ namespace TAO /// Default constructor. FT_FaultEventDescriptor (); - //////// + /// Debugging support. + void dump (); + + ///////////////// /// Data members. /// Flags indicating the "extent" of the fault. @@ -58,6 +61,9 @@ namespace TAO FT::MinimumNumberReplicasValue minimum_number_replicas; FT::InitialNumberReplicasValue initial_number_replicas; + /// The object group's factories. + FT::FactoryInfos_var factories; + }; } // namespace TAO diff --git a/TAO/orbsvcs/FT_ReplicationManager/FT_ReplicationManagerFaultAnalyzer.cpp b/TAO/orbsvcs/FT_ReplicationManager/FT_ReplicationManagerFaultAnalyzer.cpp index f6370fdb3e2..752e4df60b2 100755 --- a/TAO/orbsvcs/FT_ReplicationManager/FT_ReplicationManagerFaultAnalyzer.cpp +++ b/TAO/orbsvcs/FT_ReplicationManager/FT_ReplicationManagerFaultAnalyzer.cpp @@ -17,8 +17,10 @@ #include "orbsvcs/FT_ReplicationManager/FT_ReplicationManager.h" #include "orbsvcs/FT_ReplicationManager/FT_FaultEventDescriptor.h" #include "orbsvcs/PortableGroup/PG_Property_Utils.h" +#include "orbsvcs/PortableGroup/PG_Operators.h" #include "orbsvcs/FaultTolerance/FT_IOGR_Property.h" #include +#include #define INTEGRATED_WITH_REPLICATION_MANAGER 1 @@ -83,23 +85,8 @@ int TAO::FT_ReplicationManagerFaultAnalyzer::analyze_fault_event ( // Extract the location. if (result == 0) { - FT::Location* temp_loc; - if ((filterable[1].value >>= temp_loc) == 0) - { - ACE_ERROR ((LM_ERROR, - ACE_TEXT ( - "TAO::FT_ReplicationManagerFaultAnalyzer::analyze_fault_event: " - "Could not extract Location value from fault event.\n") - )); - result = -1; - } - else - { - // Make a deep copy of the Location. - FT::Location* copy_loc = 0; - ACE_NEW_RETURN (copy_loc, FT::Location (*temp_loc), -1); - fault_event_desc.location = copy_loc; - } + result = this->get_location ( + filterable[1].value, fault_event_desc.location.out()); } // CORBA 3.0.2, section 23.4.5.1 states: @@ -111,225 +98,39 @@ int TAO::FT_ReplicationManagerFaultAnalyzer::analyze_fault_event ( // the given location with the given type have failed. // - Both are set if the member with the given ObjectGroupId at the // given location has failed. - if (result == 0) + + if ((result == 0) && (item_count == 2)) { // All objects at location failed. - if (item_count == 2) - { - fault_event_desc.all_at_location_failed = 1; - } + fault_event_desc.all_at_location_failed = 1; + } + if ((result == 0) && (item_count == 3)) + { // All objects of type at location failed. - if (item_count == 3) - { - fault_event_desc.all_of_type_at_location_failed = 1; - fault_event_desc.type_id = this->extract_type_id (filterable[2].value); - } + fault_event_desc.all_of_type_at_location_failed = 1; + result = this->get_type_id ( + filterable[2].value, fault_event_desc.type_id.out()); + } + if ((result == 0) && (item_count == 4)) + { // An object (replica) at a location failed. - if (item_count == 4) + fault_event_desc.object_at_location_failed = 1; + result = this->get_type_id ( + filterable[2].value, fault_event_desc.type_id.out()); + if (result == 0) { - fault_event_desc.object_at_location_failed = 1; - fault_event_desc.type_id = this->extract_type_id (filterable[2].value); - fault_event_desc.object_group_id = - this->extract_object_group_id (filterable[3].value); + result = this->get_object_group_id ( + filterable[3].value, fault_event_desc.object_group_id); } } - //TODO: Finish the implementation of analyze_fault_event() to: - // - if the ReplicationStyle of the object group is FT::SEMI_ACTIVE, - // and if the failed replica was the primary replica, set a new - // primary replica - // - if the number of replicas falls below MinimumNumberReplicas - // property and MembershipStyle of the object group is FT::MEMB_INF_CTRL, - // create and add a new member. - #if (INTEGRATED_WITH_REPLICATION_MANAGER == 1) - // If a specific object at a location failed, we need to determine - // if it was the primary replica. - if ((result == 0) && - (fault_event_desc.object_at_location_failed == 1)) + // A specific object at a location failed. + if ((result == 0) && (fault_event_desc.object_at_location_failed == 1)) { - FT::ObjectGroup_var the_object_group = FT::ObjectGroup::_nil(); - ACE_TRY_NEW_ENV - { - //@@ Q: How do we get the ObjectGroup from the ObjectGroupId? - //@@ A: Use TAO-specific extension to PortableGroup::ObjectGroupManager - // interface (get_object_group_ref_from_id()). - the_object_group = - this->replication_manager_->get_object_group_ref_from_id ( - fault_event_desc.object_group_id - ACE_ENV_ARG_PARAMETER); - ACE_TRY_CHECK; - } - ACE_CATCH (PortableGroup::ObjectGroupNotFound, ex) - { - ACE_PRINT_EXCEPTION ( - ex, - ACE_TEXT ( - "TAO::FT_ReplicationManagerFaultAnalyzer::analyze_fault_event: ") - ); - result = -1; - } - ACE_ENDTRY; - - if (CORBA::is_nil (the_object_group.in())) - { - ACE_ERROR ((LM_ERROR, - ACE_TEXT ( - "TAO::FT_ReplicationManagerFaultAnalyzer::analyze_fault_event: " - "Could not get ObjectGroup reference from ObjectGroupId: <%Q>.\n"), - fault_event_desc.object_group_id - )); - result = -1; - } - - //@@ Q: How do we get the properties of the ObjectGroup to - // figure out the ReplicationStyle, MinimumNumberReplicas, etc.? - //@@ A: FT::PropertyManager::get_properties(). - - // Get the properties associated with this ObjectGroup. - FT::Properties_var properties; - if (result == 0) - { - properties = this->replication_manager_->get_properties ( - the_object_group.in() - ACE_ENV_ARG_PARAMETER); - ACE_CHECK_RETURN (-1); - - // Get the MembershipStyle property. - FT::MembershipStyleValue membership_style; - result = this->get_membership_style (properties.in(), membership_style); - if (result != 0) - { - ACE_ERROR_RETURN ((LM_ERROR, - ACE_TEXT ( - "TAO::FT_ReplicationManagerFaultAnalyzer::analyze_fault_event: " - "Could not extract MembershipStyle from properties on " - "ObjectGroup with id <%Q>.\n"), - fault_event_desc.object_group_id), - -1); - } - else - { - fault_event_desc.membership_style = membership_style; -#if (TAO_DEBUG_LEVEL_NEEDED == 1) - if (TAO_debug_level > 6) -#endif /* (TAO_DEBUG_LEVEL_NEEDED == 1) */ - { - ACE_DEBUG ((LM_DEBUG, - ACE_TEXT ( - "TAO::FT_ReplicationManagerFaultAnalyzer::validate_event_type: " - "MembershipStyleValue = <%d>"), - fault_event_desc.membership_style - )); - } - } - - // Get the ReplicationStyle property. - FT::ReplicationStyleValue replication_style; - result = this->get_replication_style (properties.in(), replication_style); - if (result != 0) - { - ACE_ERROR_RETURN ((LM_ERROR, - ACE_TEXT ( - "TAO::FT_ReplicationManagerFaultAnalyzer::analyze_fault_event: " - "Could not extract ReplicationStyle from properties on " - "ObjectGroup with id <%Q>.\n"), - fault_event_desc.object_group_id), - -1); - } - else - { - fault_event_desc.replication_style = replication_style; -#if (TAO_DEBUG_LEVEL_NEEDED == 1) - if (TAO_debug_level > 6) -#endif /* (TAO_DEBUG_LEVEL_NEEDED == 1) */ - { - ACE_DEBUG ((LM_DEBUG, - ACE_TEXT ( - "TAO::FT_ReplicationManagerFaultAnalyzer::validate_event_type: " - "ReplicationStyleValue = <%d>"), - fault_event_desc.replication_style - )); - } - } - - // Get the MinimumNumberReplicas property. - FT::MinimumNumberReplicasValue minimum_number_replicas; - result = this->get_minimum_number_replicas ( - properties.in(), minimum_number_replicas); - if (result != 0) - { - ACE_ERROR_RETURN ((LM_ERROR, - ACE_TEXT ( - "TAO::FT_ReplicationManagerFaultAnalyzer::analyze_fault_event: " - "Could not extract MinimumNumberReplicas from properties on " - "ObjectGroup with id <%Q>.\n"), - fault_event_desc.object_group_id), - -1); - } - else - { - fault_event_desc.minimum_number_replicas = minimum_number_replicas; -#if (TAO_DEBUG_LEVEL_NEEDED == 1) - if (TAO_debug_level > 6) -#endif /* (TAO_DEBUG_LEVEL_NEEDED == 1) */ - { - ACE_DEBUG ((LM_DEBUG, - ACE_TEXT ( - "TAO::FT_ReplicationManagerFaultAnalyzer::validate_event_type: " - "MinimumNumberReplicas = <%d>"), - fault_event_desc.minimum_number_replicas - )); - } - } - - // Get the InitialNumberReplicas property. - FT::InitialNumberReplicasValue initial_number_replicas; - result = this->get_initial_number_replicas ( - properties.in(), initial_number_replicas); - if (result != 0) - { - ACE_ERROR_RETURN ((LM_ERROR, - ACE_TEXT ( - "TAO::FT_ReplicationManagerFaultAnalyzer::analyze_fault_event: " - "Could not extract InitialNumberReplicas from properties on " - "ObjectGroup with id <%Q>.\n"), - fault_event_desc.object_group_id), - -1); - } - else - { - fault_event_desc.initial_number_replicas = initial_number_replicas; -#if (TAO_DEBUG_LEVEL_NEEDED == 1) - if (TAO_debug_level > 6) -#endif /* (TAO_DEBUG_LEVEL_NEEDED == 1) */ - { - ACE_DEBUG ((LM_DEBUG, - ACE_TEXT ( - "TAO::FT_ReplicationManagerFaultAnalyzer::validate_event_type: " - "InitialNumberReplicas = <%d>"), - fault_event_desc.initial_number_replicas - )); - } - } - } - - // If the ReplicationStyle is COLD_PASSIVE, WARM_PASSIVE, or - // SEMI_ACTIVE, we can see if it was the primary replica that - // failed. - if ((result == 0) && - (fault_event_desc.replication_style == FT::COLD_PASSIVE || - fault_event_desc.replication_style == FT::WARM_PASSIVE || - fault_event_desc.replication_style == FT::SEMI_ACTIVE)) - { - result = this->is_primary_member ( - the_object_group.in(), - fault_event_desc.location, - fault_event_desc.object_is_primary); - } - + result = this->single_replica_failure (fault_event_desc); } #endif /* (INTEGRATED_WITH_REPLICATION_MANAGER == 1) */ @@ -338,90 +139,68 @@ int TAO::FT_ReplicationManagerFaultAnalyzer::analyze_fault_event ( if (TAO_debug_level > 6) #endif /* (TAO_DEBUG_LEVEL_NEEDED == 1) */ { - ACE_DEBUG ((LM_DEBUG, - ACE_TEXT("Getting the location as a string.\n") - )); - - // Get the location as a string. - ACE_CString loc_as_string; - FT::Location_var & loc = fault_event_desc.location; - - for (CORBA::ULong li = 0; li < loc->length(); ++li) - { - if (li > 0) loc_as_string += "/"; - // Assume only the "id" field of the CosNaming::Name is used. - loc_as_string += CORBA::string_dup (loc[li].id); - } - - if (fault_event_desc.all_at_location_failed == 1) - { - ACE_DEBUG ((LM_DEBUG, - ACE_TEXT ( - "All objects at location <%s> failed.\n"), - loc_as_string.c_str() - )); - } - - if (fault_event_desc.all_of_type_at_location_failed == 1) - { - ACE_DEBUG ((LM_DEBUG, - ACE_TEXT ( - "All objects of type <%s> at location <%s> failed.\n"), - fault_event_desc.type_id.in(), - loc_as_string.c_str() - )); - } - - if (fault_event_desc.object_at_location_failed == 1) - { - ACE_DEBUG ((LM_DEBUG, - ACE_TEXT ( - "Replica of type <%s> with ObjectGroupId <%Q> " - "at location <%s> failed.\n"), - fault_event_desc.type_id.in(), - fault_event_desc.object_group_id, - loc_as_string.c_str() - )); - } + fault_event_desc.dump (); } return result; } // Extract a string type_id from CORBA::Any. -// Caller owns the returned string. -char* -TAO::FT_ReplicationManagerFaultAnalyzer::extract_type_id (const CORBA::Any& val) +// Caller owns the string returned via . +int TAO::FT_ReplicationManagerFaultAnalyzer::get_type_id ( + const CORBA::Any& val, FT::TypeId_out type_id) { const char* type_id_value; if ((val >>= type_id_value) == 0) { - ACE_ERROR ((LM_ERROR, + ACE_ERROR_RETURN ((LM_ERROR, ACE_TEXT ( - "TAO::FT_ReplicationManagerFaultAnalyzer::extract_type_id: " - "Could not extract TypeId value from any.\n") - )); - return 0; + "TAO::FT_ReplicationManagerFaultAnalyzer::get_type_id: " + "Could not extract TypeId value from any.\n")), + -1); } - return CORBA::string_dup (type_id_value); + std::cout << std::endl << std::endl << std::endl + << type_id_value + << std::endl << std::endl << std::endl; + + // Make a deep copy of the TypeId string. + type_id = CORBA::string_dup (type_id_value); + return 0; } // Extract the ObjectGroupId from CORBA::Any. -FT::ObjectGroupId -TAO::FT_ReplicationManagerFaultAnalyzer::extract_object_group_id (const CORBA::Any& val) +int TAO::FT_ReplicationManagerFaultAnalyzer::get_object_group_id ( + const CORBA::Any& val, FT::ObjectGroupId& id) { - FT::ObjectGroupId id = (FT::ObjectGroupId)0; - if ((val >>= id) == 0) + FT::ObjectGroupId temp_id = (FT::ObjectGroupId)0; + if ((val >>= temp_id) == 0) { - ACE_ERROR ((LM_ERROR, + ACE_ERROR_RETURN ((LM_ERROR, ACE_TEXT ( - "TAO::FT_ReplicationManagerFaultAnalyzer::extract_object_group_id: " - "Could not extract ObjectGroupId value from any.\n") - )); + "TAO::FT_ReplicationManagerFaultAnalyzer::get_object_group_id: " + "Could not extract ObjectGroupId value from any.\n")), + -1); } + id = temp_id; + return 0; +} - return id; +int TAO::FT_ReplicationManagerFaultAnalyzer::get_location ( + const CORBA::Any& val, FT::Location_out location) +{ + const FT::Location* temp_loc; + if ((val >>= temp_loc) == 0) + { + ACE_ERROR_RETURN ((LM_ERROR, + ACE_TEXT ( + "TAO::FT_ReplicationManagerFaultAnalyzer::get_location: " + "Could not extract Location value from fault event.\n")), + -1); + } + // Make a deep copy of the Location. + ACE_NEW_RETURN (location, FT::Location (*temp_loc), -1); + return 0; } // @@ -562,9 +341,50 @@ int TAO::FT_ReplicationManagerFaultAnalyzer::get_initial_number_replicas ( return result; } +int TAO::FT_ReplicationManagerFaultAnalyzer::get_factories ( + const FT::Properties & properties, + FT::FactoryInfos_out factories) +{ + FT::Name prop_name (1); + prop_name.length (1); + prop_name[0].id = CORBA::string_dup (FT::FT_FACTORIES); + int result = 0; + + FT::FactoryInfos_var temp_factories; + FT::Value value; + if (TAO_PG::get_property_value (prop_name, properties, value) == 1) + { + if ((value >>= temp_factories) == 0) + { + ACE_ERROR ((LM_ERROR, + ACE_TEXT ( + "TAO::FT_ReplicationManagerFaultAnalyzer::get_factories: " + "Could not extract Factories from properties.\n") + )); + result = -1; + } + else + { + // Make a deep copy of the Factories. + ACE_NEW_RETURN (factories, FT::FactoryInfos (temp_factories.in()), -1); + result = 0; + } + } + else + { + ACE_ERROR ((LM_ERROR, + ACE_TEXT ( + "TAO::FT_ReplicationManagerFaultAnalyzer::get_factories: " + "Could not find Factories property.\n") + )); + result = -1; + } + return result; +} + int TAO::FT_ReplicationManagerFaultAnalyzer::is_primary_member ( - const FT::ObjectGroup_ptr iogr, - const FT::Location_var & location, + FT::ObjectGroup_ptr iogr, + const FT::Location & location, int & object_is_primary) { @@ -615,7 +435,7 @@ int TAO::FT_ReplicationManagerFaultAnalyzer::is_primary_member ( // Get the object reference of the failed member. CORBA::Object_var failed_obj = this->replication_manager_->get_member_ref ( - iogr, location.in() ACE_ENV_ARG_PARAMETER); + iogr, location ACE_ENV_ARG_PARAMETER); ACE_TRY_CHECK; if (CORBA::is_nil (failed_obj.in())) { @@ -669,3 +489,459 @@ int TAO::FT_ReplicationManagerFaultAnalyzer::is_primary_member ( return result; } + +// Handle a single replica failure. +int TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure ( + TAO::FT_FaultEventDescriptor & fault_event_desc) +{ + int result = 0; + FT::ObjectGroup_var the_object_group = FT::ObjectGroup::_nil(); + FT::Properties_var properties; + + ACE_TRY_NEW_ENV + { + // Get the object group reference based on the ObjectGroupId. + the_object_group = + this->replication_manager_->get_object_group_ref_from_id ( + fault_event_desc.object_group_id + ACE_ENV_ARG_PARAMETER); + ACE_TRY_CHECK; + + // This should not happen, but let us be safe. + if (CORBA::is_nil (the_object_group.in())) + { + ACE_ERROR ((LM_ERROR, + ACE_TEXT ( + "TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: " + "Could not get ObjectGroup reference from ObjectGroupId: <%Q>.\n"), + fault_event_desc.object_group_id + )); + ACE_THROW (PortableGroup::ObjectGroupNotFound ()); + } + + // Get the properties associated with this ObjectGroup. + properties = this->replication_manager_->get_properties ( + the_object_group.in() + ACE_ENV_ARG_PARAMETER); + ACE_TRY_CHECK; + } + ACE_CATCHANY + { + ACE_PRINT_EXCEPTION ( + ACE_ANY_EXCEPTION, + ACE_TEXT ( + "TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: ") + ); + result = -1; + } + ACE_ENDTRY; + ACE_CHECK_RETURN (-1); + + if (result == 0) + { + // Get the MembershipStyle property. + FT::MembershipStyleValue membership_style; + result = this->get_membership_style (properties.in(), membership_style); + if (result != 0) + { + ACE_ERROR_RETURN ((LM_ERROR, + ACE_TEXT ( + "TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: " + "Could not extract MembershipStyle from properties on " + "ObjectGroup with id <%Q>.\n"), + fault_event_desc.object_group_id), + -1); + } + else + { + fault_event_desc.membership_style = membership_style; +#if (TAO_DEBUG_LEVEL_NEEDED == 1) + if (TAO_debug_level > 6) +#endif /* (TAO_DEBUG_LEVEL_NEEDED == 1) */ + { + ACE_DEBUG ((LM_DEBUG, + ACE_TEXT ( + "TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: " + "MembershipStyleValue = <%d>"), + fault_event_desc.membership_style + )); + } + } + + // Get the ReplicationStyle property. + FT::ReplicationStyleValue replication_style; + result = this->get_replication_style (properties.in(), replication_style); + if (result != 0) + { + ACE_ERROR_RETURN ((LM_ERROR, + ACE_TEXT ( + "TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: " + "Could not extract ReplicationStyle from properties on " + "ObjectGroup with id <%Q>.\n"), + fault_event_desc.object_group_id), + -1); + } + else + { + fault_event_desc.replication_style = replication_style; +#if (TAO_DEBUG_LEVEL_NEEDED == 1) + if (TAO_debug_level > 6) +#endif /* (TAO_DEBUG_LEVEL_NEEDED == 1) */ + { + ACE_DEBUG ((LM_DEBUG, + ACE_TEXT ( + "TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: " + "ReplicationStyleValue = <%d>"), + fault_event_desc.replication_style + )); + } + } + + // Get the MinimumNumberReplicas property. + FT::MinimumNumberReplicasValue minimum_number_replicas; + result = this->get_minimum_number_replicas ( + properties.in(), minimum_number_replicas); + if (result != 0) + { + ACE_ERROR_RETURN ((LM_ERROR, + ACE_TEXT ( + "TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: " + "Could not extract MinimumNumberReplicas from properties on " + "ObjectGroup with id <%Q>.\n"), + fault_event_desc.object_group_id), + -1); + } + else + { + fault_event_desc.minimum_number_replicas = minimum_number_replicas; +#if (TAO_DEBUG_LEVEL_NEEDED == 1) + if (TAO_debug_level > 6) +#endif /* (TAO_DEBUG_LEVEL_NEEDED == 1) */ + { + ACE_DEBUG ((LM_DEBUG, + ACE_TEXT ( + "TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: " + "MinimumNumberReplicas = <%d>"), + fault_event_desc.minimum_number_replicas + )); + } + } + + // Get the InitialNumberReplicas property. + FT::InitialNumberReplicasValue initial_number_replicas; + result = this->get_initial_number_replicas ( + properties.in(), initial_number_replicas); + if (result != 0) + { + ACE_ERROR_RETURN ((LM_ERROR, + ACE_TEXT ( + "TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: " + "Could not extract InitialNumberReplicas from properties on " + "ObjectGroup with id <%Q>.\n"), + fault_event_desc.object_group_id), + -1); + } + else + { + fault_event_desc.initial_number_replicas = initial_number_replicas; +#if (TAO_DEBUG_LEVEL_NEEDED == 1) + if (TAO_debug_level > 6) +#endif /* (TAO_DEBUG_LEVEL_NEEDED == 1) */ + { + ACE_DEBUG ((LM_DEBUG, + ACE_TEXT ( + "TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: " + "InitialNumberReplicas = <%d>"), + fault_event_desc.initial_number_replicas + )); + } + } + + // Get the Factories property. + result = this->get_factories ( + properties.in(), + fault_event_desc.factories.out()); + if (result != 0) + { + ACE_ERROR_RETURN ((LM_ERROR, + ACE_TEXT ( + "TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: " + "Could not extract Factories from properties on " + "ObjectGroup with id <%Q>.\n"), + fault_event_desc.object_group_id), + -1); + } + else + { +#if (TAO_DEBUG_LEVEL_NEEDED == 1) + if (TAO_debug_level > 6) +#endif /* (TAO_DEBUG_LEVEL_NEEDED == 1) */ + { + ACE_DEBUG ((LM_DEBUG, + ACE_TEXT ( + "TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: " + "Got Factories from properties on " + "ObjectGroup with id <%Q>.\n"), + fault_event_desc.object_group_id + )); + } + } + + } + + // If the ReplicationStyle is COLD_PASSIVE, WARM_PASSIVE, or + // SEMI_ACTIVE, we can see if it was the primary replica that + // failed. + if ((result == 0) && + (fault_event_desc.replication_style == FT::COLD_PASSIVE || + fault_event_desc.replication_style == FT::WARM_PASSIVE || + fault_event_desc.replication_style == FT::SEMI_ACTIVE)) + { +#if (TAO_DEBUG_LEVEL_NEEDED == 1) + if (TAO_debug_level > 6) +#endif /* (TAO_DEBUG_LEVEL_NEEDED == 1) */ + { + ACE_DEBUG ((LM_DEBUG, + ACE_TEXT ( + "TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: " + "Checking to see if failed replica was the primary for " + "ObjectGroup with id <%Q>.\n"), + fault_event_desc.object_group_id + )); + } + result = this->is_primary_member ( + the_object_group.in(), + fault_event_desc.location.in(), + fault_event_desc.object_is_primary); + } + + // If the MembershipStyle is FT::MEMB_INF_CTRL (infrastructure + // controlled) and the primary has faulted, establish a new primary. + // We get back a new object group. + FT::ObjectGroup_var new_object_group; + if ((result == 0) && + (fault_event_desc.membership_style == FT::MEMB_INF_CTRL) && + (fault_event_desc.object_is_primary == 1)) + { +#if (TAO_DEBUG_LEVEL_NEEDED == 1) + if (TAO_debug_level > 6) +#endif /* (TAO_DEBUG_LEVEL_NEEDED == 1) */ + { + ACE_DEBUG ((LM_DEBUG, + ACE_TEXT ( + "TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: " + "Setting new primary for " + "ObjectGroup with id <%Q>.\n"), + fault_event_desc.object_group_id + )); + } + result = this->set_new_primary ( + the_object_group.in(), + fault_event_desc, + new_object_group.out()); + the_object_group = new_object_group; + } + + // If the MembershipStyle is FT::MEMB_INF_CTRL (infrastructure + // controlled) and the number of remaining replicas is less than + // the MinimumNumberReplicas property, add new members. + // We get back a new object group. + if ((result == 0) && + (fault_event_desc.membership_style == FT::MEMB_INF_CTRL)) + { +#if (TAO_DEBUG_LEVEL_NEEDED == 1) + if (TAO_debug_level > 6) +#endif /* (TAO_DEBUG_LEVEL_NEEDED == 1) */ + { + ACE_DEBUG ((LM_DEBUG, + ACE_TEXT ( + "TAO::FT_ReplicationManagerFaultAnalyzer::single_replica_failure: " + "Potentially adding new members to " + "ObjectGroup with id <%Q>.\n"), + fault_event_desc.object_group_id + )); + } + result = this->add_members ( + the_object_group.in(), + fault_event_desc, + new_object_group.out()); + the_object_group = new_object_group; + } + + return result; +} + +// Choose a new primary member for the ObjectGroup. +// Sets and returns 0 on success. +// Returns -1 on failure. +int TAO::FT_ReplicationManagerFaultAnalyzer::set_new_primary ( + FT::ObjectGroup_ptr iogr, + TAO::FT_FaultEventDescriptor & fault_event_desc, + FT::ObjectGroup_out new_iogr) +{ + int result = 0; + new_iogr = FT::ObjectGroup::_nil (); + + ACE_TRY_NEW_ENV + { + // Remove the old primary member from the object group. + FT::ObjectGroup_var temp_iogr = + this->replication_manager_->remove_member ( + iogr, + fault_event_desc.location.in() + ACE_ENV_ARG_DECL); + ACE_TRY_CHECK; + + // Get the locations of the remaining members of the object group. + FT::Locations_var locations = + this->replication_manager_->locations_of_members ( + temp_iogr.in() + ACE_ENV_ARG_PARAMETER); + ACE_TRY_CHECK; + + // Choose the first location as our new primary location. + if (locations->length() >= 1) + { + new_iogr = this->replication_manager_->set_primary_member ( + temp_iogr.in(), + (*locations)[0] + ACE_ENV_ARG_PARAMETER); + ACE_TRY_CHECK; + } + else + { + ACE_ERROR_RETURN ((LM_ERROR, + ACE_TEXT ( + "TAO::FT_ReplicationManagerFaultAnalyzer::set_new_primary: " + "No locations remaining in ObjectGroup with id <%Q>.\n"), + fault_event_desc.object_group_id), + -1); + } + } + ACE_CATCHANY + { + ACE_PRINT_EXCEPTION ( + ACE_ANY_EXCEPTION, + "TAO::FT_ReplicationManagerFaultAnalyzer::set_new_primary: "); + result = -1; + } + ACE_ENDTRY; + + return result; +} + +// While the number of replicas in the object group is less than +// the MinimumNumberReplicas property, add new members. +// Sets and returns 0 on success. +// Returns -1 on failure. +int TAO::FT_ReplicationManagerFaultAnalyzer::add_members ( + FT::ObjectGroup_ptr iogr, + TAO::FT_FaultEventDescriptor & fault_event_desc, + FT::ObjectGroup_out new_iogr) +{ + int result = 0; + new_iogr = FT::ObjectGroup::_nil (); + + ACE_TRY_NEW_ENV + { + // Get current number of members in object group + // (same as number of locations). + FT::Locations_var locations = + this->replication_manager_->locations_of_members ( + iogr + ACE_ENV_ARG_PARAMETER); + ACE_TRY_CHECK; + CORBA::ULong num_members = locations->length(); + + // If it is less than the MinimumNumberReplicas property, add + // new members. + if (num_members < fault_event_desc.minimum_number_replicas) + { + //@@ To create a member, we need to know the ObjectGroup, + // Location, TypeId, and Criteria. + + // Get the factory registry from the Replication Manager. + PortableGroup::Criteria fake_criteria; + PortableGroup::FactoryRegistry_var factory_registry = + this->replication_manager_->get_factory_registry ( + fake_criteria ACE_ENV_ARG_PARAMETER); + ACE_TRY_CHECK; + + // Get the list of factories for the type of the failed replica. + PortableGroup::FactoryInfos_var factories_by_type = + factory_registry->list_factories_by_type ( + fault_event_desc.type_id.in() ACE_ENV_ARG_PARAMETER); + ACE_TRY_CHECK; + + // + // Build a set of locations of factories for this type that we + // can use to create new members (i.e., at locations where + // members do not currently exist). + // + FT_Location_Set valid_locations; + + // For each factory that can be used for this type... + for (CORBA::ULong f=0; flength(); ++f) + { + // ...insert its location into valid_locations set. + valid_locations.insert (factories_by_type[f].the_location); + } + + // Now remove any locations where members already exist. + for (CORBA::ULong m=0; mreplication_manager_->create_member ( + iogr, + good_location.in(), + fault_event_desc.type_id.in(), + fake_criteria + ACE_ENV_ARG_PARAMETER); + ACE_TRY_CHECK; + + // Stop adding members when we reach the value of the + // MinimumNumberReplicas property. + if (num_members++ >= fault_event_desc.minimum_number_replicas) + break; + } + + } + } + ACE_CATCHANY + { + ACE_PRINT_EXCEPTION ( + ACE_ANY_EXCEPTION, + "TAO::FT_ReplicationManagerFaultAnalyzer::add_members: "); + result = -1; + } + ACE_ENDTRY; + + return result; +} + +// Template instantiations. +#if defined (ACE_HAS_EXPLICIT_TEMPLATE_INSTANTIATION) + +template class ACE_Unbounded_Set; +template class ACE_Unbounded_Set_Iterator; + +#elif defined (ACE_HAS_TEMPLATE_INSTANTIATION_PRAGMA) + +#pragma instantiate ACE_Unbounded_Set +#pragma instantiate ACE_Unbounded_Set_Iterator + +#endif /* ACE_HAS_EXPLICIT_TEMPLATE_INSTANTIATION */ + diff --git a/TAO/orbsvcs/FT_ReplicationManager/FT_ReplicationManagerFaultAnalyzer.h b/TAO/orbsvcs/FT_ReplicationManager/FT_ReplicationManagerFaultAnalyzer.h index 48d8bc4ef33..cc7aabb95aa 100755 --- a/TAO/orbsvcs/FT_ReplicationManager/FT_ReplicationManagerFaultAnalyzer.h +++ b/TAO/orbsvcs/FT_ReplicationManager/FT_ReplicationManagerFaultAnalyzer.h @@ -25,12 +25,19 @@ #include "orbsvcs/FT_ReplicationManager/FT_DefaultFaultAnalyzer.h" #include "orbsvcs/FT_CORBAC.h" +// Forward declarations. +template class ACE_Unbounded_Set; +template class ACE_Unbounded_Set_Iterator; + namespace TAO { /////////////////////// // Forward declarations class FT_ReplicationManager; + struct FT_FaultEventDescriptor; + + typedef ACE_Unbounded_Set FT_Location_Set; /** * Replication Manager's fault analyzer. @@ -90,33 +97,79 @@ namespace TAO protected: /// Helper functions for fault analysis. - char* extract_type_id (const CORBA::Any& val); - FT::ObjectGroupId extract_object_group_id (const CORBA::Any& val); + // Extract the type id from a CORBA any. + int get_type_id (const CORBA::Any& val, FT::TypeId_out type_id); + + // Extract the ObjectGroupId from a CORBA any. + int get_object_group_id (const CORBA::Any& val, FT::ObjectGroupId& id); + + // Extract the FT::Location from a CORBA any. + int get_location (const CORBA::Any& val, FT::Location_out location); + + // Get the MembershipStyle property. int get_membership_style ( const FT::Properties & properties, FT::MembershipStyleValue & membership_style); + // Get the ReplicationStyle property. int get_replication_style ( const FT::Properties & properties, FT::ReplicationStyleValue & replication_style); + // Get the MinimumNumberReplicas property. int get_minimum_number_replicas ( const FT::Properties & properties, FT::MinimumNumberReplicasValue & minimum_number_replicas); + // Get the InitialNumberReplicas property. int get_initial_number_replicas ( const FT::Properties & properties, FT::InitialNumberReplicasValue & initial_number_replicas); - // Is the replica at location the primary member of iogr? - // Sets is_primary and returns 0 on success. + // Get the Factories property. + int get_factories ( + const FT::Properties & properties, + FT::FactoryInfos_out factories); + + // Handle a single replica failure. + int single_replica_failure ( + TAO::FT_FaultEventDescriptor & fault_event_desc); + + // Handle a location failure. + int location_failure ( + TAO::FT_FaultEventDescriptor & fault_event_desc); + + // Handle a type at location failure. + int type_failure ( + TAO::FT_FaultEventDescriptor & fault_event_desc); + + // Is the replica at location the primary member of its ObjectGroup? + // Sets and returns 0 on success. // Returns -1 on failure. int is_primary_member ( const FT::ObjectGroup_ptr iogr, - const FT::Location_var & location, + const FT::Location & location, int & object_is_primary); + // Choose a new primary member for the ObjectGroup. + // Sets and returns 0 on success. + // Returns -1 on failure. + int set_new_primary ( + FT::ObjectGroup_ptr iogr, + TAO::FT_FaultEventDescriptor & fault_event_desc, + FT::ObjectGroup_out new_iogr); + + // While the number of replicas in the object group is less than + // the MinimumNumberReplicas property, add new members. + // Sets and returns 0 on success. + // Returns -1 on failure. + int add_members ( + FT::ObjectGroup_ptr iogr, + TAO::FT_FaultEventDescriptor & fault_event_desc, + FT::ObjectGroup_out new_iogr); + + /////////////// // Data Members private: -- cgit v1.2.1