summaryrefslogtreecommitdiff
path: root/TAO/orbsvcs/FT_ReplicationManager/FT_FaultConsumer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'TAO/orbsvcs/FT_ReplicationManager/FT_FaultConsumer.cpp')
-rwxr-xr-xTAO/orbsvcs/FT_ReplicationManager/FT_FaultConsumer.cpp407
1 files changed, 374 insertions, 33 deletions
diff --git a/TAO/orbsvcs/FT_ReplicationManager/FT_FaultConsumer.cpp b/TAO/orbsvcs/FT_ReplicationManager/FT_FaultConsumer.cpp
index 40f54dadb21..3662e73ea01 100755
--- a/TAO/orbsvcs/FT_ReplicationManager/FT_FaultConsumer.cpp
+++ b/TAO/orbsvcs/FT_ReplicationManager/FT_FaultConsumer.cpp
@@ -18,6 +18,7 @@
#include "orbsvcs/FT_ReplicationManager/FT_FaultConsumer.h"
#include "orbsvcs/FT_ReplicationManagerC.h"
+#include <tao/debug.h>
ACE_RCSID (FT_FaultConsumer,
FT_FaultConsumer,
@@ -76,11 +77,10 @@ int TAO::FT_FaultConsumer::init (
//@@ For now, let's try just activating it in the POA.
// Activate this consumer in the POA.
- PortableServer::ObjectId_var oid =
- this->poa_->activate_object (this ACE_ENV_ARG_PARAMETER);
+ this->object_id_ = this->poa_->activate_object (this ACE_ENV_ARG_PARAMETER);
ACE_CHECK_RETURN (-1);
CORBA::Object_var obj =
- this->poa_->id_to_reference (oid.in() ACE_ENV_ARG_PARAMETER);
+ this->poa_->id_to_reference (this->object_id_.in() ACE_ENV_ARG_PARAMETER);
ACE_CHECK_RETURN (-1);
// Narrow it to CosNotifyComm::StructuredPushConsumer.
@@ -114,10 +114,7 @@ int TAO::FT_FaultConsumer::fini (ACE_ENV_SINGLE_ARG_DECL)
// should the application do that?
//
// Deactivate ourselves from the POA.
- PortableServer::ObjectId_var oid =
- this->poa_->servant_to_id (this ACE_ENV_ARG_PARAMETER);
- ACE_CHECK_RETURN (-1);
- this->poa_->deactivate_object (oid.in() ACE_ENV_ARG_PARAMETER);
+ this->poa_->deactivate_object (this->object_id_.in() ACE_ENV_ARG_PARAMETER);
ACE_CHECK_RETURN (-1);
this->consumer_ref_ = CosNotifyComm::StructuredPushConsumer::_nil ();
@@ -142,46 +139,123 @@ size_t TAO::FT_FaultConsumer::notifications () const
////////////////
// CORBA methods
+// Receive and process an incoming fault event from the Fault Notifier.
+// First, we validate the event to make sure it is something we can
+// handle. Then, we analyze it.
+// If it is not an event we can handle, we simply log the error and
+// drop the event.
void TAO::FT_FaultConsumer::push_structured_event (
const CosNotification::StructuredEvent &event
ACE_ENV_ARG_DECL_WITH_DEFAULTS
)
ACE_THROW_SPEC ((CORBA::SystemException, CosEventComm::Disconnected))
{
+ // Debugging support.
this->notifications_ += 1;
- ACE_DEBUG ((LM_DEBUG,
- "TAO::FT_FaultConsumer: Received Fault notification(%d):\n"
- "TAO::FT_FaultConsumer: Header EventType domain: %s\n"
- "TAO::FT_FaultConsumer: Header EventType type: %s\n"
- "TAO::FT_FaultConsumer: Header EventName: %s\n",
- ACE_static_cast (unsigned int, this->notifications_),
- ACE_static_cast (const char *, event.header.fixed_header.event_type.domain_name),
- ACE_static_cast (const char *, event.header.fixed_header.event_type.type_name),
- ACE_static_cast (const char *, event.header.fixed_header.event_name)
+#if (TAO_DEBUG_LEVEL_NEEDED == 1)
+ if (TAO_debug_level > 1)
+#endif /* (TAO_DEBUG_LEVEL_NEEDED == 1) */
+ {
+ ACE_DEBUG ((LM_DEBUG,
+ ACE_TEXT (
+ "TAO::FT_FaultConsumer::push_structured_event: "
+ "Received Fault notification(%d):\n"),
+ ACE_static_cast (unsigned int, this->notifications_)
));
+ }
- const CosNotification::FilterableEventBody & filterable =
- event.filterable_data;
+ int result = 0;
- size_t propertyCount = filterable.length ();
- for (size_t nProp = 0; nProp < propertyCount; ++nProp)
+ // Make sure it is an event type we can handle.
+ if (result == 0)
{
- const CosNotification::Property & property = filterable[nProp];
- ACE_DEBUG ((LM_DEBUG,
- "TAO::FT_FaultConsumer: Property Name: %s\n",
- ACE_static_cast (const char *, property.name)
+ result = this->validate_event_type (event);
+ ACE_CHECK;
+ if (result != 0)
+ {
+ ACE_ERROR ((LM_ERROR,
+ ACE_TEXT (
+ "TAO::FT_FaultConsumer::push_structured_event: "
+ "Received invalid fault event type.\n")
));
+ }
}
- //TODO: Finish the implementation of push_structured_event() to:
- // - analyze the fault
- // - if it was a primary fault, set a new primary replica
- // - if the number of replicas falls below MinimumNumberReplicas
- // property and membership style is Infrastructure-Controlled,
- // create and add a new member.
+ // Analyze the event.
+ if (result == 0)
+ {
+ TAO::FT_FaultEventDescriptor fault_event_desc;
+ result = this->analyze_fault_event (
+ event,
+ fault_event_desc
+ ACE_ENV_ARG_PARAMETER);
+ ACE_CHECK_RETURN (-1);
+ if (result != 0)
+ {
+ ACE_ERROR ((LM_ERROR,
+ ACE_TEXT (
+ "TAO::FT_FaultConsumer::push_structured_event: "
+ "Could not analyze fault event.\n")
+ ));
+ }
+ // Debugging support.
+#if (TAO_DEBUG_LEVEL_NEEDED == 1)
+ if (TAO_debug_level > 6)
+#endif /* (TAO_DEBUG_LEVEL_NEEDED == 1) */
+ {
+ ACE_DEBUG ((LM_DEBUG,
+ ACE_TEXT("Getting the location as a string.\n")
+ ));
+
+ // Get the location as a string.
+ ACE_CString loc_as_string;
+ FT::Location_var & loc = fault_event_desc.location;
+
+ for (CORBA::ULong li = 0; li < loc->length(); ++li)
+ {
+ if (li > 0) loc_as_string += "/";
+ // Assume only the "id" field of the CosNaming::Name is used.
+ loc_as_string += CORBA::string_dup (loc[li].id);
+ }
+
+ if (fault_event_desc.all_at_location_failed == 1)
+ {
+ ACE_DEBUG ((LM_DEBUG,
+ ACE_TEXT (
+ "All objects at location <%s> failed.\n"),
+ loc_as_string.c_str()
+ ));
+ }
+
+ if (fault_event_desc.all_of_type_at_location_failed == 1)
+ {
+ ACE_DEBUG ((LM_DEBUG,
+ ACE_TEXT (
+ "All objects of type <%s> at location <%s> failed.\n"),
+ fault_event_desc.type_id.in(),
+ loc_as_string.c_str()
+ ));
+ }
+
+ if (fault_event_desc.object_at_location_failed == 1)
+ {
+ ACE_DEBUG ((LM_DEBUG,
+ ACE_TEXT (
+ "Replica of type <%s> with ObjectGroupId <%Q> "
+ "at location <%s> failed.\n"),
+ fault_event_desc.type_id.in(),
+ fault_event_desc.object_group_id,
+ loc_as_string.c_str()
+ ));
+ }
+ }
+ }
+
+ return;
}
+
void TAO::FT_FaultConsumer::offer_change (
const CosNotification::EventTypeSeq & added,
const CosNotification::EventTypeSeq & removed
@@ -190,8 +264,8 @@ void TAO::FT_FaultConsumer::offer_change (
ACE_THROW_SPEC ((CORBA::SystemException, CosNotifyComm::InvalidEventType))
{
ACE_DEBUG ((LM_DEBUG,
- "TAO::FT_FaultConsumer::offer_change() call ignored.\n"
- ));
+ ACE_TEXT("TAO::FT_FaultConsumer::offer_change() call ignored.\n")
+ ));
}
void TAO::FT_FaultConsumer::disconnect_structured_push_consumer (
@@ -201,7 +275,274 @@ void TAO::FT_FaultConsumer::disconnect_structured_push_consumer (
{
//TODO: For now, we are just ignoring the disconnect callback.
ACE_DEBUG ((LM_DEBUG,
- "TAO::FT_FaultConsumer::disconnect_structured_push_consumer() call ignored.\n"
+ ACE_TEXT("TAO::FT_FaultConsumer::disconnect_structured_push_consumer() "
+ "call ignored.\n")
+ ));
+}
+
+//////////////////
+// Private methods
+
+// Validate the event to make sure it is one we can handle.
+// If it is not an event we can handle, this function logs the error
+// and returns -1.
+int TAO::FT_FaultConsumer::validate_event_type (
+ const CosNotification::StructuredEvent & event)
+{
+ int result = 0;
+
+ // CORBA 3.0.2, section 23.4.5.1 states:
+ //
+ // The fault management specification defines one event type:
+ // ObjectCrashFault. As the name suggests, this event is
+ // generated by a Fault Detector when it detects that an object
+ // has crashed.
+
+ // So, the event header's event_type.domain_name must be "FT_CORBA"
+ // and the event header's event_type.type_name must be "ObjectCrashFault".
+ CORBA::String_var domain_name = CORBA::string_dup (
+ event.header.fixed_header.event_type.domain_name);
+ CORBA::String_var type_name = CORBA::string_dup (
+ event.header.fixed_header.event_type.type_name);
+ CORBA::String_var event_name = CORBA::string_dup (
+ event.header.fixed_header.event_name);
+
+ if (result == 0)
+ {
+ //TODO: Get rid of magic strings (though they come from the spec).
+ if (ACE_OS::strcmp (domain_name.in(), "FT_CORBA") != 0 ||
+ ACE_OS::strcmp (type_name.in(), "ObjectCrashFault") != 0)
+ {
+ ACE_ERROR ((LM_ERROR,
+ ACE_TEXT (
+ "TAO::FT_FaultConsumer::validate_event_type: "
+ "Received invalid event type.\n"
+ "EventType domain: <%s>\n"
+ "EventType type: <%s>\n"
+ "EventName: <%s>\n"),
+ domain_name.in(),
+ type_name.in(),
+ event_name.in()
+ ));
+ result = -1;
+ }
+ }
+
+ // CORBA 3.0.2, section 23.4.5.1 also states:
+ //
+ // The filterable_data part of the event body contains the
+ // identity of the crashed object as four name-value pairs: the
+ // fault tolerance domain identifier, the member’s location
+ // identifier, the repository identifier and the object group
+ // identifier. The Fault Notifier filters events based on the
+ // domain_name, the type_name, and the four identifiers. All
+ // other fields of the structured event may be set to null.
+ //
+ // The Fault Detector always sets the following fault event
+ // fields: domain_name, type_name, FTDomainId, and Location.
+ //
+ // So, at least "FTDomainId" and "Location" must be present:
+ if (result == 0)
+ {
+ if (event.filterable_data.length () >= 2)
+ {
+ // Check for FTDomainId.
+ if (ACE_OS::strcmp (
+ event.filterable_data[0].name.in(), "FTDomainId") != 0)
+ {
+ ACE_ERROR ((LM_ERROR,
+ ACE_TEXT (
+ "TAO::FT_FaultConsumer::validate_event_type: "
+ "Received invalid structured event.\n"
+ "filterable_data[0] must be \"FTDomainId\", not \"%s\"\n"),
+ event.filterable_data[0].name.in()
+ ));
+ result = -1;
+ }
+ else if (ACE_OS::strcmp (
+ event.filterable_data[1].name.in(), "Location") != 0)
+ {
+ ACE_ERROR ((LM_ERROR,
+ ACE_TEXT (
+ "TAO::FT_FaultConsumer::validate_event_type: "
+ "Received invalid structured event.\n"
+ "filterable_data[1] must be \"Location\", not \"%s\"\n"),
+ event.filterable_data[1].name.in()
+ ));
+ result = -1;
+ }
+ }
+ else
+ {
+ ACE_ERROR ((LM_ERROR,
+ ACE_TEXT (
+ "TAO::FT_FaultConsumer::validate_event_type: "
+ "Received invalid structured event.\n"
+ "There must be at least two name/value pairs in "
+ "the filterable_data field, for \"FTDomainId\" and \"Location\".\n")
+ ));
+ result = -1;
+ }
+ }
+
+ return result;
+}
+
+/// Analyze a fault event.
+int TAO::FT_FaultConsumer::analyze_fault_event (
+ const CosNotification::StructuredEvent & event,
+ TAO::FT_FaultEventDescriptor & fault_event_desc
+ ACE_ENV_ARG_DECL_WITH_DEFAULTS
+ )
+ ACE_THROW_SPEC ((CORBA::SystemException, CosEventComm::Disconnected))
+{
+ int result = 0;
+
+ const CosNotification::FilterableEventBody & filterable =
+ event.filterable_data;
+ CORBA::ULong item_count = filterable.length ();
+#if (TAO_DEBUG_LEVEL_NEEDED == 1)
+ if (TAO_debug_level > 6)
+#endif /* (TAO_DEBUG_LEVEL_NEEDED == 1) */
+ {
+ for (CORBA::ULong n_prop = 0; n_prop < item_count; ++n_prop)
+ {
+ ACE_DEBUG ((LM_DEBUG,
+ ACE_TEXT("TAO::FT_FaultConsumer::analyze_fault_event: "
+ "Property Name: <%s>\n"),
+ filterable[n_prop].name.in()
+ ));
+ }
+ }
+
+ //
+ // Populate the TAO::FT_FaultEventDescriptor structure from the
+ // properties in the event.
+ //
+
+ // Extract the location.
+ if (result == 0)
+ {
+ FT::Location* temp_loc;
+ if ((filterable[1].value >>= temp_loc) == 0)
+ {
+ ACE_ERROR ((LM_ERROR,
+ ACE_TEXT (
+ "TAO::FT_FaultConsumer::analyze_fault_event: "
+ "Could not extract Location value from fault event.\n")
+ ));
+ result = -1;
+ }
+ else
+ {
+ // Make a deep copy of the Location.
+ FT::Location* copy_loc = 0;
+ ACE_NEW_RETURN (copy_loc, FT::Location (*temp_loc), -1);
+ fault_event_desc.location = copy_loc;
+ }
+ }
+
+ // CORBA 3.0.2, section 23.4.5.1 states:
+ //
+ // The fault detector may or may not set the TypeId and
+ // ObjectGroupId fields with the following interpretations:
+ // - Neither is set if all objects at the given location have failed.
+ // - TypeId is set and ObjectGroupId is not set if all objects at
+ // the given location with the given type have failed.
+ // - Both are set if the member with the given ObjectGroupId at the
+ // given location has failed.
+ if (result == 0)
+ {
+ // All objects at location failed.
+ if (item_count == 2)
+ {
+ fault_event_desc.all_at_location_failed = 1;
+ }
+
+ // All objects of type at location failed.
+ if (item_count == 3)
+ {
+ fault_event_desc.all_of_type_at_location_failed = 1;
+ fault_event_desc.type_id = this->extract_type_id (filterable[2].value);
+ }
+
+ // An object (replica) at a location failed.
+ if (item_count == 4)
+ {
+ fault_event_desc.object_at_location_failed = 1;
+ fault_event_desc.type_id = this->extract_type_id (filterable[2].value);
+ fault_event_desc.object_group_id =
+ this->extract_object_group_id (filterable[3].value);
+ }
+ }
+
+ //TODO: Finish the implementation of analyze_fault_event() to:
+ // - if the ReplicationStyle of the object group is FT::SEMI_ACTIVE,
+ // and if the failed replica was the primary replica, set a new
+ // primary replica
+ // - if the number of replicas falls below MinimumNumberReplicas
+ // property and MembershipStyle of the object group is FT::MEMB_INF_CTRL,
+ // create and add a new member.
+
+ //Q: How do we get the ObjectGroup from the ObjectGroupId?
+ //A: ??
+
+ //Q: Then, how do we get the properties of the ObjectGroup to
+ // figure out the ReplicationStyle, current primary,
+ // MinimumNumberReplicas, etc.?
+ //A: FT::PropertyManager::get_properties().
+
+ // Assuming we have the ObjectGroup reference...
+ FT::ObjectGroup_var the_object_group;
+
+ // Get the properties associated with this ObjectGroup.
+#if 0
+ if (result == 0)
+ {
+ FT::Properties_var properties =
+ this->replication_manager_->get_properties (
+ the_object_group.in()
+ ACE_ENV_ARG_PARAMETER);
+ ACE_CHECK_RETURN (-1);
+ }
+#endif
+
+ return result;
+}
+
+// Extract a string type_id from CORBA::Any.
+// Caller owns the returned string.
+char*
+TAO::FT_FaultConsumer::extract_type_id (const CORBA::Any& val)
+{
+ const char* type_id_value;
+ if ((val >>= type_id_value) == 0)
+ {
+ ACE_ERROR ((LM_ERROR,
+ ACE_TEXT (
+ "TAO::FT_FaultConsumer::extract_type_id: "
+ "Could not extract TypeId value from any.\n")
));
+ return 0;
+ }
+
+ return CORBA::string_dup (type_id_value);
+}
+
+// Extract the ObjectGroupId from CORBA::Any.
+FT::ObjectGroupId
+TAO::FT_FaultConsumer::extract_object_group_id (const CORBA::Any& val)
+{
+ FT::ObjectGroupId id = (FT::ObjectGroupId)0;
+ if ((val >>= id) == 0)
+ {
+ ACE_ERROR ((LM_ERROR,
+ ACE_TEXT (
+ "TAO::FT_FaultConsumer::extract_object_group_id: "
+ "Could not extract ObjectGroupId value from any.\n")
+ ));
+ }
+
+ return id;
}