summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWilliam R. Otte <wotte@dre.vanderbilt.edu>2011-10-13 22:25:28 +0000
committerWilliam R. Otte <wotte@dre.vanderbilt.edu>2011-10-13 22:25:28 +0000
commit381c3fa38c950cb48a9989f9a0521517e0fa46c9 (patch)
tree99059674857ab1a593f22a45fc940003f983005f
parentce54c89f04bab323c2ff59804f2a7185d0a4aa80 (diff)
downloadATCD-381c3fa38c950cb48a9989f9a0521517e0fa46c9.tar.gz
Thu Oct 13 22:24:56 UTC 2011 William R. Otte <wotte@dre.vanderbilt.edu>
* dance/Plan_Launcher/Plan_Launcher.cpp: * tools/Domain_Validator/CORBA/CORBA_Domain_Validator_impl.h: * tools/Domain_Validator/CORBA/CORBA_Domain_Validator_impl.cpp: Bugfixes.
-rw-r--r--DAnCE/ChangeLog8
-rw-r--r--DAnCE/dance/Plan_Launcher/Plan_Launcher.cpp54
-rw-r--r--DAnCE/tools/Domain_Validator/CORBA/CORBA_Domain_Validator_impl.cpp81
-rw-r--r--DAnCE/tools/Domain_Validator/CORBA/CORBA_Domain_Validator_impl.h2
4 files changed, 105 insertions, 40 deletions
diff --git a/DAnCE/ChangeLog b/DAnCE/ChangeLog
index e3c09703805..2c8e9de0ed2 100644
--- a/DAnCE/ChangeLog
+++ b/DAnCE/ChangeLog
@@ -1,3 +1,11 @@
+Thu Oct 13 22:24:56 UTC 2011 William R. Otte <wotte@dre.vanderbilt.edu>
+
+ * dance/Plan_Launcher/Plan_Launcher.cpp:
+ * tools/Domain_Validator/CORBA/CORBA_Domain_Validator_impl.h:
+ * tools/Domain_Validator/CORBA/CORBA_Domain_Validator_impl.cpp:
+
+ Bugfixes.
+
Thu Oct 13 22:19:49 UTC 2011 William R. Otte <wotte@dre.vanderbilt.edu>
* tools/Config_Handlers/XMLSchema/id_map.hpp:
diff --git a/DAnCE/dance/Plan_Launcher/Plan_Launcher.cpp b/DAnCE/dance/Plan_Launcher/Plan_Launcher.cpp
index b5dd70625ea..533ad371dad 100644
--- a/DAnCE/dance/Plan_Launcher/Plan_Launcher.cpp
+++ b/DAnCE/dance/Plan_Launcher/Plan_Launcher.cpp
@@ -11,6 +11,7 @@
#include "dance/Logger/Log_Macros.h"
#include "dance/Logger/Logger_Service.h"
#include "tools/Convert_Plan/Convert_Plan_Impl.h"
+#include "tools/Config_Handlers/XML_File_Intf.h"
#include "tools/Domain_Validator/CORBA/CORBA_Domain_Validator_impl.h"
#include "EM_Launcher.h"
@@ -80,9 +81,12 @@ usage(const ACE_TCHAR*)
ACE_TEXT ("NodeManager IOR for NM based deployment.\n")
ACE_TEXT ("\t--lm-ior <LocalityManager IOR>\t")
ACE_TEXT ("LocalityManager IOR for LM based deployment.\n")
- ACE_TEXT ("\t--manager-timeout <seconds>\t")
+ ACE_TEXT ("\t--manager-timeout <seconds>\t\n")
+ ACE_TEXT ("Number of seconds to wait for a valid Execution Manager reference.\n")
ACE_TEXT ("\t--domain-timeout <seconds>\t")
- ACE_TEXT ("Number of seconds to wait for a valid domain validation.\n")
+ ACE_TEXT ("Number of seconds to wait for a domain validation.\n")
+ ACE_TEXT ("\t--domain-file <cdd>\t")
+ ACE_TEXT ("The domain file that the plan launcher should use for domain validation\n")
/*
ACE_TEXT ("\nName Service Options\n")
ACE_TEXT ("\t--domain-nc [NC]\t\t)
@@ -157,6 +161,7 @@ parse_args(int argc, ACE_TCHAR *argv[], Options &options)
get_opt.long_option(ACE_TEXT("help"), 'h', ACE_Get_Opt::NO_ARG);
get_opt.long_option(ACE_TEXT("manager-timeout"), ACE_Get_Opt::ARG_REQUIRED);
get_opt.long_option(ACE_TEXT("domain-timeout"), ACE_Get_Opt::ARG_REQUIRED);
+ get_opt.long_option(ACE_TEXT("domain-file"), ACE_Get_Opt::ARG_REQUIRED);
int c;
ACE_CString s;
@@ -297,6 +302,17 @@ parse_args(int argc, ACE_TCHAR *argv[], Options &options)
options.lm_ior_ = get_opt.opt_arg ();
break;
}
+ if (ACE_OS::strcmp (get_opt.long_option (),
+ ACE_TEXT ("domain-file")) == 0)
+ {
+ DANCE_DEBUG (DANCE_LOG_MAJOR_DEBUG_INFO,
+ (LM_DEBUG, DLINFO
+ ACE_TEXT ("Plan_Launcher::parse_args - ")
+ ACE_TEXT ("Got domain file: %C\n"),
+ get_opt.opt_arg ()));
+ options.domain_file_ = get_opt.opt_arg ();
+ break;
+ }
if ((ACE_OS::strcmp (get_opt.long_option (),
ACE_TEXT ("manager-timeout")) == 0) ||
(ACE_OS::strcmp (get_opt.long_option (),
@@ -305,7 +321,7 @@ parse_args(int argc, ACE_TCHAR *argv[], Options &options)
DANCE_DEBUG (DANCE_LOG_MAJOR_DEBUG_INFO,
(LM_DEBUG, DLINFO
ACE_TEXT ("Plan_Launcher::parse_args - ")
- ACE_TEXT ("Got Manager Timeout value: %C"),
+ ACE_TEXT ("Got Manager Timeout value: %C\n"),
get_opt.opt_arg ()));
options.em_timeout_ = ACE_OS::atoi (get_opt.opt_arg ());
break;
@@ -872,6 +888,7 @@ ACE_TMAIN (int argc, ACE_TCHAR *argv[])
pl_base.reset (lm_pl);
}
+
Deployment::DeploymentPlan_var dp;
DANCE_DEBUG (DANCE_LOG_MAJOR_EVENT,
@@ -910,20 +927,41 @@ ACE_TMAIN (int argc, ACE_TCHAR *argv[])
if (options.domain_file_)
{
+ ::DAnCE::Config_Handlers::XML_File_Intf file (options.domain_file_);
+ file.add_search_path (ACE_TEXT ("DANCE_ROOT"), ACE_TEXT ("/docs/schema/"));
+ ::Deployment::Domain *plan = file.release_domain ();
+
+ if (!plan)
+ {
+ DANCE_ERROR (DANCE_LOG_ERROR,
+ (LM_ERROR, DLINFO
+ ACE_TEXT("PlanLauncher - Error - ")
+ ACE_TEXT("Error: Processing file <%C>\n"), options.domain_file_));
+ return false;
+ }
+
DAnCE_Domain_Validator_i validator (orb);
- validator.load_domain_from_file (options.domain_file_);
+ validator.load_domain (*plan);
ACE_Time_Value remaining_time (timeout - ACE_OS::gettimeofday ());
ACE_UINT64 usec;
- remaining_time.to_usec (usec);
+ timeout.to_usec (usec);
::DAnCE::Time_Value tv;
- tv.useconds (usec);
+ tv.seconds (options.em_timeout_);
CORBA::StringSeq_var late_nodes;
- validator.block_for_domain (tv, late_nodes.out ());
-
+ if (!validator.block_for_domain (tv, late_nodes.out ()))
+ {
+ DANCE_ERROR (DANCE_LOG_EMERGENCY,
+ (LM_ERROR, DLINFO
+ ACE_TEXT ("PlanLauncher - Error: ")
+ ACE_TEXT ("Not all nodes in domain ready\n")));
+ return 1;
+ }
}
+
+
switch (options.mode_)
{
case Options::LAUNCH:
diff --git a/DAnCE/tools/Domain_Validator/CORBA/CORBA_Domain_Validator_impl.cpp b/DAnCE/tools/Domain_Validator/CORBA/CORBA_Domain_Validator_impl.cpp
index 3eb9d53e889..240b738a3a5 100644
--- a/DAnCE/tools/Domain_Validator/CORBA/CORBA_Domain_Validator_impl.cpp
+++ b/DAnCE/tools/Domain_Validator/CORBA/CORBA_Domain_Validator_impl.cpp
@@ -27,20 +27,22 @@ DAnCE_Domain_Validator_i::load_domain_from_file (const char * filename)
{
DANCE_ERROR (DANCE_LOG_ERROR,
(LM_ERROR, DLINFO
- ACE_TEXT("Node_Locator::process_cdd - ")
+ ACE_TEXT("DAnCE_Domain_Validator_i::load_domain_from_file - ")
ACE_TEXT("Error: Provided with nil filename\n")));
return false;
}
::DAnCE::Config_Handlers::XML_File_Intf file (filename);
+
file.add_search_path (ACE_TEXT ("DANCE_ROOT"), ACE_TEXT ("/docs/schema/"));
+
::Deployment::Domain *plan = file.release_domain ();
if (!plan)
{
DANCE_ERROR (DANCE_LOG_ERROR,
(LM_ERROR, DLINFO
- ACE_TEXT("Node_Locator::process_cdd - ")
+ ACE_TEXT("DAnCE_Domain_Validator_i::load_domain_from_file - ")
ACE_TEXT("Error: Processing file <%C>\n"), filename));
return false;
}
@@ -52,9 +54,8 @@ DAnCE_Domain_Validator_i::load_domain_from_file (const char * filename)
void
DAnCE_Domain_Validator_i::load_domain (const ::Deployment::Domain & domain)
{
- throw CORBA::NO_IMPLEMENT ();
- // this->domain_ = domain;
- // return this->create_node_table ();
+ this->domain_ = &domain;
+ this->create_node_table ();
}
bool
@@ -135,7 +136,7 @@ DAnCE_Domain_Validator_i::validate_node (const char * node_name)
{
DANCE_DEBUG (DANCE_LOG_EVENT_TRACE,
(LM_DEBUG, DLINFO
- ACE_TEXT ("DAnCE_Domain_Validator_i::validate_node")
+ ACE_TEXT ("DAnCE_Domain_Validator_i::validate_node - ")
ACE_TEXT ("Nil object refernece from string_to_object for node %C\n"),
node_name));
return false;
@@ -147,7 +148,7 @@ DAnCE_Domain_Validator_i::validate_node (const char * node_name)
{
DANCE_DEBUG (DANCE_LOG_EVENT_TRACE,
(LM_DEBUG, DLINFO
- ACE_TEXT ("DAnCE_Domain_Validator_i::validate_node")
+ ACE_TEXT ("DAnCE_Domain_Validator_i::validate_node - ")
ACE_TEXT ("Resolved object reference not valid for node %C\n"),
node_name));
return false;
@@ -157,7 +158,7 @@ DAnCE_Domain_Validator_i::validate_node (const char * node_name)
{
DANCE_DEBUG (DANCE_LOG_EVENT_TRACE,
(LM_DEBUG, DLINFO
- ACE_TEXT ("DAnCE_Domain_Validator_i::validate_node")
+ ACE_TEXT ("DAnCE_Domain_Validator_i::validate_node - ")
ACE_TEXT ("Caught CORBA Exception whilst resolving node %C: %C\n"),
node_name,
ex._info ().c_str ()));
@@ -167,7 +168,7 @@ DAnCE_Domain_Validator_i::validate_node (const char * node_name)
{
DANCE_DEBUG (DANCE_LOG_EVENT_TRACE,
(LM_DEBUG, DLINFO
- ACE_TEXT ("DAnCE_Domain_Validator_i::validate_node")
+ ACE_TEXT ("DAnCE_Domain_Validator_i::validate_node - ")
ACE_TEXT ("Caught C++ exception whilst resolving node %C\n"),
node_name));
return false;
@@ -330,6 +331,11 @@ DAnCE_Domain_Validator_i::block_for_domain (const ::DAnCE::Time_Value & max_bloc
NODE_LIST untried_list;
NODE_LIST retry_list;
+ DANCE_DEBUG (DANCE_LOG_EVENT_TRACE,
+ (LM_DEBUG, DLINFO
+ ACE_TEXT ("DAnCE_Domain_Validator_i::block_for_domain - ")
+ ACE_TEXT ("Starting domain check\n")));
+
for (CORBA::ULong i = 0; i < this->domain_->node.length (); ++i)
{
untried_list.push_back (this->domain_->node[i].name.in ());
@@ -339,7 +345,7 @@ DAnCE_Domain_Validator_i::block_for_domain (const ::DAnCE::Time_Value & max_bloc
i != untried_list.end ();
++i)
{
- if (ACE_OS::gettimeofday () < timeout)
+ if (ACE_OS::gettimeofday () > timeout)
{
DANCE_ERROR (DANCE_LOG_ERROR,
(LM_ERROR, DLINFO
@@ -372,33 +378,46 @@ DAnCE_Domain_Validator_i::block_for_domain (const ::DAnCE::Time_Value & max_bloc
}
}
- for (NODE_LIST::iterator i = retry_list.begin ();
- i != retry_list.end ();
- ++i)
+ bool first = true;
+ // @@ TODO: We'll probably want a more intelligent quantum.
+ ACE_Time_Value retry (0, 1000000 / 4);
+
+ while ((ACE_OS::gettimeofday () < timeout) && retry_list.size ())
{
- if (ACE_OS::gettimeofday () < timeout)
+ if (!first)
{
- DANCE_ERROR (DANCE_LOG_ERROR,
- (LM_ERROR, DLINFO
- ACE_TEXT ("DAnCE_Domain_Validator_i::block_for_domain - ")
- ACE_TEXT ("Timeout occurred while performing follow up validation\n")));
+ ACE_OS::sleep (retry);
+ }
+ else first = false;
- this->build_late_list (retry_list, late_nodes);
+ for (NODE_LIST::iterator i = retry_list.begin ();
+ i != retry_list.end ();
+ ++i)
+ {
+ if (ACE_OS::gettimeofday () > timeout)
+ {
+ DANCE_ERROR (DANCE_LOG_ERROR,
+ (LM_ERROR, DLINFO
+ ACE_TEXT ("DAnCE_Domain_Validator_i::block_for_domain - ")
+ ACE_TEXT ("Timeout occurred while performing follow up validation\n")));
- return false;
- }
+ this->build_late_list (retry_list, late_nodes);
- bool result = this->validate_node (i->c_str ());
+ return false;
+ }
- if (result)
- {
- DANCE_DEBUG (DANCE_LOG_EVENT_TRACE,
- (LM_DEBUG, DLINFO
- ACE_TEXT ("DAnCE_Domain_Validator_i::block_for_domain - ")
- ACE_TEXT ("Node %C is alive\n"),
- i->c_str ()));
- retry_list.erase (i);
- --i; // need to reposition the iterator so the next ++ will put it in the correct place
+ bool result = this->validate_node (i->c_str ());
+
+ if (result)
+ {
+ DANCE_DEBUG (DANCE_LOG_EVENT_TRACE,
+ (LM_DEBUG, DLINFO
+ ACE_TEXT ("DAnCE_Domain_Validator_i::block_for_domain - ")
+ ACE_TEXT ("Node %C is alive\n"),
+ i->c_str ()));
+ retry_list.erase (i);
+ --i; // need to reposition the iterator so the next ++ will put it in the correct place
+ }
}
}
diff --git a/DAnCE/tools/Domain_Validator/CORBA/CORBA_Domain_Validator_impl.h b/DAnCE/tools/Domain_Validator/CORBA/CORBA_Domain_Validator_impl.h
index 948f4db30d0..76ec8c95021 100644
--- a/DAnCE/tools/Domain_Validator/CORBA/CORBA_Domain_Validator_impl.h
+++ b/DAnCE/tools/Domain_Validator/CORBA/CORBA_Domain_Validator_impl.h
@@ -77,7 +77,7 @@ public:
::CORBA::ORB_var orb_;
- ::Deployment::Domain_var domain_;
+ const ::Deployment::Domain *domain_;
// Maps node names to strings
typedef std::map < std::string, std::string > NODE_MAP;