summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--server-tools/instance-manager/commands.cc290
-rw-r--r--server-tools/instance-manager/commands.h104
-rw-r--r--server-tools/instance-manager/guardian.cc543
-rw-r--r--server-tools/instance-manager/guardian.h125
-rw-r--r--server-tools/instance-manager/instance.cc630
-rw-r--r--server-tools/instance-manager/instance.h171
-rw-r--r--server-tools/instance-manager/instance_map.cc217
-rw-r--r--server-tools/instance-manager/instance_map.h61
-rw-r--r--server-tools/instance-manager/instance_options.h1
-rw-r--r--server-tools/instance-manager/manager.cc154
-rw-r--r--server-tools/instance-manager/manager.h8
-rw-r--r--server-tools/instance-manager/user_map.cc20
12 files changed, 1368 insertions, 956 deletions
diff --git a/server-tools/instance-manager/commands.cc b/server-tools/instance-manager/commands.cc
index 15738f8ebb3..6cc1c8d3047 100644
--- a/server-tools/instance-manager/commands.cc
+++ b/server-tools/instance-manager/commands.cc
@@ -29,6 +29,7 @@
#include "guardian.h"
#include "instance_map.h"
#include "log.h"
+#include "manager.h"
#include "messages.h"
#include "mysqld_error.h"
#include "mysql_manager_error.h"
@@ -36,8 +37,11 @@
#include "priv.h"
#include "protocol.h"
+/**************************************************************************
+ {{{ Static functions.
+**************************************************************************/
-/*
+/**
modify_defaults_to_im_error -- a map of error codes of
mysys::modify_defaults_file() into Instance Manager error codes.
*/
@@ -46,38 +50,25 @@ static const int modify_defaults_to_im_error[]= { 0, ER_OUT_OF_RESOURCES,
ER_ACCESS_OPTION_FILE };
-/*
- Add a string to a buffer.
+/**
+ Parse version number from the version string.
SYNOPSIS
- put_to_buff()
- buff buffer to add the string
- str string to add
- position offset in the buff to add a string
+ parse_version_number()
+ version_str
+ version
+ version_size
DESCRIPTION
+ TODO
- Function to add a string to the buffer. It is different from
- store_to_protocol_packet, which is used in the protocol.cc.
- The last one also stores the length of the string in a special way.
- This is required for MySQL client/server protocol support only.
+ TODO: Move this function to Instance_options and parse version number
+ only once.
- RETURN
- 0 - ok
- 1 - error occured
+ NOTE: This function is used only in SHOW INSTANCE STATUS statement at the
+ moment.
*/
-static inline int put_to_buff(Buffer *buff, const char *str, uint *position)
-{
- uint len= strlen(str);
- if (buff->append(*position, str, len))
- return 1;
-
- *position+= len;
- return 0;
-}
-
-
static int parse_version_number(const char *version_str, char *version,
uint version_size)
{
@@ -102,6 +93,9 @@ static int parse_version_number(const char *version_str, char *version,
return 0;
}
+/**************************************************************************
+ }}}
+**************************************************************************/
/**************************************************************************
Implementation of Instance_name.
@@ -122,7 +116,7 @@ Instance_name::Instance_name(const LEX_STRING *name)
Implementation of Show_instances.
**************************************************************************/
-/*
+/**
Implementation of SHOW INSTANCES statement.
Possible error codes:
@@ -172,7 +166,6 @@ int Show_instances::write_data(st_net *net)
Instance *instance;
Instance_map::Iterator iterator(instance_map);
- instance_map->guardian->lock();
instance_map->lock();
while ((instance= iterator.next()))
@@ -180,20 +173,25 @@ int Show_instances::write_data(st_net *net)
Buffer send_buf; /* buffer for packets */
uint pos= 0;
+ instance->lock();
+
const char *instance_name= instance->options.instance_name.str;
- const char *state_name= instance_map->get_instance_state_name(instance);
+ const char *state_name= instance->get_state_name();
if (store_to_protocol_packet(&send_buf, instance_name, &pos) ||
store_to_protocol_packet(&send_buf, state_name, &pos) ||
my_net_write(net, send_buf.buffer, pos))
{
err_status= TRUE;
- break;
}
+
+ instance->unlock();
+
+ if (err_status)
+ break;
}
instance_map->unlock();
- instance_map->guardian->unlock();
return err_status ? ER_OUT_OF_RESOURCES : 0;
}
@@ -203,7 +201,7 @@ int Show_instances::write_data(st_net *net)
Implementation of Flush_instances.
**************************************************************************/
-/*
+/**
Implementation of FLUSH INSTANCES statement.
Possible error codes:
@@ -213,36 +211,19 @@ int Show_instances::write_data(st_net *net)
int Flush_instances::execute(st_net *net, ulong connection_id)
{
- instance_map->guardian->lock();
- instance_map->lock();
-
- if (instance_map->is_there_active_instance())
- {
- instance_map->unlock();
- instance_map->guardian->unlock();
- return ER_THERE_IS_ACTIVE_INSTACE;
- }
-
- if (instance_map->flush_instances())
- {
- instance_map->unlock();
- instance_map->guardian->unlock();
+ if (Manager::flush_instances())
return ER_OUT_OF_RESOURCES;
- }
-
- instance_map->unlock();
- instance_map->guardian->unlock();
return net_send_ok(net, connection_id, NULL) ? ER_OUT_OF_RESOURCES : 0;
}
/**************************************************************************
- Implementation of Abstract_instance_cmd.
+ Implementation of Instance_cmd.
**************************************************************************/
-Abstract_instance_cmd::Abstract_instance_cmd(const LEX_STRING *instance_name_arg)
- :instance_name(instance_name_arg)
+Instance_cmd::Instance_cmd(const LEX_STRING *instance_name_arg):
+ instance_name(instance_name_arg)
{
/*
MT-NOTE: we can not make a search for Instance object here,
@@ -251,26 +232,39 @@ Abstract_instance_cmd::Abstract_instance_cmd(const LEX_STRING *instance_name_arg
}
+/**************************************************************************
+ Implementation of Abstract_instance_cmd.
+**************************************************************************/
+
+Abstract_instance_cmd::Abstract_instance_cmd(
+ const LEX_STRING *instance_name_arg)
+ :Instance_cmd(instance_name_arg)
+{
+}
+
+
int Abstract_instance_cmd::execute(st_net *net, ulong connection_id)
{
int err_code;
+ Instance *instance;
instance_map->lock();
- {
- Instance *instance= instance_map->find(get_instance_name());
-
- if (!instance)
- {
- instance_map->unlock();
- return ER_BAD_INSTANCE_NAME;
- }
+ instance= instance_map->find(get_instance_name());
- err_code= execute_impl(net, instance);
+ if (!instance)
+ {
+ instance_map->unlock();
+ return ER_BAD_INSTANCE_NAME;
}
+ instance->lock();
instance_map->unlock();
+ err_code= execute_impl(net, instance);
+
+ instance->unlock();
+
if (!err_code)
err_code= send_ok_response(net, connection_id);
@@ -288,7 +282,7 @@ Show_instance_status::Show_instance_status(const LEX_STRING *instance_name_arg)
}
-/*
+/**
Implementation of SHOW INSTANCE STATUS statement.
Possible error codes:
@@ -363,19 +357,14 @@ int Show_instance_status::write_data(st_net *net, Instance *instance)
char version_num_buf[MAX_VERSION_LENGTH];
uint pos= 0;
- const char *state_name;
+ const char *state_name= instance->get_state_name();
const char *version_tag= "unknown";
const char *version_num= "unknown";
- const char *mysqld_compatible_status;
-
- instance_map->guardian->lock();
- state_name= instance_map->get_instance_state_name(instance);
- mysqld_compatible_status= instance->is_mysqld_compatible() ? "yes" : "no";
- instance_map->guardian->unlock();
+ const char *mysqld_compatible_status=
+ instance->is_mysqld_compatible() ? "yes" : "no";
if (instance->options.mysqld_version)
{
-
if (parse_version_number(instance->options.mysqld_version, version_num_buf,
sizeof(version_num_buf)))
return ER_OUT_OF_RESOURCES;
@@ -409,7 +398,7 @@ Show_instance_options::Show_instance_options(
}
-/*
+/**
Implementation of SHOW INSTANCE OPTIONS statement.
Possible error codes:
@@ -505,23 +494,33 @@ Start_instance::Start_instance(const LEX_STRING *instance_name_arg)
}
-/*
+/**
Implementation of START INSTANCE statement.
Possible error codes:
ER_BAD_INSTANCE_NAME The instance with the given name does not exist
- ER_OUT_OF_RESOURCES Not enough resources to complete the operation
+ ER_INSTANCE_MISCONFIGURED The instance configuration is invalid
+ ER_INSTANCE_ALREADY_STARTED The instance is already started
+ ER_CANNOT_START_INSTANCE The instance could not have been started
+
+ TODO: as soon as this method operates only with Instance, we probably
+ should introduce a new method (execute_stop_instance()) in Instance and
+ just call it from here.
*/
int Start_instance::execute_impl(st_net * /* net */, Instance *instance)
{
- int err_code;
+ if (!instance->is_configured())
+ return ER_INSTANCE_MISCONFIGURED;
- if ((err_code= instance->start()))
- return err_code;
+ if (instance->is_active())
+ return ER_INSTANCE_ALREADY_STARTED;
+
+ if (instance->start_mysqld())
+ return ER_CANNOT_START_INSTANCE;
- if (!(instance->options.nonguarded))
- instance_map->guardian->guard(instance);
+ instance->reset_stat();
+ instance->set_state(Instance::NOT_STARTED);
return 0;
}
@@ -546,25 +545,26 @@ Stop_instance::Stop_instance(const LEX_STRING *instance_name_arg)
}
-/*
+/**
Implementation of STOP INSTANCE statement.
Possible error codes:
ER_BAD_INSTANCE_NAME The instance with the given name does not exist
ER_OUT_OF_RESOURCES Not enough resources to complete the operation
+
+ TODO: as soon as this method operates only with Instance, we probably
+ should introduce a new method (execute_stop_instance()) in Instance and
+ just call it from here.
*/
int Stop_instance::execute_impl(st_net * /* net */, Instance *instance)
{
- int err_code;
+ if (!instance->is_active())
+ return ER_INSTANCE_IS_NOT_STARTED;
- if (!(instance->options.nonguarded))
- instance_map->guardian->stop_guard(instance);
+ instance->set_state(Instance::STOPPED);
- if ((err_code= instance->stop()))
- return err_code;
-
- return 0;
+ return instance->stop_mysqld() ? ER_STOP_INSTANCE : 0;
}
@@ -582,12 +582,12 @@ int Stop_instance::send_ok_response(st_net *net, ulong connection_id)
**************************************************************************/
Create_instance::Create_instance(const LEX_STRING *instance_name_arg)
- :instance_name(instance_name_arg)
+ :Instance_cmd(instance_name_arg)
{
}
-/*
+/**
This operation initializes Create_instance object.
SYNOPSIS
@@ -604,7 +604,7 @@ bool Create_instance::init(const char **text)
}
-/*
+/**
This operation parses CREATE INSTANCE options.
SYNOPSIS
@@ -724,7 +724,7 @@ bool Create_instance::parse_args(const char **text)
}
-/*
+/**
Implementation of CREATE INSTANCE statement.
Possible error codes:
@@ -736,6 +736,7 @@ bool Create_instance::parse_args(const char **text)
int Create_instance::execute(st_net *net, ulong connection_id)
{
int err_code;
+ Instance *instance;
/* Check that the name is valid and there is no instance with such name. */
@@ -761,17 +762,26 @@ int Create_instance::execute(st_net *net, ulong connection_id)
return err_code;
}
+ instance= instance_map->find(get_instance_name());
+ DBUG_ASSERT(instance);
+
if ((err_code= create_instance_in_file(get_instance_name(), &options)))
{
- Instance *instance= instance_map->find(get_instance_name());
-
- if (instance)
- instance_map->remove_instance(instance); /* instance is deleted here. */
+ instance_map->remove_instance(instance); /* instance is deleted here. */
instance_map->unlock();
return err_code;
}
+ /*
+ CREATE INSTANCE must not lead to start instance, even if it guarded.
+
+ TODO: The problem however is that if Instance Manager restarts after
+ creating instance, the instance will be restarted (see also BUG#19718).
+ */
+
+ instance->set_state(Instance::STOPPED);
+
/* That's all. */
instance_map->unlock();
@@ -790,12 +800,12 @@ int Create_instance::execute(st_net *net, ulong connection_id)
**************************************************************************/
Drop_instance::Drop_instance(const LEX_STRING *instance_name_arg)
- :Abstract_instance_cmd(instance_name_arg)
+ :Instance_cmd(instance_name_arg)
{
}
-/*
+/**
Implementation of DROP INSTANCE statement.
Possible error codes:
@@ -804,14 +814,38 @@ Drop_instance::Drop_instance(const LEX_STRING *instance_name_arg)
ER_OUT_OF_RESOURCES Not enough resources to complete the operation
*/
-int Drop_instance::execute_impl(st_net * /* net */, Instance *instance)
+int Drop_instance::execute(st_net *net, ulong connection_id)
{
int err_code;
+ Instance *instance;
+
+ /* Lock Guardian, then Instance_map. */
+
+ instance_map->lock();
+
+ /* Find an instance. */
+
+ instance= instance_map->find(get_instance_name());
+
+ if (!instance)
+ {
+ instance_map->unlock();
+ return ER_BAD_INSTANCE_NAME;
+ }
+
+ instance->lock();
/* Check that the instance is offline. */
- if (instance_map->guardian->is_active(instance))
+ if (instance->is_active())
+ {
+ instance->unlock();
+ instance_map->unlock();
+
return ER_DROP_ACTIVE_INSTANCE;
+ }
+
+ /* Try to remove instance from the file. */
err_code= modify_defaults_file(Options::Main::config_file, NULL, NULL,
get_instance_name()->str, MY_REMOVE_SECTION);
@@ -824,27 +858,30 @@ int Drop_instance::execute_impl(st_net * /* net */, Instance *instance)
(const char *) get_instance_name()->str,
(const char *) Options::Main::config_file,
(int) err_code);
- }
- if (err_code)
+ instance->unlock();
+ instance_map->unlock();
+
return modify_defaults_to_im_error[err_code];
+ }
- /* Remove instance from the instance map hash and Guardian's list. */
+ /* Unlock the instance before destroy. */
- if (!instance->options.nonguarded)
- instance_map->guardian->stop_guard(instance);
+ instance->unlock();
- if ((err_code= instance->stop()))
- return err_code;
+ /*
+ Remove instance from the instance map
+ (the instance will be also destroyed here).
+ */
instance_map->remove_instance(instance);
- return 0;
-}
+ /* Unlock the instance map. */
+ instance_map->unlock();
+
+ /* That's all: send ok. */
-int Drop_instance::send_ok_response(st_net *net, ulong connection_id)
-{
if (net_send_ok(net, connection_id, "Instance dropped"))
return ER_OUT_OF_RESOURCES;
@@ -867,7 +904,7 @@ Show_instance_log::Show_instance_log(const LEX_STRING *instance_name_arg,
}
-/*
+/**
Implementation of SHOW INSTANCE LOG statement.
Possible error codes:
@@ -1012,7 +1049,7 @@ Show_instance_log_files::Show_instance_log_files
}
-/*
+/**
Implementation of SHOW INSTANCE LOG FILES statement.
Possible error codes:
@@ -1133,7 +1170,7 @@ int Show_instance_log_files::write_data(st_net *net, Instance *instance)
Implementation of Abstract_option_cmd.
**************************************************************************/
-/*
+/**
Instance_options_list -- a data class representing a list of options for
some instance.
*/
@@ -1251,7 +1288,7 @@ bool Abstract_option_cmd::init(const char **text)
}
-/*
+/**
Correct the option file. The "skip" option is used to remove the found
option.
@@ -1290,8 +1327,8 @@ int Abstract_option_cmd::correct_file(Instance *instance, Named_value *option,
}
-/*
- Implementation of SET statement.
+/**
+ Lock Instance Map and call execute_impl().
Possible error codes:
ER_BAD_INSTANCE_NAME The instance with the given name does not exist
@@ -1341,6 +1378,11 @@ Abstract_option_cmd::get_instance_options_list(const LEX_STRING *instance_name)
}
+/**
+ Skeleton implementation of option-management command.
+
+ MT-NOTE: Instance Map is locked before calling this operation.
+*/
int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id)
{
int err_code= 0;
@@ -1352,12 +1394,18 @@ int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id)
Instance_options_list *lst=
(Instance_options_list *) hash_element(&instance_options_map, i);
+ bool instance_is_active;
+
lst->instance= instance_map->find(lst->get_instance_name());
if (!lst->instance)
return ER_BAD_INSTANCE_NAME;
- if (instance_map->guardian->is_active(lst->instance))
+ lst->instance->lock();
+ instance_is_active= lst->instance->is_active();
+ lst->instance->unlock();
+
+ if (instance_is_active)
return ER_INSTANCE_IS_ACTIVE;
}
@@ -1368,6 +1416,8 @@ int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id)
Instance_options_list *lst=
(Instance_options_list *) hash_element(&instance_options_map, i);
+ lst->instance->lock();
+
for (int j= 0; j < lst->options.get_size(); ++j)
{
Named_value option= lst->options.get_element(j);
@@ -1377,6 +1427,8 @@ int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id)
break;
}
+ lst->instance->unlock();
+
if (err_code)
break;
}
@@ -1392,7 +1444,7 @@ int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id)
Implementation of Set_option.
**************************************************************************/
-/*
+/**
This operation parses SET options.
SYNOPSIS
@@ -1566,7 +1618,7 @@ int Set_option::process_option(Instance *instance, Named_value *option)
Implementation of Unset_option.
**************************************************************************/
-/*
+/**
This operation parses UNSET options.
SYNOPSIS
@@ -1662,7 +1714,7 @@ bool Unset_option::parse_args(const char **text)
}
-/*
+/**
Implementation of UNSET statement.
Possible error codes:
diff --git a/server-tools/instance-manager/commands.h b/server-tools/instance-manager/commands.h
index 8768aaab121..9b5d27b0982 100644
--- a/server-tools/instance-manager/commands.h
+++ b/server-tools/instance-manager/commands.h
@@ -30,7 +30,7 @@
#endif
-/*
+/**
Print all instances of this instance manager.
Grammar: SHOW INSTANCES
*/
@@ -50,7 +50,7 @@ private:
};
-/*
+/**
Reread configuration file and refresh internal cache.
Grammar: FLUSH INSTANCES
*/
@@ -66,11 +66,50 @@ public:
};
-/*
+/**
+ Base class for Instance-specific commands
+ (commands that operate on one instance).
+
+ Instance_cmd extends Command class by:
+ - an attribute for storing instance name;
+ - code to initialize instance name in constructor;
+ - an accessor to get instance name.
+*/
+
+class Instance_cmd : public Command
+{
+public:
+ Instance_cmd(const LEX_STRING *instance_name_arg);
+
+protected:
+ inline const LEX_STRING *get_instance_name() const
+ {
+ return instance_name.get_str();
+ }
+
+private:
+ Instance_name instance_name;
+};
+
+
+/**
Abstract class for Instance-specific commands.
+
+ Abstract_instance_cmd extends Instance_cmd by providing a common
+ framework for writing command-implementations. Basically, the class
+ implements Command::execute() pure virtual function in the following
+ way:
+ - Lock Instance_map;
+ - Get an instance by name. Return an error, if there is no such
+ instance;
+ - Lock the instance;
+ - Unlock Instance_map;
+ - Call execute_impl(), which should be implemented in derived class;
+ - Unlock the instance;
+ - Send response to the client and return error status.
*/
-class Abstract_instance_cmd: public Command
+class Abstract_instance_cmd: public Instance_cmd
{
public:
Abstract_instance_cmd(const LEX_STRING *instance_name_arg);
@@ -79,29 +118,24 @@ public:
virtual int execute(st_net *net, ulong connection_id);
protected:
- /* MT-NOTE: this operation is called under acquired Instance_map's lock. */
+ /**
+ This operation is intended to contain command-specific implementation.
+
+ MT-NOTE: this operation is called under acquired Instance's lock.
+ */
virtual int execute_impl(st_net *net, Instance *instance) = 0;
- /*
+ /**
This operation is invoked on successful return of execute_impl() and is
intended to send closing data.
- MT-NOTE: this operation is called under released Instance_map's lock.
+ MT-NOTE: this operation is called under released Instance's lock.
*/
virtual int send_ok_response(st_net *net, ulong connection_id) = 0;
-
-protected:
- inline const LEX_STRING *get_instance_name() const
- {
- return instance_name.get_str();
- }
-
-private:
- Instance_name instance_name;
};
-/*
+/**
Print status of an instance.
Grammar: SHOW INSTANCE STATUS <instance_name>
*/
@@ -121,7 +155,7 @@ private:
};
-/*
+/**
Print options of chosen instance.
Grammar: SHOW INSTANCE OPTIONS <instance_name>
*/
@@ -141,7 +175,7 @@ private:
};
-/*
+/**
Start an instance.
Grammar: START INSTANCE <instance_name>
*/
@@ -157,7 +191,7 @@ protected:
};
-/*
+/**
Stop an instance.
Grammar: STOP INSTANCE <instance_name>
*/
@@ -173,12 +207,12 @@ protected:
};
-/*
+/**
Create an instance.
Grammar: CREATE INSTANCE <instance_name> [<options>]
*/
-class Create_instance: public Command
+class Create_instance: public Instance_cmd
{
public:
Create_instance(const LEX_STRING *instance_name_arg);
@@ -189,22 +223,15 @@ public:
protected:
virtual int execute(st_net *net, ulong connection_id);
- inline const LEX_STRING *get_instance_name() const
- {
- return instance_name.get_str();
- }
-
private:
bool parse_args(const char **text);
private:
- Instance_name instance_name;
-
Named_value_arr options;
};
-/*
+/**
Drop an instance.
Grammar: DROP INSTANCE <instance_name>
@@ -213,18 +240,17 @@ private:
is removed from the instance map.
*/
-class Drop_instance: public Abstract_instance_cmd
+class Drop_instance: public Instance_cmd
{
public:
Drop_instance(const LEX_STRING *instance_name_arg);
protected:
- virtual int execute_impl(st_net *net, Instance *instance);
- virtual int send_ok_response(st_net *net, ulong connection_id);
+ virtual int execute(st_net *net, ulong connection_id);
};
-/*
+/**
Print requested part of the log.
Grammar:
SHOW <instance_name> LOG {ERROR | SLOW | GENERAL} size[, offset_from_end]
@@ -252,7 +278,7 @@ private:
};
-/*
+/**
Shows the list of the log files, used by an instance.
Grammar: SHOW <instance_name> LOG FILES
*/
@@ -272,7 +298,7 @@ private:
};
-/*
+/**
Abstract class for option-management commands.
*/
@@ -312,7 +338,7 @@ private:
};
-/*
+/**
Set an option for the instance.
Grammar: SET instance_name.option[=option_value][, ...]
*/
@@ -329,7 +355,7 @@ protected:
};
-/*
+/**
Remove option of the instance.
Grammar: UNSET instance_name.option[, ...]
*/
@@ -346,7 +372,7 @@ protected:
};
-/*
+/**
Syntax error command.
This command is issued if parser reported a syntax error. We need it to
diff --git a/server-tools/instance-manager/guardian.cc b/server-tools/instance-manager/guardian.cc
index e601ce0111c..1b451cd9933 100644
--- a/server-tools/instance-manager/guardian.cc
+++ b/server-tools/instance-manager/guardian.cc
@@ -28,101 +28,126 @@
#include "instance_map.h"
#include "log.h"
#include "mysql_manager_error.h"
+#include "options.h"
-const char *
-Guardian::get_instance_state_name(enum_instance_state state)
-{
- switch (state) {
- case NOT_STARTED:
- return "offline";
-
- case STARTING:
- return "starting";
-
- case STARTED:
- return "online";
- case JUST_CRASHED:
- return "failed";
+/*************************************************************************
+ {{{ Constructor & destructor.
+*************************************************************************/
- case CRASHED:
- return "crashed";
-
- case CRASHED_AND_ABANDONED:
- return "abandoned";
-
- case STOPPING:
- return "stopping";
- }
+/**
+ Guardian constructor.
- return NULL; /* just to ignore compiler warning. */
-}
+ SYNOPSIS
+ Guardian()
+ thread_registry_arg
+ instance_map_arg
-/* {{{ Constructor & destructor. */
+ DESCRIPTION
+ Nominal contructor intended for assigning references and initialize
+ trivial objects. Real initialization is made by init() method.
+*/
Guardian::Guardian(Thread_registry *thread_registry_arg,
- Instance_map *instance_map_arg,
- uint monitoring_interval_arg)
- :stopped(FALSE),
- monitoring_interval(monitoring_interval_arg),
+ Instance_map *instance_map_arg)
+ :shutdown_requested(FALSE),
+ stopped(FALSE),
thread_registry(thread_registry_arg),
- instance_map(instance_map_arg),
- shutdown_requested(FALSE)
+ instance_map(instance_map_arg)
{
pthread_mutex_init(&LOCK_guardian, 0);
pthread_cond_init(&COND_guardian, 0);
- init_alloc_root(&alloc, MEM_ROOT_BLOCK_SIZE, 0);
}
Guardian::~Guardian()
{
- /* delay guardian destruction to the moment when no one needs it */
- pthread_mutex_lock(&LOCK_guardian);
- free_root(&alloc, MYF(0));
- pthread_mutex_unlock(&LOCK_guardian);
+ /*
+ NOTE: it's necessary to synchronize here, because Guiardian thread can be
+ still alive an hold the mutex (because it is detached and we have no
+ control over it).
+ */
+
+ lock();
+ unlock();
+
pthread_mutex_destroy(&LOCK_guardian);
pthread_cond_destroy(&COND_guardian);
}
-/* }}} */
+/*************************************************************************
+ }}}
+*************************************************************************/
+/**
+ Send request to stop Guardian.
+
+ SYNOPSIS
+ request_shutdown()
+*/
+
void Guardian::request_shutdown()
{
- pthread_mutex_lock(&LOCK_guardian);
- /* STOP Instances or just clean up Guardian repository */
stop_instances();
+
+ lock();
shutdown_requested= TRUE;
- pthread_mutex_unlock(&LOCK_guardian);
+ unlock();
+
+ ping();
}
-void Guardian::process_instance(Instance *instance,
- GUARD_NODE *current_node,
- LIST **guarded_instances,
- LIST *node)
+/**
+ Process an instance.
+
+ SYNOPSIS
+ process_instance()
+ instance a pointer to the instance for processing
+
+ MT-NOTE:
+ - the given instance must be locked before calling this operation;
+ - Guardian must be locked before calling this operation.
+*/
+
+void Guardian::process_instance(Instance *instance)
{
- uint waitchild= (uint) Instance::DEFAULT_SHUTDOWN_DELAY;
- /* The amount of times, Guardian attempts to restart an instance */
int restart_retry= 100;
time_t current_time= time(NULL);
- if (current_node->state == STOPPING)
+ if (instance->get_state() == Instance::STOPPING)
{
- waitchild= instance->options.get_shutdown_delay();
+ /* This brach is executed during shutdown. */
- /* this returns TRUE if and only if an instance was stopped for sure */
+ /* This returns TRUE if and only if an instance was stopped for sure. */
if (instance->is_crashed())
- *guarded_instances= list_delete(*guarded_instances, node);
- else if ( (uint) (current_time - current_node->last_checked) > waitchild)
{
+ log_info("Guardian: '%s' stopped.",
+ (const char *) instance->get_name()->str);
+
+ instance->set_state(Instance::STOPPED);
+ }
+ else if ((uint) (current_time - instance->last_checked) >=
+ instance->options.get_shutdown_delay())
+ {
+ log_info("Guardian: '%s' hasn't stopped within %d secs.",
+ (const char *) instance->get_name()->str,
+ (int) instance->options.get_shutdown_delay());
+
instance->kill_mysqld(SIGKILL);
- /*
- Later we do node= node->next. This is ok, as we are only removing
- the node from the list. The pointer to the next one is still valid.
- */
- *guarded_instances= list_delete(*guarded_instances, node);
+
+ log_info("Guardian: pretend that '%s' is killed.",
+ (const char *) instance->get_name()->str);
+
+ instance->set_state(Instance::STOPPED);
+ }
+ else
+ {
+ log_info("Guardian: waiting for '%s' to stop (%d secs left).",
+ (const char *) instance->get_name()->str,
+ (int) (instance->options.get_shutdown_delay() -
+ current_time + instance->last_checked));
}
return;
@@ -133,83 +158,90 @@ void Guardian::process_instance(Instance *instance,
/* The instance can be contacted on it's port */
/* If STARTING also check that pidfile has been created */
- if (current_node->state == STARTING &&
- current_node->instance->options.load_pid() == 0)
+ if (instance->get_state() == Instance::STARTING &&
+ instance->options.load_pid() == 0)
{
/* Pid file not created yet, don't go to STARTED state yet */
}
- else if (current_node->state != STARTED)
+ else if (instance->get_state() != Instance::STARTED)
{
/* clear status fields */
log_info("Guardian: '%s' is running, set state to STARTED.",
(const char *) instance->options.instance_name.str);
- current_node->restart_counter= 0;
- current_node->crash_moment= 0;
- current_node->state= STARTED;
+ instance->reset_stat();
+ instance->set_state(Instance::STARTED);
}
}
else
{
- switch (current_node->state) {
- case NOT_STARTED:
+ switch (instance->get_state()) {
+ case Instance::NOT_STARTED:
log_info("Guardian: starting '%s'...",
(const char *) instance->options.instance_name.str);
- /* NOTE, set state to STARTING _before_ start() is called */
- current_node->state= STARTING;
- instance->start();
- current_node->last_checked= current_time;
- break;
- case STARTED: /* fallthrough */
- case STARTING: /* let the instance start or crash */
- if (instance->is_crashed())
- {
- current_node->crash_moment= current_time;
- current_node->last_checked= current_time;
- current_node->state= JUST_CRASHED;
- /* fallthrough -- restart an instance immediately */
- }
- else
- break;
- case JUST_CRASHED:
- if (current_time - current_node->crash_moment <= 2)
+ /* NOTE: set state to STARTING _before_ start() is called. */
+ instance->set_state(Instance::STARTING);
+ instance->last_checked= current_time;
+
+ instance->start_mysqld();
+
+ return;
+
+ case Instance::STARTED: /* fallthrough */
+ case Instance::STARTING: /* let the instance start or crash */
+ if (!instance->is_crashed())
+ return;
+
+ instance->crash_moment= current_time;
+ instance->last_checked= current_time;
+ instance->set_state(Instance::JUST_CRASHED);
+ /* fallthrough -- restart an instance immediately */
+
+ case Instance::JUST_CRASHED:
+ if (current_time - instance->crash_moment <= 2)
{
if (instance->is_crashed())
{
- instance->start();
+ instance->start_mysqld();
log_info("Guardian: starting '%s'...",
(const char *) instance->options.instance_name.str);
}
}
else
- current_node->state= CRASHED;
- break;
- case CRASHED: /* just regular restarts */
- if (current_time - current_node->last_checked >
- monitoring_interval)
+ instance->set_state(Instance::CRASHED);
+
+ return;
+
+ case Instance::CRASHED: /* just regular restarts */
+ if (current_time - instance->last_checked <=
+ Options::Main::monitoring_interval)
+ return;
+
+ if (instance->restart_counter < restart_retry)
{
- if ((current_node->restart_counter < restart_retry))
- {
- if (instance->is_crashed())
- {
- instance->start();
- current_node->last_checked= current_time;
- current_node->restart_counter++;
- log_info("Guardian: restarting '%s'...",
- (const char *) instance->options.instance_name.str);
- }
- }
- else
+ if (instance->is_crashed())
{
- log_info("Guardian: can not start '%s'. "
- "Abandoning attempts to (re)start it",
+ instance->start_mysqld();
+ instance->last_checked= current_time;
+
+ log_info("Guardian: restarting '%s'...",
(const char *) instance->options.instance_name.str);
- current_node->state= CRASHED_AND_ABANDONED;
}
}
- break;
- case CRASHED_AND_ABANDONED:
- break; /* do nothing */
+ else
+ {
+ log_info("Guardian: can not start '%s'. "
+ "Abandoning attempts to (re)start it",
+ (const char *) instance->options.instance_name.str);
+
+ instance->set_state(Instance::CRASHED_AND_ABANDONED);
+ }
+
+ return;
+
+ case Instance::CRASHED_AND_ABANDONED:
+ return; /* do nothing */
+
default:
DBUG_ASSERT(0);
}
@@ -217,56 +249,78 @@ void Guardian::process_instance(Instance *instance,
}
-/*
+/**
Main function of Guardian thread.
SYNOPSIS
run()
DESCRIPTION
- Check for all guarded instances and restart them if needed. If everything
- is fine go and sleep for some time.
+ Check for all guarded instances and restart them if needed.
*/
void Guardian::run()
{
- Instance *instance;
- LIST *node;
struct timespec timeout;
log_info("Guardian: started.");
thread_registry->register_thread(&thread_info);
- pthread_mutex_lock(&LOCK_guardian);
+ /* Loop, until all instances were shut down at the end. */
- /* loop, until all instances were shut down at the end */
- while (!(shutdown_requested && (guarded_instances == NULL)))
+ while (true)
{
- node= guarded_instances;
+ Instance_map::Iterator instances_it(instance_map);
+ Instance *instance;
+ bool all_instances_stopped= TRUE;
+
+ instance_map->lock();
- while (node != NULL)
+ while ((instance= instances_it.next()))
{
- GUARD_NODE *current_node= (GUARD_NODE *) node->data;
- instance= ((GUARD_NODE *) node->data)->instance;
- process_instance(instance, current_node, &guarded_instances, node);
+ instance->lock();
- node= node->next;
+ if (!instance->is_guarded() ||
+ instance->get_state() == Instance::STOPPED)
+ {
+ instance->unlock();
+ continue;
+ }
+
+ process_instance(instance);
+
+ if (instance->get_state() != Instance::STOPPED)
+ all_instances_stopped= FALSE;
+
+ instance->unlock();
}
- timeout.tv_sec= time(NULL) + monitoring_interval;
+
+ instance_map->unlock();
+
+ lock();
+
+ if (shutdown_requested && all_instances_stopped)
+ {
+ log_info("Guardian: all guarded mysqlds stopped.");
+
+ stopped= TRUE;
+ unlock();
+ break;
+ }
+
+ timeout.tv_sec= time(NULL) + Options::Main::monitoring_interval;
timeout.tv_nsec= 0;
- /* check the loop predicate before sleeping */
- if (!(shutdown_requested && (!(guarded_instances))))
- thread_registry->cond_timedwait(&thread_info, &COND_guardian,
- &LOCK_guardian, &timeout);
+ thread_registry->cond_timedwait(&thread_info, &COND_guardian,
+ &LOCK_guardian, &timeout);
+ unlock();
}
log_info("Guardian: stopped.");
- stopped= TRUE;
- pthread_mutex_unlock(&LOCK_guardian);
- /* now, when the Guardian is stopped we can stop the IM */
+ /* Now, when the Guardian is stopped we can stop the IM. */
+
thread_registry->unregister_thread(&thread_info);
thread_registry->request_shutdown();
@@ -274,129 +328,65 @@ void Guardian::run()
}
-int Guardian::is_stopped()
+/**
+ Return the value of stopped flag.
+*/
+
+bool Guardian::is_stopped()
{
int var;
- pthread_mutex_lock(&LOCK_guardian);
+
+ lock();
var= stopped;
- pthread_mutex_unlock(&LOCK_guardian);
+ unlock();
+
return var;
}
-/*
- Initialize the list of guarded instances: loop through the Instance_map and
- add all of the instances, which don't have 'nonguarded' option specified.
-
- SYNOPSIS
- Guardian::init()
+/**
+ Wake up Guardian thread.
- NOTE: The operation should be invoked with the following locks acquired:
- - Guardian;
- - Instance_map;
-
- RETURN
- 0 - ok
- 1 - error occurred
+ MT-NOTE: though usually the mutex associated with condition variable should
+ be acquired before signalling the variable, here this is not needed.
+ Signalling under locked mutex is used to avoid lost signals. In the current
+ logic however locking mutex does not guarantee that the signal will not be
+ lost.
*/
-int Guardian::init()
+void Guardian::ping()
{
- Instance *instance;
- Instance_map::Iterator iterator(instance_map);
-
- /* clear the list of guarded instances */
- free_root(&alloc, MYF(0));
- init_alloc_root(&alloc, MEM_ROOT_BLOCK_SIZE, 0);
- guarded_instances= NULL;
-
- while ((instance= iterator.next()))
- {
- if (instance->options.nonguarded)
- continue;
-
- if (guard(instance, TRUE)) /* do not lock guardian */
- return 1;
- }
-
- return 0;
+ pthread_cond_signal(&COND_guardian);
}
-/*
- Add instance to the Guardian list
+/**
+ Prepare list of instances.
SYNOPSIS
- guard()
- instance the instance to be guarded
- nolock whether we prefer do not lock Guardian here,
- but use external locking instead
-
- DESCRIPTION
-
- The instance is added to the guarded instances list. Usually guard() is
- called after we start an instance.
+ init()
- RETURN
- 0 - ok
- 1 - error occurred
+ MT-NOTE: Instance Map must be locked before calling the operation.
*/
-int Guardian::guard(Instance *instance, bool nolock)
+void Guardian::init()
{
- LIST *node;
- GUARD_NODE *content;
-
- node= (LIST *) alloc_root(&alloc, sizeof(LIST));
- content= (GUARD_NODE *) alloc_root(&alloc, sizeof(GUARD_NODE));
-
- if ((!(node)) || (!(content)))
- return 1;
- /* we store the pointers to instances from the instance_map's MEM_ROOT */
- content->instance= instance;
- content->restart_counter= 0;
- content->crash_moment= 0;
- content->state= NOT_STARTED;
- node->data= (void*) content;
-
- if (nolock)
- guarded_instances= list_add(guarded_instances, node);
- else
- {
- pthread_mutex_lock(&LOCK_guardian);
- guarded_instances= list_add(guarded_instances, node);
- pthread_mutex_unlock(&LOCK_guardian);
- }
-
- return 0;
-}
-
-
-/*
- TODO: perhaps it would make sense to create a pool of the LIST nodeents
- and give them upon request. Now we are loosing a bit of memory when
- guarded instance was stopped and then restarted (since we cannot free just
- a piece of the MEM_ROOT).
-*/
-
-int Guardian::stop_guard(Instance *instance)
-{
- LIST *node;
-
- pthread_mutex_lock(&LOCK_guardian);
-
- node= find_instance_node(instance);
+ Instance *instance;
+ Instance_map::Iterator iterator(instance_map);
- if (node != NULL)
- guarded_instances= list_delete(guarded_instances, node);
+ while ((instance= iterator.next()))
+ {
+ instance->lock();
- pthread_mutex_unlock(&LOCK_guardian);
+ instance->reset_stat();
+ instance->set_state(Instance::NOT_STARTED);
- /* if there is nothing to delete it is also fine */
- return 0;
+ instance->unlock();
+ }
}
-/*
+
+/**
An internal method which is called at shutdown to unregister instances and
attempt to stop them if requested.
@@ -409,86 +399,71 @@ int Guardian::stop_guard(Instance *instance)
accordingly.
NOTE
- Guardian object should be locked by the calling function.
+ Guardian object should be locked by the caller.
- RETURN
- 0 - ok
- 1 - error occurred
*/
-int Guardian::stop_instances()
+void Guardian::stop_instances()
{
- LIST *node;
- node= guarded_instances;
- while (node != NULL)
+ Instance_map::Iterator instances_it(instance_map);
+ Instance *instance;
+
+ instance_map->lock();
+
+ while ((instance= instances_it.next()))
{
- GUARD_NODE *current_node= (GUARD_NODE *) node->data;
+ instance->lock();
+
+ if (!instance->is_guarded() ||
+ instance->get_state() == Instance::STOPPED)
+ {
+ instance->unlock();
+ continue;
+ }
+
/*
If instance is running or was running (and now probably hanging),
request stop.
*/
- if (current_node->instance->is_mysqld_running() ||
- (current_node->state == STARTED))
+
+ if (instance->is_mysqld_running() ||
+ instance->get_state() == Instance::STARTED)
{
- current_node->state= STOPPING;
- current_node->last_checked= time(NULL);
+ instance->set_state(Instance::STOPPING);
+ instance->last_checked= time(NULL);
}
else
- /* otherwise remove it from the list */
- guarded_instances= list_delete(guarded_instances, node);
- /* But try to kill it anyway. Just in case */
- current_node->instance->kill_mysqld(SIGTERM);
- node= node->next;
+ {
+ /* Otherwise mark it as STOPPED. */
+ instance->set_state(Instance::STOPPED);
+ }
+
+ /* Request mysqld to stop. */
+
+ instance->kill_mysqld(SIGTERM);
+
+ instance->unlock();
}
- return 0;
+
+ instance_map->unlock();
}
+/**
+ Lock Guardian.
+*/
+
void Guardian::lock()
{
pthread_mutex_lock(&LOCK_guardian);
}
+/**
+ Unlock Guardian.
+*/
+
void Guardian::unlock()
{
pthread_mutex_unlock(&LOCK_guardian);
}
-
-
-LIST *Guardian::find_instance_node(Instance *instance)
-{
- LIST *node= guarded_instances;
-
- while (node != NULL)
- {
- /*
- We compare only pointers, as we always use pointers from the
- instance_map's MEM_ROOT.
- */
- if (((GUARD_NODE *) node->data)->instance == instance)
- return node;
-
- node= node->next;
- }
-
- return NULL;
-}
-
-
-bool Guardian::is_active(Instance *instance)
-{
- bool guarded;
-
- lock();
-
- guarded= find_instance_node(instance) != NULL;
-
- /* is_running() can take a long time, so let's unlock mutex first. */
- unlock();
-
- if (guarded)
- return true;
-
- return instance->is_mysqld_running();
-}
diff --git a/server-tools/instance-manager/guardian.h b/server-tools/instance-manager/guardian.h
index 0eee1dc631d..2c7987f4565 100644
--- a/server-tools/instance-manager/guardian.h
+++ b/server-tools/instance-manager/guardian.h
@@ -17,10 +17,12 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
-#include "thread_registry.h"
+#include <my_global.h>
#include <my_sys.h>
#include <my_list.h>
+#include "thread_registry.h"
+
#if defined(__GNUC__) && defined(USE_PRAGMA_INTERFACE)
#pragma interface
#endif
@@ -28,7 +30,6 @@
class Instance;
class Instance_map;
class Thread_registry;
-struct GUARD_NODE;
/**
The guardian thread is responsible for monitoring and restarting of guarded
@@ -38,97 +39,73 @@ struct GUARD_NODE;
class Guardian: public Thread
{
public:
- /* states of an instance */
- enum enum_instance_state { NOT_STARTED= 1, STARTING, STARTED, JUST_CRASHED,
- CRASHED, CRASHED_AND_ABANDONED, STOPPING };
-
- /*
- The Guardian list node structure. Guardian utilizes it to store
- guarded instances plus some additional info.
- */
+ Guardian(Thread_registry *thread_registry_arg,
+ Instance_map *instance_map_arg);
+ ~Guardian();
- struct GUARD_NODE
- {
- Instance *instance;
- /* state of an instance (i.e. STARTED, CRASHED, etc.) */
- enum_instance_state state;
- /* the amount of attemts to restart instance (cleaned up at success) */
- int restart_counter;
- /* triggered at a crash */
- time_t crash_moment;
- /* General time field. Used to provide timeouts (at shutdown and restart) */
- time_t last_checked;
- };
-
- /* Return client state name. */
- static const char *get_instance_state_name(enum_instance_state state);
+ void init();
- Guardian(Thread_registry *thread_registry_arg,
- Instance_map *instance_map_arg,
- uint monitoring_interval_arg);
- virtual ~Guardian();
- /* Initialize or refresh the list of guarded instances */
- int init();
- /* Request guardian shutdown. Stop instances if needed */
+public:
void request_shutdown();
- /* Start instance protection */
- int guard(Instance *instance, bool nolock= FALSE);
- /* Stop instance protection */
- int stop_guard(Instance *instance);
- /* Returns TRUE if guardian thread is stopped */
- int is_stopped();
+
+ bool is_stopped();
+
void lock();
void unlock();
- /*
- Return an internal list node for the given instance if the instance is
- managed by Guardian. Otherwise, return NULL.
+ void ping();
- MT-NOTE: must be called under acquired lock.
- */
- LIST *find_instance_node(Instance *instance);
+protected:
+ virtual void run();
+
+private:
+ void stop_instances();
- /* The operation is used to check if the instance is active or not. */
- bool is_active(Instance *instance);
+ void process_instance(Instance *instance);
+private:
/*
- Return state of the given instance list node. The pointer must specify
- a valid list node.
+ LOCK_guardian protectes the members in this section:
+ - shutdown_requested;
+ - stopped;
+
+ Also, it is used for COND_guardian.
*/
- inline enum_instance_state get_instance_state(LIST *instance_node);
-protected:
- /* Main funtion of the thread */
- virtual void run();
+ pthread_mutex_t LOCK_guardian;
-public:
+ /*
+ Guardian's main loop waits on this condition. So, it should be signalled
+ each time, when instance state has been changed and we want Guardian to
+ wake up.
+
+ TODO: Change this to having data-scoped conditions, i.e. conditions,
+ which indicate that some data has been changed.
+ */
pthread_cond_t COND_guardian;
-private:
- /* Prepares Guardian shutdown. Stops instances is needed */
- int stop_instances();
- /* check instance state and act accordingly */
- void process_instance(Instance *instance, GUARD_NODE *current_node,
- LIST **guarded_instances, LIST *elem);
+ /*
+ This variable is set to TRUE, when Manager thread is shutting down.
+ The flag is used by Guardian thread to understand that it's time to
+ finish.
+ */
+ bool shutdown_requested;
+
+ /*
+ This flag is set to TRUE on shutdown by Guardian thread, when all guarded
+ mysqlds are stopped.
- int stopped;
+ The flag is used in the Manager thread to wait for Guardian to stop all
+ mysqlds.
+ */
+ bool stopped;
-private:
- pthread_mutex_t LOCK_guardian;
Thread_info thread_info;
- int monitoring_interval;
Thread_registry *thread_registry;
Instance_map *instance_map;
- LIST *guarded_instances;
- MEM_ROOT alloc;
- /* this variable is set to TRUE when we want to stop Guardian thread */
- bool shutdown_requested;
-};
-
-inline Guardian::enum_instance_state
-Guardian::get_instance_state(LIST *instance_node)
-{
- return ((GUARD_NODE *) instance_node->data)->state;
-}
+private:
+ Guardian(const Guardian &);
+ Guardian&operator =(const Guardian &);
+};
#endif /* INCLUDES_MYSQL_INSTANCE_MANAGER_GUARDIAN_H */
diff --git a/server-tools/instance-manager/instance.cc b/server-tools/instance-manager/instance.cc
index 6b4289c5b29..b852a8323e5 100644
--- a/server-tools/instance-manager/instance.cc
+++ b/server-tools/instance-manager/instance.cc
@@ -36,7 +36,9 @@
#include "thread_registry.h"
#include "instance_map.h"
-/* {{{ Platform-specific functions. */
+/*************************************************************************
+ {{{ Platform-specific functions.
+*************************************************************************/
#ifndef __WIN__
typedef pid_t My_process_info;
@@ -45,34 +47,6 @@ typedef PROCESS_INFORMATION My_process_info;
#endif
/*
- Proxy thread is a simple way to avoid all pitfalls of the threads
- implementation in the OS (e.g. LinuxThreads). With such a thread we
- don't have to process SIGCHLD, which is a tricky business if we want
- to do it in a portable way.
-*/
-
-class Instance_monitor: public Thread
-{
-public:
- Instance_monitor(Instance *instance_arg) :instance(instance_arg) {}
-protected:
- virtual void run();
- void start_and_monitor_instance(Instance_options *old_instance_options,
- Instance_map *instance_map,
- Thread_registry *thread_registry);
-private:
- Instance *instance;
-};
-
-void Instance_monitor::run()
-{
- start_and_monitor_instance(&instance->options,
- Manager::get_instance_map(),
- Manager::get_thread_registry());
- delete this;
-}
-
-/*
Wait for an instance
SYNOPSIS
@@ -285,113 +259,149 @@ int kill(pid_t pid, int signum)
}
#endif
-/* }}} */
+/*************************************************************************
+ }}}
+*************************************************************************/
+
-/* {{{ Static constants. */
+/*************************************************************************
+ {{{ Static constants.
+*************************************************************************/
const LEX_STRING
Instance::DFLT_INSTANCE_NAME= { C_STRING_WITH_LEN("mysqld") };
-/* }}} */
+/*************************************************************************
+ }}}
+*************************************************************************/
-/*
- Fork child, exec an instance and monitor it.
+/*************************************************************************
+ {{{ Instance Monitor thread.
+*************************************************************************/
- SYNOPSIS
- start_and_monitor_instance()
- old_instance_options Pointer to the options of the instance to be
- launched. This info is likely to become obsolete
- when function returns from wait_process()
- instance_map Pointer to the instance_map. We use it to protect
- the instance from deletion, while we are working
- with it.
+/**
+ Proxy thread is a simple way to avoid all pitfalls of the threads
+ implementation in the OS (e.g. LinuxThreads). With such a thread we
+ don't have to process SIGCHLD, which is a tricky business if we want
+ to do it in a portable way.
- DESCRIPTION
- Fork a child, then exec and monitor it. When the child is dead,
- find appropriate instance (for this purpose we save its name),
- set appropriate flags and wake all threads waiting for instance
- to stop.
-
- NOTE
- A separate thread for starting/monitoring instance is a simple way
- to avoid all pitfalls of the threads implementation in the OS (e.g.
- LinuxThreads). For one, with such a thread we don't have to process
- SIGCHLD, which is a tricky business if we want to do it in a
- portable way.
+ Instance Monitor Thread forks a child process, execs mysqld and waits for
+ the child to die.
- RETURN
- Function returns no value
+ Instance Monitor assumes that the monitoring instance will not be dropped.
+ This is guaranteed by having flag monitoring_thread_active and
+ Instance::is_active() operation.
*/
-void
-Instance_monitor::
-start_and_monitor_instance(Instance_options *old_instance_options,
- Instance_map *instance_map,
- Thread_registry *thread_registry)
+class Instance_monitor: public Thread
{
- Instance_name instance_name(&old_instance_options->instance_name);
- Instance *current_instance;
- My_process_info process_info;
- Thread_info thread_info;
+public:
+ Instance_monitor(Instance *instance_arg) :instance(instance_arg) {}
+protected:
+ virtual void run();
+ void start_and_monitor_instance();
+private:
+ Instance *instance;
+};
+
+
+void Instance_monitor::run()
+{
+ start_and_monitor_instance();
+ delete this;
+}
+
+
+void Instance_monitor::start_and_monitor_instance()
+{
+ Thread_registry *thread_registry= Manager::get_thread_registry();
+ Guardian *guardian= Manager::get_guardian();
+
+ My_process_info mysqld_process_info;
+ Thread_info monitor_thread_info;
log_info("Instance '%s': Monitor: started.",
(const char *) instance->get_name()->str);
- if (!old_instance_options->nonguarded)
- {
- /*
- Register thread in Thread_registry to wait for it to stop on shutdown
- only if instance is guarded. If instance is guarded, the thread will not
- finish, because nonguarded instances are not stopped on shutdown.
- */
- thread_registry->register_thread(&thread_info, FALSE);
- }
-
/*
- Lock instance map to guarantee that no instances are deleted during
- strmake() and execv() calls.
+ For guarded instance register the thread in Thread_registry to wait for
+ the thread to stop on shutdown (nonguarded instances are not stopped on
+ shutdown, so the thread will no finish).
*/
- instance_map->lock();
- /*
- Save the instance name in the case if Instance object we
- are using is destroyed. (E.g. by "FLUSH INSTANCES")
- */
+ if (instance->is_guarded())
+ {
+ thread_registry->register_thread(&monitor_thread_info, FALSE);
+ }
+
+ /* Starting mysqld. */
log_info("Instance '%s': Monitor: starting mysqld...",
(const char *) instance->get_name()->str);
- if (start_process(old_instance_options, &process_info))
+ if (start_process(&instance->options, &mysqld_process_info))
{
- instance_map->unlock();
- return; /* error is logged */
+ instance->lock();
+ instance->monitoring_thread_active= FALSE;
+ instance->unlock();
+
+ return;
}
- /* allow users to delete instances */
- instance_map->unlock();
+ /* Waiting for mysqld to die. */
log_info("Instance '%s': Monitor: waiting for mysqld to stop...",
(const char *) instance->get_name()->str);
- wait_process(&process_info); /* Don't check for return value. */
+ wait_process(&mysqld_process_info); /* Don't check for return value. */
- instance_map->lock();
+ log_info("Instance '%s': Monitor: mysqld stopped.",
+ (const char *) instance->get_name()->str);
- current_instance= instance_map->find(instance_name.get_str());
+ /* Update instance status. */
- if (current_instance)
- current_instance->set_crash_flag_n_wake_all();
+ instance->lock();
- instance_map->unlock();
+ if (instance->is_guarded())
+ thread_registry->unregister_thread(&monitor_thread_info);
- if (!old_instance_options->nonguarded)
- thread_registry->unregister_thread(&thread_info);
+ instance->crashed= TRUE;
+ instance->monitoring_thread_active= FALSE;
log_info("Instance '%s': Monitor: finished.",
(const char *) instance->get_name()->str);
+
+ instance->unlock();
+
+ /* Wake up guardian. */
+
+ guardian->ping();
}
+/**************************************************************************
+ }}}
+**************************************************************************/
+
+
+/**************************************************************************
+ {{{ Static operations.
+**************************************************************************/
+
+/**
+ The operation is intended to check whether string is a well-formed
+ instance name or not.
+
+ SYNOPSIS
+ is_name_valid()
+ name string to check
+
+ RETURN
+ TRUE string is a valid instance name
+ FALSE string is not a valid instance name
+
+ TODO: Move to Instance_name class: Instance_name::is_valid().
+*/
bool Instance::is_name_valid(const LEX_STRING *name)
{
@@ -405,21 +415,83 @@ bool Instance::is_name_valid(const LEX_STRING *name)
}
+/**
+ The operation is intended to check if the given instance name is
+ mysqld-compatible or not.
+
+ SYNOPSIS
+ is_mysqld_compatible_name()
+ name name to check
+
+ RETURN
+ TRUE name is mysqld-compatible
+ FALSE otherwise
+
+ TODO: Move to Instance_name class: Instance_name::is_mysqld_compatible().
+*/
+
bool Instance::is_mysqld_compatible_name(const LEX_STRING *name)
{
return strcmp(name->str, DFLT_INSTANCE_NAME.str) == 0;
}
+/**
+ Return client state name. Must not be used outside the class.
+ Use Instance::get_state_name() instead.
+*/
+
+const char * Instance::get_instance_state_name(enum_instance_state state)
+{
+ switch (state) {
+ case STOPPED:
+ return "offline";
+
+ case NOT_STARTED:
+ return "not started";
+
+ case STARTING:
+ return "starting";
+
+ case STARTED:
+ return "online";
+
+ case JUST_CRASHED:
+ return "failed";
+
+ case CRASHED:
+ return "crashed";
-/* {{{ Constructor & destructor */
+ case CRASHED_AND_ABANDONED:
+ return "abandoned";
+
+ case STOPPING:
+ return "stopping";
+ }
+
+ return NULL; /* just to ignore compiler warning. */
+}
+
+/**************************************************************************
+ }}}
+**************************************************************************/
+
+
+/**************************************************************************
+ {{{ Initialization & deinitialization.
+**************************************************************************/
Instance::Instance()
- :crashed(FALSE),
- configured(FALSE)
+ :monitoring_thread_active(FALSE),
+ crashed(FALSE),
+ configured(FALSE),
+ /* mysqld_compatible is initialized in init() */
+ state(NOT_STARTED),
+ restart_counter(0),
+ crash_moment(0),
+ last_checked(0)
{
pthread_mutex_init(&LOCK_instance, 0);
- pthread_cond_init(&COND_instance_stopped, 0);
}
@@ -427,13 +499,11 @@ Instance::~Instance()
{
log_info("Instance '%s': destroying...", (const char *) get_name()->str);
- pthread_cond_destroy(&COND_instance_stopped);
pthread_mutex_destroy(&LOCK_instance);
}
-/* }}} */
-/*
+/**
Initialize instance options.
SYNOPSIS
@@ -453,7 +523,7 @@ bool Instance::init(const LEX_STRING *name_arg)
}
-/*
+/**
Complete instance options initialization.
SYNOPSIS
@@ -474,7 +544,47 @@ bool Instance::complete_initialization()
*/
}
-/*
+/**************************************************************************
+ }}}
+**************************************************************************/
+
+
+/**************************************************************************
+ {{{ Instance: public interface implementation.
+**************************************************************************/
+
+/**
+ Determine if there is some activity with the instance.
+
+ SYNOPSIS
+ is_active()
+
+ DESCRIPTION
+ An instance is active if one of the following conditions is true:
+ - Instance-monitoring thread is running;
+ - Instance is guarded and its state is other than STOPPED;
+ - Corresponding mysqld-server accepts connections.
+
+ MT-NOTE: instance must be locked before calling the operation.
+
+ RETURN
+ TRUE - instance is active
+ FALSE - otherwise.
+*/
+
+bool Instance::is_active()
+{
+ if (monitoring_thread_active)
+ return TRUE;
+
+ if (is_guarded() && get_state() != STOPPED)
+ return TRUE;
+
+ return is_mysqld_running();
+}
+
+
+/**
Determine if mysqld is accepting connections.
SYNOPSIS
@@ -484,7 +594,7 @@ bool Instance::complete_initialization()
Try to connect to mysqld with fake login/password to check whether it is
accepting connections or not.
- MT-NOTE: this operation must be called under acquired LOCK_instance.
+ MT-NOTE: instance must be locked before calling the operation.
RETURN
TRUE - mysqld is alive and accept connections
@@ -508,8 +618,6 @@ bool Instance::is_mysqld_running()
if (!port && !options.mysqld_socket)
port= SERVER_DEFAULT_PORT;
- pthread_mutex_lock(&LOCK_instance);
-
mysql_init(&mysql);
/* try to connect to a server with a fake username/password pair */
if (mysql_real_connect(&mysql, LOCAL_HOST, username,
@@ -523,7 +631,6 @@ bool Instance::is_mysqld_running()
*/
log_error("Instance '%s': was able to log into mysqld.",
(const char *) get_name()->str);
- pthread_mutex_unlock(&LOCK_instance);
return_val= TRUE; /* server is alive */
}
else
@@ -531,145 +638,145 @@ bool Instance::is_mysqld_running()
sizeof(access_denied_message) - 1));
mysql_close(&mysql);
- pthread_mutex_unlock(&LOCK_instance);
return return_val;
}
-/*
- The method starts an instance.
+
+/**
+ Start mysqld.
SYNOPSIS
- start()
+ start_mysqld()
+
+ DESCRIPTION
+ Reset flags and start Instance Monitor thread, which will start mysqld.
+
+ MT-NOTE: instance must be locked before calling the operation.
RETURN
- 0 ok
- ER_CANNOT_START_INSTANCE Cannot start instance
- ER_INSTANCE_ALREADY_STARTED The instance on the specified port/socket
- is already started
+ FALSE - ok
+ TRUE - could not start instance
*/
-int Instance::start()
+bool Instance::start_mysqld()
{
- /* clear crash flag */
- pthread_mutex_lock(&LOCK_instance);
- crashed= FALSE;
- pthread_mutex_unlock(&LOCK_instance);
+ Instance_monitor *instance_monitor;
+ /*
+ Prepare instance to start Instance Monitor thread.
- if (configured && !is_mysqld_running())
- {
- Instance_monitor *instance_monitor;
- remove_pid();
+ NOTE: It's important to set these actions here in order to avoid
+ race conditions -- these actions must be done under acquired lock on
+ Instance.
+ */
- instance_monitor= new Instance_monitor(this);
+ crashed= FALSE;
+ monitoring_thread_active= TRUE;
- if (instance_monitor == NULL || instance_monitor->start(Thread::DETACHED))
- {
- delete instance_monitor;
- log_error("Instance::start(): failed to create the monitoring thread"
- " to start an instance");
- return ER_CANNOT_START_INSTANCE;
- }
- /* The monitoring thread will delete itself when it's finished. */
+ remove_pid();
- return 0;
- }
+ /* Create and start the Instance Monitor thread. */
- /* The instance is started already or misconfigured. */
- return configured ? ER_INSTANCE_ALREADY_STARTED : ER_INSTANCE_MISCONFIGURED;
-}
+ instance_monitor= new Instance_monitor(this);
-/*
- The method sets the crash flag and wakes all waiters on
- COND_instance_stopped and COND_guardian
+ if (instance_monitor == NULL || instance_monitor->start(Thread::DETACHED))
+ {
+ delete instance_monitor;
+ monitoring_thread_active= FALSE;
- SYNOPSIS
- set_crash_flag_n_wake_all()
+ log_error("Instance '%s': can not create instance monitor thread.",
+ (const char *) get_name()->str);
- DESCRIPTION
- The method is called when an instance is crashed or terminated.
- In the former case it might indicate that guardian probably should
- restart it.
+ return TRUE;
+ }
- RETURN
- Function returns no value
-*/
+ ++restart_counter;
-void Instance::set_crash_flag_n_wake_all()
-{
- /* set instance state to crashed */
- pthread_mutex_lock(&LOCK_instance);
- crashed= TRUE;
- pthread_mutex_unlock(&LOCK_instance);
+ /* The Instance Monitor thread will delete itself when it's finished. */
- /*
- Wake connection threads waiting for an instance to stop. This
- is needed if a user issued command to stop an instance via
- mysql connection. This is not the case if Guardian stop the thread.
- */
- pthread_cond_signal(&COND_instance_stopped);
- /* wake guardian */
- pthread_cond_signal(&Manager::get_guardian()->COND_guardian);
+ return FALSE;
}
-/*
- Stop an instance.
+/**
+ Stop mysqld.
SYNOPSIS
- stop()
+ stop_mysqld()
- RETURN:
- 0 ok
- ER_INSTANCE_IS_NOT_STARTED Looks like the instance it is not started
- ER_STOP_INSTANCE mysql_shutdown reported an error
-*/
+ DESCRIPTION
+ Try to stop mysqld gracefully. Otherwise kill it with SIGKILL.
-int Instance::stop()
-{
- struct timespec timeout;
- uint waitchild= (uint) DEFAULT_SHUTDOWN_DELAY;
+ MT-NOTE: instance must be locked before calling the operation.
- if (is_mysqld_running())
- {
- waitchild= options.get_shutdown_delay();
+ RETURN
+ FALSE - ok
+ TRUE - could not stop the instance
+*/
- kill_mysqld(SIGTERM);
- /* sleep on condition to wait for SIGCHLD */
+bool Instance::stop_mysqld()
+{
+ log_info("Instance '%s': stopping mysqld...",
+ (const char *) get_name()->str);
- timeout.tv_sec= time(NULL) + waitchild;
- timeout.tv_nsec= 0;
- if (pthread_mutex_lock(&LOCK_instance))
- return ER_STOP_INSTANCE;
+ kill_mysqld(SIGTERM);
- while (options.load_pid() != 0) /* while server isn't stopped */
- {
- int status;
+ if (!wait_for_stop())
+ {
+ log_info("Instance '%s': mysqld stopped gracefully.",
+ (const char *) get_name()->str);
+ return FALSE;
+ }
- status= pthread_cond_timedwait(&COND_instance_stopped,
- &LOCK_instance,
- &timeout);
- if (status == ETIMEDOUT || status == ETIME)
- break;
- }
+ log_info("Instance '%s': mysqld failed to stop gracefully within %d seconds.",
+ (const char *) get_name()->str,
+ (int) options.get_shutdown_delay());
- pthread_mutex_unlock(&LOCK_instance);
+ log_info("Instance'%s': killing mysqld...",
+ (const char *) get_name()->str);
- kill_mysqld(SIGKILL);
+ kill_mysqld(SIGKILL);
- return 0;
+ if (!wait_for_stop())
+ {
+ log_info("Instance '%s': mysqld has been killed.",
+ (const char *) get_name()->str);
+ return FALSE;
}
- return ER_INSTANCE_IS_NOT_STARTED;
+ log_info("Instance '%s': can not kill mysqld within %d seconds.",
+ (const char *) get_name()->str,
+ (int) options.get_shutdown_delay());
+
+ return TRUE;
}
-/*
+/**
Send signal to mysqld.
SYNOPSIS
kill_mysqld()
+
+ DESCRIPTION
+ Load pid from the pid file and send the given signal to that process.
+ If the signal is SIGKILL, remove the pid file after sending the signal.
+
+ MT-NOTE: instance must be locked before calling the operation.
+
+ TODO
+ This too low-level and OS-specific operation for public interface.
+ Also, it has some implicit behaviour for SIGKILL signal. Probably, we
+ should have the following public operations instead:
+ - start_mysqld() -- as is;
+ - stop_mysqld -- request mysqld to shutdown gracefully (send SIGTERM);
+ don't wait for complete shutdown;
+ - wait_for_stop() (or join_mysqld()) -- wait for mysqld to stop within
+ time interval;
+ - kill_mysqld() -- request to terminate mysqld; don't wait for
+ completion.
+ These operations should also be used in Guardian to manage instances.
*/
void Instance::kill_mysqld(int signum)
@@ -707,27 +814,91 @@ void Instance::kill_mysqld(int signum)
}
}
-/*
- Return crashed flag.
- SYNOPSIS
- is_crashed()
-
- RETURN
- TRUE - mysqld crashed
- FALSE - mysqld hasn't crashed yet
+/**
+ Lock instance.
*/
-bool Instance::is_crashed()
+void Instance::lock()
{
- bool val;
pthread_mutex_lock(&LOCK_instance);
- val= crashed;
+}
+
+
+/**
+ Unlock instance.
+*/
+
+void Instance::unlock()
+{
pthread_mutex_unlock(&LOCK_instance);
- return val;
}
-/*
+
+/**
+ Return instance state name.
+
+ SYNOPSIS
+ get_state_name()
+
+ DESCRIPTION
+ The operation returns user-friendly state name. The operation can be
+ used both for guarded and non-guarded instances.
+
+ MT-NOTE: instance must be locked before calling the operation.
+
+ TODO: Replace with the static get_state_name(state_code) function.
+*/
+
+const char *Instance::get_state_name()
+{
+ if (!is_configured())
+ return "misconfigured";
+
+ if (is_guarded())
+ {
+ /* The instance is managed by Guardian: we can report precise state. */
+
+ return get_instance_state_name(get_state());
+ }
+
+ /* The instance is not managed by Guardian: we can report status only. */
+
+ return is_active() ? "online" : "offline";
+}
+
+
+/**
+ Reset statistics.
+
+ SYNOPSIS
+ reset_stat()
+
+ DESCRIPTION
+ The operation resets statistics used for guarding the instance.
+
+ MT-NOTE: instance must be locked before calling the operation.
+
+ TODO: Make private.
+*/
+
+void Instance::reset_stat()
+{
+ restart_counter= 0;
+ crash_moment= 0;
+ last_checked= 0;
+}
+
+/**************************************************************************
+ }}}
+**************************************************************************/
+
+
+/**************************************************************************
+ {{{ Instance: implementation of private operations.
+**************************************************************************/
+
+/**
Remove pid file.
*/
@@ -744,3 +915,36 @@ void Instance::remove_pid()
(const char *) options.instance_name.str);
}
}
+
+
+/**
+ Wait for mysqld to stop within shutdown interval.
+*/
+
+bool Instance::wait_for_stop()
+{
+ int start_time= time(NULL);
+ int finish_time= start_time + options.get_shutdown_delay();
+
+ log_info("Instance '%s': waiting for mysqld to stop "
+ "(timeout: %d seconds)...",
+ (const char *) get_name()->str,
+ (int) options.get_shutdown_delay());
+
+ while (true)
+ {
+ if (options.load_pid() == 0 && !is_mysqld_running())
+ return FALSE;
+
+ if (time(NULL) >= finish_time)
+ return TRUE;
+
+ /* Sleep for 0.3 sec and check again. */
+
+ my_sleep(300000);
+ }
+}
+
+/**************************************************************************
+ }}}
+**************************************************************************/
diff --git a/server-tools/instance-manager/instance.h b/server-tools/instance-manager/instance.h
index 412d01acc46..5bdd8d61d2f 100644
--- a/server-tools/instance-manager/instance.h
+++ b/server-tools/instance-manager/instance.h
@@ -30,7 +30,7 @@ class Instance_map;
class Thread_registry;
-/*
+/**
Instance_name -- the class represents instance name -- a string of length
less than MAX_INSTANCE_NAME_SIZE.
@@ -68,72 +68,127 @@ private:
class Instance
{
public:
- /*
- The following two constants defines name of the default mysqld-instance
- ("mysqld").
+ /* States of an instance. */
+ enum enum_instance_state
+ {
+ STOPPED,
+ NOT_STARTED,
+ STARTING,
+ STARTED,
+ JUST_CRASHED,
+ CRASHED,
+ CRASHED_AND_ABANDONED,
+ STOPPING
+ };
+
+public:
+ /**
+ The constant defines name of the default mysqld-instance ("mysqld").
*/
static const LEX_STRING DFLT_INSTANCE_NAME;
public:
- /*
- The operation is intended to check whether string is a well-formed
- instance name or not.
- */
static bool is_name_valid(const LEX_STRING *name);
-
- /*
- The operation is intended to check if the given instance name is
- mysqld-compatible or not.
- */
static bool is_mysqld_compatible_name(const LEX_STRING *name);
public:
Instance();
-
~Instance();
+
bool init(const LEX_STRING *name_arg);
bool complete_initialization();
+public:
+ bool is_active();
+
bool is_mysqld_running();
- int start();
- int stop();
- /* send a signal to the instance */
+
+ bool start_mysqld();
+ bool stop_mysqld();
void kill_mysqld(int signo);
- bool is_crashed();
- void set_crash_flag_n_wake_all();
- /*
+ void lock();
+ void unlock();
+
+ const char *get_state_name();
+
+ void reset_stat();
+
+public:
+ /**
The operation is intended to check if the instance is mysqld-compatible
or not.
*/
inline bool is_mysqld_compatible() const;
- /*
+ /**
The operation is intended to check if the instance is configured properly
or not. Misconfigured instances are not managed.
*/
inline bool is_configured() const;
+ /**
+ The operation returns TRUE if the instance is guarded and FALSE otherwise.
+ */
+ inline bool is_guarded() const;
+
+ /**
+ The operation returns name of the instance.
+ */
inline const LEX_STRING *get_name() const;
+ /**
+ The operation returns the current state of the instance.
+
+ NOTE: At the moment should be used only for guarded instances.
+ */
+ inline enum_instance_state get_state() const;
+
+ /**
+ The operation changes the state of the instance.
+
+ NOTE: At the moment should be used only for guarded instances.
+ TODO: Make private.
+ */
+ inline void set_state(enum_instance_state new_state);
+
+ /**
+ The operation returns crashed flag.
+ */
+ inline bool is_crashed();
+
public:
- enum { DEFAULT_SHUTDOWN_DELAY= 35 };
+ /**
+ This attributes contains instance options.
+
+ TODO: Make private.
+ */
Instance_options options;
private:
- /* This attributes is a flag, specifies if the instance has been crashed. */
+ /**
+ monitoring_thread_active is TRUE if there is a thread that monitors the
+ corresponding mysqld-process.
+ */
+ bool monitoring_thread_active;
+
+ /**
+ crashed is TRUE when corresponding mysqld-process has been died after
+ start.
+ */
bool crashed;
- /*
- This attribute specifies if the instance is configured properly or not.
+ /**
+ configured is TRUE when the instance is configured and FALSE otherwise.
Misconfigured instances are not managed.
*/
bool configured;
/*
- This attribute specifies whether the instance is mysqld-compatible or not.
- Mysqld-compatible instances can contain only mysqld-specific options.
- At the moment an instance is mysqld-compatible if its name is "mysqld".
+ mysqld_compatible specifies whether the instance is mysqld-compatible
+ or not. Mysqld-compatible instances can contain only mysqld-specific
+ options. At the moment an instance is mysqld-compatible if its name is
+ "mysqld".
The idea is that [mysqld] section should contain only mysqld-specific
options (no Instance Manager-specific options) to be readable by mysqld
@@ -142,18 +197,36 @@ private:
bool mysqld_compatible;
/*
- Mutex protecting the instance. Currently we use it to avoid the
- double start of the instance. This happens when the instance is starting
- and we issue the start command once more.
+ Mutex protecting the instance.
*/
pthread_mutex_t LOCK_instance;
- /*
- This condition variable is used to wake threads waiting for instance to
- stop in Instance::stop()
- */
- pthread_cond_t COND_instance_stopped;
- void remove_pid();
+private:
+ /* Guarded-instance attributes. */
+
+ /* state of an instance (i.e. STARTED, CRASHED, etc.) */
+ enum_instance_state state;
+
+public:
+ /* the amount of attemts to restart instance (cleaned up at success) */
+ int restart_counter;
+
+ /* triggered at a crash */
+ time_t crash_moment;
+
+ /* General time field. Used to provide timeouts (at shutdown and restart) */
+ time_t last_checked;
+
+private:
+ static const char *get_instance_state_name(enum_instance_state state);
+
+private:
+ void remove_pid();
+
+ bool wait_for_stop();
+
+private:
+ friend class Instance_monitor;
};
@@ -169,9 +242,33 @@ inline bool Instance::is_configured() const
}
+inline bool Instance::is_guarded() const
+{
+ return !options.nonguarded;
+}
+
+
inline const LEX_STRING *Instance::get_name() const
{
return &options.instance_name;
}
+
+inline Instance::enum_instance_state Instance::get_state() const
+{
+ return state;
+}
+
+
+inline void Instance::set_state(enum_instance_state new_state)
+{
+ state= new_state;
+}
+
+
+inline bool Instance::is_crashed()
+{
+ return crashed;
+}
+
#endif /* INCLUDES_MYSQL_INSTANCE_MANAGER_INSTANCE_H */
diff --git a/server-tools/instance-manager/instance_map.cc b/server-tools/instance-manager/instance_map.cc
index a356e308e44..a9108eae763 100644
--- a/server-tools/instance-manager/instance_map.cc
+++ b/server-tools/instance-manager/instance_map.cc
@@ -25,26 +25,18 @@
#include <mysql_com.h>
#include "buffer.h"
-#include "guardian.h"
#include "instance.h"
#include "log.h"
-#include "manager.h"
#include "mysqld_error.h"
#include "mysql_manager_error.h"
#include "options.h"
#include "priv.h"
-/*
- Note: As we are going to suppost different types of connections,
- we shouldn't have connection-specific functions. To avoid it we could
- put such functions to the Command-derived class instead.
- The command could be easily constructed for a specific connection if
- we would provide a special factory for each connection.
-*/
-
C_MODE_START
-/* Procedure needed for HASH initialization */
+/**
+ HASH-routines: get key of instance for storing in hash.
+*/
static byte* get_instance_key(const byte* u, uint* len,
my_bool __attribute__((unused)) t)
@@ -54,14 +46,18 @@ static byte* get_instance_key(const byte* u, uint* len,
return (byte *) instance->options.instance_name.str;
}
+/**
+ HASH-routines: cleanup handler.
+*/
+
static void delete_instance(void *u)
{
Instance *instance= (Instance *) u;
delete instance;
}
-/*
- The option handler to pass to the process_default_option_files finction.
+/**
+ The option handler to pass to the process_default_option_files function.
SYNOPSIS
process_option()
@@ -96,8 +92,8 @@ static int process_option(void *ctx, const char *group, const char *option)
C_MODE_END
-/*
- Parse option string.
+/**
+ Parse option string.
SYNOPSIS
parse_option()
@@ -137,7 +133,7 @@ static void parse_option(const char *option_str,
}
-/*
+/**
Process one option from the configuration file.
SYNOPSIS
@@ -151,6 +147,10 @@ static void parse_option(const char *option_str,
process_option(). The caller ensures proper locking
of the instance map object.
*/
+ /*
+ Process a given option and assign it to appropricate instance. This is
+ required for the option handler, passed to my_search_option_files().
+ */
int Instance_map::process_one_option(const LEX_STRING *group,
const char *option)
@@ -213,92 +213,97 @@ int Instance_map::process_one_option(const LEX_STRING *group,
}
+/**
+ Instance_map constructor.
+*/
+
Instance_map::Instance_map()
{
pthread_mutex_init(&LOCK_instance_map, 0);
}
+/**
+ Initialize Instance_map internals.
+*/
+
bool Instance_map::init()
{
return hash_init(&hash, default_charset_info, START_HASH_SIZE, 0, 0,
get_instance_key, delete_instance, 0);
}
+
+/**
+ Reset Instance_map data.
+*/
+
+bool Instance_map::reset()
+{
+ hash_free(&hash);
+ return init();
+}
+
+
+/**
+ Instance_map destructor.
+*/
+
Instance_map::~Instance_map()
{
- pthread_mutex_lock(&LOCK_instance_map);
+ lock();
+
+ /*
+ NOTE: it's necessary to synchronize on each instance before removal,
+ because Instance-monitoring thread can be still alive an hold the mutex
+ (because it is detached and we have no control over it).
+ */
+
+ while (true)
+ {
+ Iterator it(this);
+ Instance *instance= it.next();
+
+ if (!instance)
+ break;
+
+ instance->lock();
+ instance->unlock();
+
+ remove_instance(instance);
+ }
+
hash_free(&hash);
- pthread_mutex_unlock(&LOCK_instance_map);
+ unlock();
+
pthread_mutex_destroy(&LOCK_instance_map);
}
+/**
+ Lock Instance_map.
+*/
+
void Instance_map::lock()
{
pthread_mutex_lock(&LOCK_instance_map);
}
+/**
+ Unlock Instance_map.
+*/
+
void Instance_map::unlock()
{
pthread_mutex_unlock(&LOCK_instance_map);
}
-/*
- Re-read instance configuration file.
-
- SYNOPSIS
- Instance_map::flush_instances()
- DESCRIPTION
- This function will:
- - clear the current list of instances. This removes both
- running and stopped instances.
- - load a new instance configuration from the file.
- - pass on the new map to the guardian thread: it will start
- all instances that are marked `guarded' and not yet started.
- Note, as the check whether an instance is started is currently
- very simple (returns TRUE if there is a MySQL server running
- at the given port), this function has some peculiar
- side-effects:
- * if the port number of a running instance was changed, the
- old instance is forgotten, even if it was running. The new
- instance will be started at the new port.
- * if the configuration was changed in a way that two
- instances swapped their port numbers, the guardian thread
- will not notice that and simply report that both instances
- are configured successfully and running.
- In order to avoid such side effects one should never call
- FLUSH INSTANCES without prior stop of all running instances.
-
- NOTE: The operation should be invoked with the following locks acquired:
- - Guardian;
- - Instance_map;
+/**
+ Check if there is an active instance or not.
*/
-int Instance_map::flush_instances()
-{
- int rc;
-
- /*
- Guardian thread relies on the instance map repository for guarding
- instances. This is why refreshing instance map, we need (1) to stop
- guardian (2) reload the instance map (3) reinitialize the guardian
- with new instances.
- */
- hash_free(&hash);
- hash_init(&hash, default_charset_info, START_HASH_SIZE, 0, 0,
- get_instance_key, delete_instance, 0);
-
- rc= load();
- /* don't init guardian if we failed to load instances */
- if (!rc)
- guardian->init(); // TODO: check error status.
- return rc;
-}
-
-
bool Instance_map::is_there_active_instance()
{
Instance *instance;
@@ -306,29 +311,50 @@ bool Instance_map::is_there_active_instance()
while ((instance= iterator.next()))
{
- if (guardian->find_instance_node(instance) != NULL ||
- instance->is_mysqld_running())
- {
+ bool active_instance_found;
+
+ instance->lock();
+ active_instance_found= instance->is_active();
+ instance->unlock();
+
+ if (active_instance_found)
return TRUE;
- }
}
return FALSE;
}
+/**
+ Add an instance into the internal hash.
+
+ MT-NOTE: Instance Map must be locked before calling the operation.
+*/
+
int Instance_map::add_instance(Instance *instance)
{
return my_hash_insert(&hash, (byte *) instance);
}
+/**
+ Remove instance from the internal hash.
+
+ MT-NOTE: Instance Map must be locked before calling the operation.
+*/
+
int Instance_map::remove_instance(Instance *instance)
{
return hash_delete(&hash, (byte *) instance);
}
+/**
+ Create a new instance and register it in the internal hash.
+
+ MT-NOTE: Instance Map must be locked before calling the operation.
+*/
+
int Instance_map::create_instance(const LEX_STRING *instance_name,
const Named_value_arr *options)
{
@@ -392,12 +418,22 @@ int Instance_map::create_instance(const LEX_STRING *instance_name,
}
+/**
+ Return a pointer to the instance or NULL, if there is no such instance.
+
+ MT-NOTE: Instance Map must be locked before calling the operation.
+*/
+
Instance * Instance_map::find(const LEX_STRING *name)
{
return (Instance *) hash_search(&hash, (byte *) name->str, name->length);
}
+/**
+ Init instances command line arguments after all options have been loaded.
+*/
+
bool Instance_map::complete_initialization()
{
bool mysqld_found;
@@ -455,7 +491,10 @@ bool Instance_map::complete_initialization()
}
-/* load options from config files and create appropriate instance structures */
+/**
+ Load options from config files and create appropriate instance
+ structures.
+*/
int Instance_map::load()
{
@@ -505,8 +544,9 @@ int Instance_map::load()
}
-/*--- Implementaton of the Instance map iterator class ---*/
-
+/*************************************************************************
+ {{{ Instance_map::Iterator implementation.
+*************************************************************************/
void Instance_map::Iterator::go_to_first()
{
@@ -522,29 +562,12 @@ Instance *Instance_map::Iterator::next()
return NULL;
}
-
-const char *Instance_map::get_instance_state_name(Instance *instance)
-{
- LIST *instance_node;
-
- if (!instance->is_configured())
- return "misconfigured";
-
- if ((instance_node= guardian->find_instance_node(instance)) != NULL)
- {
- /* The instance is managed by Guardian: we can report precise state. */
-
- return Guardian::get_instance_state_name(
- guardian->get_instance_state(instance_node));
- }
-
- /* The instance is not managed by Guardian: we can report status only. */
-
- return instance->is_mysqld_running() ? "online" : "offline";
-}
+/*************************************************************************
+ }}}
+*************************************************************************/
-/*
+/**
Create a new configuration section for mysqld-instance in the config file.
SYNOPSIS
diff --git a/server-tools/instance-manager/instance_map.h b/server-tools/instance-manager/instance_map.h
index 69d225c89f7..cdbac7635c2 100644
--- a/server-tools/instance-manager/instance_map.h
+++ b/server-tools/instance-manager/instance_map.h
@@ -37,14 +37,17 @@ extern int create_instance_in_file(const LEX_STRING *instance_name,
const Named_value_arr *options);
-/*
+/**
Instance_map - stores all existing instances
*/
class Instance_map
{
public:
- /* Instance_map iterator */
+ /**
+ Instance_map iterator
+ */
+
class Iterator
{
private:
@@ -58,79 +61,43 @@ public:
void go_to_first();
Instance *next();
};
- friend class Iterator;
+
public:
- /*
- Return a pointer to the instance or NULL, if there is no such instance.
- MT-NOTE: must be called under acquired lock.
- */
Instance *find(const LEX_STRING *name);
- /* Clear the configuration cache and reload the configuration file. */
- int flush_instances();
-
- /* The operation is used to check if there is an active instance or not. */
bool is_there_active_instance();
void lock();
void unlock();
bool init();
+ bool reset();
- /*
- Process a given option and assign it to appropricate instance. This is
- required for the option handler, passed to my_search_option_files().
- */
- int process_one_option(const LEX_STRING *group, const char *option);
+ int load();
- /*
- Add an instance into the internal hash.
+ int process_one_option(const LEX_STRING *group, const char *option);
- MT-NOTE: the operation must be called under acquired lock.
- */
int add_instance(Instance *instance);
- /*
- Remove instance from the internal hash.
-
- MT-NOTE: the operation must be called under acquired lock.
- */
int remove_instance(Instance *instance);
- /*
- Create a new instance and register it in the internal hash.
-
- MT-NOTE: the operation must be called under acquired lock.
- */
int create_instance(const LEX_STRING *instance_name,
const Named_value_arr *options);
+public:
Instance_map();
~Instance_map();
- /*
- Retrieve client state name of the given instance.
-
- MT-NOTE: the options must be called under acquired locks of the following
- objects:
- - Instance_map;
- - Guardian;
- */
- const char *get_instance_state_name(Instance *instance);
-
-public:
- const char *mysqld_path;
- Guardian *guardian;
-
private:
- /* loads options from config files */
- int load();
- /* inits instances argv's after all options have been loaded */
bool complete_initialization();
+
private:
enum { START_HASH_SIZE = 16 };
pthread_mutex_t LOCK_instance_map;
HASH hash;
+
+private:
+ friend class Iterator;
};
#endif /* INCLUDES_MYSQL_INSTANCE_MANAGER_INSTANCE_MAP_H */
diff --git a/server-tools/instance-manager/instance_options.h b/server-tools/instance-manager/instance_options.h
index 7c1e1a8dcf3..38de839d762 100644
--- a/server-tools/instance-manager/instance_options.h
+++ b/server-tools/instance-manager/instance_options.h
@@ -46,7 +46,6 @@ public:
Instance_options();
~Instance_options();
- /* fills in argv */
bool complete_initialization();
bool set_option(Named_value *option);
diff --git a/server-tools/instance-manager/manager.cc b/server-tools/instance-manager/manager.cc
index a002902bd56..da36982f510 100644
--- a/server-tools/instance-manager/manager.cc
+++ b/server-tools/instance-manager/manager.cc
@@ -37,6 +37,9 @@
#include "user_map.h"
+/**********************************************************************
+ {{{ Platform-specific implementation.
+**********************************************************************/
#ifndef __WIN__
void set_signals(sigset_t *mask)
@@ -92,9 +95,13 @@ int my_sigwait(const sigset_t *set, int *sig)
#endif
+/**********************************************************************
+ }}}
+**********************************************************************/
+
/**********************************************************************
- Implementation of checking the actual thread model.
+ {{{ Implementation of checking the actual thread model.
***********************************************************************/
namespace { /* no-indent */
@@ -137,6 +144,10 @@ bool check_if_linux_threads(bool *linux_threads)
}
+/**********************************************************************
+ }}}
+***********************************************************************/
+
/**********************************************************************
Manager implementation
@@ -152,25 +163,37 @@ bool Manager::linux_threads;
#endif // __WIN__
+/**
+ Request shutdown of guardian and threads registered in Thread_registry.
+
+ SYNOPSIS
+ stop_all_threads()
+*/
+
void Manager::stop_all_threads()
{
/*
- Let guardian thread know that it should break it's processing cycle,
+ Let Guardian thread know that it should break it's processing cycle,
once it wakes up.
*/
p_guardian->request_shutdown();
- /* wake guardian */
- pthread_cond_signal(&p_guardian->COND_guardian);
- /* stop all threads */
+
+ /* Stop all threads. */
p_thread_registry->deliver_shutdown();
}
-/*
- manager - entry point to the main instance manager process: start
- listener thread, write pid file and enter into signal handling.
- See also comments in mysqlmanager.cc to picture general Instance Manager
- architecture.
+/**
+ Main manager function.
+
+ SYNOPSIS
+ main()
+
+ DESCRIPTION
+ This is an entry point to the main instance manager process:
+ start listener thread, write pid file and enter into signal handling.
+ See also comments in mysqlmanager.cc to picture general Instance Manager
+ architecture.
TODO: how about returning error status.
*/
@@ -194,22 +217,33 @@ int Manager::main()
(const char *) (linux_threads ? "LINUX threads" : "POSIX threads"));
#endif // __WIN__
- Thread_registry thread_registry;
/*
- All objects created in the manager() function live as long as
- thread_registry lives, and thread_registry is alive until there are
- working threads.
+ All objects created in the Manager object live as long as thread_registry
+ lives, and thread_registry is alive until there are working threads.
+
+ There are two main purposes of the Thread Registry:
+ 1. Interrupt blocking I/O and signal condition variables in case of
+ shutdown;
+ 2. Wait for detached threads before shutting down the main thread.
+
+ NOTE:
+ 1. Handling shutdown can be done in more elegant manner by introducing
+ Event (or Condition) object with support of logical operations.
+ 2. Using Thread Registry to wait for detached threads is definitely not
+ the best way, because when Thread Registry unregisters an thread, the
+ thread is still alive. Accurate way to wait for threads to stop is
+ not using detached threads and join all threads before shutdown.
*/
+ Thread_registry thread_registry;
User_map user_map;
Instance_map instance_map;
- Guardian guardian(&thread_registry, &instance_map,
- Options::Main::monitoring_interval);
+ Guardian guardian(&thread_registry, &instance_map);
Listener listener(&thread_registry, &user_map);
p_instance_map= &instance_map;
- p_guardian= instance_map.guardian= &guardian;
+ p_guardian= &guardian;
p_thread_registry= &thread_registry;
p_user_map= &user_map;
@@ -249,7 +283,7 @@ int Manager::main()
}
}
- /* write Instance Manager pid file */
+ /* Write Instance Manager pid file. */
log_info("IM pid file: '%s'; PID: %d.",
(const char *) Options::Main::pid_file_name,
@@ -290,6 +324,7 @@ int Manager::main()
permitted to process instances. And before flush_instances() has
completed, there are no instances to guard.
*/
+
if (guardian.start(Thread::DETACHED))
{
log_error("Can not start Guardian thread.");
@@ -298,21 +333,11 @@ int Manager::main()
/* Load instances. */
+ if (Manager::flush_instances())
{
- instance_map.guardian->lock();
- instance_map.lock();
-
- int flush_instances_status= instance_map.flush_instances();
-
- instance_map.unlock();
- instance_map.guardian->unlock();
-
- if (flush_instances_status)
- {
- log_error("Can not init instances repository.");
- stop_all_threads();
- goto err;
- }
+ log_error("Can not init instances repository.");
+ stop_all_threads();
+ goto err;
}
/* Initialize the Listener. */
@@ -328,7 +353,8 @@ int Manager::main()
After the list of guarded instances have been initialized,
Guardian should start them.
*/
- pthread_cond_signal(&guardian.COND_guardian);
+
+ guardian.ping();
/* Main loop. */
@@ -381,7 +407,6 @@ int Manager::main()
if (!guardian.is_stopped())
{
guardian.request_shutdown();
- pthread_cond_signal(&guardian.COND_guardian);
}
else
{
@@ -406,3 +431,64 @@ err:
#endif
return rc;
}
+
+
+/**
+ Re-read instance configuration file.
+
+ SYNOPSIS
+ flush_instances()
+
+ DESCRIPTION
+ This function will:
+ - clear the current list of instances. This removes both
+ running and stopped instances.
+ - load a new instance configuration from the file.
+ - pass on the new map to the guardian thread: it will start
+ all instances that are marked `guarded' and not yet started.
+
+ Note, as the check whether an instance is started is currently
+ very simple (returns TRUE if there is a MySQL server running
+ at the given port), this function has some peculiar
+ side-effects:
+ * if the port number of a running instance was changed, the
+ old instance is forgotten, even if it was running. The new
+ instance will be started at the new port.
+ * if the configuration was changed in a way that two
+ instances swapped their port numbers, the guardian thread
+ will not notice that and simply report that both instances
+ are configured successfully and running.
+
+ In order to avoid such side effects one should never call
+ FLUSH INSTANCES without prior stop of all running instances.
+*/
+
+bool Manager::flush_instances()
+{
+ p_instance_map->lock();
+
+ if (p_instance_map->is_there_active_instance())
+ {
+ p_instance_map->unlock();
+ return TRUE;
+ }
+
+ if (p_instance_map->reset())
+ {
+ p_instance_map->unlock();
+ return TRUE;
+ }
+
+ if (p_instance_map->load())
+ {
+ p_instance_map->unlock();
+ return TRUE; /* Don't init guardian if we failed to load instances. */
+ }
+
+ get_guardian()->init(); /* TODO: check error status. */
+ get_guardian()->ping();
+
+ p_instance_map->unlock();
+
+ return FALSE;
+}
diff --git a/server-tools/instance-manager/manager.h b/server-tools/instance-manager/manager.h
index a77809cca6d..9a00b9320ce 100644
--- a/server-tools/instance-manager/manager.h
+++ b/server-tools/instance-manager/manager.h
@@ -19,6 +19,7 @@
#if defined(__GNUC__) && defined(USE_PRAGMA_INTERFACE)
#pragma interface
#endif
+
#include <my_global.h>
class Guardian;
@@ -30,8 +31,12 @@ class Manager
{
public:
static int main();
+
+ static bool flush_instances();
+
+public:
/**
- These methods return a non-zero value only for the duration
+ These methods return a non-NULL value only for the duration
of main().
*/
static Instance_map *get_instance_map() { return p_instance_map; }
@@ -39,6 +44,7 @@ public:
static Thread_registry *get_thread_registry() { return p_thread_registry; }
static User_map *get_user_map() { return p_user_map; }
+public:
#ifndef __WIN__
static bool is_linux_threads() { return linux_threads; }
#endif // __WIN__
diff --git a/server-tools/instance-manager/user_map.cc b/server-tools/instance-manager/user_map.cc
index f3a6e3cd76c..7f34195c0b1 100644
--- a/server-tools/instance-manager/user_map.cc
+++ b/server-tools/instance-manager/user_map.cc
@@ -42,7 +42,7 @@ int User::init(const char *line)
if (name_end == 0 || name_end[1] != ':')
{
log_error("Invalid format (unmatched quote) of user line (%s).",
- (const char *) line);
+ (const char *) line);
return 1;
}
password= name_end + 2;
@@ -54,7 +54,7 @@ int User::init(const char *line)
if (name_end == 0)
{
log_error("Invalid format (no delimiter) of user line (%s).",
- (const char *) line);
+ (const char *) line);
return 1;
}
password= name_end + 1;
@@ -64,10 +64,10 @@ int User::init(const char *line)
if (user_length > USERNAME_LENGTH)
{
log_error("User name is too long (%d). Max length: %d. "
- "User line: '%s'.",
- (int) user_length,
- (int) USERNAME_LENGTH,
- (const char *) line);
+ "User line: '%s'.",
+ (int) user_length,
+ (int) USERNAME_LENGTH,
+ (const char *) line);
return 1;
}
@@ -75,10 +75,10 @@ int User::init(const char *line)
if (password_length > SCRAMBLED_PASSWORD_CHAR_LENGTH)
{
log_error("Password is too long (%d). Max length: %d."
- "User line: '%s'.",
- (int) password_length,
- (int) SCRAMBLED_PASSWORD_CHAR_LENGTH,
- line);
+ "User line: '%s'.",
+ (int) password_length,
+ (int) SCRAMBLED_PASSWORD_CHAR_LENGTH,
+ (const char *) line);
return 1;
}