diff options
author | unknown <anozdrin/alik@booka.> | 2006-11-30 12:23:55 +0300 |
---|---|---|
committer | unknown <anozdrin/alik@booka.> | 2006-11-30 12:23:55 +0300 |
commit | 23776f53ff8058f3bc2eb01f61f95ff718adb5ef (patch) | |
tree | 3c5d1b038e5990a85468d111f0c218fd6ee5723e /server-tools | |
parent | 6949b04246beb07f03f9e5b16e45a54b12d453a8 (diff) | |
download | mariadb-git-23776f53ff8058f3bc2eb01f61f95ff718adb5ef.tar.gz |
Fix for the following bugs:
- BUG#22306: STOP INSTANCE can not be applied for instances in Crashed,
Failed and Abandoned;
- BUG#23476: DROP INSTANCE does not work
- BUG#23215: STOP INSTANCE takes too much time
BUG#22306:
The problem was that STOP INSTANCE checked that mysqld is up and running.
If it was not so, STOP INSTANCE reported an error. Now, STOP INSTANCE
reports an error if the instance has been started (mysqld can be down).
BUG#23476:
The problem was that DROP INSTANCE tried to stop inactive instance. The fix is
trivial.
BUG#23215:
The problem was that locks were not acquired properly, so the
instance-monitoring thread could not acquire the mutex, holded by the
query-processing thread.
The fix is to simplify locking scheme by moving instance-related information to
Instance-class out of Guardian-class. This allows to get rid of storing a
separate list of Instance-information in Guardian and keeping it synchronized
with the original list in Instance_map.
server-tools/instance-manager/commands.cc:
1. Introduce Instance_cmd class -- base class for the commands
that deal with the one instance;
2. Remove Instance_map argument from command constructors;
3. Ensure, that Instance Map and Instance are locked in the proper order;
4. Polishing.
server-tools/instance-manager/commands.h:
1. Introduce Instance_cmd class -- base class for the commands
that deal with the one instance;
2. Remove Instance_map argument from command constructors;
3. Polishing.
server-tools/instance-manager/guardian.cc:
1. Move "extended" instance information to the Instance-class.
That allows to get rid of storing instance-related container and data in
Guardian class, that significantly simplifies locking schema.
2. Polishing.
server-tools/instance-manager/guardian.h:
1. Move "extended" instance information to the Instance-class.
That allows to get rid of storing instance-related container and data in
Guardian class, that significantly simplifies locking schema.
2. Polishing.
server-tools/instance-manager/instance.cc:
1. Move "extended" instance information to the Instance-class.
2. Introduce new state STOPPED to mark that guarded instance
is stopped and should not be restarted by Guardian.
3. Polishing.
server-tools/instance-manager/instance.h:
1. Move "extended" instance information to the Instance-class.
2. Introduce new state STOPPED to mark that guarded instance
is stopped and should not be restarted by Guardian.
3. Polishing.
server-tools/instance-manager/instance_map.cc:
1. Move flush_instances() from Instance_map to Manager.
2. Polishing.
server-tools/instance-manager/instance_map.h:
1. Move flush_instances() from Instance_map to Manager.
2. Polishing.
server-tools/instance-manager/instance_options.h:
Polishing.
server-tools/instance-manager/manager.cc:
1. Move flush_instances() from Instance_map to Manager.
2. Polishing.
server-tools/instance-manager/manager.h:
1. Move flush_instances() from Instance_map to Manager.
2. Polishing.
server-tools/instance-manager/user_map.cc:
Polishing.
Diffstat (limited to 'server-tools')
-rw-r--r-- | server-tools/instance-manager/commands.cc | 290 | ||||
-rw-r--r-- | server-tools/instance-manager/commands.h | 104 | ||||
-rw-r--r-- | server-tools/instance-manager/guardian.cc | 543 | ||||
-rw-r--r-- | server-tools/instance-manager/guardian.h | 125 | ||||
-rw-r--r-- | server-tools/instance-manager/instance.cc | 630 | ||||
-rw-r--r-- | server-tools/instance-manager/instance.h | 171 | ||||
-rw-r--r-- | server-tools/instance-manager/instance_map.cc | 217 | ||||
-rw-r--r-- | server-tools/instance-manager/instance_map.h | 61 | ||||
-rw-r--r-- | server-tools/instance-manager/instance_options.h | 1 | ||||
-rw-r--r-- | server-tools/instance-manager/manager.cc | 154 | ||||
-rw-r--r-- | server-tools/instance-manager/manager.h | 8 | ||||
-rw-r--r-- | server-tools/instance-manager/user_map.cc | 20 |
12 files changed, 1368 insertions, 956 deletions
diff --git a/server-tools/instance-manager/commands.cc b/server-tools/instance-manager/commands.cc index 15738f8ebb3..6cc1c8d3047 100644 --- a/server-tools/instance-manager/commands.cc +++ b/server-tools/instance-manager/commands.cc @@ -29,6 +29,7 @@ #include "guardian.h" #include "instance_map.h" #include "log.h" +#include "manager.h" #include "messages.h" #include "mysqld_error.h" #include "mysql_manager_error.h" @@ -36,8 +37,11 @@ #include "priv.h" #include "protocol.h" +/************************************************************************** + {{{ Static functions. +**************************************************************************/ -/* +/** modify_defaults_to_im_error -- a map of error codes of mysys::modify_defaults_file() into Instance Manager error codes. */ @@ -46,38 +50,25 @@ static const int modify_defaults_to_im_error[]= { 0, ER_OUT_OF_RESOURCES, ER_ACCESS_OPTION_FILE }; -/* - Add a string to a buffer. +/** + Parse version number from the version string. SYNOPSIS - put_to_buff() - buff buffer to add the string - str string to add - position offset in the buff to add a string + parse_version_number() + version_str + version + version_size DESCRIPTION + TODO - Function to add a string to the buffer. It is different from - store_to_protocol_packet, which is used in the protocol.cc. - The last one also stores the length of the string in a special way. - This is required for MySQL client/server protocol support only. + TODO: Move this function to Instance_options and parse version number + only once. - RETURN - 0 - ok - 1 - error occured + NOTE: This function is used only in SHOW INSTANCE STATUS statement at the + moment. */ -static inline int put_to_buff(Buffer *buff, const char *str, uint *position) -{ - uint len= strlen(str); - if (buff->append(*position, str, len)) - return 1; - - *position+= len; - return 0; -} - - static int parse_version_number(const char *version_str, char *version, uint version_size) { @@ -102,6 +93,9 @@ static int parse_version_number(const char *version_str, char *version, return 0; } +/************************************************************************** + }}} +**************************************************************************/ /************************************************************************** Implementation of Instance_name. @@ -122,7 +116,7 @@ Instance_name::Instance_name(const LEX_STRING *name) Implementation of Show_instances. **************************************************************************/ -/* +/** Implementation of SHOW INSTANCES statement. Possible error codes: @@ -172,7 +166,6 @@ int Show_instances::write_data(st_net *net) Instance *instance; Instance_map::Iterator iterator(instance_map); - instance_map->guardian->lock(); instance_map->lock(); while ((instance= iterator.next())) @@ -180,20 +173,25 @@ int Show_instances::write_data(st_net *net) Buffer send_buf; /* buffer for packets */ uint pos= 0; + instance->lock(); + const char *instance_name= instance->options.instance_name.str; - const char *state_name= instance_map->get_instance_state_name(instance); + const char *state_name= instance->get_state_name(); if (store_to_protocol_packet(&send_buf, instance_name, &pos) || store_to_protocol_packet(&send_buf, state_name, &pos) || my_net_write(net, send_buf.buffer, pos)) { err_status= TRUE; - break; } + + instance->unlock(); + + if (err_status) + break; } instance_map->unlock(); - instance_map->guardian->unlock(); return err_status ? ER_OUT_OF_RESOURCES : 0; } @@ -203,7 +201,7 @@ int Show_instances::write_data(st_net *net) Implementation of Flush_instances. **************************************************************************/ -/* +/** Implementation of FLUSH INSTANCES statement. Possible error codes: @@ -213,36 +211,19 @@ int Show_instances::write_data(st_net *net) int Flush_instances::execute(st_net *net, ulong connection_id) { - instance_map->guardian->lock(); - instance_map->lock(); - - if (instance_map->is_there_active_instance()) - { - instance_map->unlock(); - instance_map->guardian->unlock(); - return ER_THERE_IS_ACTIVE_INSTACE; - } - - if (instance_map->flush_instances()) - { - instance_map->unlock(); - instance_map->guardian->unlock(); + if (Manager::flush_instances()) return ER_OUT_OF_RESOURCES; - } - - instance_map->unlock(); - instance_map->guardian->unlock(); return net_send_ok(net, connection_id, NULL) ? ER_OUT_OF_RESOURCES : 0; } /************************************************************************** - Implementation of Abstract_instance_cmd. + Implementation of Instance_cmd. **************************************************************************/ -Abstract_instance_cmd::Abstract_instance_cmd(const LEX_STRING *instance_name_arg) - :instance_name(instance_name_arg) +Instance_cmd::Instance_cmd(const LEX_STRING *instance_name_arg): + instance_name(instance_name_arg) { /* MT-NOTE: we can not make a search for Instance object here, @@ -251,26 +232,39 @@ Abstract_instance_cmd::Abstract_instance_cmd(const LEX_STRING *instance_name_arg } +/************************************************************************** + Implementation of Abstract_instance_cmd. +**************************************************************************/ + +Abstract_instance_cmd::Abstract_instance_cmd( + const LEX_STRING *instance_name_arg) + :Instance_cmd(instance_name_arg) +{ +} + + int Abstract_instance_cmd::execute(st_net *net, ulong connection_id) { int err_code; + Instance *instance; instance_map->lock(); - { - Instance *instance= instance_map->find(get_instance_name()); - - if (!instance) - { - instance_map->unlock(); - return ER_BAD_INSTANCE_NAME; - } + instance= instance_map->find(get_instance_name()); - err_code= execute_impl(net, instance); + if (!instance) + { + instance_map->unlock(); + return ER_BAD_INSTANCE_NAME; } + instance->lock(); instance_map->unlock(); + err_code= execute_impl(net, instance); + + instance->unlock(); + if (!err_code) err_code= send_ok_response(net, connection_id); @@ -288,7 +282,7 @@ Show_instance_status::Show_instance_status(const LEX_STRING *instance_name_arg) } -/* +/** Implementation of SHOW INSTANCE STATUS statement. Possible error codes: @@ -363,19 +357,14 @@ int Show_instance_status::write_data(st_net *net, Instance *instance) char version_num_buf[MAX_VERSION_LENGTH]; uint pos= 0; - const char *state_name; + const char *state_name= instance->get_state_name(); const char *version_tag= "unknown"; const char *version_num= "unknown"; - const char *mysqld_compatible_status; - - instance_map->guardian->lock(); - state_name= instance_map->get_instance_state_name(instance); - mysqld_compatible_status= instance->is_mysqld_compatible() ? "yes" : "no"; - instance_map->guardian->unlock(); + const char *mysqld_compatible_status= + instance->is_mysqld_compatible() ? "yes" : "no"; if (instance->options.mysqld_version) { - if (parse_version_number(instance->options.mysqld_version, version_num_buf, sizeof(version_num_buf))) return ER_OUT_OF_RESOURCES; @@ -409,7 +398,7 @@ Show_instance_options::Show_instance_options( } -/* +/** Implementation of SHOW INSTANCE OPTIONS statement. Possible error codes: @@ -505,23 +494,33 @@ Start_instance::Start_instance(const LEX_STRING *instance_name_arg) } -/* +/** Implementation of START INSTANCE statement. Possible error codes: ER_BAD_INSTANCE_NAME The instance with the given name does not exist - ER_OUT_OF_RESOURCES Not enough resources to complete the operation + ER_INSTANCE_MISCONFIGURED The instance configuration is invalid + ER_INSTANCE_ALREADY_STARTED The instance is already started + ER_CANNOT_START_INSTANCE The instance could not have been started + + TODO: as soon as this method operates only with Instance, we probably + should introduce a new method (execute_stop_instance()) in Instance and + just call it from here. */ int Start_instance::execute_impl(st_net * /* net */, Instance *instance) { - int err_code; + if (!instance->is_configured()) + return ER_INSTANCE_MISCONFIGURED; - if ((err_code= instance->start())) - return err_code; + if (instance->is_active()) + return ER_INSTANCE_ALREADY_STARTED; + + if (instance->start_mysqld()) + return ER_CANNOT_START_INSTANCE; - if (!(instance->options.nonguarded)) - instance_map->guardian->guard(instance); + instance->reset_stat(); + instance->set_state(Instance::NOT_STARTED); return 0; } @@ -546,25 +545,26 @@ Stop_instance::Stop_instance(const LEX_STRING *instance_name_arg) } -/* +/** Implementation of STOP INSTANCE statement. Possible error codes: ER_BAD_INSTANCE_NAME The instance with the given name does not exist ER_OUT_OF_RESOURCES Not enough resources to complete the operation + + TODO: as soon as this method operates only with Instance, we probably + should introduce a new method (execute_stop_instance()) in Instance and + just call it from here. */ int Stop_instance::execute_impl(st_net * /* net */, Instance *instance) { - int err_code; + if (!instance->is_active()) + return ER_INSTANCE_IS_NOT_STARTED; - if (!(instance->options.nonguarded)) - instance_map->guardian->stop_guard(instance); + instance->set_state(Instance::STOPPED); - if ((err_code= instance->stop())) - return err_code; - - return 0; + return instance->stop_mysqld() ? ER_STOP_INSTANCE : 0; } @@ -582,12 +582,12 @@ int Stop_instance::send_ok_response(st_net *net, ulong connection_id) **************************************************************************/ Create_instance::Create_instance(const LEX_STRING *instance_name_arg) - :instance_name(instance_name_arg) + :Instance_cmd(instance_name_arg) { } -/* +/** This operation initializes Create_instance object. SYNOPSIS @@ -604,7 +604,7 @@ bool Create_instance::init(const char **text) } -/* +/** This operation parses CREATE INSTANCE options. SYNOPSIS @@ -724,7 +724,7 @@ bool Create_instance::parse_args(const char **text) } -/* +/** Implementation of CREATE INSTANCE statement. Possible error codes: @@ -736,6 +736,7 @@ bool Create_instance::parse_args(const char **text) int Create_instance::execute(st_net *net, ulong connection_id) { int err_code; + Instance *instance; /* Check that the name is valid and there is no instance with such name. */ @@ -761,17 +762,26 @@ int Create_instance::execute(st_net *net, ulong connection_id) return err_code; } + instance= instance_map->find(get_instance_name()); + DBUG_ASSERT(instance); + if ((err_code= create_instance_in_file(get_instance_name(), &options))) { - Instance *instance= instance_map->find(get_instance_name()); - - if (instance) - instance_map->remove_instance(instance); /* instance is deleted here. */ + instance_map->remove_instance(instance); /* instance is deleted here. */ instance_map->unlock(); return err_code; } + /* + CREATE INSTANCE must not lead to start instance, even if it guarded. + + TODO: The problem however is that if Instance Manager restarts after + creating instance, the instance will be restarted (see also BUG#19718). + */ + + instance->set_state(Instance::STOPPED); + /* That's all. */ instance_map->unlock(); @@ -790,12 +800,12 @@ int Create_instance::execute(st_net *net, ulong connection_id) **************************************************************************/ Drop_instance::Drop_instance(const LEX_STRING *instance_name_arg) - :Abstract_instance_cmd(instance_name_arg) + :Instance_cmd(instance_name_arg) { } -/* +/** Implementation of DROP INSTANCE statement. Possible error codes: @@ -804,14 +814,38 @@ Drop_instance::Drop_instance(const LEX_STRING *instance_name_arg) ER_OUT_OF_RESOURCES Not enough resources to complete the operation */ -int Drop_instance::execute_impl(st_net * /* net */, Instance *instance) +int Drop_instance::execute(st_net *net, ulong connection_id) { int err_code; + Instance *instance; + + /* Lock Guardian, then Instance_map. */ + + instance_map->lock(); + + /* Find an instance. */ + + instance= instance_map->find(get_instance_name()); + + if (!instance) + { + instance_map->unlock(); + return ER_BAD_INSTANCE_NAME; + } + + instance->lock(); /* Check that the instance is offline. */ - if (instance_map->guardian->is_active(instance)) + if (instance->is_active()) + { + instance->unlock(); + instance_map->unlock(); + return ER_DROP_ACTIVE_INSTANCE; + } + + /* Try to remove instance from the file. */ err_code= modify_defaults_file(Options::Main::config_file, NULL, NULL, get_instance_name()->str, MY_REMOVE_SECTION); @@ -824,27 +858,30 @@ int Drop_instance::execute_impl(st_net * /* net */, Instance *instance) (const char *) get_instance_name()->str, (const char *) Options::Main::config_file, (int) err_code); - } - if (err_code) + instance->unlock(); + instance_map->unlock(); + return modify_defaults_to_im_error[err_code]; + } - /* Remove instance from the instance map hash and Guardian's list. */ + /* Unlock the instance before destroy. */ - if (!instance->options.nonguarded) - instance_map->guardian->stop_guard(instance); + instance->unlock(); - if ((err_code= instance->stop())) - return err_code; + /* + Remove instance from the instance map + (the instance will be also destroyed here). + */ instance_map->remove_instance(instance); - return 0; -} + /* Unlock the instance map. */ + instance_map->unlock(); + + /* That's all: send ok. */ -int Drop_instance::send_ok_response(st_net *net, ulong connection_id) -{ if (net_send_ok(net, connection_id, "Instance dropped")) return ER_OUT_OF_RESOURCES; @@ -867,7 +904,7 @@ Show_instance_log::Show_instance_log(const LEX_STRING *instance_name_arg, } -/* +/** Implementation of SHOW INSTANCE LOG statement. Possible error codes: @@ -1012,7 +1049,7 @@ Show_instance_log_files::Show_instance_log_files } -/* +/** Implementation of SHOW INSTANCE LOG FILES statement. Possible error codes: @@ -1133,7 +1170,7 @@ int Show_instance_log_files::write_data(st_net *net, Instance *instance) Implementation of Abstract_option_cmd. **************************************************************************/ -/* +/** Instance_options_list -- a data class representing a list of options for some instance. */ @@ -1251,7 +1288,7 @@ bool Abstract_option_cmd::init(const char **text) } -/* +/** Correct the option file. The "skip" option is used to remove the found option. @@ -1290,8 +1327,8 @@ int Abstract_option_cmd::correct_file(Instance *instance, Named_value *option, } -/* - Implementation of SET statement. +/** + Lock Instance Map and call execute_impl(). Possible error codes: ER_BAD_INSTANCE_NAME The instance with the given name does not exist @@ -1341,6 +1378,11 @@ Abstract_option_cmd::get_instance_options_list(const LEX_STRING *instance_name) } +/** + Skeleton implementation of option-management command. + + MT-NOTE: Instance Map is locked before calling this operation. +*/ int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id) { int err_code= 0; @@ -1352,12 +1394,18 @@ int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id) Instance_options_list *lst= (Instance_options_list *) hash_element(&instance_options_map, i); + bool instance_is_active; + lst->instance= instance_map->find(lst->get_instance_name()); if (!lst->instance) return ER_BAD_INSTANCE_NAME; - if (instance_map->guardian->is_active(lst->instance)) + lst->instance->lock(); + instance_is_active= lst->instance->is_active(); + lst->instance->unlock(); + + if (instance_is_active) return ER_INSTANCE_IS_ACTIVE; } @@ -1368,6 +1416,8 @@ int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id) Instance_options_list *lst= (Instance_options_list *) hash_element(&instance_options_map, i); + lst->instance->lock(); + for (int j= 0; j < lst->options.get_size(); ++j) { Named_value option= lst->options.get_element(j); @@ -1377,6 +1427,8 @@ int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id) break; } + lst->instance->unlock(); + if (err_code) break; } @@ -1392,7 +1444,7 @@ int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id) Implementation of Set_option. **************************************************************************/ -/* +/** This operation parses SET options. SYNOPSIS @@ -1566,7 +1618,7 @@ int Set_option::process_option(Instance *instance, Named_value *option) Implementation of Unset_option. **************************************************************************/ -/* +/** This operation parses UNSET options. SYNOPSIS @@ -1662,7 +1714,7 @@ bool Unset_option::parse_args(const char **text) } -/* +/** Implementation of UNSET statement. Possible error codes: diff --git a/server-tools/instance-manager/commands.h b/server-tools/instance-manager/commands.h index 8768aaab121..9b5d27b0982 100644 --- a/server-tools/instance-manager/commands.h +++ b/server-tools/instance-manager/commands.h @@ -30,7 +30,7 @@ #endif -/* +/** Print all instances of this instance manager. Grammar: SHOW INSTANCES */ @@ -50,7 +50,7 @@ private: }; -/* +/** Reread configuration file and refresh internal cache. Grammar: FLUSH INSTANCES */ @@ -66,11 +66,50 @@ public: }; -/* +/** + Base class for Instance-specific commands + (commands that operate on one instance). + + Instance_cmd extends Command class by: + - an attribute for storing instance name; + - code to initialize instance name in constructor; + - an accessor to get instance name. +*/ + +class Instance_cmd : public Command +{ +public: + Instance_cmd(const LEX_STRING *instance_name_arg); + +protected: + inline const LEX_STRING *get_instance_name() const + { + return instance_name.get_str(); + } + +private: + Instance_name instance_name; +}; + + +/** Abstract class for Instance-specific commands. + + Abstract_instance_cmd extends Instance_cmd by providing a common + framework for writing command-implementations. Basically, the class + implements Command::execute() pure virtual function in the following + way: + - Lock Instance_map; + - Get an instance by name. Return an error, if there is no such + instance; + - Lock the instance; + - Unlock Instance_map; + - Call execute_impl(), which should be implemented in derived class; + - Unlock the instance; + - Send response to the client and return error status. */ -class Abstract_instance_cmd: public Command +class Abstract_instance_cmd: public Instance_cmd { public: Abstract_instance_cmd(const LEX_STRING *instance_name_arg); @@ -79,29 +118,24 @@ public: virtual int execute(st_net *net, ulong connection_id); protected: - /* MT-NOTE: this operation is called under acquired Instance_map's lock. */ + /** + This operation is intended to contain command-specific implementation. + + MT-NOTE: this operation is called under acquired Instance's lock. + */ virtual int execute_impl(st_net *net, Instance *instance) = 0; - /* + /** This operation is invoked on successful return of execute_impl() and is intended to send closing data. - MT-NOTE: this operation is called under released Instance_map's lock. + MT-NOTE: this operation is called under released Instance's lock. */ virtual int send_ok_response(st_net *net, ulong connection_id) = 0; - -protected: - inline const LEX_STRING *get_instance_name() const - { - return instance_name.get_str(); - } - -private: - Instance_name instance_name; }; -/* +/** Print status of an instance. Grammar: SHOW INSTANCE STATUS <instance_name> */ @@ -121,7 +155,7 @@ private: }; -/* +/** Print options of chosen instance. Grammar: SHOW INSTANCE OPTIONS <instance_name> */ @@ -141,7 +175,7 @@ private: }; -/* +/** Start an instance. Grammar: START INSTANCE <instance_name> */ @@ -157,7 +191,7 @@ protected: }; -/* +/** Stop an instance. Grammar: STOP INSTANCE <instance_name> */ @@ -173,12 +207,12 @@ protected: }; -/* +/** Create an instance. Grammar: CREATE INSTANCE <instance_name> [<options>] */ -class Create_instance: public Command +class Create_instance: public Instance_cmd { public: Create_instance(const LEX_STRING *instance_name_arg); @@ -189,22 +223,15 @@ public: protected: virtual int execute(st_net *net, ulong connection_id); - inline const LEX_STRING *get_instance_name() const - { - return instance_name.get_str(); - } - private: bool parse_args(const char **text); private: - Instance_name instance_name; - Named_value_arr options; }; -/* +/** Drop an instance. Grammar: DROP INSTANCE <instance_name> @@ -213,18 +240,17 @@ private: is removed from the instance map. */ -class Drop_instance: public Abstract_instance_cmd +class Drop_instance: public Instance_cmd { public: Drop_instance(const LEX_STRING *instance_name_arg); protected: - virtual int execute_impl(st_net *net, Instance *instance); - virtual int send_ok_response(st_net *net, ulong connection_id); + virtual int execute(st_net *net, ulong connection_id); }; -/* +/** Print requested part of the log. Grammar: SHOW <instance_name> LOG {ERROR | SLOW | GENERAL} size[, offset_from_end] @@ -252,7 +278,7 @@ private: }; -/* +/** Shows the list of the log files, used by an instance. Grammar: SHOW <instance_name> LOG FILES */ @@ -272,7 +298,7 @@ private: }; -/* +/** Abstract class for option-management commands. */ @@ -312,7 +338,7 @@ private: }; -/* +/** Set an option for the instance. Grammar: SET instance_name.option[=option_value][, ...] */ @@ -329,7 +355,7 @@ protected: }; -/* +/** Remove option of the instance. Grammar: UNSET instance_name.option[, ...] */ @@ -346,7 +372,7 @@ protected: }; -/* +/** Syntax error command. This command is issued if parser reported a syntax error. We need it to diff --git a/server-tools/instance-manager/guardian.cc b/server-tools/instance-manager/guardian.cc index e601ce0111c..1b451cd9933 100644 --- a/server-tools/instance-manager/guardian.cc +++ b/server-tools/instance-manager/guardian.cc @@ -28,101 +28,126 @@ #include "instance_map.h" #include "log.h" #include "mysql_manager_error.h" +#include "options.h" -const char * -Guardian::get_instance_state_name(enum_instance_state state) -{ - switch (state) { - case NOT_STARTED: - return "offline"; - - case STARTING: - return "starting"; - - case STARTED: - return "online"; - case JUST_CRASHED: - return "failed"; +/************************************************************************* + {{{ Constructor & destructor. +*************************************************************************/ - case CRASHED: - return "crashed"; - - case CRASHED_AND_ABANDONED: - return "abandoned"; - - case STOPPING: - return "stopping"; - } +/** + Guardian constructor. - return NULL; /* just to ignore compiler warning. */ -} + SYNOPSIS + Guardian() + thread_registry_arg + instance_map_arg -/* {{{ Constructor & destructor. */ + DESCRIPTION + Nominal contructor intended for assigning references and initialize + trivial objects. Real initialization is made by init() method. +*/ Guardian::Guardian(Thread_registry *thread_registry_arg, - Instance_map *instance_map_arg, - uint monitoring_interval_arg) - :stopped(FALSE), - monitoring_interval(monitoring_interval_arg), + Instance_map *instance_map_arg) + :shutdown_requested(FALSE), + stopped(FALSE), thread_registry(thread_registry_arg), - instance_map(instance_map_arg), - shutdown_requested(FALSE) + instance_map(instance_map_arg) { pthread_mutex_init(&LOCK_guardian, 0); pthread_cond_init(&COND_guardian, 0); - init_alloc_root(&alloc, MEM_ROOT_BLOCK_SIZE, 0); } Guardian::~Guardian() { - /* delay guardian destruction to the moment when no one needs it */ - pthread_mutex_lock(&LOCK_guardian); - free_root(&alloc, MYF(0)); - pthread_mutex_unlock(&LOCK_guardian); + /* + NOTE: it's necessary to synchronize here, because Guiardian thread can be + still alive an hold the mutex (because it is detached and we have no + control over it). + */ + + lock(); + unlock(); + pthread_mutex_destroy(&LOCK_guardian); pthread_cond_destroy(&COND_guardian); } -/* }}} */ +/************************************************************************* + }}} +*************************************************************************/ +/** + Send request to stop Guardian. + + SYNOPSIS + request_shutdown() +*/ + void Guardian::request_shutdown() { - pthread_mutex_lock(&LOCK_guardian); - /* STOP Instances or just clean up Guardian repository */ stop_instances(); + + lock(); shutdown_requested= TRUE; - pthread_mutex_unlock(&LOCK_guardian); + unlock(); + + ping(); } -void Guardian::process_instance(Instance *instance, - GUARD_NODE *current_node, - LIST **guarded_instances, - LIST *node) +/** + Process an instance. + + SYNOPSIS + process_instance() + instance a pointer to the instance for processing + + MT-NOTE: + - the given instance must be locked before calling this operation; + - Guardian must be locked before calling this operation. +*/ + +void Guardian::process_instance(Instance *instance) { - uint waitchild= (uint) Instance::DEFAULT_SHUTDOWN_DELAY; - /* The amount of times, Guardian attempts to restart an instance */ int restart_retry= 100; time_t current_time= time(NULL); - if (current_node->state == STOPPING) + if (instance->get_state() == Instance::STOPPING) { - waitchild= instance->options.get_shutdown_delay(); + /* This brach is executed during shutdown. */ - /* this returns TRUE if and only if an instance was stopped for sure */ + /* This returns TRUE if and only if an instance was stopped for sure. */ if (instance->is_crashed()) - *guarded_instances= list_delete(*guarded_instances, node); - else if ( (uint) (current_time - current_node->last_checked) > waitchild) { + log_info("Guardian: '%s' stopped.", + (const char *) instance->get_name()->str); + + instance->set_state(Instance::STOPPED); + } + else if ((uint) (current_time - instance->last_checked) >= + instance->options.get_shutdown_delay()) + { + log_info("Guardian: '%s' hasn't stopped within %d secs.", + (const char *) instance->get_name()->str, + (int) instance->options.get_shutdown_delay()); + instance->kill_mysqld(SIGKILL); - /* - Later we do node= node->next. This is ok, as we are only removing - the node from the list. The pointer to the next one is still valid. - */ - *guarded_instances= list_delete(*guarded_instances, node); + + log_info("Guardian: pretend that '%s' is killed.", + (const char *) instance->get_name()->str); + + instance->set_state(Instance::STOPPED); + } + else + { + log_info("Guardian: waiting for '%s' to stop (%d secs left).", + (const char *) instance->get_name()->str, + (int) (instance->options.get_shutdown_delay() - + current_time + instance->last_checked)); } return; @@ -133,83 +158,90 @@ void Guardian::process_instance(Instance *instance, /* The instance can be contacted on it's port */ /* If STARTING also check that pidfile has been created */ - if (current_node->state == STARTING && - current_node->instance->options.load_pid() == 0) + if (instance->get_state() == Instance::STARTING && + instance->options.load_pid() == 0) { /* Pid file not created yet, don't go to STARTED state yet */ } - else if (current_node->state != STARTED) + else if (instance->get_state() != Instance::STARTED) { /* clear status fields */ log_info("Guardian: '%s' is running, set state to STARTED.", (const char *) instance->options.instance_name.str); - current_node->restart_counter= 0; - current_node->crash_moment= 0; - current_node->state= STARTED; + instance->reset_stat(); + instance->set_state(Instance::STARTED); } } else { - switch (current_node->state) { - case NOT_STARTED: + switch (instance->get_state()) { + case Instance::NOT_STARTED: log_info("Guardian: starting '%s'...", (const char *) instance->options.instance_name.str); - /* NOTE, set state to STARTING _before_ start() is called */ - current_node->state= STARTING; - instance->start(); - current_node->last_checked= current_time; - break; - case STARTED: /* fallthrough */ - case STARTING: /* let the instance start or crash */ - if (instance->is_crashed()) - { - current_node->crash_moment= current_time; - current_node->last_checked= current_time; - current_node->state= JUST_CRASHED; - /* fallthrough -- restart an instance immediately */ - } - else - break; - case JUST_CRASHED: - if (current_time - current_node->crash_moment <= 2) + /* NOTE: set state to STARTING _before_ start() is called. */ + instance->set_state(Instance::STARTING); + instance->last_checked= current_time; + + instance->start_mysqld(); + + return; + + case Instance::STARTED: /* fallthrough */ + case Instance::STARTING: /* let the instance start or crash */ + if (!instance->is_crashed()) + return; + + instance->crash_moment= current_time; + instance->last_checked= current_time; + instance->set_state(Instance::JUST_CRASHED); + /* fallthrough -- restart an instance immediately */ + + case Instance::JUST_CRASHED: + if (current_time - instance->crash_moment <= 2) { if (instance->is_crashed()) { - instance->start(); + instance->start_mysqld(); log_info("Guardian: starting '%s'...", (const char *) instance->options.instance_name.str); } } else - current_node->state= CRASHED; - break; - case CRASHED: /* just regular restarts */ - if (current_time - current_node->last_checked > - monitoring_interval) + instance->set_state(Instance::CRASHED); + + return; + + case Instance::CRASHED: /* just regular restarts */ + if (current_time - instance->last_checked <= + Options::Main::monitoring_interval) + return; + + if (instance->restart_counter < restart_retry) { - if ((current_node->restart_counter < restart_retry)) - { - if (instance->is_crashed()) - { - instance->start(); - current_node->last_checked= current_time; - current_node->restart_counter++; - log_info("Guardian: restarting '%s'...", - (const char *) instance->options.instance_name.str); - } - } - else + if (instance->is_crashed()) { - log_info("Guardian: can not start '%s'. " - "Abandoning attempts to (re)start it", + instance->start_mysqld(); + instance->last_checked= current_time; + + log_info("Guardian: restarting '%s'...", (const char *) instance->options.instance_name.str); - current_node->state= CRASHED_AND_ABANDONED; } } - break; - case CRASHED_AND_ABANDONED: - break; /* do nothing */ + else + { + log_info("Guardian: can not start '%s'. " + "Abandoning attempts to (re)start it", + (const char *) instance->options.instance_name.str); + + instance->set_state(Instance::CRASHED_AND_ABANDONED); + } + + return; + + case Instance::CRASHED_AND_ABANDONED: + return; /* do nothing */ + default: DBUG_ASSERT(0); } @@ -217,56 +249,78 @@ void Guardian::process_instance(Instance *instance, } -/* +/** Main function of Guardian thread. SYNOPSIS run() DESCRIPTION - Check for all guarded instances and restart them if needed. If everything - is fine go and sleep for some time. + Check for all guarded instances and restart them if needed. */ void Guardian::run() { - Instance *instance; - LIST *node; struct timespec timeout; log_info("Guardian: started."); thread_registry->register_thread(&thread_info); - pthread_mutex_lock(&LOCK_guardian); + /* Loop, until all instances were shut down at the end. */ - /* loop, until all instances were shut down at the end */ - while (!(shutdown_requested && (guarded_instances == NULL))) + while (true) { - node= guarded_instances; + Instance_map::Iterator instances_it(instance_map); + Instance *instance; + bool all_instances_stopped= TRUE; + + instance_map->lock(); - while (node != NULL) + while ((instance= instances_it.next())) { - GUARD_NODE *current_node= (GUARD_NODE *) node->data; - instance= ((GUARD_NODE *) node->data)->instance; - process_instance(instance, current_node, &guarded_instances, node); + instance->lock(); - node= node->next; + if (!instance->is_guarded() || + instance->get_state() == Instance::STOPPED) + { + instance->unlock(); + continue; + } + + process_instance(instance); + + if (instance->get_state() != Instance::STOPPED) + all_instances_stopped= FALSE; + + instance->unlock(); } - timeout.tv_sec= time(NULL) + monitoring_interval; + + instance_map->unlock(); + + lock(); + + if (shutdown_requested && all_instances_stopped) + { + log_info("Guardian: all guarded mysqlds stopped."); + + stopped= TRUE; + unlock(); + break; + } + + timeout.tv_sec= time(NULL) + Options::Main::monitoring_interval; timeout.tv_nsec= 0; - /* check the loop predicate before sleeping */ - if (!(shutdown_requested && (!(guarded_instances)))) - thread_registry->cond_timedwait(&thread_info, &COND_guardian, - &LOCK_guardian, &timeout); + thread_registry->cond_timedwait(&thread_info, &COND_guardian, + &LOCK_guardian, &timeout); + unlock(); } log_info("Guardian: stopped."); - stopped= TRUE; - pthread_mutex_unlock(&LOCK_guardian); - /* now, when the Guardian is stopped we can stop the IM */ + /* Now, when the Guardian is stopped we can stop the IM. */ + thread_registry->unregister_thread(&thread_info); thread_registry->request_shutdown(); @@ -274,129 +328,65 @@ void Guardian::run() } -int Guardian::is_stopped() +/** + Return the value of stopped flag. +*/ + +bool Guardian::is_stopped() { int var; - pthread_mutex_lock(&LOCK_guardian); + + lock(); var= stopped; - pthread_mutex_unlock(&LOCK_guardian); + unlock(); + return var; } -/* - Initialize the list of guarded instances: loop through the Instance_map and - add all of the instances, which don't have 'nonguarded' option specified. - - SYNOPSIS - Guardian::init() +/** + Wake up Guardian thread. - NOTE: The operation should be invoked with the following locks acquired: - - Guardian; - - Instance_map; - - RETURN - 0 - ok - 1 - error occurred + MT-NOTE: though usually the mutex associated with condition variable should + be acquired before signalling the variable, here this is not needed. + Signalling under locked mutex is used to avoid lost signals. In the current + logic however locking mutex does not guarantee that the signal will not be + lost. */ -int Guardian::init() +void Guardian::ping() { - Instance *instance; - Instance_map::Iterator iterator(instance_map); - - /* clear the list of guarded instances */ - free_root(&alloc, MYF(0)); - init_alloc_root(&alloc, MEM_ROOT_BLOCK_SIZE, 0); - guarded_instances= NULL; - - while ((instance= iterator.next())) - { - if (instance->options.nonguarded) - continue; - - if (guard(instance, TRUE)) /* do not lock guardian */ - return 1; - } - - return 0; + pthread_cond_signal(&COND_guardian); } -/* - Add instance to the Guardian list +/** + Prepare list of instances. SYNOPSIS - guard() - instance the instance to be guarded - nolock whether we prefer do not lock Guardian here, - but use external locking instead - - DESCRIPTION - - The instance is added to the guarded instances list. Usually guard() is - called after we start an instance. + init() - RETURN - 0 - ok - 1 - error occurred + MT-NOTE: Instance Map must be locked before calling the operation. */ -int Guardian::guard(Instance *instance, bool nolock) +void Guardian::init() { - LIST *node; - GUARD_NODE *content; - - node= (LIST *) alloc_root(&alloc, sizeof(LIST)); - content= (GUARD_NODE *) alloc_root(&alloc, sizeof(GUARD_NODE)); - - if ((!(node)) || (!(content))) - return 1; - /* we store the pointers to instances from the instance_map's MEM_ROOT */ - content->instance= instance; - content->restart_counter= 0; - content->crash_moment= 0; - content->state= NOT_STARTED; - node->data= (void*) content; - - if (nolock) - guarded_instances= list_add(guarded_instances, node); - else - { - pthread_mutex_lock(&LOCK_guardian); - guarded_instances= list_add(guarded_instances, node); - pthread_mutex_unlock(&LOCK_guardian); - } - - return 0; -} - - -/* - TODO: perhaps it would make sense to create a pool of the LIST nodeents - and give them upon request. Now we are loosing a bit of memory when - guarded instance was stopped and then restarted (since we cannot free just - a piece of the MEM_ROOT). -*/ - -int Guardian::stop_guard(Instance *instance) -{ - LIST *node; - - pthread_mutex_lock(&LOCK_guardian); - - node= find_instance_node(instance); + Instance *instance; + Instance_map::Iterator iterator(instance_map); - if (node != NULL) - guarded_instances= list_delete(guarded_instances, node); + while ((instance= iterator.next())) + { + instance->lock(); - pthread_mutex_unlock(&LOCK_guardian); + instance->reset_stat(); + instance->set_state(Instance::NOT_STARTED); - /* if there is nothing to delete it is also fine */ - return 0; + instance->unlock(); + } } -/* + +/** An internal method which is called at shutdown to unregister instances and attempt to stop them if requested. @@ -409,86 +399,71 @@ int Guardian::stop_guard(Instance *instance) accordingly. NOTE - Guardian object should be locked by the calling function. + Guardian object should be locked by the caller. - RETURN - 0 - ok - 1 - error occurred */ -int Guardian::stop_instances() +void Guardian::stop_instances() { - LIST *node; - node= guarded_instances; - while (node != NULL) + Instance_map::Iterator instances_it(instance_map); + Instance *instance; + + instance_map->lock(); + + while ((instance= instances_it.next())) { - GUARD_NODE *current_node= (GUARD_NODE *) node->data; + instance->lock(); + + if (!instance->is_guarded() || + instance->get_state() == Instance::STOPPED) + { + instance->unlock(); + continue; + } + /* If instance is running or was running (and now probably hanging), request stop. */ - if (current_node->instance->is_mysqld_running() || - (current_node->state == STARTED)) + + if (instance->is_mysqld_running() || + instance->get_state() == Instance::STARTED) { - current_node->state= STOPPING; - current_node->last_checked= time(NULL); + instance->set_state(Instance::STOPPING); + instance->last_checked= time(NULL); } else - /* otherwise remove it from the list */ - guarded_instances= list_delete(guarded_instances, node); - /* But try to kill it anyway. Just in case */ - current_node->instance->kill_mysqld(SIGTERM); - node= node->next; + { + /* Otherwise mark it as STOPPED. */ + instance->set_state(Instance::STOPPED); + } + + /* Request mysqld to stop. */ + + instance->kill_mysqld(SIGTERM); + + instance->unlock(); } - return 0; + + instance_map->unlock(); } +/** + Lock Guardian. +*/ + void Guardian::lock() { pthread_mutex_lock(&LOCK_guardian); } +/** + Unlock Guardian. +*/ + void Guardian::unlock() { pthread_mutex_unlock(&LOCK_guardian); } - - -LIST *Guardian::find_instance_node(Instance *instance) -{ - LIST *node= guarded_instances; - - while (node != NULL) - { - /* - We compare only pointers, as we always use pointers from the - instance_map's MEM_ROOT. - */ - if (((GUARD_NODE *) node->data)->instance == instance) - return node; - - node= node->next; - } - - return NULL; -} - - -bool Guardian::is_active(Instance *instance) -{ - bool guarded; - - lock(); - - guarded= find_instance_node(instance) != NULL; - - /* is_running() can take a long time, so let's unlock mutex first. */ - unlock(); - - if (guarded) - return true; - - return instance->is_mysqld_running(); -} diff --git a/server-tools/instance-manager/guardian.h b/server-tools/instance-manager/guardian.h index 0eee1dc631d..2c7987f4565 100644 --- a/server-tools/instance-manager/guardian.h +++ b/server-tools/instance-manager/guardian.h @@ -17,10 +17,12 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include "thread_registry.h" +#include <my_global.h> #include <my_sys.h> #include <my_list.h> +#include "thread_registry.h" + #if defined(__GNUC__) && defined(USE_PRAGMA_INTERFACE) #pragma interface #endif @@ -28,7 +30,6 @@ class Instance; class Instance_map; class Thread_registry; -struct GUARD_NODE; /** The guardian thread is responsible for monitoring and restarting of guarded @@ -38,97 +39,73 @@ struct GUARD_NODE; class Guardian: public Thread { public: - /* states of an instance */ - enum enum_instance_state { NOT_STARTED= 1, STARTING, STARTED, JUST_CRASHED, - CRASHED, CRASHED_AND_ABANDONED, STOPPING }; - - /* - The Guardian list node structure. Guardian utilizes it to store - guarded instances plus some additional info. - */ + Guardian(Thread_registry *thread_registry_arg, + Instance_map *instance_map_arg); + ~Guardian(); - struct GUARD_NODE - { - Instance *instance; - /* state of an instance (i.e. STARTED, CRASHED, etc.) */ - enum_instance_state state; - /* the amount of attemts to restart instance (cleaned up at success) */ - int restart_counter; - /* triggered at a crash */ - time_t crash_moment; - /* General time field. Used to provide timeouts (at shutdown and restart) */ - time_t last_checked; - }; - - /* Return client state name. */ - static const char *get_instance_state_name(enum_instance_state state); + void init(); - Guardian(Thread_registry *thread_registry_arg, - Instance_map *instance_map_arg, - uint monitoring_interval_arg); - virtual ~Guardian(); - /* Initialize or refresh the list of guarded instances */ - int init(); - /* Request guardian shutdown. Stop instances if needed */ +public: void request_shutdown(); - /* Start instance protection */ - int guard(Instance *instance, bool nolock= FALSE); - /* Stop instance protection */ - int stop_guard(Instance *instance); - /* Returns TRUE if guardian thread is stopped */ - int is_stopped(); + + bool is_stopped(); + void lock(); void unlock(); - /* - Return an internal list node for the given instance if the instance is - managed by Guardian. Otherwise, return NULL. + void ping(); - MT-NOTE: must be called under acquired lock. - */ - LIST *find_instance_node(Instance *instance); +protected: + virtual void run(); + +private: + void stop_instances(); - /* The operation is used to check if the instance is active or not. */ - bool is_active(Instance *instance); + void process_instance(Instance *instance); +private: /* - Return state of the given instance list node. The pointer must specify - a valid list node. + LOCK_guardian protectes the members in this section: + - shutdown_requested; + - stopped; + + Also, it is used for COND_guardian. */ - inline enum_instance_state get_instance_state(LIST *instance_node); -protected: - /* Main funtion of the thread */ - virtual void run(); + pthread_mutex_t LOCK_guardian; -public: + /* + Guardian's main loop waits on this condition. So, it should be signalled + each time, when instance state has been changed and we want Guardian to + wake up. + + TODO: Change this to having data-scoped conditions, i.e. conditions, + which indicate that some data has been changed. + */ pthread_cond_t COND_guardian; -private: - /* Prepares Guardian shutdown. Stops instances is needed */ - int stop_instances(); - /* check instance state and act accordingly */ - void process_instance(Instance *instance, GUARD_NODE *current_node, - LIST **guarded_instances, LIST *elem); + /* + This variable is set to TRUE, when Manager thread is shutting down. + The flag is used by Guardian thread to understand that it's time to + finish. + */ + bool shutdown_requested; + + /* + This flag is set to TRUE on shutdown by Guardian thread, when all guarded + mysqlds are stopped. - int stopped; + The flag is used in the Manager thread to wait for Guardian to stop all + mysqlds. + */ + bool stopped; -private: - pthread_mutex_t LOCK_guardian; Thread_info thread_info; - int monitoring_interval; Thread_registry *thread_registry; Instance_map *instance_map; - LIST *guarded_instances; - MEM_ROOT alloc; - /* this variable is set to TRUE when we want to stop Guardian thread */ - bool shutdown_requested; -}; - -inline Guardian::enum_instance_state -Guardian::get_instance_state(LIST *instance_node) -{ - return ((GUARD_NODE *) instance_node->data)->state; -} +private: + Guardian(const Guardian &); + Guardian&operator =(const Guardian &); +}; #endif /* INCLUDES_MYSQL_INSTANCE_MANAGER_GUARDIAN_H */ diff --git a/server-tools/instance-manager/instance.cc b/server-tools/instance-manager/instance.cc index 6b4289c5b29..b852a8323e5 100644 --- a/server-tools/instance-manager/instance.cc +++ b/server-tools/instance-manager/instance.cc @@ -36,7 +36,9 @@ #include "thread_registry.h" #include "instance_map.h" -/* {{{ Platform-specific functions. */ +/************************************************************************* + {{{ Platform-specific functions. +*************************************************************************/ #ifndef __WIN__ typedef pid_t My_process_info; @@ -45,34 +47,6 @@ typedef PROCESS_INFORMATION My_process_info; #endif /* - Proxy thread is a simple way to avoid all pitfalls of the threads - implementation in the OS (e.g. LinuxThreads). With such a thread we - don't have to process SIGCHLD, which is a tricky business if we want - to do it in a portable way. -*/ - -class Instance_monitor: public Thread -{ -public: - Instance_monitor(Instance *instance_arg) :instance(instance_arg) {} -protected: - virtual void run(); - void start_and_monitor_instance(Instance_options *old_instance_options, - Instance_map *instance_map, - Thread_registry *thread_registry); -private: - Instance *instance; -}; - -void Instance_monitor::run() -{ - start_and_monitor_instance(&instance->options, - Manager::get_instance_map(), - Manager::get_thread_registry()); - delete this; -} - -/* Wait for an instance SYNOPSIS @@ -285,113 +259,149 @@ int kill(pid_t pid, int signum) } #endif -/* }}} */ +/************************************************************************* + }}} +*************************************************************************/ + -/* {{{ Static constants. */ +/************************************************************************* + {{{ Static constants. +*************************************************************************/ const LEX_STRING Instance::DFLT_INSTANCE_NAME= { C_STRING_WITH_LEN("mysqld") }; -/* }}} */ +/************************************************************************* + }}} +*************************************************************************/ -/* - Fork child, exec an instance and monitor it. +/************************************************************************* + {{{ Instance Monitor thread. +*************************************************************************/ - SYNOPSIS - start_and_monitor_instance() - old_instance_options Pointer to the options of the instance to be - launched. This info is likely to become obsolete - when function returns from wait_process() - instance_map Pointer to the instance_map. We use it to protect - the instance from deletion, while we are working - with it. +/** + Proxy thread is a simple way to avoid all pitfalls of the threads + implementation in the OS (e.g. LinuxThreads). With such a thread we + don't have to process SIGCHLD, which is a tricky business if we want + to do it in a portable way. - DESCRIPTION - Fork a child, then exec and monitor it. When the child is dead, - find appropriate instance (for this purpose we save its name), - set appropriate flags and wake all threads waiting for instance - to stop. - - NOTE - A separate thread for starting/monitoring instance is a simple way - to avoid all pitfalls of the threads implementation in the OS (e.g. - LinuxThreads). For one, with such a thread we don't have to process - SIGCHLD, which is a tricky business if we want to do it in a - portable way. + Instance Monitor Thread forks a child process, execs mysqld and waits for + the child to die. - RETURN - Function returns no value + Instance Monitor assumes that the monitoring instance will not be dropped. + This is guaranteed by having flag monitoring_thread_active and + Instance::is_active() operation. */ -void -Instance_monitor:: -start_and_monitor_instance(Instance_options *old_instance_options, - Instance_map *instance_map, - Thread_registry *thread_registry) +class Instance_monitor: public Thread { - Instance_name instance_name(&old_instance_options->instance_name); - Instance *current_instance; - My_process_info process_info; - Thread_info thread_info; +public: + Instance_monitor(Instance *instance_arg) :instance(instance_arg) {} +protected: + virtual void run(); + void start_and_monitor_instance(); +private: + Instance *instance; +}; + + +void Instance_monitor::run() +{ + start_and_monitor_instance(); + delete this; +} + + +void Instance_monitor::start_and_monitor_instance() +{ + Thread_registry *thread_registry= Manager::get_thread_registry(); + Guardian *guardian= Manager::get_guardian(); + + My_process_info mysqld_process_info; + Thread_info monitor_thread_info; log_info("Instance '%s': Monitor: started.", (const char *) instance->get_name()->str); - if (!old_instance_options->nonguarded) - { - /* - Register thread in Thread_registry to wait for it to stop on shutdown - only if instance is guarded. If instance is guarded, the thread will not - finish, because nonguarded instances are not stopped on shutdown. - */ - thread_registry->register_thread(&thread_info, FALSE); - } - /* - Lock instance map to guarantee that no instances are deleted during - strmake() and execv() calls. + For guarded instance register the thread in Thread_registry to wait for + the thread to stop on shutdown (nonguarded instances are not stopped on + shutdown, so the thread will no finish). */ - instance_map->lock(); - /* - Save the instance name in the case if Instance object we - are using is destroyed. (E.g. by "FLUSH INSTANCES") - */ + if (instance->is_guarded()) + { + thread_registry->register_thread(&monitor_thread_info, FALSE); + } + + /* Starting mysqld. */ log_info("Instance '%s': Monitor: starting mysqld...", (const char *) instance->get_name()->str); - if (start_process(old_instance_options, &process_info)) + if (start_process(&instance->options, &mysqld_process_info)) { - instance_map->unlock(); - return; /* error is logged */ + instance->lock(); + instance->monitoring_thread_active= FALSE; + instance->unlock(); + + return; } - /* allow users to delete instances */ - instance_map->unlock(); + /* Waiting for mysqld to die. */ log_info("Instance '%s': Monitor: waiting for mysqld to stop...", (const char *) instance->get_name()->str); - wait_process(&process_info); /* Don't check for return value. */ + wait_process(&mysqld_process_info); /* Don't check for return value. */ - instance_map->lock(); + log_info("Instance '%s': Monitor: mysqld stopped.", + (const char *) instance->get_name()->str); - current_instance= instance_map->find(instance_name.get_str()); + /* Update instance status. */ - if (current_instance) - current_instance->set_crash_flag_n_wake_all(); + instance->lock(); - instance_map->unlock(); + if (instance->is_guarded()) + thread_registry->unregister_thread(&monitor_thread_info); - if (!old_instance_options->nonguarded) - thread_registry->unregister_thread(&thread_info); + instance->crashed= TRUE; + instance->monitoring_thread_active= FALSE; log_info("Instance '%s': Monitor: finished.", (const char *) instance->get_name()->str); + + instance->unlock(); + + /* Wake up guardian. */ + + guardian->ping(); } +/************************************************************************** + }}} +**************************************************************************/ + + +/************************************************************************** + {{{ Static operations. +**************************************************************************/ + +/** + The operation is intended to check whether string is a well-formed + instance name or not. + + SYNOPSIS + is_name_valid() + name string to check + + RETURN + TRUE string is a valid instance name + FALSE string is not a valid instance name + + TODO: Move to Instance_name class: Instance_name::is_valid(). +*/ bool Instance::is_name_valid(const LEX_STRING *name) { @@ -405,21 +415,83 @@ bool Instance::is_name_valid(const LEX_STRING *name) } +/** + The operation is intended to check if the given instance name is + mysqld-compatible or not. + + SYNOPSIS + is_mysqld_compatible_name() + name name to check + + RETURN + TRUE name is mysqld-compatible + FALSE otherwise + + TODO: Move to Instance_name class: Instance_name::is_mysqld_compatible(). +*/ + bool Instance::is_mysqld_compatible_name(const LEX_STRING *name) { return strcmp(name->str, DFLT_INSTANCE_NAME.str) == 0; } +/** + Return client state name. Must not be used outside the class. + Use Instance::get_state_name() instead. +*/ + +const char * Instance::get_instance_state_name(enum_instance_state state) +{ + switch (state) { + case STOPPED: + return "offline"; + + case NOT_STARTED: + return "not started"; + + case STARTING: + return "starting"; + + case STARTED: + return "online"; + + case JUST_CRASHED: + return "failed"; + + case CRASHED: + return "crashed"; -/* {{{ Constructor & destructor */ + case CRASHED_AND_ABANDONED: + return "abandoned"; + + case STOPPING: + return "stopping"; + } + + return NULL; /* just to ignore compiler warning. */ +} + +/************************************************************************** + }}} +**************************************************************************/ + + +/************************************************************************** + {{{ Initialization & deinitialization. +**************************************************************************/ Instance::Instance() - :crashed(FALSE), - configured(FALSE) + :monitoring_thread_active(FALSE), + crashed(FALSE), + configured(FALSE), + /* mysqld_compatible is initialized in init() */ + state(NOT_STARTED), + restart_counter(0), + crash_moment(0), + last_checked(0) { pthread_mutex_init(&LOCK_instance, 0); - pthread_cond_init(&COND_instance_stopped, 0); } @@ -427,13 +499,11 @@ Instance::~Instance() { log_info("Instance '%s': destroying...", (const char *) get_name()->str); - pthread_cond_destroy(&COND_instance_stopped); pthread_mutex_destroy(&LOCK_instance); } -/* }}} */ -/* +/** Initialize instance options. SYNOPSIS @@ -453,7 +523,7 @@ bool Instance::init(const LEX_STRING *name_arg) } -/* +/** Complete instance options initialization. SYNOPSIS @@ -474,7 +544,47 @@ bool Instance::complete_initialization() */ } -/* +/************************************************************************** + }}} +**************************************************************************/ + + +/************************************************************************** + {{{ Instance: public interface implementation. +**************************************************************************/ + +/** + Determine if there is some activity with the instance. + + SYNOPSIS + is_active() + + DESCRIPTION + An instance is active if one of the following conditions is true: + - Instance-monitoring thread is running; + - Instance is guarded and its state is other than STOPPED; + - Corresponding mysqld-server accepts connections. + + MT-NOTE: instance must be locked before calling the operation. + + RETURN + TRUE - instance is active + FALSE - otherwise. +*/ + +bool Instance::is_active() +{ + if (monitoring_thread_active) + return TRUE; + + if (is_guarded() && get_state() != STOPPED) + return TRUE; + + return is_mysqld_running(); +} + + +/** Determine if mysqld is accepting connections. SYNOPSIS @@ -484,7 +594,7 @@ bool Instance::complete_initialization() Try to connect to mysqld with fake login/password to check whether it is accepting connections or not. - MT-NOTE: this operation must be called under acquired LOCK_instance. + MT-NOTE: instance must be locked before calling the operation. RETURN TRUE - mysqld is alive and accept connections @@ -508,8 +618,6 @@ bool Instance::is_mysqld_running() if (!port && !options.mysqld_socket) port= SERVER_DEFAULT_PORT; - pthread_mutex_lock(&LOCK_instance); - mysql_init(&mysql); /* try to connect to a server with a fake username/password pair */ if (mysql_real_connect(&mysql, LOCAL_HOST, username, @@ -523,7 +631,6 @@ bool Instance::is_mysqld_running() */ log_error("Instance '%s': was able to log into mysqld.", (const char *) get_name()->str); - pthread_mutex_unlock(&LOCK_instance); return_val= TRUE; /* server is alive */ } else @@ -531,145 +638,145 @@ bool Instance::is_mysqld_running() sizeof(access_denied_message) - 1)); mysql_close(&mysql); - pthread_mutex_unlock(&LOCK_instance); return return_val; } -/* - The method starts an instance. + +/** + Start mysqld. SYNOPSIS - start() + start_mysqld() + + DESCRIPTION + Reset flags and start Instance Monitor thread, which will start mysqld. + + MT-NOTE: instance must be locked before calling the operation. RETURN - 0 ok - ER_CANNOT_START_INSTANCE Cannot start instance - ER_INSTANCE_ALREADY_STARTED The instance on the specified port/socket - is already started + FALSE - ok + TRUE - could not start instance */ -int Instance::start() +bool Instance::start_mysqld() { - /* clear crash flag */ - pthread_mutex_lock(&LOCK_instance); - crashed= FALSE; - pthread_mutex_unlock(&LOCK_instance); + Instance_monitor *instance_monitor; + /* + Prepare instance to start Instance Monitor thread. - if (configured && !is_mysqld_running()) - { - Instance_monitor *instance_monitor; - remove_pid(); + NOTE: It's important to set these actions here in order to avoid + race conditions -- these actions must be done under acquired lock on + Instance. + */ - instance_monitor= new Instance_monitor(this); + crashed= FALSE; + monitoring_thread_active= TRUE; - if (instance_monitor == NULL || instance_monitor->start(Thread::DETACHED)) - { - delete instance_monitor; - log_error("Instance::start(): failed to create the monitoring thread" - " to start an instance"); - return ER_CANNOT_START_INSTANCE; - } - /* The monitoring thread will delete itself when it's finished. */ + remove_pid(); - return 0; - } + /* Create and start the Instance Monitor thread. */ - /* The instance is started already or misconfigured. */ - return configured ? ER_INSTANCE_ALREADY_STARTED : ER_INSTANCE_MISCONFIGURED; -} + instance_monitor= new Instance_monitor(this); -/* - The method sets the crash flag and wakes all waiters on - COND_instance_stopped and COND_guardian + if (instance_monitor == NULL || instance_monitor->start(Thread::DETACHED)) + { + delete instance_monitor; + monitoring_thread_active= FALSE; - SYNOPSIS - set_crash_flag_n_wake_all() + log_error("Instance '%s': can not create instance monitor thread.", + (const char *) get_name()->str); - DESCRIPTION - The method is called when an instance is crashed or terminated. - In the former case it might indicate that guardian probably should - restart it. + return TRUE; + } - RETURN - Function returns no value -*/ + ++restart_counter; -void Instance::set_crash_flag_n_wake_all() -{ - /* set instance state to crashed */ - pthread_mutex_lock(&LOCK_instance); - crashed= TRUE; - pthread_mutex_unlock(&LOCK_instance); + /* The Instance Monitor thread will delete itself when it's finished. */ - /* - Wake connection threads waiting for an instance to stop. This - is needed if a user issued command to stop an instance via - mysql connection. This is not the case if Guardian stop the thread. - */ - pthread_cond_signal(&COND_instance_stopped); - /* wake guardian */ - pthread_cond_signal(&Manager::get_guardian()->COND_guardian); + return FALSE; } -/* - Stop an instance. +/** + Stop mysqld. SYNOPSIS - stop() + stop_mysqld() - RETURN: - 0 ok - ER_INSTANCE_IS_NOT_STARTED Looks like the instance it is not started - ER_STOP_INSTANCE mysql_shutdown reported an error -*/ + DESCRIPTION + Try to stop mysqld gracefully. Otherwise kill it with SIGKILL. -int Instance::stop() -{ - struct timespec timeout; - uint waitchild= (uint) DEFAULT_SHUTDOWN_DELAY; + MT-NOTE: instance must be locked before calling the operation. - if (is_mysqld_running()) - { - waitchild= options.get_shutdown_delay(); + RETURN + FALSE - ok + TRUE - could not stop the instance +*/ - kill_mysqld(SIGTERM); - /* sleep on condition to wait for SIGCHLD */ +bool Instance::stop_mysqld() +{ + log_info("Instance '%s': stopping mysqld...", + (const char *) get_name()->str); - timeout.tv_sec= time(NULL) + waitchild; - timeout.tv_nsec= 0; - if (pthread_mutex_lock(&LOCK_instance)) - return ER_STOP_INSTANCE; + kill_mysqld(SIGTERM); - while (options.load_pid() != 0) /* while server isn't stopped */ - { - int status; + if (!wait_for_stop()) + { + log_info("Instance '%s': mysqld stopped gracefully.", + (const char *) get_name()->str); + return FALSE; + } - status= pthread_cond_timedwait(&COND_instance_stopped, - &LOCK_instance, - &timeout); - if (status == ETIMEDOUT || status == ETIME) - break; - } + log_info("Instance '%s': mysqld failed to stop gracefully within %d seconds.", + (const char *) get_name()->str, + (int) options.get_shutdown_delay()); - pthread_mutex_unlock(&LOCK_instance); + log_info("Instance'%s': killing mysqld...", + (const char *) get_name()->str); - kill_mysqld(SIGKILL); + kill_mysqld(SIGKILL); - return 0; + if (!wait_for_stop()) + { + log_info("Instance '%s': mysqld has been killed.", + (const char *) get_name()->str); + return FALSE; } - return ER_INSTANCE_IS_NOT_STARTED; + log_info("Instance '%s': can not kill mysqld within %d seconds.", + (const char *) get_name()->str, + (int) options.get_shutdown_delay()); + + return TRUE; } -/* +/** Send signal to mysqld. SYNOPSIS kill_mysqld() + + DESCRIPTION + Load pid from the pid file and send the given signal to that process. + If the signal is SIGKILL, remove the pid file after sending the signal. + + MT-NOTE: instance must be locked before calling the operation. + + TODO + This too low-level and OS-specific operation for public interface. + Also, it has some implicit behaviour for SIGKILL signal. Probably, we + should have the following public operations instead: + - start_mysqld() -- as is; + - stop_mysqld -- request mysqld to shutdown gracefully (send SIGTERM); + don't wait for complete shutdown; + - wait_for_stop() (or join_mysqld()) -- wait for mysqld to stop within + time interval; + - kill_mysqld() -- request to terminate mysqld; don't wait for + completion. + These operations should also be used in Guardian to manage instances. */ void Instance::kill_mysqld(int signum) @@ -707,27 +814,91 @@ void Instance::kill_mysqld(int signum) } } -/* - Return crashed flag. - SYNOPSIS - is_crashed() - - RETURN - TRUE - mysqld crashed - FALSE - mysqld hasn't crashed yet +/** + Lock instance. */ -bool Instance::is_crashed() +void Instance::lock() { - bool val; pthread_mutex_lock(&LOCK_instance); - val= crashed; +} + + +/** + Unlock instance. +*/ + +void Instance::unlock() +{ pthread_mutex_unlock(&LOCK_instance); - return val; } -/* + +/** + Return instance state name. + + SYNOPSIS + get_state_name() + + DESCRIPTION + The operation returns user-friendly state name. The operation can be + used both for guarded and non-guarded instances. + + MT-NOTE: instance must be locked before calling the operation. + + TODO: Replace with the static get_state_name(state_code) function. +*/ + +const char *Instance::get_state_name() +{ + if (!is_configured()) + return "misconfigured"; + + if (is_guarded()) + { + /* The instance is managed by Guardian: we can report precise state. */ + + return get_instance_state_name(get_state()); + } + + /* The instance is not managed by Guardian: we can report status only. */ + + return is_active() ? "online" : "offline"; +} + + +/** + Reset statistics. + + SYNOPSIS + reset_stat() + + DESCRIPTION + The operation resets statistics used for guarding the instance. + + MT-NOTE: instance must be locked before calling the operation. + + TODO: Make private. +*/ + +void Instance::reset_stat() +{ + restart_counter= 0; + crash_moment= 0; + last_checked= 0; +} + +/************************************************************************** + }}} +**************************************************************************/ + + +/************************************************************************** + {{{ Instance: implementation of private operations. +**************************************************************************/ + +/** Remove pid file. */ @@ -744,3 +915,36 @@ void Instance::remove_pid() (const char *) options.instance_name.str); } } + + +/** + Wait for mysqld to stop within shutdown interval. +*/ + +bool Instance::wait_for_stop() +{ + int start_time= time(NULL); + int finish_time= start_time + options.get_shutdown_delay(); + + log_info("Instance '%s': waiting for mysqld to stop " + "(timeout: %d seconds)...", + (const char *) get_name()->str, + (int) options.get_shutdown_delay()); + + while (true) + { + if (options.load_pid() == 0 && !is_mysqld_running()) + return FALSE; + + if (time(NULL) >= finish_time) + return TRUE; + + /* Sleep for 0.3 sec and check again. */ + + my_sleep(300000); + } +} + +/************************************************************************** + }}} +**************************************************************************/ diff --git a/server-tools/instance-manager/instance.h b/server-tools/instance-manager/instance.h index 412d01acc46..5bdd8d61d2f 100644 --- a/server-tools/instance-manager/instance.h +++ b/server-tools/instance-manager/instance.h @@ -30,7 +30,7 @@ class Instance_map; class Thread_registry; -/* +/** Instance_name -- the class represents instance name -- a string of length less than MAX_INSTANCE_NAME_SIZE. @@ -68,72 +68,127 @@ private: class Instance { public: - /* - The following two constants defines name of the default mysqld-instance - ("mysqld"). + /* States of an instance. */ + enum enum_instance_state + { + STOPPED, + NOT_STARTED, + STARTING, + STARTED, + JUST_CRASHED, + CRASHED, + CRASHED_AND_ABANDONED, + STOPPING + }; + +public: + /** + The constant defines name of the default mysqld-instance ("mysqld"). */ static const LEX_STRING DFLT_INSTANCE_NAME; public: - /* - The operation is intended to check whether string is a well-formed - instance name or not. - */ static bool is_name_valid(const LEX_STRING *name); - - /* - The operation is intended to check if the given instance name is - mysqld-compatible or not. - */ static bool is_mysqld_compatible_name(const LEX_STRING *name); public: Instance(); - ~Instance(); + bool init(const LEX_STRING *name_arg); bool complete_initialization(); +public: + bool is_active(); + bool is_mysqld_running(); - int start(); - int stop(); - /* send a signal to the instance */ + + bool start_mysqld(); + bool stop_mysqld(); void kill_mysqld(int signo); - bool is_crashed(); - void set_crash_flag_n_wake_all(); - /* + void lock(); + void unlock(); + + const char *get_state_name(); + + void reset_stat(); + +public: + /** The operation is intended to check if the instance is mysqld-compatible or not. */ inline bool is_mysqld_compatible() const; - /* + /** The operation is intended to check if the instance is configured properly or not. Misconfigured instances are not managed. */ inline bool is_configured() const; + /** + The operation returns TRUE if the instance is guarded and FALSE otherwise. + */ + inline bool is_guarded() const; + + /** + The operation returns name of the instance. + */ inline const LEX_STRING *get_name() const; + /** + The operation returns the current state of the instance. + + NOTE: At the moment should be used only for guarded instances. + */ + inline enum_instance_state get_state() const; + + /** + The operation changes the state of the instance. + + NOTE: At the moment should be used only for guarded instances. + TODO: Make private. + */ + inline void set_state(enum_instance_state new_state); + + /** + The operation returns crashed flag. + */ + inline bool is_crashed(); + public: - enum { DEFAULT_SHUTDOWN_DELAY= 35 }; + /** + This attributes contains instance options. + + TODO: Make private. + */ Instance_options options; private: - /* This attributes is a flag, specifies if the instance has been crashed. */ + /** + monitoring_thread_active is TRUE if there is a thread that monitors the + corresponding mysqld-process. + */ + bool monitoring_thread_active; + + /** + crashed is TRUE when corresponding mysqld-process has been died after + start. + */ bool crashed; - /* - This attribute specifies if the instance is configured properly or not. + /** + configured is TRUE when the instance is configured and FALSE otherwise. Misconfigured instances are not managed. */ bool configured; /* - This attribute specifies whether the instance is mysqld-compatible or not. - Mysqld-compatible instances can contain only mysqld-specific options. - At the moment an instance is mysqld-compatible if its name is "mysqld". + mysqld_compatible specifies whether the instance is mysqld-compatible + or not. Mysqld-compatible instances can contain only mysqld-specific + options. At the moment an instance is mysqld-compatible if its name is + "mysqld". The idea is that [mysqld] section should contain only mysqld-specific options (no Instance Manager-specific options) to be readable by mysqld @@ -142,18 +197,36 @@ private: bool mysqld_compatible; /* - Mutex protecting the instance. Currently we use it to avoid the - double start of the instance. This happens when the instance is starting - and we issue the start command once more. + Mutex protecting the instance. */ pthread_mutex_t LOCK_instance; - /* - This condition variable is used to wake threads waiting for instance to - stop in Instance::stop() - */ - pthread_cond_t COND_instance_stopped; - void remove_pid(); +private: + /* Guarded-instance attributes. */ + + /* state of an instance (i.e. STARTED, CRASHED, etc.) */ + enum_instance_state state; + +public: + /* the amount of attemts to restart instance (cleaned up at success) */ + int restart_counter; + + /* triggered at a crash */ + time_t crash_moment; + + /* General time field. Used to provide timeouts (at shutdown and restart) */ + time_t last_checked; + +private: + static const char *get_instance_state_name(enum_instance_state state); + +private: + void remove_pid(); + + bool wait_for_stop(); + +private: + friend class Instance_monitor; }; @@ -169,9 +242,33 @@ inline bool Instance::is_configured() const } +inline bool Instance::is_guarded() const +{ + return !options.nonguarded; +} + + inline const LEX_STRING *Instance::get_name() const { return &options.instance_name; } + +inline Instance::enum_instance_state Instance::get_state() const +{ + return state; +} + + +inline void Instance::set_state(enum_instance_state new_state) +{ + state= new_state; +} + + +inline bool Instance::is_crashed() +{ + return crashed; +} + #endif /* INCLUDES_MYSQL_INSTANCE_MANAGER_INSTANCE_H */ diff --git a/server-tools/instance-manager/instance_map.cc b/server-tools/instance-manager/instance_map.cc index a356e308e44..a9108eae763 100644 --- a/server-tools/instance-manager/instance_map.cc +++ b/server-tools/instance-manager/instance_map.cc @@ -25,26 +25,18 @@ #include <mysql_com.h> #include "buffer.h" -#include "guardian.h" #include "instance.h" #include "log.h" -#include "manager.h" #include "mysqld_error.h" #include "mysql_manager_error.h" #include "options.h" #include "priv.h" -/* - Note: As we are going to suppost different types of connections, - we shouldn't have connection-specific functions. To avoid it we could - put such functions to the Command-derived class instead. - The command could be easily constructed for a specific connection if - we would provide a special factory for each connection. -*/ - C_MODE_START -/* Procedure needed for HASH initialization */ +/** + HASH-routines: get key of instance for storing in hash. +*/ static byte* get_instance_key(const byte* u, uint* len, my_bool __attribute__((unused)) t) @@ -54,14 +46,18 @@ static byte* get_instance_key(const byte* u, uint* len, return (byte *) instance->options.instance_name.str; } +/** + HASH-routines: cleanup handler. +*/ + static void delete_instance(void *u) { Instance *instance= (Instance *) u; delete instance; } -/* - The option handler to pass to the process_default_option_files finction. +/** + The option handler to pass to the process_default_option_files function. SYNOPSIS process_option() @@ -96,8 +92,8 @@ static int process_option(void *ctx, const char *group, const char *option) C_MODE_END -/* - Parse option string. +/** + Parse option string. SYNOPSIS parse_option() @@ -137,7 +133,7 @@ static void parse_option(const char *option_str, } -/* +/** Process one option from the configuration file. SYNOPSIS @@ -151,6 +147,10 @@ static void parse_option(const char *option_str, process_option(). The caller ensures proper locking of the instance map object. */ + /* + Process a given option and assign it to appropricate instance. This is + required for the option handler, passed to my_search_option_files(). + */ int Instance_map::process_one_option(const LEX_STRING *group, const char *option) @@ -213,92 +213,97 @@ int Instance_map::process_one_option(const LEX_STRING *group, } +/** + Instance_map constructor. +*/ + Instance_map::Instance_map() { pthread_mutex_init(&LOCK_instance_map, 0); } +/** + Initialize Instance_map internals. +*/ + bool Instance_map::init() { return hash_init(&hash, default_charset_info, START_HASH_SIZE, 0, 0, get_instance_key, delete_instance, 0); } + +/** + Reset Instance_map data. +*/ + +bool Instance_map::reset() +{ + hash_free(&hash); + return init(); +} + + +/** + Instance_map destructor. +*/ + Instance_map::~Instance_map() { - pthread_mutex_lock(&LOCK_instance_map); + lock(); + + /* + NOTE: it's necessary to synchronize on each instance before removal, + because Instance-monitoring thread can be still alive an hold the mutex + (because it is detached and we have no control over it). + */ + + while (true) + { + Iterator it(this); + Instance *instance= it.next(); + + if (!instance) + break; + + instance->lock(); + instance->unlock(); + + remove_instance(instance); + } + hash_free(&hash); - pthread_mutex_unlock(&LOCK_instance_map); + unlock(); + pthread_mutex_destroy(&LOCK_instance_map); } +/** + Lock Instance_map. +*/ + void Instance_map::lock() { pthread_mutex_lock(&LOCK_instance_map); } +/** + Unlock Instance_map. +*/ + void Instance_map::unlock() { pthread_mutex_unlock(&LOCK_instance_map); } -/* - Re-read instance configuration file. - - SYNOPSIS - Instance_map::flush_instances() - DESCRIPTION - This function will: - - clear the current list of instances. This removes both - running and stopped instances. - - load a new instance configuration from the file. - - pass on the new map to the guardian thread: it will start - all instances that are marked `guarded' and not yet started. - Note, as the check whether an instance is started is currently - very simple (returns TRUE if there is a MySQL server running - at the given port), this function has some peculiar - side-effects: - * if the port number of a running instance was changed, the - old instance is forgotten, even if it was running. The new - instance will be started at the new port. - * if the configuration was changed in a way that two - instances swapped their port numbers, the guardian thread - will not notice that and simply report that both instances - are configured successfully and running. - In order to avoid such side effects one should never call - FLUSH INSTANCES without prior stop of all running instances. - - NOTE: The operation should be invoked with the following locks acquired: - - Guardian; - - Instance_map; +/** + Check if there is an active instance or not. */ -int Instance_map::flush_instances() -{ - int rc; - - /* - Guardian thread relies on the instance map repository for guarding - instances. This is why refreshing instance map, we need (1) to stop - guardian (2) reload the instance map (3) reinitialize the guardian - with new instances. - */ - hash_free(&hash); - hash_init(&hash, default_charset_info, START_HASH_SIZE, 0, 0, - get_instance_key, delete_instance, 0); - - rc= load(); - /* don't init guardian if we failed to load instances */ - if (!rc) - guardian->init(); // TODO: check error status. - return rc; -} - - bool Instance_map::is_there_active_instance() { Instance *instance; @@ -306,29 +311,50 @@ bool Instance_map::is_there_active_instance() while ((instance= iterator.next())) { - if (guardian->find_instance_node(instance) != NULL || - instance->is_mysqld_running()) - { + bool active_instance_found; + + instance->lock(); + active_instance_found= instance->is_active(); + instance->unlock(); + + if (active_instance_found) return TRUE; - } } return FALSE; } +/** + Add an instance into the internal hash. + + MT-NOTE: Instance Map must be locked before calling the operation. +*/ + int Instance_map::add_instance(Instance *instance) { return my_hash_insert(&hash, (byte *) instance); } +/** + Remove instance from the internal hash. + + MT-NOTE: Instance Map must be locked before calling the operation. +*/ + int Instance_map::remove_instance(Instance *instance) { return hash_delete(&hash, (byte *) instance); } +/** + Create a new instance and register it in the internal hash. + + MT-NOTE: Instance Map must be locked before calling the operation. +*/ + int Instance_map::create_instance(const LEX_STRING *instance_name, const Named_value_arr *options) { @@ -392,12 +418,22 @@ int Instance_map::create_instance(const LEX_STRING *instance_name, } +/** + Return a pointer to the instance or NULL, if there is no such instance. + + MT-NOTE: Instance Map must be locked before calling the operation. +*/ + Instance * Instance_map::find(const LEX_STRING *name) { return (Instance *) hash_search(&hash, (byte *) name->str, name->length); } +/** + Init instances command line arguments after all options have been loaded. +*/ + bool Instance_map::complete_initialization() { bool mysqld_found; @@ -455,7 +491,10 @@ bool Instance_map::complete_initialization() } -/* load options from config files and create appropriate instance structures */ +/** + Load options from config files and create appropriate instance + structures. +*/ int Instance_map::load() { @@ -505,8 +544,9 @@ int Instance_map::load() } -/*--- Implementaton of the Instance map iterator class ---*/ - +/************************************************************************* + {{{ Instance_map::Iterator implementation. +*************************************************************************/ void Instance_map::Iterator::go_to_first() { @@ -522,29 +562,12 @@ Instance *Instance_map::Iterator::next() return NULL; } - -const char *Instance_map::get_instance_state_name(Instance *instance) -{ - LIST *instance_node; - - if (!instance->is_configured()) - return "misconfigured"; - - if ((instance_node= guardian->find_instance_node(instance)) != NULL) - { - /* The instance is managed by Guardian: we can report precise state. */ - - return Guardian::get_instance_state_name( - guardian->get_instance_state(instance_node)); - } - - /* The instance is not managed by Guardian: we can report status only. */ - - return instance->is_mysqld_running() ? "online" : "offline"; -} +/************************************************************************* + }}} +*************************************************************************/ -/* +/** Create a new configuration section for mysqld-instance in the config file. SYNOPSIS diff --git a/server-tools/instance-manager/instance_map.h b/server-tools/instance-manager/instance_map.h index 69d225c89f7..cdbac7635c2 100644 --- a/server-tools/instance-manager/instance_map.h +++ b/server-tools/instance-manager/instance_map.h @@ -37,14 +37,17 @@ extern int create_instance_in_file(const LEX_STRING *instance_name, const Named_value_arr *options); -/* +/** Instance_map - stores all existing instances */ class Instance_map { public: - /* Instance_map iterator */ + /** + Instance_map iterator + */ + class Iterator { private: @@ -58,79 +61,43 @@ public: void go_to_first(); Instance *next(); }; - friend class Iterator; + public: - /* - Return a pointer to the instance or NULL, if there is no such instance. - MT-NOTE: must be called under acquired lock. - */ Instance *find(const LEX_STRING *name); - /* Clear the configuration cache and reload the configuration file. */ - int flush_instances(); - - /* The operation is used to check if there is an active instance or not. */ bool is_there_active_instance(); void lock(); void unlock(); bool init(); + bool reset(); - /* - Process a given option and assign it to appropricate instance. This is - required for the option handler, passed to my_search_option_files(). - */ - int process_one_option(const LEX_STRING *group, const char *option); + int load(); - /* - Add an instance into the internal hash. + int process_one_option(const LEX_STRING *group, const char *option); - MT-NOTE: the operation must be called under acquired lock. - */ int add_instance(Instance *instance); - /* - Remove instance from the internal hash. - - MT-NOTE: the operation must be called under acquired lock. - */ int remove_instance(Instance *instance); - /* - Create a new instance and register it in the internal hash. - - MT-NOTE: the operation must be called under acquired lock. - */ int create_instance(const LEX_STRING *instance_name, const Named_value_arr *options); +public: Instance_map(); ~Instance_map(); - /* - Retrieve client state name of the given instance. - - MT-NOTE: the options must be called under acquired locks of the following - objects: - - Instance_map; - - Guardian; - */ - const char *get_instance_state_name(Instance *instance); - -public: - const char *mysqld_path; - Guardian *guardian; - private: - /* loads options from config files */ - int load(); - /* inits instances argv's after all options have been loaded */ bool complete_initialization(); + private: enum { START_HASH_SIZE = 16 }; pthread_mutex_t LOCK_instance_map; HASH hash; + +private: + friend class Iterator; }; #endif /* INCLUDES_MYSQL_INSTANCE_MANAGER_INSTANCE_MAP_H */ diff --git a/server-tools/instance-manager/instance_options.h b/server-tools/instance-manager/instance_options.h index 7c1e1a8dcf3..38de839d762 100644 --- a/server-tools/instance-manager/instance_options.h +++ b/server-tools/instance-manager/instance_options.h @@ -46,7 +46,6 @@ public: Instance_options(); ~Instance_options(); - /* fills in argv */ bool complete_initialization(); bool set_option(Named_value *option); diff --git a/server-tools/instance-manager/manager.cc b/server-tools/instance-manager/manager.cc index a002902bd56..da36982f510 100644 --- a/server-tools/instance-manager/manager.cc +++ b/server-tools/instance-manager/manager.cc @@ -37,6 +37,9 @@ #include "user_map.h" +/********************************************************************** + {{{ Platform-specific implementation. +**********************************************************************/ #ifndef __WIN__ void set_signals(sigset_t *mask) @@ -92,9 +95,13 @@ int my_sigwait(const sigset_t *set, int *sig) #endif +/********************************************************************** + }}} +**********************************************************************/ + /********************************************************************** - Implementation of checking the actual thread model. + {{{ Implementation of checking the actual thread model. ***********************************************************************/ namespace { /* no-indent */ @@ -137,6 +144,10 @@ bool check_if_linux_threads(bool *linux_threads) } +/********************************************************************** + }}} +***********************************************************************/ + /********************************************************************** Manager implementation @@ -152,25 +163,37 @@ bool Manager::linux_threads; #endif // __WIN__ +/** + Request shutdown of guardian and threads registered in Thread_registry. + + SYNOPSIS + stop_all_threads() +*/ + void Manager::stop_all_threads() { /* - Let guardian thread know that it should break it's processing cycle, + Let Guardian thread know that it should break it's processing cycle, once it wakes up. */ p_guardian->request_shutdown(); - /* wake guardian */ - pthread_cond_signal(&p_guardian->COND_guardian); - /* stop all threads */ + + /* Stop all threads. */ p_thread_registry->deliver_shutdown(); } -/* - manager - entry point to the main instance manager process: start - listener thread, write pid file and enter into signal handling. - See also comments in mysqlmanager.cc to picture general Instance Manager - architecture. +/** + Main manager function. + + SYNOPSIS + main() + + DESCRIPTION + This is an entry point to the main instance manager process: + start listener thread, write pid file and enter into signal handling. + See also comments in mysqlmanager.cc to picture general Instance Manager + architecture. TODO: how about returning error status. */ @@ -194,22 +217,33 @@ int Manager::main() (const char *) (linux_threads ? "LINUX threads" : "POSIX threads")); #endif // __WIN__ - Thread_registry thread_registry; /* - All objects created in the manager() function live as long as - thread_registry lives, and thread_registry is alive until there are - working threads. + All objects created in the Manager object live as long as thread_registry + lives, and thread_registry is alive until there are working threads. + + There are two main purposes of the Thread Registry: + 1. Interrupt blocking I/O and signal condition variables in case of + shutdown; + 2. Wait for detached threads before shutting down the main thread. + + NOTE: + 1. Handling shutdown can be done in more elegant manner by introducing + Event (or Condition) object with support of logical operations. + 2. Using Thread Registry to wait for detached threads is definitely not + the best way, because when Thread Registry unregisters an thread, the + thread is still alive. Accurate way to wait for threads to stop is + not using detached threads and join all threads before shutdown. */ + Thread_registry thread_registry; User_map user_map; Instance_map instance_map; - Guardian guardian(&thread_registry, &instance_map, - Options::Main::monitoring_interval); + Guardian guardian(&thread_registry, &instance_map); Listener listener(&thread_registry, &user_map); p_instance_map= &instance_map; - p_guardian= instance_map.guardian= &guardian; + p_guardian= &guardian; p_thread_registry= &thread_registry; p_user_map= &user_map; @@ -249,7 +283,7 @@ int Manager::main() } } - /* write Instance Manager pid file */ + /* Write Instance Manager pid file. */ log_info("IM pid file: '%s'; PID: %d.", (const char *) Options::Main::pid_file_name, @@ -290,6 +324,7 @@ int Manager::main() permitted to process instances. And before flush_instances() has completed, there are no instances to guard. */ + if (guardian.start(Thread::DETACHED)) { log_error("Can not start Guardian thread."); @@ -298,21 +333,11 @@ int Manager::main() /* Load instances. */ + if (Manager::flush_instances()) { - instance_map.guardian->lock(); - instance_map.lock(); - - int flush_instances_status= instance_map.flush_instances(); - - instance_map.unlock(); - instance_map.guardian->unlock(); - - if (flush_instances_status) - { - log_error("Can not init instances repository."); - stop_all_threads(); - goto err; - } + log_error("Can not init instances repository."); + stop_all_threads(); + goto err; } /* Initialize the Listener. */ @@ -328,7 +353,8 @@ int Manager::main() After the list of guarded instances have been initialized, Guardian should start them. */ - pthread_cond_signal(&guardian.COND_guardian); + + guardian.ping(); /* Main loop. */ @@ -381,7 +407,6 @@ int Manager::main() if (!guardian.is_stopped()) { guardian.request_shutdown(); - pthread_cond_signal(&guardian.COND_guardian); } else { @@ -406,3 +431,64 @@ err: #endif return rc; } + + +/** + Re-read instance configuration file. + + SYNOPSIS + flush_instances() + + DESCRIPTION + This function will: + - clear the current list of instances. This removes both + running and stopped instances. + - load a new instance configuration from the file. + - pass on the new map to the guardian thread: it will start + all instances that are marked `guarded' and not yet started. + + Note, as the check whether an instance is started is currently + very simple (returns TRUE if there is a MySQL server running + at the given port), this function has some peculiar + side-effects: + * if the port number of a running instance was changed, the + old instance is forgotten, even if it was running. The new + instance will be started at the new port. + * if the configuration was changed in a way that two + instances swapped their port numbers, the guardian thread + will not notice that and simply report that both instances + are configured successfully and running. + + In order to avoid such side effects one should never call + FLUSH INSTANCES without prior stop of all running instances. +*/ + +bool Manager::flush_instances() +{ + p_instance_map->lock(); + + if (p_instance_map->is_there_active_instance()) + { + p_instance_map->unlock(); + return TRUE; + } + + if (p_instance_map->reset()) + { + p_instance_map->unlock(); + return TRUE; + } + + if (p_instance_map->load()) + { + p_instance_map->unlock(); + return TRUE; /* Don't init guardian if we failed to load instances. */ + } + + get_guardian()->init(); /* TODO: check error status. */ + get_guardian()->ping(); + + p_instance_map->unlock(); + + return FALSE; +} diff --git a/server-tools/instance-manager/manager.h b/server-tools/instance-manager/manager.h index a77809cca6d..9a00b9320ce 100644 --- a/server-tools/instance-manager/manager.h +++ b/server-tools/instance-manager/manager.h @@ -19,6 +19,7 @@ #if defined(__GNUC__) && defined(USE_PRAGMA_INTERFACE) #pragma interface #endif + #include <my_global.h> class Guardian; @@ -30,8 +31,12 @@ class Manager { public: static int main(); + + static bool flush_instances(); + +public: /** - These methods return a non-zero value only for the duration + These methods return a non-NULL value only for the duration of main(). */ static Instance_map *get_instance_map() { return p_instance_map; } @@ -39,6 +44,7 @@ public: static Thread_registry *get_thread_registry() { return p_thread_registry; } static User_map *get_user_map() { return p_user_map; } +public: #ifndef __WIN__ static bool is_linux_threads() { return linux_threads; } #endif // __WIN__ diff --git a/server-tools/instance-manager/user_map.cc b/server-tools/instance-manager/user_map.cc index f3a6e3cd76c..7f34195c0b1 100644 --- a/server-tools/instance-manager/user_map.cc +++ b/server-tools/instance-manager/user_map.cc @@ -42,7 +42,7 @@ int User::init(const char *line) if (name_end == 0 || name_end[1] != ':') { log_error("Invalid format (unmatched quote) of user line (%s).", - (const char *) line); + (const char *) line); return 1; } password= name_end + 2; @@ -54,7 +54,7 @@ int User::init(const char *line) if (name_end == 0) { log_error("Invalid format (no delimiter) of user line (%s).", - (const char *) line); + (const char *) line); return 1; } password= name_end + 1; @@ -64,10 +64,10 @@ int User::init(const char *line) if (user_length > USERNAME_LENGTH) { log_error("User name is too long (%d). Max length: %d. " - "User line: '%s'.", - (int) user_length, - (int) USERNAME_LENGTH, - (const char *) line); + "User line: '%s'.", + (int) user_length, + (int) USERNAME_LENGTH, + (const char *) line); return 1; } @@ -75,10 +75,10 @@ int User::init(const char *line) if (password_length > SCRAMBLED_PASSWORD_CHAR_LENGTH) { log_error("Password is too long (%d). Max length: %d." - "User line: '%s'.", - (int) password_length, - (int) SCRAMBLED_PASSWORD_CHAR_LENGTH, - line); + "User line: '%s'.", + (int) password_length, + (int) SCRAMBLED_PASSWORD_CHAR_LENGTH, + (const char *) line); return 1; } |