diff options
-rw-r--r-- | server-tools/instance-manager/commands.cc | 290 | ||||
-rw-r--r-- | server-tools/instance-manager/commands.h | 104 | ||||
-rw-r--r-- | server-tools/instance-manager/guardian.cc | 543 | ||||
-rw-r--r-- | server-tools/instance-manager/guardian.h | 125 | ||||
-rw-r--r-- | server-tools/instance-manager/instance.cc | 630 | ||||
-rw-r--r-- | server-tools/instance-manager/instance.h | 171 | ||||
-rw-r--r-- | server-tools/instance-manager/instance_map.cc | 217 | ||||
-rw-r--r-- | server-tools/instance-manager/instance_map.h | 61 | ||||
-rw-r--r-- | server-tools/instance-manager/instance_options.h | 1 | ||||
-rw-r--r-- | server-tools/instance-manager/manager.cc | 154 | ||||
-rw-r--r-- | server-tools/instance-manager/manager.h | 8 | ||||
-rw-r--r-- | server-tools/instance-manager/user_map.cc | 20 |
12 files changed, 1368 insertions, 956 deletions
diff --git a/server-tools/instance-manager/commands.cc b/server-tools/instance-manager/commands.cc index 15738f8ebb3..6cc1c8d3047 100644 --- a/server-tools/instance-manager/commands.cc +++ b/server-tools/instance-manager/commands.cc @@ -29,6 +29,7 @@ #include "guardian.h" #include "instance_map.h" #include "log.h" +#include "manager.h" #include "messages.h" #include "mysqld_error.h" #include "mysql_manager_error.h" @@ -36,8 +37,11 @@ #include "priv.h" #include "protocol.h" +/************************************************************************** + {{{ Static functions. +**************************************************************************/ -/* +/** modify_defaults_to_im_error -- a map of error codes of mysys::modify_defaults_file() into Instance Manager error codes. */ @@ -46,38 +50,25 @@ static const int modify_defaults_to_im_error[]= { 0, ER_OUT_OF_RESOURCES, ER_ACCESS_OPTION_FILE }; -/* - Add a string to a buffer. +/** + Parse version number from the version string. SYNOPSIS - put_to_buff() - buff buffer to add the string - str string to add - position offset in the buff to add a string + parse_version_number() + version_str + version + version_size DESCRIPTION + TODO - Function to add a string to the buffer. It is different from - store_to_protocol_packet, which is used in the protocol.cc. - The last one also stores the length of the string in a special way. - This is required for MySQL client/server protocol support only. + TODO: Move this function to Instance_options and parse version number + only once. - RETURN - 0 - ok - 1 - error occured + NOTE: This function is used only in SHOW INSTANCE STATUS statement at the + moment. */ -static inline int put_to_buff(Buffer *buff, const char *str, uint *position) -{ - uint len= strlen(str); - if (buff->append(*position, str, len)) - return 1; - - *position+= len; - return 0; -} - - static int parse_version_number(const char *version_str, char *version, uint version_size) { @@ -102,6 +93,9 @@ static int parse_version_number(const char *version_str, char *version, return 0; } +/************************************************************************** + }}} +**************************************************************************/ /************************************************************************** Implementation of Instance_name. @@ -122,7 +116,7 @@ Instance_name::Instance_name(const LEX_STRING *name) Implementation of Show_instances. **************************************************************************/ -/* +/** Implementation of SHOW INSTANCES statement. Possible error codes: @@ -172,7 +166,6 @@ int Show_instances::write_data(st_net *net) Instance *instance; Instance_map::Iterator iterator(instance_map); - instance_map->guardian->lock(); instance_map->lock(); while ((instance= iterator.next())) @@ -180,20 +173,25 @@ int Show_instances::write_data(st_net *net) Buffer send_buf; /* buffer for packets */ uint pos= 0; + instance->lock(); + const char *instance_name= instance->options.instance_name.str; - const char *state_name= instance_map->get_instance_state_name(instance); + const char *state_name= instance->get_state_name(); if (store_to_protocol_packet(&send_buf, instance_name, &pos) || store_to_protocol_packet(&send_buf, state_name, &pos) || my_net_write(net, send_buf.buffer, pos)) { err_status= TRUE; - break; } + + instance->unlock(); + + if (err_status) + break; } instance_map->unlock(); - instance_map->guardian->unlock(); return err_status ? ER_OUT_OF_RESOURCES : 0; } @@ -203,7 +201,7 @@ int Show_instances::write_data(st_net *net) Implementation of Flush_instances. **************************************************************************/ -/* +/** Implementation of FLUSH INSTANCES statement. Possible error codes: @@ -213,36 +211,19 @@ int Show_instances::write_data(st_net *net) int Flush_instances::execute(st_net *net, ulong connection_id) { - instance_map->guardian->lock(); - instance_map->lock(); - - if (instance_map->is_there_active_instance()) - { - instance_map->unlock(); - instance_map->guardian->unlock(); - return ER_THERE_IS_ACTIVE_INSTACE; - } - - if (instance_map->flush_instances()) - { - instance_map->unlock(); - instance_map->guardian->unlock(); + if (Manager::flush_instances()) return ER_OUT_OF_RESOURCES; - } - - instance_map->unlock(); - instance_map->guardian->unlock(); return net_send_ok(net, connection_id, NULL) ? ER_OUT_OF_RESOURCES : 0; } /************************************************************************** - Implementation of Abstract_instance_cmd. + Implementation of Instance_cmd. **************************************************************************/ -Abstract_instance_cmd::Abstract_instance_cmd(const LEX_STRING *instance_name_arg) - :instance_name(instance_name_arg) +Instance_cmd::Instance_cmd(const LEX_STRING *instance_name_arg): + instance_name(instance_name_arg) { /* MT-NOTE: we can not make a search for Instance object here, @@ -251,26 +232,39 @@ Abstract_instance_cmd::Abstract_instance_cmd(const LEX_STRING *instance_name_arg } +/************************************************************************** + Implementation of Abstract_instance_cmd. +**************************************************************************/ + +Abstract_instance_cmd::Abstract_instance_cmd( + const LEX_STRING *instance_name_arg) + :Instance_cmd(instance_name_arg) +{ +} + + int Abstract_instance_cmd::execute(st_net *net, ulong connection_id) { int err_code; + Instance *instance; instance_map->lock(); - { - Instance *instance= instance_map->find(get_instance_name()); - - if (!instance) - { - instance_map->unlock(); - return ER_BAD_INSTANCE_NAME; - } + instance= instance_map->find(get_instance_name()); - err_code= execute_impl(net, instance); + if (!instance) + { + instance_map->unlock(); + return ER_BAD_INSTANCE_NAME; } + instance->lock(); instance_map->unlock(); + err_code= execute_impl(net, instance); + + instance->unlock(); + if (!err_code) err_code= send_ok_response(net, connection_id); @@ -288,7 +282,7 @@ Show_instance_status::Show_instance_status(const LEX_STRING *instance_name_arg) } -/* +/** Implementation of SHOW INSTANCE STATUS statement. Possible error codes: @@ -363,19 +357,14 @@ int Show_instance_status::write_data(st_net *net, Instance *instance) char version_num_buf[MAX_VERSION_LENGTH]; uint pos= 0; - const char *state_name; + const char *state_name= instance->get_state_name(); const char *version_tag= "unknown"; const char *version_num= "unknown"; - const char *mysqld_compatible_status; - - instance_map->guardian->lock(); - state_name= instance_map->get_instance_state_name(instance); - mysqld_compatible_status= instance->is_mysqld_compatible() ? "yes" : "no"; - instance_map->guardian->unlock(); + const char *mysqld_compatible_status= + instance->is_mysqld_compatible() ? "yes" : "no"; if (instance->options.mysqld_version) { - if (parse_version_number(instance->options.mysqld_version, version_num_buf, sizeof(version_num_buf))) return ER_OUT_OF_RESOURCES; @@ -409,7 +398,7 @@ Show_instance_options::Show_instance_options( } -/* +/** Implementation of SHOW INSTANCE OPTIONS statement. Possible error codes: @@ -505,23 +494,33 @@ Start_instance::Start_instance(const LEX_STRING *instance_name_arg) } -/* +/** Implementation of START INSTANCE statement. Possible error codes: ER_BAD_INSTANCE_NAME The instance with the given name does not exist - ER_OUT_OF_RESOURCES Not enough resources to complete the operation + ER_INSTANCE_MISCONFIGURED The instance configuration is invalid + ER_INSTANCE_ALREADY_STARTED The instance is already started + ER_CANNOT_START_INSTANCE The instance could not have been started + + TODO: as soon as this method operates only with Instance, we probably + should introduce a new method (execute_stop_instance()) in Instance and + just call it from here. */ int Start_instance::execute_impl(st_net * /* net */, Instance *instance) { - int err_code; + if (!instance->is_configured()) + return ER_INSTANCE_MISCONFIGURED; - if ((err_code= instance->start())) - return err_code; + if (instance->is_active()) + return ER_INSTANCE_ALREADY_STARTED; + + if (instance->start_mysqld()) + return ER_CANNOT_START_INSTANCE; - if (!(instance->options.nonguarded)) - instance_map->guardian->guard(instance); + instance->reset_stat(); + instance->set_state(Instance::NOT_STARTED); return 0; } @@ -546,25 +545,26 @@ Stop_instance::Stop_instance(const LEX_STRING *instance_name_arg) } -/* +/** Implementation of STOP INSTANCE statement. Possible error codes: ER_BAD_INSTANCE_NAME The instance with the given name does not exist ER_OUT_OF_RESOURCES Not enough resources to complete the operation + + TODO: as soon as this method operates only with Instance, we probably + should introduce a new method (execute_stop_instance()) in Instance and + just call it from here. */ int Stop_instance::execute_impl(st_net * /* net */, Instance *instance) { - int err_code; + if (!instance->is_active()) + return ER_INSTANCE_IS_NOT_STARTED; - if (!(instance->options.nonguarded)) - instance_map->guardian->stop_guard(instance); + instance->set_state(Instance::STOPPED); - if ((err_code= instance->stop())) - return err_code; - - return 0; + return instance->stop_mysqld() ? ER_STOP_INSTANCE : 0; } @@ -582,12 +582,12 @@ int Stop_instance::send_ok_response(st_net *net, ulong connection_id) **************************************************************************/ Create_instance::Create_instance(const LEX_STRING *instance_name_arg) - :instance_name(instance_name_arg) + :Instance_cmd(instance_name_arg) { } -/* +/** This operation initializes Create_instance object. SYNOPSIS @@ -604,7 +604,7 @@ bool Create_instance::init(const char **text) } -/* +/** This operation parses CREATE INSTANCE options. SYNOPSIS @@ -724,7 +724,7 @@ bool Create_instance::parse_args(const char **text) } -/* +/** Implementation of CREATE INSTANCE statement. Possible error codes: @@ -736,6 +736,7 @@ bool Create_instance::parse_args(const char **text) int Create_instance::execute(st_net *net, ulong connection_id) { int err_code; + Instance *instance; /* Check that the name is valid and there is no instance with such name. */ @@ -761,17 +762,26 @@ int Create_instance::execute(st_net *net, ulong connection_id) return err_code; } + instance= instance_map->find(get_instance_name()); + DBUG_ASSERT(instance); + if ((err_code= create_instance_in_file(get_instance_name(), &options))) { - Instance *instance= instance_map->find(get_instance_name()); - - if (instance) - instance_map->remove_instance(instance); /* instance is deleted here. */ + instance_map->remove_instance(instance); /* instance is deleted here. */ instance_map->unlock(); return err_code; } + /* + CREATE INSTANCE must not lead to start instance, even if it guarded. + + TODO: The problem however is that if Instance Manager restarts after + creating instance, the instance will be restarted (see also BUG#19718). + */ + + instance->set_state(Instance::STOPPED); + /* That's all. */ instance_map->unlock(); @@ -790,12 +800,12 @@ int Create_instance::execute(st_net *net, ulong connection_id) **************************************************************************/ Drop_instance::Drop_instance(const LEX_STRING *instance_name_arg) - :Abstract_instance_cmd(instance_name_arg) + :Instance_cmd(instance_name_arg) { } -/* +/** Implementation of DROP INSTANCE statement. Possible error codes: @@ -804,14 +814,38 @@ Drop_instance::Drop_instance(const LEX_STRING *instance_name_arg) ER_OUT_OF_RESOURCES Not enough resources to complete the operation */ -int Drop_instance::execute_impl(st_net * /* net */, Instance *instance) +int Drop_instance::execute(st_net *net, ulong connection_id) { int err_code; + Instance *instance; + + /* Lock Guardian, then Instance_map. */ + + instance_map->lock(); + + /* Find an instance. */ + + instance= instance_map->find(get_instance_name()); + + if (!instance) + { + instance_map->unlock(); + return ER_BAD_INSTANCE_NAME; + } + + instance->lock(); /* Check that the instance is offline. */ - if (instance_map->guardian->is_active(instance)) + if (instance->is_active()) + { + instance->unlock(); + instance_map->unlock(); + return ER_DROP_ACTIVE_INSTANCE; + } + + /* Try to remove instance from the file. */ err_code= modify_defaults_file(Options::Main::config_file, NULL, NULL, get_instance_name()->str, MY_REMOVE_SECTION); @@ -824,27 +858,30 @@ int Drop_instance::execute_impl(st_net * /* net */, Instance *instance) (const char *) get_instance_name()->str, (const char *) Options::Main::config_file, (int) err_code); - } - if (err_code) + instance->unlock(); + instance_map->unlock(); + return modify_defaults_to_im_error[err_code]; + } - /* Remove instance from the instance map hash and Guardian's list. */ + /* Unlock the instance before destroy. */ - if (!instance->options.nonguarded) - instance_map->guardian->stop_guard(instance); + instance->unlock(); - if ((err_code= instance->stop())) - return err_code; + /* + Remove instance from the instance map + (the instance will be also destroyed here). + */ instance_map->remove_instance(instance); - return 0; -} + /* Unlock the instance map. */ + instance_map->unlock(); + + /* That's all: send ok. */ -int Drop_instance::send_ok_response(st_net *net, ulong connection_id) -{ if (net_send_ok(net, connection_id, "Instance dropped")) return ER_OUT_OF_RESOURCES; @@ -867,7 +904,7 @@ Show_instance_log::Show_instance_log(const LEX_STRING *instance_name_arg, } -/* +/** Implementation of SHOW INSTANCE LOG statement. Possible error codes: @@ -1012,7 +1049,7 @@ Show_instance_log_files::Show_instance_log_files } -/* +/** Implementation of SHOW INSTANCE LOG FILES statement. Possible error codes: @@ -1133,7 +1170,7 @@ int Show_instance_log_files::write_data(st_net *net, Instance *instance) Implementation of Abstract_option_cmd. **************************************************************************/ -/* +/** Instance_options_list -- a data class representing a list of options for some instance. */ @@ -1251,7 +1288,7 @@ bool Abstract_option_cmd::init(const char **text) } -/* +/** Correct the option file. The "skip" option is used to remove the found option. @@ -1290,8 +1327,8 @@ int Abstract_option_cmd::correct_file(Instance *instance, Named_value *option, } -/* - Implementation of SET statement. +/** + Lock Instance Map and call execute_impl(). Possible error codes: ER_BAD_INSTANCE_NAME The instance with the given name does not exist @@ -1341,6 +1378,11 @@ Abstract_option_cmd::get_instance_options_list(const LEX_STRING *instance_name) } +/** + Skeleton implementation of option-management command. + + MT-NOTE: Instance Map is locked before calling this operation. +*/ int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id) { int err_code= 0; @@ -1352,12 +1394,18 @@ int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id) Instance_options_list *lst= (Instance_options_list *) hash_element(&instance_options_map, i); + bool instance_is_active; + lst->instance= instance_map->find(lst->get_instance_name()); if (!lst->instance) return ER_BAD_INSTANCE_NAME; - if (instance_map->guardian->is_active(lst->instance)) + lst->instance->lock(); + instance_is_active= lst->instance->is_active(); + lst->instance->unlock(); + + if (instance_is_active) return ER_INSTANCE_IS_ACTIVE; } @@ -1368,6 +1416,8 @@ int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id) Instance_options_list *lst= (Instance_options_list *) hash_element(&instance_options_map, i); + lst->instance->lock(); + for (int j= 0; j < lst->options.get_size(); ++j) { Named_value option= lst->options.get_element(j); @@ -1377,6 +1427,8 @@ int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id) break; } + lst->instance->unlock(); + if (err_code) break; } @@ -1392,7 +1444,7 @@ int Abstract_option_cmd::execute_impl(st_net *net, ulong connection_id) Implementation of Set_option. **************************************************************************/ -/* +/** This operation parses SET options. SYNOPSIS @@ -1566,7 +1618,7 @@ int Set_option::process_option(Instance *instance, Named_value *option) Implementation of Unset_option. **************************************************************************/ -/* +/** This operation parses UNSET options. SYNOPSIS @@ -1662,7 +1714,7 @@ bool Unset_option::parse_args(const char **text) } -/* +/** Implementation of UNSET statement. Possible error codes: diff --git a/server-tools/instance-manager/commands.h b/server-tools/instance-manager/commands.h index 8768aaab121..9b5d27b0982 100644 --- a/server-tools/instance-manager/commands.h +++ b/server-tools/instance-manager/commands.h @@ -30,7 +30,7 @@ #endif -/* +/** Print all instances of this instance manager. Grammar: SHOW INSTANCES */ @@ -50,7 +50,7 @@ private: }; -/* +/** Reread configuration file and refresh internal cache. Grammar: FLUSH INSTANCES */ @@ -66,11 +66,50 @@ public: }; -/* +/** + Base class for Instance-specific commands + (commands that operate on one instance). + + Instance_cmd extends Command class by: + - an attribute for storing instance name; + - code to initialize instance name in constructor; + - an accessor to get instance name. +*/ + +class Instance_cmd : public Command +{ +public: + Instance_cmd(const LEX_STRING *instance_name_arg); + +protected: + inline const LEX_STRING *get_instance_name() const + { + return instance_name.get_str(); + } + +private: + Instance_name instance_name; +}; + + +/** Abstract class for Instance-specific commands. + + Abstract_instance_cmd extends Instance_cmd by providing a common + framework for writing command-implementations. Basically, the class + implements Command::execute() pure virtual function in the following + way: + - Lock Instance_map; + - Get an instance by name. Return an error, if there is no such + instance; + - Lock the instance; + - Unlock Instance_map; + - Call execute_impl(), which should be implemented in derived class; + - Unlock the instance; + - Send response to the client and return error status. */ -class Abstract_instance_cmd: public Command +class Abstract_instance_cmd: public Instance_cmd { public: Abstract_instance_cmd(const LEX_STRING *instance_name_arg); @@ -79,29 +118,24 @@ public: virtual int execute(st_net *net, ulong connection_id); protected: - /* MT-NOTE: this operation is called under acquired Instance_map's lock. */ + /** + This operation is intended to contain command-specific implementation. + + MT-NOTE: this operation is called under acquired Instance's lock. + */ virtual int execute_impl(st_net *net, Instance *instance) = 0; - /* + /** This operation is invoked on successful return of execute_impl() and is intended to send closing data. - MT-NOTE: this operation is called under released Instance_map's lock. + MT-NOTE: this operation is called under released Instance's lock. */ virtual int send_ok_response(st_net *net, ulong connection_id) = 0; - -protected: - inline const LEX_STRING *get_instance_name() const - { - return instance_name.get_str(); - } - -private: - Instance_name instance_name; }; -/* +/** Print status of an instance. Grammar: SHOW INSTANCE STATUS <instance_name> */ @@ -121,7 +155,7 @@ private: }; -/* +/** Print options of chosen instance. Grammar: SHOW INSTANCE OPTIONS <instance_name> */ @@ -141,7 +175,7 @@ private: }; -/* +/** Start an instance. Grammar: START INSTANCE <instance_name> */ @@ -157,7 +191,7 @@ protected: }; -/* +/** Stop an instance. Grammar: STOP INSTANCE <instance_name> */ @@ -173,12 +207,12 @@ protected: }; -/* +/** Create an instance. Grammar: CREATE INSTANCE <instance_name> [<options>] */ -class Create_instance: public Command +class Create_instance: public Instance_cmd { public: Create_instance(const LEX_STRING *instance_name_arg); @@ -189,22 +223,15 @@ public: protected: virtual int execute(st_net *net, ulong connection_id); - inline const LEX_STRING *get_instance_name() const - { - return instance_name.get_str(); - } - private: bool parse_args(const char **text); private: - Instance_name instance_name; - Named_value_arr options; }; -/* +/** Drop an instance. Grammar: DROP INSTANCE <instance_name> @@ -213,18 +240,17 @@ private: is removed from the instance map. */ -class Drop_instance: public Abstract_instance_cmd +class Drop_instance: public Instance_cmd { public: Drop_instance(const LEX_STRING *instance_name_arg); protected: - virtual int execute_impl(st_net *net, Instance *instance); - virtual int send_ok_response(st_net *net, ulong connection_id); + virtual int execute(st_net *net, ulong connection_id); }; -/* +/** Print requested part of the log. Grammar: SHOW <instance_name> LOG {ERROR | SLOW | GENERAL} size[, offset_from_end] @@ -252,7 +278,7 @@ private: }; -/* +/** Shows the list of the log files, used by an instance. Grammar: SHOW <instance_name> LOG FILES */ @@ -272,7 +298,7 @@ private: }; -/* +/** Abstract class for option-management commands. */ @@ -312,7 +338,7 @@ private: }; -/* +/** Set an option for the instance. Grammar: SET instance_name.option[=option_value][, ...] */ @@ -329,7 +355,7 @@ protected: }; -/* +/** Remove option of the instance. Grammar: UNSET instance_name.option[, ...] */ @@ -346,7 +372,7 @@ protected: }; -/* +/** Syntax error command. This command is issued if parser reported a syntax error. We need it to diff --git a/server-tools/instance-manager/guardian.cc b/server-tools/instance-manager/guardian.cc index e601ce0111c..1b451cd9933 100644 --- a/server-tools/instance-manager/guardian.cc +++ b/server-tools/instance-manager/guardian.cc @@ -28,101 +28,126 @@ #include "instance_map.h" #include "log.h" #include "mysql_manager_error.h" +#include "options.h" -const char * -Guardian::get_instance_state_name(enum_instance_state state) -{ - switch (state) { - case NOT_STARTED: - return "offline"; - - case STARTING: - return "starting"; - - case STARTED: - return "online"; - case JUST_CRASHED: - return "failed"; +/************************************************************************* + {{{ Constructor & destructor. +*************************************************************************/ - case CRASHED: - return "crashed"; - - case CRASHED_AND_ABANDONED: - return "abandoned"; - - case STOPPING: - return "stopping"; - } +/** + Guardian constructor. - return NULL; /* just to ignore compiler warning. */ -} + SYNOPSIS + Guardian() + thread_registry_arg + instance_map_arg -/* {{{ Constructor & destructor. */ + DESCRIPTION + Nominal contructor intended for assigning references and initialize + trivial objects. Real initialization is made by init() method. +*/ Guardian::Guardian(Thread_registry *thread_registry_arg, - Instance_map *instance_map_arg, - uint monitoring_interval_arg) - :stopped(FALSE), - monitoring_interval(monitoring_interval_arg), + Instance_map *instance_map_arg) + :shutdown_requested(FALSE), + stopped(FALSE), thread_registry(thread_registry_arg), - instance_map(instance_map_arg), - shutdown_requested(FALSE) + instance_map(instance_map_arg) { pthread_mutex_init(&LOCK_guardian, 0); pthread_cond_init(&COND_guardian, 0); - init_alloc_root(&alloc, MEM_ROOT_BLOCK_SIZE, 0); } Guardian::~Guardian() { - /* delay guardian destruction to the moment when no one needs it */ - pthread_mutex_lock(&LOCK_guardian); - free_root(&alloc, MYF(0)); - pthread_mutex_unlock(&LOCK_guardian); + /* + NOTE: it's necessary to synchronize here, because Guiardian thread can be + still alive an hold the mutex (because it is detached and we have no + control over it). + */ + + lock(); + unlock(); + pthread_mutex_destroy(&LOCK_guardian); pthread_cond_destroy(&COND_guardian); } -/* }}} */ +/************************************************************************* + }}} +*************************************************************************/ +/** + Send request to stop Guardian. + + SYNOPSIS + request_shutdown() +*/ + void Guardian::request_shutdown() { - pthread_mutex_lock(&LOCK_guardian); - /* STOP Instances or just clean up Guardian repository */ stop_instances(); + + lock(); shutdown_requested= TRUE; - pthread_mutex_unlock(&LOCK_guardian); + unlock(); + + ping(); } -void Guardian::process_instance(Instance *instance, - GUARD_NODE *current_node, - LIST **guarded_instances, - LIST *node) +/** + Process an instance. + + SYNOPSIS + process_instance() + instance a pointer to the instance for processing + + MT-NOTE: + - the given instance must be locked before calling this operation; + - Guardian must be locked before calling this operation. +*/ + +void Guardian::process_instance(Instance *instance) { - uint waitchild= (uint) Instance::DEFAULT_SHUTDOWN_DELAY; - /* The amount of times, Guardian attempts to restart an instance */ int restart_retry= 100; time_t current_time= time(NULL); - if (current_node->state == STOPPING) + if (instance->get_state() == Instance::STOPPING) { - waitchild= instance->options.get_shutdown_delay(); + /* This brach is executed during shutdown. */ - /* this returns TRUE if and only if an instance was stopped for sure */ + /* This returns TRUE if and only if an instance was stopped for sure. */ if (instance->is_crashed()) - *guarded_instances= list_delete(*guarded_instances, node); - else if ( (uint) (current_time - current_node->last_checked) > waitchild) { + log_info("Guardian: '%s' stopped.", + (const char *) instance->get_name()->str); + + instance->set_state(Instance::STOPPED); + } + else if ((uint) (current_time - instance->last_checked) >= + instance->options.get_shutdown_delay()) + { + log_info("Guardian: '%s' hasn't stopped within %d secs.", + (const char *) instance->get_name()->str, + (int) instance->options.get_shutdown_delay()); + instance->kill_mysqld(SIGKILL); - /* - Later we do node= node->next. This is ok, as we are only removing - the node from the list. The pointer to the next one is still valid. - */ - *guarded_instances= list_delete(*guarded_instances, node); + + log_info("Guardian: pretend that '%s' is killed.", + (const char *) instance->get_name()->str); + + instance->set_state(Instance::STOPPED); + } + else + { + log_info("Guardian: waiting for '%s' to stop (%d secs left).", + (const char *) instance->get_name()->str, + (int) (instance->options.get_shutdown_delay() - + current_time + instance->last_checked)); } return; @@ -133,83 +158,90 @@ void Guardian::process_instance(Instance *instance, /* The instance can be contacted on it's port */ /* If STARTING also check that pidfile has been created */ - if (current_node->state == STARTING && - current_node->instance->options.load_pid() == 0) + if (instance->get_state() == Instance::STARTING && + instance->options.load_pid() == 0) { /* Pid file not created yet, don't go to STARTED state yet */ } - else if (current_node->state != STARTED) + else if (instance->get_state() != Instance::STARTED) { /* clear status fields */ log_info("Guardian: '%s' is running, set state to STARTED.", (const char *) instance->options.instance_name.str); - current_node->restart_counter= 0; - current_node->crash_moment= 0; - current_node->state= STARTED; + instance->reset_stat(); + instance->set_state(Instance::STARTED); } } else { - switch (current_node->state) { - case NOT_STARTED: + switch (instance->get_state()) { + case Instance::NOT_STARTED: log_info("Guardian: starting '%s'...", (const char *) instance->options.instance_name.str); - /* NOTE, set state to STARTING _before_ start() is called */ - current_node->state= STARTING; - instance->start(); - current_node->last_checked= current_time; - break; - case STARTED: /* fallthrough */ - case STARTING: /* let the instance start or crash */ - if (instance->is_crashed()) - { - current_node->crash_moment= current_time; - current_node->last_checked= current_time; - current_node->state= JUST_CRASHED; - /* fallthrough -- restart an instance immediately */ - } - else - break; - case JUST_CRASHED: - if (current_time - current_node->crash_moment <= 2) + /* NOTE: set state to STARTING _before_ start() is called. */ + instance->set_state(Instance::STARTING); + instance->last_checked= current_time; + + instance->start_mysqld(); + + return; + + case Instance::STARTED: /* fallthrough */ + case Instance::STARTING: /* let the instance start or crash */ + if (!instance->is_crashed()) + return; + + instance->crash_moment= current_time; + instance->last_checked= current_time; + instance->set_state(Instance::JUST_CRASHED); + /* fallthrough -- restart an instance immediately */ + + case Instance::JUST_CRASHED: + if (current_time - instance->crash_moment <= 2) { if (instance->is_crashed()) { - instance->start(); + instance->start_mysqld(); log_info("Guardian: starting '%s'...", (const char *) instance->options.instance_name.str); } } else - current_node->state= CRASHED; - break; - case CRASHED: /* just regular restarts */ - if (current_time - current_node->last_checked > - monitoring_interval) + instance->set_state(Instance::CRASHED); + + return; + + case Instance::CRASHED: /* just regular restarts */ + if (current_time - instance->last_checked <= + Options::Main::monitoring_interval) + return; + + if (instance->restart_counter < restart_retry) { - if ((current_node->restart_counter < restart_retry)) - { - if (instance->is_crashed()) - { - instance->start(); - current_node->last_checked= current_time; - current_node->restart_counter++; - log_info("Guardian: restarting '%s'...", - (const char *) instance->options.instance_name.str); - } - } - else + if (instance->is_crashed()) { - log_info("Guardian: can not start '%s'. " - "Abandoning attempts to (re)start it", + instance->start_mysqld(); + instance->last_checked= current_time; + + log_info("Guardian: restarting '%s'...", (const char *) instance->options.instance_name.str); - current_node->state= CRASHED_AND_ABANDONED; } } - break; - case CRASHED_AND_ABANDONED: - break; /* do nothing */ + else + { + log_info("Guardian: can not start '%s'. " + "Abandoning attempts to (re)start it", + (const char *) instance->options.instance_name.str); + + instance->set_state(Instance::CRASHED_AND_ABANDONED); + } + + return; + + case Instance::CRASHED_AND_ABANDONED: + return; /* do nothing */ + default: DBUG_ASSERT(0); } @@ -217,56 +249,78 @@ void Guardian::process_instance(Instance *instance, } -/* +/** Main function of Guardian thread. SYNOPSIS run() DESCRIPTION - Check for all guarded instances and restart them if needed. If everything - is fine go and sleep for some time. + Check for all guarded instances and restart them if needed. */ void Guardian::run() { - Instance *instance; - LIST *node; struct timespec timeout; log_info("Guardian: started."); thread_registry->register_thread(&thread_info); - pthread_mutex_lock(&LOCK_guardian); + /* Loop, until all instances were shut down at the end. */ - /* loop, until all instances were shut down at the end */ - while (!(shutdown_requested && (guarded_instances == NULL))) + while (true) { - node= guarded_instances; + Instance_map::Iterator instances_it(instance_map); + Instance *instance; + bool all_instances_stopped= TRUE; + + instance_map->lock(); - while (node != NULL) + while ((instance= instances_it.next())) { - GUARD_NODE *current_node= (GUARD_NODE *) node->data; - instance= ((GUARD_NODE *) node->data)->instance; - process_instance(instance, current_node, &guarded_instances, node); + instance->lock(); - node= node->next; + if (!instance->is_guarded() || + instance->get_state() == Instance::STOPPED) + { + instance->unlock(); + continue; + } + + process_instance(instance); + + if (instance->get_state() != Instance::STOPPED) + all_instances_stopped= FALSE; + + instance->unlock(); } - timeout.tv_sec= time(NULL) + monitoring_interval; + + instance_map->unlock(); + + lock(); + + if (shutdown_requested && all_instances_stopped) + { + log_info("Guardian: all guarded mysqlds stopped."); + + stopped= TRUE; + unlock(); + break; + } + + timeout.tv_sec= time(NULL) + Options::Main::monitoring_interval; timeout.tv_nsec= 0; - /* check the loop predicate before sleeping */ - if (!(shutdown_requested && (!(guarded_instances)))) - thread_registry->cond_timedwait(&thread_info, &COND_guardian, - &LOCK_guardian, &timeout); + thread_registry->cond_timedwait(&thread_info, &COND_guardian, + &LOCK_guardian, &timeout); + unlock(); } log_info("Guardian: stopped."); - stopped= TRUE; - pthread_mutex_unlock(&LOCK_guardian); - /* now, when the Guardian is stopped we can stop the IM */ + /* Now, when the Guardian is stopped we can stop the IM. */ + thread_registry->unregister_thread(&thread_info); thread_registry->request_shutdown(); @@ -274,129 +328,65 @@ void Guardian::run() } -int Guardian::is_stopped() +/** + Return the value of stopped flag. +*/ + +bool Guardian::is_stopped() { int var; - pthread_mutex_lock(&LOCK_guardian); + + lock(); var= stopped; - pthread_mutex_unlock(&LOCK_guardian); + unlock(); + return var; } -/* - Initialize the list of guarded instances: loop through the Instance_map and - add all of the instances, which don't have 'nonguarded' option specified. - - SYNOPSIS - Guardian::init() +/** + Wake up Guardian thread. - NOTE: The operation should be invoked with the following locks acquired: - - Guardian; - - Instance_map; - - RETURN - 0 - ok - 1 - error occurred + MT-NOTE: though usually the mutex associated with condition variable should + be acquired before signalling the variable, here this is not needed. + Signalling under locked mutex is used to avoid lost signals. In the current + logic however locking mutex does not guarantee that the signal will not be + lost. */ -int Guardian::init() +void Guardian::ping() { - Instance *instance; - Instance_map::Iterator iterator(instance_map); - - /* clear the list of guarded instances */ - free_root(&alloc, MYF(0)); - init_alloc_root(&alloc, MEM_ROOT_BLOCK_SIZE, 0); - guarded_instances= NULL; - - while ((instance= iterator.next())) - { - if (instance->options.nonguarded) - continue; - - if (guard(instance, TRUE)) /* do not lock guardian */ - return 1; - } - - return 0; + pthread_cond_signal(&COND_guardian); } -/* - Add instance to the Guardian list +/** + Prepare list of instances. SYNOPSIS - guard() - instance the instance to be guarded - nolock whether we prefer do not lock Guardian here, - but use external locking instead - - DESCRIPTION - - The instance is added to the guarded instances list. Usually guard() is - called after we start an instance. + init() - RETURN - 0 - ok - 1 - error occurred + MT-NOTE: Instance Map must be locked before calling the operation. */ -int Guardian::guard(Instance *instance, bool nolock) +void Guardian::init() { - LIST *node; - GUARD_NODE *content; - - node= (LIST *) alloc_root(&alloc, sizeof(LIST)); - content= (GUARD_NODE *) alloc_root(&alloc, sizeof(GUARD_NODE)); - - if ((!(node)) || (!(content))) - return 1; - /* we store the pointers to instances from the instance_map's MEM_ROOT */ - content->instance= instance; - content->restart_counter= 0; - content->crash_moment= 0; - content->state= NOT_STARTED; - node->data= (void*) content; - - if (nolock) - guarded_instances= list_add(guarded_instances, node); - else - { - pthread_mutex_lock(&LOCK_guardian); - guarded_instances= list_add(guarded_instances, node); - pthread_mutex_unlock(&LOCK_guardian); - } - - return 0; -} - - -/* - TODO: perhaps it would make sense to create a pool of the LIST nodeents - and give them upon request. Now we are loosing a bit of memory when - guarded instance was stopped and then restarted (since we cannot free just - a piece of the MEM_ROOT). -*/ - -int Guardian::stop_guard(Instance *instance) -{ - LIST *node; - - pthread_mutex_lock(&LOCK_guardian); - - node= find_instance_node(instance); + Instance *instance; + Instance_map::Iterator iterator(instance_map); - if (node != NULL) - guarded_instances= list_delete(guarded_instances, node); + while ((instance= iterator.next())) + { + instance->lock(); - pthread_mutex_unlock(&LOCK_guardian); + instance->reset_stat(); + instance->set_state(Instance::NOT_STARTED); - /* if there is nothing to delete it is also fine */ - return 0; + instance->unlock(); + } } -/* + +/** An internal method which is called at shutdown to unregister instances and attempt to stop them if requested. @@ -409,86 +399,71 @@ int Guardian::stop_guard(Instance *instance) accordingly. NOTE - Guardian object should be locked by the calling function. + Guardian object should be locked by the caller. - RETURN - 0 - ok - 1 - error occurred */ -int Guardian::stop_instances() +void Guardian::stop_instances() { - LIST *node; - node= guarded_instances; - while (node != NULL) + Instance_map::Iterator instances_it(instance_map); + Instance *instance; + + instance_map->lock(); + + while ((instance= instances_it.next())) { - GUARD_NODE *current_node= (GUARD_NODE *) node->data; + instance->lock(); + + if (!instance->is_guarded() || + instance->get_state() == Instance::STOPPED) + { + instance->unlock(); + continue; + } + /* If instance is running or was running (and now probably hanging), request stop. */ - if (current_node->instance->is_mysqld_running() || - (current_node->state == STARTED)) + + if (instance->is_mysqld_running() || + instance->get_state() == Instance::STARTED) { - current_node->state= STOPPING; - current_node->last_checked= time(NULL); + instance->set_state(Instance::STOPPING); + instance->last_checked= time(NULL); } else - /* otherwise remove it from the list */ - guarded_instances= list_delete(guarded_instances, node); - /* But try to kill it anyway. Just in case */ - current_node->instance->kill_mysqld(SIGTERM); - node= node->next; + { + /* Otherwise mark it as STOPPED. */ + instance->set_state(Instance::STOPPED); + } + + /* Request mysqld to stop. */ + + instance->kill_mysqld(SIGTERM); + + instance->unlock(); } - return 0; + + instance_map->unlock(); } +/** + Lock Guardian. +*/ + void Guardian::lock() { pthread_mutex_lock(&LOCK_guardian); } +/** + Unlock Guardian. +*/ + void Guardian::unlock() { pthread_mutex_unlock(&LOCK_guardian); } - - -LIST *Guardian::find_instance_node(Instance *instance) -{ - LIST *node= guarded_instances; - - while (node != NULL) - { - /* - We compare only pointers, as we always use pointers from the - instance_map's MEM_ROOT. - */ - if (((GUARD_NODE *) node->data)->instance == instance) - return node; - - node= node->next; - } - - return NULL; -} - - -bool Guardian::is_active(Instance *instance) -{ - bool guarded; - - lock(); - - guarded= find_instance_node(instance) != NULL; - - /* is_running() can take a long time, so let's unlock mutex first. */ - unlock(); - - if (guarded) - return true; - - return instance->is_mysqld_running(); -} diff --git a/server-tools/instance-manager/guardian.h b/server-tools/instance-manager/guardian.h index 0eee1dc631d..2c7987f4565 100644 --- a/server-tools/instance-manager/guardian.h +++ b/server-tools/instance-manager/guardian.h @@ -17,10 +17,12 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include "thread_registry.h" +#include <my_global.h> #include <my_sys.h> #include <my_list.h> +#include "thread_registry.h" + #if defined(__GNUC__) && defined(USE_PRAGMA_INTERFACE) #pragma interface #endif @@ -28,7 +30,6 @@ class Instance; class Instance_map; class Thread_registry; -struct GUARD_NODE; /** The guardian thread is responsible for monitoring and restarting of guarded @@ -38,97 +39,73 @@ struct GUARD_NODE; class Guardian: public Thread { public: - /* states of an instance */ - enum enum_instance_state { NOT_STARTED= 1, STARTING, STARTED, JUST_CRASHED, - CRASHED, CRASHED_AND_ABANDONED, STOPPING }; - - /* - The Guardian list node structure. Guardian utilizes it to store - guarded instances plus some additional info. - */ + Guardian(Thread_registry *thread_registry_arg, + Instance_map *instance_map_arg); + ~Guardian(); - struct GUARD_NODE - { - Instance *instance; - /* state of an instance (i.e. STARTED, CRASHED, etc.) */ - enum_instance_state state; - /* the amount of attemts to restart instance (cleaned up at success) */ - int restart_counter; - /* triggered at a crash */ - time_t crash_moment; - /* General time field. Used to provide timeouts (at shutdown and restart) */ - time_t last_checked; - }; - - /* Return client state name. */ - static const char *get_instance_state_name(enum_instance_state state); + void init(); - Guardian(Thread_registry *thread_registry_arg, - Instance_map *instance_map_arg, - uint monitoring_interval_arg); - virtual ~Guardian(); - /* Initialize or refresh the list of guarded instances */ - int init(); - /* Request guardian shutdown. Stop instances if needed */ +public: void request_shutdown(); - /* Start instance protection */ - int guard(Instance *instance, bool nolock= FALSE); - /* Stop instance protection */ - int stop_guard(Instance *instance); - /* Returns TRUE if guardian thread is stopped */ - int is_stopped(); + + bool is_stopped(); + void lock(); void unlock(); - /* - Return an internal list node for the given instance if the instance is - managed by Guardian. Otherwise, return NULL. + void ping(); - MT-NOTE: must be called under acquired lock. - */ - LIST *find_instance_node(Instance *instance); +protected: + virtual void run(); + +private: + void stop_instances(); - /* The operation is used to check if the instance is active or not. */ - bool is_active(Instance *instance); + void process_instance(Instance *instance); +private: /* - Return state of the given instance list node. The pointer must specify - a valid list node. + LOCK_guardian protectes the members in this section: + - shutdown_requested; + - stopped; + + Also, it is used for COND_guardian. */ - inline enum_instance_state get_instance_state(LIST *instance_node); -protected: - /* Main funtion of the thread */ - virtual void run(); + pthread_mutex_t LOCK_guardian; -public: + /* + Guardian's main loop waits on this condition. So, it should be signalled + each time, when instance state has been changed and we want Guardian to + wake up. + + TODO: Change this to having data-scoped conditions, i.e. conditions, + which indicate that some data has been changed. + */ pthread_cond_t COND_guardian; -private: - /* Prepares Guardian shutdown. Stops instances is needed */ - int stop_instances(); - /* check instance state and act accordingly */ - void process_instance(Instance *instance, GUARD_NODE *current_node, - LIST **guarded_instances, LIST *elem); + /* + This variable is set to TRUE, when Manager thread is shutting down. + The flag is used by Guardian thread to understand that it's time to + finish. + */ + bool shutdown_requested; + + /* + This flag is set to TRUE on shutdown by Guardian thread, when all guarded + mysqlds are stopped. - int stopped; + The flag is used in the Manager thread to wait for Guardian to stop all + mysqlds. + */ + bool stopped; -private: - pthread_mutex_t LOCK_guardian; Thread_info thread_info; - int monitoring_interval; Thread_registry *thread_registry; Instance_map *instance_map; - LIST *guarded_instances; - MEM_ROOT alloc; - /* this variable is set to TRUE when we want to stop Guardian thread */ - bool shutdown_requested; -}; - -inline Guardian::enum_instance_state -Guardian::get_instance_state(LIST *instance_node) -{ - return ((GUARD_NODE *) instance_node->data)->state; -} +private: + Guardian(const Guardian &); + Guardian&operator =(const Guardian &); +}; #endif /* INCLUDES_MYSQL_INSTANCE_MANAGER_GUARDIAN_H */ diff --git a/server-tools/instance-manager/instance.cc b/server-tools/instance-manager/instance.cc index 6b4289c5b29..b852a8323e5 100644 --- a/server-tools/instance-manager/instance.cc +++ b/server-tools/instance-manager/instance.cc @@ -36,7 +36,9 @@ #include "thread_registry.h" #include "instance_map.h" -/* {{{ Platform-specific functions. */ +/************************************************************************* + {{{ Platform-specific functions. +*************************************************************************/ #ifndef __WIN__ typedef pid_t My_process_info; @@ -45,34 +47,6 @@ typedef PROCESS_INFORMATION My_process_info; #endif /* - Proxy thread is a simple way to avoid all pitfalls of the threads - implementation in the OS (e.g. LinuxThreads). With such a thread we - don't have to process SIGCHLD, which is a tricky business if we want - to do it in a portable way. -*/ - -class Instance_monitor: public Thread -{ -public: - Instance_monitor(Instance *instance_arg) :instance(instance_arg) {} -protected: - virtual void run(); - void start_and_monitor_instance(Instance_options *old_instance_options, - Instance_map *instance_map, - Thread_registry *thread_registry); -private: - Instance *instance; -}; - -void Instance_monitor::run() -{ - start_and_monitor_instance(&instance->options, - Manager::get_instance_map(), - Manager::get_thread_registry()); - delete this; -} - -/* Wait for an instance SYNOPSIS @@ -285,113 +259,149 @@ int kill(pid_t pid, int signum) } #endif -/* }}} */ +/************************************************************************* + }}} +*************************************************************************/ + -/* {{{ Static constants. */ +/************************************************************************* + {{{ Static constants. +*************************************************************************/ const LEX_STRING Instance::DFLT_INSTANCE_NAME= { C_STRING_WITH_LEN("mysqld") }; -/* }}} */ +/************************************************************************* + }}} +*************************************************************************/ -/* - Fork child, exec an instance and monitor it. +/************************************************************************* + {{{ Instance Monitor thread. +*************************************************************************/ - SYNOPSIS - start_and_monitor_instance() - old_instance_options Pointer to the options of the instance to be - launched. This info is likely to become obsolete - when function returns from wait_process() - instance_map Pointer to the instance_map. We use it to protect - the instance from deletion, while we are working - with it. +/** + Proxy thread is a simple way to avoid all pitfalls of the threads + implementation in the OS (e.g. LinuxThreads). With such a thread we + don't have to process SIGCHLD, which is a tricky business if we want + to do it in a portable way. - DESCRIPTION - Fork a child, then exec and monitor it. When the child is dead, - find appropriate instance (for this purpose we save its name), - set appropriate flags and wake all threads waiting for instance - to stop. - - NOTE - A separate thread for starting/monitoring instance is a simple way - to avoid all pitfalls of the threads implementation in the OS (e.g. - LinuxThreads). For one, with such a thread we don't have to process - SIGCHLD, which is a tricky business if we want to do it in a - portable way. + Instance Monitor Thread forks a child process, execs mysqld and waits for + the child to die. - RETURN - Function returns no value + Instance Monitor assumes that the monitoring instance will not be dropped. + This is guaranteed by having flag monitoring_thread_active and + Instance::is_active() operation. */ -void -Instance_monitor:: -start_and_monitor_instance(Instance_options *old_instance_options, - Instance_map *instance_map, - Thread_registry *thread_registry) +class Instance_monitor: public Thread { - Instance_name instance_name(&old_instance_options->instance_name); - Instance *current_instance; - My_process_info process_info; - Thread_info thread_info; +public: + Instance_monitor(Instance *instance_arg) :instance(instance_arg) {} +protected: + virtual void run(); + void start_and_monitor_instance(); +private: + Instance *instance; +}; + + +void Instance_monitor::run() +{ + start_and_monitor_instance(); + delete this; +} + + +void Instance_monitor::start_and_monitor_instance() +{ + Thread_registry *thread_registry= Manager::get_thread_registry(); + Guardian *guardian= Manager::get_guardian(); + + My_process_info mysqld_process_info; + Thread_info monitor_thread_info; log_info("Instance '%s': Monitor: started.", (const char *) instance->get_name()->str); - if (!old_instance_options->nonguarded) - { - /* - Register thread in Thread_registry to wait for it to stop on shutdown - only if instance is guarded. If instance is guarded, the thread will not - finish, because nonguarded instances are not stopped on shutdown. - */ - thread_registry->register_thread(&thread_info, FALSE); - } - /* - Lock instance map to guarantee that no instances are deleted during - strmake() and execv() calls. + For guarded instance register the thread in Thread_registry to wait for + the thread to stop on shutdown (nonguarded instances are not stopped on + shutdown, so the thread will no finish). */ - instance_map->lock(); - /* - Save the instance name in the case if Instance object we - are using is destroyed. (E.g. by "FLUSH INSTANCES") - */ + if (instance->is_guarded()) + { + thread_registry->register_thread(&monitor_thread_info, FALSE); + } + + /* Starting mysqld. */ log_info("Instance '%s': Monitor: starting mysqld...", (const char *) instance->get_name()->str); - if (start_process(old_instance_options, &process_info)) + if (start_process(&instance->options, &mysqld_process_info)) { - instance_map->unlock(); - return; /* error is logged */ + instance->lock(); + instance->monitoring_thread_active= FALSE; + instance->unlock(); + + return; } - /* allow users to delete instances */ - instance_map->unlock(); + /* Waiting for mysqld to die. */ log_info("Instance '%s': Monitor: waiting for mysqld to stop...", (const char *) instance->get_name()->str); - wait_process(&process_info); /* Don't check for return value. */ + wait_process(&mysqld_process_info); /* Don't check for return value. */ - instance_map->lock(); + log_info("Instance '%s': Monitor: mysqld stopped.", + (const char *) instance->get_name()->str); - current_instance= instance_map->find(instance_name.get_str()); + /* Update instance status. */ - if (current_instance) - current_instance->set_crash_flag_n_wake_all(); + instance->lock(); - instance_map->unlock(); + if (instance->is_guarded()) + thread_registry->unregister_thread(&monitor_thread_info); - if (!old_instance_options->nonguarded) - thread_registry->unregister_thread(&thread_info); + instance->crashed= TRUE; + instance->monitoring_thread_active= FALSE; log_info("Instance '%s': Monitor: finished.", (const char *) instance->get_name()->str); + + instance->unlock(); + + /* Wake up guardian. */ + + guardian->ping(); } +/************************************************************************** + }}} +**************************************************************************/ + + +/************************************************************************** + {{{ Static operations. +**************************************************************************/ + +/** + The operation is intended to check whether string is a well-formed + instance name or not. + + SYNOPSIS + is_name_valid() + name string to check + + RETURN + TRUE string is a valid instance name + FALSE string is not a valid instance name + + TODO: Move to Instance_name class: Instance_name::is_valid(). +*/ bool Instance::is_name_valid(const LEX_STRING *name) { @@ -405,21 +415,83 @@ bool Instance::is_name_valid(const LEX_STRING *name) } +/** + The operation is intended to check if the given instance name is + mysqld-compatible or not. + + SYNOPSIS + is_mysqld_compatible_name() + name name to check + + RETURN + TRUE name is mysqld-compatible + FALSE otherwise + + TODO: Move to Instance_name class: Instance_name::is_mysqld_compatible(). +*/ + bool Instance::is_mysqld_compatible_name(const LEX_STRING *name) { return strcmp(name->str, DFLT_INSTANCE_NAME.str) == 0; } +/** + Return client state name. Must not be used outside the class. + Use Instance::get_state_name() instead. +*/ + +const char * Instance::get_instance_state_name(enum_instance_state state) +{ + switch (state) { + case STOPPED: + return "offline"; + + case NOT_STARTED: + return "not started"; + + case STARTING: + return "starting"; + + case STARTED: + return "online"; + + case JUST_CRASHED: + return "failed"; + + case CRASHED: + return "crashed"; -/* {{{ Constructor & destructor */ + case CRASHED_AND_ABANDONED: + return "abandoned"; + + case STOPPING: + return "stopping"; + } + + return NULL; /* just to ignore compiler warning. */ +} + +/************************************************************************** + }}} +**************************************************************************/ + + +/************************************************************************** + {{{ Initialization & deinitialization. +**************************************************************************/ Instance::Instance() - :crashed(FALSE), - configured(FALSE) + :monitoring_thread_active(FALSE), + crashed(FALSE), + configured(FALSE), + /* mysqld_compatible is initialized in init() */ + state(NOT_STARTED), + restart_counter(0), + crash_moment(0), + last_checked(0) { pthread_mutex_init(&LOCK_instance, 0); - pthread_cond_init(&COND_instance_stopped, 0); } @@ -427,13 +499,11 @@ Instance::~Instance() { log_info("Instance '%s': destroying...", (const char *) get_name()->str); - pthread_cond_destroy(&COND_instance_stopped); pthread_mutex_destroy(&LOCK_instance); } -/* }}} */ -/* +/** Initialize instance options. SYNOPSIS @@ -453,7 +523,7 @@ bool Instance::init(const LEX_STRING *name_arg) } -/* +/** Complete instance options initialization. SYNOPSIS @@ -474,7 +544,47 @@ bool Instance::complete_initialization() */ } -/* +/************************************************************************** + }}} +**************************************************************************/ + + +/************************************************************************** + {{{ Instance: public interface implementation. +**************************************************************************/ + +/** + Determine if there is some activity with the instance. + + SYNOPSIS + is_active() + + DESCRIPTION + An instance is active if one of the following conditions is true: + - Instance-monitoring thread is running; + - Instance is guarded and its state is other than STOPPED; + - Corresponding mysqld-server accepts connections. + + MT-NOTE: instance must be locked before calling the operation. + + RETURN + TRUE - instance is active + FALSE - otherwise. +*/ + +bool Instance::is_active() +{ + if (monitoring_thread_active) + return TRUE; + + if (is_guarded() && get_state() != STOPPED) + return TRUE; + + return is_mysqld_running(); +} + + +/** Determine if mysqld is accepting connections. SYNOPSIS @@ -484,7 +594,7 @@ bool Instance::complete_initialization() Try to connect to mysqld with fake login/password to check whether it is accepting connections or not. - MT-NOTE: this operation must be called under acquired LOCK_instance. + MT-NOTE: instance must be locked before calling the operation. RETURN TRUE - mysqld is alive and accept connections @@ -508,8 +618,6 @@ bool Instance::is_mysqld_running() if (!port && !options.mysqld_socket) port= SERVER_DEFAULT_PORT; - pthread_mutex_lock(&LOCK_instance); - mysql_init(&mysql); /* try to connect to a server with a fake username/password pair */ if (mysql_real_connect(&mysql, LOCAL_HOST, username, @@ -523,7 +631,6 @@ bool Instance::is_mysqld_running() */ log_error("Instance '%s': was able to log into mysqld.", (const char *) get_name()->str); - pthread_mutex_unlock(&LOCK_instance); return_val= TRUE; /* server is alive */ } else @@ -531,145 +638,145 @@ bool Instance::is_mysqld_running() sizeof(access_denied_message) - 1)); mysql_close(&mysql); - pthread_mutex_unlock(&LOCK_instance); return return_val; } -/* - The method starts an instance. + +/** + Start mysqld. SYNOPSIS - start() + start_mysqld() + + DESCRIPTION + Reset flags and start Instance Monitor thread, which will start mysqld. + + MT-NOTE: instance must be locked before calling the operation. RETURN - 0 ok - ER_CANNOT_START_INSTANCE Cannot start instance - ER_INSTANCE_ALREADY_STARTED The instance on the specified port/socket - is already started + FALSE - ok + TRUE - could not start instance */ -int Instance::start() +bool Instance::start_mysqld() { - /* clear crash flag */ - pthread_mutex_lock(&LOCK_instance); - crashed= FALSE; - pthread_mutex_unlock(&LOCK_instance); + Instance_monitor *instance_monitor; + /* + Prepare instance to start Instance Monitor thread. - if (configured && !is_mysqld_running()) - { - Instance_monitor *instance_monitor; - remove_pid(); + NOTE: It's important to set these actions here in order to avoid + race conditions -- these actions must be done under acquired lock on + Instance. + */ - instance_monitor= new Instance_monitor(this); + crashed= FALSE; + monitoring_thread_active= TRUE; - if (instance_monitor == NULL || instance_monitor->start(Thread::DETACHED)) - { - delete instance_monitor; - log_error("Instance::start(): failed to create the monitoring thread" - " to start an instance"); - return ER_CANNOT_START_INSTANCE; - } - /* The monitoring thread will delete itself when it's finished. */ + remove_pid(); - return 0; - } + /* Create and start the Instance Monitor thread. */ - /* The instance is started already or misconfigured. */ - return configured ? ER_INSTANCE_ALREADY_STARTED : ER_INSTANCE_MISCONFIGURED; -} + instance_monitor= new Instance_monitor(this); -/* - The method sets the crash flag and wakes all waiters on - COND_instance_stopped and COND_guardian + if (instance_monitor == NULL || instance_monitor->start(Thread::DETACHED)) + { + delete instance_monitor; + monitoring_thread_active= FALSE; - SYNOPSIS - set_crash_flag_n_wake_all() + log_error("Instance '%s': can not create instance monitor thread.", + (const char *) get_name()->str); - DESCRIPTION - The method is called when an instance is crashed or terminated. - In the former case it might indicate that guardian probably should - restart it. + return TRUE; + } - RETURN - Function returns no value -*/ + ++restart_counter; -void Instance::set_crash_flag_n_wake_all() -{ - /* set instance state to crashed */ - pthread_mutex_lock(&LOCK_instance); - crashed= TRUE; - pthread_mutex_unlock(&LOCK_instance); + /* The Instance Monitor thread will delete itself when it's finished. */ - /* - Wake connection threads waiting for an instance to stop. This - is needed if a user issued command to stop an instance via - mysql connection. This is not the case if Guardian stop the thread. - */ - pthread_cond_signal(&COND_instance_stopped); - /* wake guardian */ - pthread_cond_signal(&Manager::get_guardian()->COND_guardian); + return FALSE; } -/* - Stop an instance. +/** + Stop mysqld. SYNOPSIS - stop() + stop_mysqld() - RETURN: - 0 ok - ER_INSTANCE_IS_NOT_STARTED Looks like the instance it is not started - ER_STOP_INSTANCE mysql_shutdown reported an error -*/ + DESCRIPTION + Try to stop mysqld gracefully. Otherwise kill it with SIGKILL. -int Instance::stop() -{ - struct timespec timeout; - uint waitchild= (uint) DEFAULT_SHUTDOWN_DELAY; + MT-NOTE: instance must be locked before calling the operation. - if (is_mysqld_running()) - { - waitchild= options.get_shutdown_delay(); + RETURN + FALSE - ok + TRUE - could not stop the instance +*/ - kill_mysqld(SIGTERM); - /* sleep on condition to wait for SIGCHLD */ +bool Instance::stop_mysqld() +{ + log_info("Instance '%s': stopping mysqld...", + (const char *) get_name()->str); - timeout.tv_sec= time(NULL) + waitchild; - timeout.tv_nsec= 0; - if (pthread_mutex_lock(&LOCK_instance)) - return ER_STOP_INSTANCE; + kill_mysqld(SIGTERM); - while (options.load_pid() != 0) /* while server isn't stopped */ - { - int status; + if (!wait_for_stop()) + { + log_info("Instance '%s': mysqld stopped gracefully.", + (const char *) get_name()->str); + return FALSE; + } - status= pthread_cond_timedwait(&COND_instance_stopped, - &LOCK_instance, - &timeout); - if (status == ETIMEDOUT || status == ETIME) - break; - } + log_info("Instance '%s': mysqld failed to stop gracefully within %d seconds.", + (const char *) get_name()->str, + (int) options.get_shutdown_delay()); - pthread_mutex_unlock(&LOCK_instance); + log_info("Instance'%s': killing mysqld...", + (const char *) get_name()->str); - kill_mysqld(SIGKILL); + kill_mysqld(SIGKILL); - return 0; + if (!wait_for_stop()) + { + log_info("Instance '%s': mysqld has been killed.", + (const char *) get_name()->str); + return FALSE; } - return ER_INSTANCE_IS_NOT_STARTED; + log_info("Instance '%s': can not kill mysqld within %d seconds.", + (const char *) get_name()->str, + (int) options.get_shutdown_delay()); + + return TRUE; } -/* +/** Send signal to mysqld. SYNOPSIS kill_mysqld() + + DESCRIPTION + Load pid from the pid file and send the given signal to that process. + If the signal is SIGKILL, remove the pid file after sending the signal. + + MT-NOTE: instance must be locked before calling the operation. + + TODO + This too low-level and OS-specific operation for public interface. + Also, it has some implicit behaviour for SIGKILL signal. Probably, we + should have the following public operations instead: + - start_mysqld() -- as is; + - stop_mysqld -- request mysqld to shutdown gracefully (send SIGTERM); + don't wait for complete shutdown; + - wait_for_stop() (or join_mysqld()) -- wait for mysqld to stop within + time interval; + - kill_mysqld() -- request to terminate mysqld; don't wait for + completion. + These operations should also be used in Guardian to manage instances. */ void Instance::kill_mysqld(int signum) @@ -707,27 +814,91 @@ void Instance::kill_mysqld(int signum) } } -/* - Return crashed flag. - SYNOPSIS - is_crashed() - - RETURN - TRUE - mysqld crashed - FALSE - mysqld hasn't crashed yet +/** + Lock instance. */ -bool Instance::is_crashed() +void Instance::lock() { - bool val; pthread_mutex_lock(&LOCK_instance); - val= crashed; +} + + +/** + Unlock instance. +*/ + +void Instance::unlock() +{ pthread_mutex_unlock(&LOCK_instance); - return val; } -/* + +/** + Return instance state name. + + SYNOPSIS + get_state_name() + + DESCRIPTION + The operation returns user-friendly state name. The operation can be + used both for guarded and non-guarded instances. + + MT-NOTE: instance must be locked before calling the operation. + + TODO: Replace with the static get_state_name(state_code) function. +*/ + +const char *Instance::get_state_name() +{ + if (!is_configured()) + return "misconfigured"; + + if (is_guarded()) + { + /* The instance is managed by Guardian: we can report precise state. */ + + return get_instance_state_name(get_state()); + } + + /* The instance is not managed by Guardian: we can report status only. */ + + return is_active() ? "online" : "offline"; +} + + +/** + Reset statistics. + + SYNOPSIS + reset_stat() + + DESCRIPTION + The operation resets statistics used for guarding the instance. + + MT-NOTE: instance must be locked before calling the operation. + + TODO: Make private. +*/ + +void Instance::reset_stat() +{ + restart_counter= 0; + crash_moment= 0; + last_checked= 0; +} + +/************************************************************************** + }}} +**************************************************************************/ + + +/************************************************************************** + {{{ Instance: implementation of private operations. +**************************************************************************/ + +/** Remove pid file. */ @@ -744,3 +915,36 @@ void Instance::remove_pid() (const char *) options.instance_name.str); } } + + +/** + Wait for mysqld to stop within shutdown interval. +*/ + +bool Instance::wait_for_stop() +{ + int start_time= time(NULL); + int finish_time= start_time + options.get_shutdown_delay(); + + log_info("Instance '%s': waiting for mysqld to stop " + "(timeout: %d seconds)...", + (const char *) get_name()->str, + (int) options.get_shutdown_delay()); + + while (true) + { + if (options.load_pid() == 0 && !is_mysqld_running()) + return FALSE; + + if (time(NULL) >= finish_time) + return TRUE; + + /* Sleep for 0.3 sec and check again. */ + + my_sleep(300000); + } +} + +/************************************************************************** + }}} +**************************************************************************/ diff --git a/server-tools/instance-manager/instance.h b/server-tools/instance-manager/instance.h index 412d01acc46..5bdd8d61d2f 100644 --- a/server-tools/instance-manager/instance.h +++ b/server-tools/instance-manager/instance.h @@ -30,7 +30,7 @@ class Instance_map; class Thread_registry; -/* +/** Instance_name -- the class represents instance name -- a string of length less than MAX_INSTANCE_NAME_SIZE. @@ -68,72 +68,127 @@ private: class Instance { public: - /* - The following two constants defines name of the default mysqld-instance - ("mysqld"). + /* States of an instance. */ + enum enum_instance_state + { + STOPPED, + NOT_STARTED, + STARTING, + STARTED, + JUST_CRASHED, + CRASHED, + CRASHED_AND_ABANDONED, + STOPPING + }; + +public: + /** + The constant defines name of the default mysqld-instance ("mysqld"). */ static const LEX_STRING DFLT_INSTANCE_NAME; public: - /* - The operation is intended to check whether string is a well-formed - instance name or not. - */ static bool is_name_valid(const LEX_STRING *name); - - /* - The operation is intended to check if the given instance name is - mysqld-compatible or not. - */ static bool is_mysqld_compatible_name(const LEX_STRING *name); public: Instance(); - ~Instance(); + bool init(const LEX_STRING *name_arg); bool complete_initialization(); +public: + bool is_active(); + bool is_mysqld_running(); - int start(); - int stop(); - /* send a signal to the instance */ + + bool start_mysqld(); + bool stop_mysqld(); void kill_mysqld(int signo); - bool is_crashed(); - void set_crash_flag_n_wake_all(); - /* + void lock(); + void unlock(); + + const char *get_state_name(); + + void reset_stat(); + +public: + /** The operation is intended to check if the instance is mysqld-compatible or not. */ inline bool is_mysqld_compatible() const; - /* + /** The operation is intended to check if the instance is configured properly or not. Misconfigured instances are not managed. */ inline bool is_configured() const; + /** + The operation returns TRUE if the instance is guarded and FALSE otherwise. + */ + inline bool is_guarded() const; + + /** + The operation returns name of the instance. + */ inline const LEX_STRING *get_name() const; + /** + The operation returns the current state of the instance. + + NOTE: At the moment should be used only for guarded instances. + */ + inline enum_instance_state get_state() const; + + /** + The operation changes the state of the instance. + + NOTE: At the moment should be used only for guarded instances. + TODO: Make private. + */ + inline void set_state(enum_instance_state new_state); + + /** + The operation returns crashed flag. + */ + inline bool is_crashed(); + public: - enum { DEFAULT_SHUTDOWN_DELAY= 35 }; + /** + This attributes contains instance options. + + TODO: Make private. + */ Instance_options options; private: - /* This attributes is a flag, specifies if the instance has been crashed. */ + /** + monitoring_thread_active is TRUE if there is a thread that monitors the + corresponding mysqld-process. + */ + bool monitoring_thread_active; + + /** + crashed is TRUE when corresponding mysqld-process has been died after + start. + */ bool crashed; - /* - This attribute specifies if the instance is configured properly or not. + /** + configured is TRUE when the instance is configured and FALSE otherwise. Misconfigured instances are not managed. */ bool configured; /* - This attribute specifies whether the instance is mysqld-compatible or not. - Mysqld-compatible instances can contain only mysqld-specific options. - At the moment an instance is mysqld-compatible if its name is "mysqld". + mysqld_compatible specifies whether the instance is mysqld-compatible + or not. Mysqld-compatible instances can contain only mysqld-specific + options. At the moment an instance is mysqld-compatible if its name is + "mysqld". The idea is that [mysqld] section should contain only mysqld-specific options (no Instance Manager-specific options) to be readable by mysqld @@ -142,18 +197,36 @@ private: bool mysqld_compatible; /* - Mutex protecting the instance. Currently we use it to avoid the - double start of the instance. This happens when the instance is starting - and we issue the start command once more. + Mutex protecting the instance. */ pthread_mutex_t LOCK_instance; - /* - This condition variable is used to wake threads waiting for instance to - stop in Instance::stop() - */ - pthread_cond_t COND_instance_stopped; - void remove_pid(); +private: + /* Guarded-instance attributes. */ + + /* state of an instance (i.e. STARTED, CRASHED, etc.) */ + enum_instance_state state; + +public: + /* the amount of attemts to restart instance (cleaned up at success) */ + int restart_counter; + + /* triggered at a crash */ + time_t crash_moment; + + /* General time field. Used to provide timeouts (at shutdown and restart) */ + time_t last_checked; + +private: + static const char *get_instance_state_name(enum_instance_state state); + +private: + void remove_pid(); + + bool wait_for_stop(); + +private: + friend class Instance_monitor; }; @@ -169,9 +242,33 @@ inline bool Instance::is_configured() const } +inline bool Instance::is_guarded() const +{ + return !options.nonguarded; +} + + inline const LEX_STRING *Instance::get_name() const { return &options.instance_name; } + +inline Instance::enum_instance_state Instance::get_state() const +{ + return state; +} + + +inline void Instance::set_state(enum_instance_state new_state) +{ + state= new_state; +} + + +inline bool Instance::is_crashed() +{ + return crashed; +} + #endif /* INCLUDES_MYSQL_INSTANCE_MANAGER_INSTANCE_H */ diff --git a/server-tools/instance-manager/instance_map.cc b/server-tools/instance-manager/instance_map.cc index a356e308e44..a9108eae763 100644 --- a/server-tools/instance-manager/instance_map.cc +++ b/server-tools/instance-manager/instance_map.cc @@ -25,26 +25,18 @@ #include <mysql_com.h> #include "buffer.h" -#include "guardian.h" #include "instance.h" #include "log.h" -#include "manager.h" #include "mysqld_error.h" #include "mysql_manager_error.h" #include "options.h" #include "priv.h" -/* - Note: As we are going to suppost different types of connections, - we shouldn't have connection-specific functions. To avoid it we could - put such functions to the Command-derived class instead. - The command could be easily constructed for a specific connection if - we would provide a special factory for each connection. -*/ - C_MODE_START -/* Procedure needed for HASH initialization */ +/** + HASH-routines: get key of instance for storing in hash. +*/ static byte* get_instance_key(const byte* u, uint* len, my_bool __attribute__((unused)) t) @@ -54,14 +46,18 @@ static byte* get_instance_key(const byte* u, uint* len, return (byte *) instance->options.instance_name.str; } +/** + HASH-routines: cleanup handler. +*/ + static void delete_instance(void *u) { Instance *instance= (Instance *) u; delete instance; } -/* - The option handler to pass to the process_default_option_files finction. +/** + The option handler to pass to the process_default_option_files function. SYNOPSIS process_option() @@ -96,8 +92,8 @@ static int process_option(void *ctx, const char *group, const char *option) C_MODE_END -/* - Parse option string. +/** + Parse option string. SYNOPSIS parse_option() @@ -137,7 +133,7 @@ static void parse_option(const char *option_str, } -/* +/** Process one option from the configuration file. SYNOPSIS @@ -151,6 +147,10 @@ static void parse_option(const char *option_str, process_option(). The caller ensures proper locking of the instance map object. */ + /* + Process a given option and assign it to appropricate instance. This is + required for the option handler, passed to my_search_option_files(). + */ int Instance_map::process_one_option(const LEX_STRING *group, const char *option) @@ -213,92 +213,97 @@ int Instance_map::process_one_option(const LEX_STRING *group, } +/** + Instance_map constructor. +*/ + Instance_map::Instance_map() { pthread_mutex_init(&LOCK_instance_map, 0); } +/** + Initialize Instance_map internals. +*/ + bool Instance_map::init() { return hash_init(&hash, default_charset_info, START_HASH_SIZE, 0, 0, get_instance_key, delete_instance, 0); } + +/** + Reset Instance_map data. +*/ + +bool Instance_map::reset() +{ + hash_free(&hash); + return init(); +} + + +/** + Instance_map destructor. +*/ + Instance_map::~Instance_map() { - pthread_mutex_lock(&LOCK_instance_map); + lock(); + + /* + NOTE: it's necessary to synchronize on each instance before removal, + because Instance-monitoring thread can be still alive an hold the mutex + (because it is detached and we have no control over it). + */ + + while (true) + { + Iterator it(this); + Instance *instance= it.next(); + + if (!instance) + break; + + instance->lock(); + instance->unlock(); + + remove_instance(instance); + } + hash_free(&hash); - pthread_mutex_unlock(&LOCK_instance_map); + unlock(); + pthread_mutex_destroy(&LOCK_instance_map); } +/** + Lock Instance_map. +*/ + void Instance_map::lock() { pthread_mutex_lock(&LOCK_instance_map); } +/** + Unlock Instance_map. +*/ + void Instance_map::unlock() { pthread_mutex_unlock(&LOCK_instance_map); } -/* - Re-read instance configuration file. - - SYNOPSIS - Instance_map::flush_instances() - DESCRIPTION - This function will: - - clear the current list of instances. This removes both - running and stopped instances. - - load a new instance configuration from the file. - - pass on the new map to the guardian thread: it will start - all instances that are marked `guarded' and not yet started. - Note, as the check whether an instance is started is currently - very simple (returns TRUE if there is a MySQL server running - at the given port), this function has some peculiar - side-effects: - * if the port number of a running instance was changed, the - old instance is forgotten, even if it was running. The new - instance will be started at the new port. - * if the configuration was changed in a way that two - instances swapped their port numbers, the guardian thread - will not notice that and simply report that both instances - are configured successfully and running. - In order to avoid such side effects one should never call - FLUSH INSTANCES without prior stop of all running instances. - - NOTE: The operation should be invoked with the following locks acquired: - - Guardian; - - Instance_map; +/** + Check if there is an active instance or not. */ -int Instance_map::flush_instances() -{ - int rc; - - /* - Guardian thread relies on the instance map repository for guarding - instances. This is why refreshing instance map, we need (1) to stop - guardian (2) reload the instance map (3) reinitialize the guardian - with new instances. - */ - hash_free(&hash); - hash_init(&hash, default_charset_info, START_HASH_SIZE, 0, 0, - get_instance_key, delete_instance, 0); - - rc= load(); - /* don't init guardian if we failed to load instances */ - if (!rc) - guardian->init(); // TODO: check error status. - return rc; -} - - bool Instance_map::is_there_active_instance() { Instance *instance; @@ -306,29 +311,50 @@ bool Instance_map::is_there_active_instance() while ((instance= iterator.next())) { - if (guardian->find_instance_node(instance) != NULL || - instance->is_mysqld_running()) - { + bool active_instance_found; + + instance->lock(); + active_instance_found= instance->is_active(); + instance->unlock(); + + if (active_instance_found) return TRUE; - } } return FALSE; } +/** + Add an instance into the internal hash. + + MT-NOTE: Instance Map must be locked before calling the operation. +*/ + int Instance_map::add_instance(Instance *instance) { return my_hash_insert(&hash, (byte *) instance); } +/** + Remove instance from the internal hash. + + MT-NOTE: Instance Map must be locked before calling the operation. +*/ + int Instance_map::remove_instance(Instance *instance) { return hash_delete(&hash, (byte *) instance); } +/** + Create a new instance and register it in the internal hash. + + MT-NOTE: Instance Map must be locked before calling the operation. +*/ + int Instance_map::create_instance(const LEX_STRING *instance_name, const Named_value_arr *options) { @@ -392,12 +418,22 @@ int Instance_map::create_instance(const LEX_STRING *instance_name, } +/** + Return a pointer to the instance or NULL, if there is no such instance. + + MT-NOTE: Instance Map must be locked before calling the operation. +*/ + Instance * Instance_map::find(const LEX_STRING *name) { return (Instance *) hash_search(&hash, (byte *) name->str, name->length); } +/** + Init instances command line arguments after all options have been loaded. +*/ + bool Instance_map::complete_initialization() { bool mysqld_found; @@ -455,7 +491,10 @@ bool Instance_map::complete_initialization() } -/* load options from config files and create appropriate instance structures */ +/** + Load options from config files and create appropriate instance + structures. +*/ int Instance_map::load() { @@ -505,8 +544,9 @@ int Instance_map::load() } -/*--- Implementaton of the Instance map iterator class ---*/ - +/************************************************************************* + {{{ Instance_map::Iterator implementation. +*************************************************************************/ void Instance_map::Iterator::go_to_first() { @@ -522,29 +562,12 @@ Instance *Instance_map::Iterator::next() return NULL; } - -const char *Instance_map::get_instance_state_name(Instance *instance) -{ - LIST *instance_node; - - if (!instance->is_configured()) - return "misconfigured"; - - if ((instance_node= guardian->find_instance_node(instance)) != NULL) - { - /* The instance is managed by Guardian: we can report precise state. */ - - return Guardian::get_instance_state_name( - guardian->get_instance_state(instance_node)); - } - - /* The instance is not managed by Guardian: we can report status only. */ - - return instance->is_mysqld_running() ? "online" : "offline"; -} +/************************************************************************* + }}} +*************************************************************************/ -/* +/** Create a new configuration section for mysqld-instance in the config file. SYNOPSIS diff --git a/server-tools/instance-manager/instance_map.h b/server-tools/instance-manager/instance_map.h index 69d225c89f7..cdbac7635c2 100644 --- a/server-tools/instance-manager/instance_map.h +++ b/server-tools/instance-manager/instance_map.h @@ -37,14 +37,17 @@ extern int create_instance_in_file(const LEX_STRING *instance_name, const Named_value_arr *options); -/* +/** Instance_map - stores all existing instances */ class Instance_map { public: - /* Instance_map iterator */ + /** + Instance_map iterator + */ + class Iterator { private: @@ -58,79 +61,43 @@ public: void go_to_first(); Instance *next(); }; - friend class Iterator; + public: - /* - Return a pointer to the instance or NULL, if there is no such instance. - MT-NOTE: must be called under acquired lock. - */ Instance *find(const LEX_STRING *name); - /* Clear the configuration cache and reload the configuration file. */ - int flush_instances(); - - /* The operation is used to check if there is an active instance or not. */ bool is_there_active_instance(); void lock(); void unlock(); bool init(); + bool reset(); - /* - Process a given option and assign it to appropricate instance. This is - required for the option handler, passed to my_search_option_files(). - */ - int process_one_option(const LEX_STRING *group, const char *option); + int load(); - /* - Add an instance into the internal hash. + int process_one_option(const LEX_STRING *group, const char *option); - MT-NOTE: the operation must be called under acquired lock. - */ int add_instance(Instance *instance); - /* - Remove instance from the internal hash. - - MT-NOTE: the operation must be called under acquired lock. - */ int remove_instance(Instance *instance); - /* - Create a new instance and register it in the internal hash. - - MT-NOTE: the operation must be called under acquired lock. - */ int create_instance(const LEX_STRING *instance_name, const Named_value_arr *options); +public: Instance_map(); ~Instance_map(); - /* - Retrieve client state name of the given instance. - - MT-NOTE: the options must be called under acquired locks of the following - objects: - - Instance_map; - - Guardian; - */ - const char *get_instance_state_name(Instance *instance); - -public: - const char *mysqld_path; - Guardian *guardian; - private: - /* loads options from config files */ - int load(); - /* inits instances argv's after all options have been loaded */ bool complete_initialization(); + private: enum { START_HASH_SIZE = 16 }; pthread_mutex_t LOCK_instance_map; HASH hash; + +private: + friend class Iterator; }; #endif /* INCLUDES_MYSQL_INSTANCE_MANAGER_INSTANCE_MAP_H */ diff --git a/server-tools/instance-manager/instance_options.h b/server-tools/instance-manager/instance_options.h index 7c1e1a8dcf3..38de839d762 100644 --- a/server-tools/instance-manager/instance_options.h +++ b/server-tools/instance-manager/instance_options.h @@ -46,7 +46,6 @@ public: Instance_options(); ~Instance_options(); - /* fills in argv */ bool complete_initialization(); bool set_option(Named_value *option); diff --git a/server-tools/instance-manager/manager.cc b/server-tools/instance-manager/manager.cc index a002902bd56..da36982f510 100644 --- a/server-tools/instance-manager/manager.cc +++ b/server-tools/instance-manager/manager.cc @@ -37,6 +37,9 @@ #include "user_map.h" +/********************************************************************** + {{{ Platform-specific implementation. +**********************************************************************/ #ifndef __WIN__ void set_signals(sigset_t *mask) @@ -92,9 +95,13 @@ int my_sigwait(const sigset_t *set, int *sig) #endif +/********************************************************************** + }}} +**********************************************************************/ + /********************************************************************** - Implementation of checking the actual thread model. + {{{ Implementation of checking the actual thread model. ***********************************************************************/ namespace { /* no-indent */ @@ -137,6 +144,10 @@ bool check_if_linux_threads(bool *linux_threads) } +/********************************************************************** + }}} +***********************************************************************/ + /********************************************************************** Manager implementation @@ -152,25 +163,37 @@ bool Manager::linux_threads; #endif // __WIN__ +/** + Request shutdown of guardian and threads registered in Thread_registry. + + SYNOPSIS + stop_all_threads() +*/ + void Manager::stop_all_threads() { /* - Let guardian thread know that it should break it's processing cycle, + Let Guardian thread know that it should break it's processing cycle, once it wakes up. */ p_guardian->request_shutdown(); - /* wake guardian */ - pthread_cond_signal(&p_guardian->COND_guardian); - /* stop all threads */ + + /* Stop all threads. */ p_thread_registry->deliver_shutdown(); } -/* - manager - entry point to the main instance manager process: start - listener thread, write pid file and enter into signal handling. - See also comments in mysqlmanager.cc to picture general Instance Manager - architecture. +/** + Main manager function. + + SYNOPSIS + main() + + DESCRIPTION + This is an entry point to the main instance manager process: + start listener thread, write pid file and enter into signal handling. + See also comments in mysqlmanager.cc to picture general Instance Manager + architecture. TODO: how about returning error status. */ @@ -194,22 +217,33 @@ int Manager::main() (const char *) (linux_threads ? "LINUX threads" : "POSIX threads")); #endif // __WIN__ - Thread_registry thread_registry; /* - All objects created in the manager() function live as long as - thread_registry lives, and thread_registry is alive until there are - working threads. + All objects created in the Manager object live as long as thread_registry + lives, and thread_registry is alive until there are working threads. + + There are two main purposes of the Thread Registry: + 1. Interrupt blocking I/O and signal condition variables in case of + shutdown; + 2. Wait for detached threads before shutting down the main thread. + + NOTE: + 1. Handling shutdown can be done in more elegant manner by introducing + Event (or Condition) object with support of logical operations. + 2. Using Thread Registry to wait for detached threads is definitely not + the best way, because when Thread Registry unregisters an thread, the + thread is still alive. Accurate way to wait for threads to stop is + not using detached threads and join all threads before shutdown. */ + Thread_registry thread_registry; User_map user_map; Instance_map instance_map; - Guardian guardian(&thread_registry, &instance_map, - Options::Main::monitoring_interval); + Guardian guardian(&thread_registry, &instance_map); Listener listener(&thread_registry, &user_map); p_instance_map= &instance_map; - p_guardian= instance_map.guardian= &guardian; + p_guardian= &guardian; p_thread_registry= &thread_registry; p_user_map= &user_map; @@ -249,7 +283,7 @@ int Manager::main() } } - /* write Instance Manager pid file */ + /* Write Instance Manager pid file. */ log_info("IM pid file: '%s'; PID: %d.", (const char *) Options::Main::pid_file_name, @@ -290,6 +324,7 @@ int Manager::main() permitted to process instances. And before flush_instances() has completed, there are no instances to guard. */ + if (guardian.start(Thread::DETACHED)) { log_error("Can not start Guardian thread."); @@ -298,21 +333,11 @@ int Manager::main() /* Load instances. */ + if (Manager::flush_instances()) { - instance_map.guardian->lock(); - instance_map.lock(); - - int flush_instances_status= instance_map.flush_instances(); - - instance_map.unlock(); - instance_map.guardian->unlock(); - - if (flush_instances_status) - { - log_error("Can not init instances repository."); - stop_all_threads(); - goto err; - } + log_error("Can not init instances repository."); + stop_all_threads(); + goto err; } /* Initialize the Listener. */ @@ -328,7 +353,8 @@ int Manager::main() After the list of guarded instances have been initialized, Guardian should start them. */ - pthread_cond_signal(&guardian.COND_guardian); + + guardian.ping(); /* Main loop. */ @@ -381,7 +407,6 @@ int Manager::main() if (!guardian.is_stopped()) { guardian.request_shutdown(); - pthread_cond_signal(&guardian.COND_guardian); } else { @@ -406,3 +431,64 @@ err: #endif return rc; } + + +/** + Re-read instance configuration file. + + SYNOPSIS + flush_instances() + + DESCRIPTION + This function will: + - clear the current list of instances. This removes both + running and stopped instances. + - load a new instance configuration from the file. + - pass on the new map to the guardian thread: it will start + all instances that are marked `guarded' and not yet started. + + Note, as the check whether an instance is started is currently + very simple (returns TRUE if there is a MySQL server running + at the given port), this function has some peculiar + side-effects: + * if the port number of a running instance was changed, the + old instance is forgotten, even if it was running. The new + instance will be started at the new port. + * if the configuration was changed in a way that two + instances swapped their port numbers, the guardian thread + will not notice that and simply report that both instances + are configured successfully and running. + + In order to avoid such side effects one should never call + FLUSH INSTANCES without prior stop of all running instances. +*/ + +bool Manager::flush_instances() +{ + p_instance_map->lock(); + + if (p_instance_map->is_there_active_instance()) + { + p_instance_map->unlock(); + return TRUE; + } + + if (p_instance_map->reset()) + { + p_instance_map->unlock(); + return TRUE; + } + + if (p_instance_map->load()) + { + p_instance_map->unlock(); + return TRUE; /* Don't init guardian if we failed to load instances. */ + } + + get_guardian()->init(); /* TODO: check error status. */ + get_guardian()->ping(); + + p_instance_map->unlock(); + + return FALSE; +} diff --git a/server-tools/instance-manager/manager.h b/server-tools/instance-manager/manager.h index a77809cca6d..9a00b9320ce 100644 --- a/server-tools/instance-manager/manager.h +++ b/server-tools/instance-manager/manager.h @@ -19,6 +19,7 @@ #if defined(__GNUC__) && defined(USE_PRAGMA_INTERFACE) #pragma interface #endif + #include <my_global.h> class Guardian; @@ -30,8 +31,12 @@ class Manager { public: static int main(); + + static bool flush_instances(); + +public: /** - These methods return a non-zero value only for the duration + These methods return a non-NULL value only for the duration of main(). */ static Instance_map *get_instance_map() { return p_instance_map; } @@ -39,6 +44,7 @@ public: static Thread_registry *get_thread_registry() { return p_thread_registry; } static User_map *get_user_map() { return p_user_map; } +public: #ifndef __WIN__ static bool is_linux_threads() { return linux_threads; } #endif // __WIN__ diff --git a/server-tools/instance-manager/user_map.cc b/server-tools/instance-manager/user_map.cc index f3a6e3cd76c..7f34195c0b1 100644 --- a/server-tools/instance-manager/user_map.cc +++ b/server-tools/instance-manager/user_map.cc @@ -42,7 +42,7 @@ int User::init(const char *line) if (name_end == 0 || name_end[1] != ':') { log_error("Invalid format (unmatched quote) of user line (%s).", - (const char *) line); + (const char *) line); return 1; } password= name_end + 2; @@ -54,7 +54,7 @@ int User::init(const char *line) if (name_end == 0) { log_error("Invalid format (no delimiter) of user line (%s).", - (const char *) line); + (const char *) line); return 1; } password= name_end + 1; @@ -64,10 +64,10 @@ int User::init(const char *line) if (user_length > USERNAME_LENGTH) { log_error("User name is too long (%d). Max length: %d. " - "User line: '%s'.", - (int) user_length, - (int) USERNAME_LENGTH, - (const char *) line); + "User line: '%s'.", + (int) user_length, + (int) USERNAME_LENGTH, + (const char *) line); return 1; } @@ -75,10 +75,10 @@ int User::init(const char *line) if (password_length > SCRAMBLED_PASSWORD_CHAR_LENGTH) { log_error("Password is too long (%d). Max length: %d." - "User line: '%s'.", - (int) password_length, - (int) SCRAMBLED_PASSWORD_CHAR_LENGTH, - line); + "User line: '%s'.", + (int) password_length, + (int) SCRAMBLED_PASSWORD_CHAR_LENGTH, + (const char *) line); return 1; } |