From f91b91cf659118eaf74b077f900a4bfd3405ed7d Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 25 Sep 2006 09:28:52 +0200 Subject: Bug #22379 im_daemon_life_cycle.test fails on merge of 5.1 -> 5.1-engines Remove race situations that occur when removing pidfiles. Primarily each process should remove its own pidfile, secondly it should be removed by the process that created it and _only_ if it's certain the process is dead. Third, mysql-test-run.pl will remove the pidfile when process has been killed. - Set state of an instance to STARTING _before_ calling instance->start() - Check that pidfile of instance has been created before changing STARTING => STARTED - Only remove the pidfile if IM kills an instance with SIGKILL, otherwise the instance will remove it itself server-tools/instance-manager/guardian.cc: If state of an instance is STARTING, chech that the instance pidfile has been created before changing state to STARTED Set state to STARTING before calling instance->start(), it can take some time before it is fully started and during that time it should be in state STARTING server-tools/instance-manager/instance.cc: Only remove the pid file of instance manager when a SIGKILL has been performed sucessfully server-tools/instance-manager/instance_options.cc: Check that fscanf returns 1 which is the number of args that should be scanned from the pid file --- server-tools/instance-manager/guardian.cc | 27 ++++++++++++++++++----- server-tools/instance-manager/instance.cc | 25 +++++++++++---------- server-tools/instance-manager/instance_options.cc | 3 ++- 3 files changed, 36 insertions(+), 19 deletions(-) (limited to 'server-tools') diff --git a/server-tools/instance-manager/guardian.cc b/server-tools/instance-manager/guardian.cc index 24844e05776..e2142c97f33 100644 --- a/server-tools/instance-manager/guardian.cc +++ b/server-tools/instance-manager/guardian.cc @@ -110,20 +110,35 @@ void Guardian_thread::process_instance(Instance *instance, if (instance->is_running()) { - /* clear status fields */ - current_node->restart_counter= 0; - current_node->crash_moment= 0; - current_node->state= STARTED; + /* The instance can be contacted on it's port */ + + /* If STARTING also check that pidfile has been created */ + if (current_node->state == STARTING && + current_node->instance->options.get_pid() == 0) + { + /* Pid file not created yet, don't go to STARTED state yet */ + } + else + { + /* clear status fields */ + log_info("guardian: instance %s is running, set state to STARTED", + instance->options.instance_name); + current_node->restart_counter= 0; + current_node->crash_moment= 0; + current_node->state= STARTED; + } } else { switch (current_node->state) { case NOT_STARTED: - instance->start(); - current_node->last_checked= current_time; log_info("guardian: starting instance %s", instance->options.instance_name); + + /* NOTE, set state to STARTING _before_ start() is called */ current_node->state= STARTING; + instance->start(); + current_node->last_checked= current_time; break; case STARTED: /* fallthrough */ case STARTING: /* let the instance start or crash */ diff --git a/server-tools/instance-manager/instance.cc b/server-tools/instance-manager/instance.cc index 39381b457ab..5c47dc87734 100644 --- a/server-tools/instance-manager/instance.cc +++ b/server-tools/instance-manager/instance.cc @@ -571,18 +571,19 @@ void Instance::kill_instance(int signum) /* if there are no pid, everything seems to be fine */ if ((pid= options.get_pid()) != 0) /* get pid from pidfile */ { - /* - If we cannot kill mysqld, then it has propably crashed. - Let us try to remove staled pidfile and return successfully - as mysqld is probably stopped. - */ - if (!kill(pid, signum)) - options.unlink_pidfile(); - else if (signum == SIGKILL) /* really killed instance with SIGKILL */ - log_error("The instance %s is being stopped forsibly. Normally \ - it should not happed. Probably the instance has been \ - hanging. You should also check your IM setup", - options.instance_name); + if (kill(pid, signum) == 0) + { + /* Kill suceeded */ + if (signum == SIGKILL) /* really killed instance with SIGKILL */ + { + log_error("The instance %s is being stopped forcibly. Normally" \ + "it should not happen. Probably the instance has been" \ + "hanging. You should also check your IM setup", + options.instance_name); + /* After sucessful hard kill the pidfile need to be removed */ + options.unlink_pidfile(); + } + } } return; } diff --git a/server-tools/instance-manager/instance_options.cc b/server-tools/instance-manager/instance_options.cc index 72621ed1662..f86f359959b 100644 --- a/server-tools/instance-manager/instance_options.cc +++ b/server-tools/instance-manager/instance_options.cc @@ -377,7 +377,8 @@ pid_t Instance_options::get_pid() { pid_t pid; - fscanf(pid_file_stream, "%i", &pid); + if (fscanf(pid_file_stream, "%i", &pid) != 1) + pid= -1; my_fclose(pid_file_stream, MYF(0)); return pid; } -- cgit v1.2.1