summaryrefslogtreecommitdiff
path: root/deps/rabbitmq_cli/lib/rabbitmq/cli/queues/commands/check_if_node_is_quorum_critical_command.ex
blob: d8f4a34c1c0e83b1049d4a04c9723ba3cc8e7991 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
## This Source Code Form is subject to the terms of the Mozilla Public
## License, v. 2.0. If a copy of the MPL was not distributed with this
## file, You can obtain one at https://mozilla.org/MPL/2.0/.
##
## Copyright (c) 2007-2020 VMware, Inc. or its affiliates.  All rights reserved.

defmodule RabbitMQ.CLI.Queues.Commands.CheckIfNodeIsQuorumCriticalCommand do
  @moduledoc """
  Exits with a non-zero code if there are quorum queues that would lose their quorum
  if the target node is shut down.

  This command is meant to be used as a pre-upgrade (pre-shutdown) check.
  """

  @behaviour RabbitMQ.CLI.CommandBehaviour

  import RabbitMQ.CLI.Core.Platform, only: [line_separator: 0]

  def scopes(), do: [:diagnostics, :queues]

  use RabbitMQ.CLI.Core.AcceptsDefaultSwitchesAndTimeout
  use RabbitMQ.CLI.Core.MergesNoDefaults
  use RabbitMQ.CLI.Core.AcceptsNoPositionalArguments
  use RabbitMQ.CLI.Core.RequiresRabbitAppRunning

  def run([], %{node: node_name, timeout: timeout}) do
    case :rabbit_misc.rpc_call(node_name, :rabbit_nodes, :is_single_node_cluster, [], timeout) do
      # if target node is the only one in the cluster, the check makes little sense
      # and false positives can be misleading
      true  -> {:ok, :single_node_cluster}
      false ->
        case :rabbit_misc.rpc_call(node_name, :rabbit_maintenance, :is_being_drained_local_read, [node_name]) do
          # if target node is under maintenance, it has already transferred all of its quorum queue
          # replicas. Don't consider it to be quorum critical. See rabbitmq/rabbitmq-server#2469
          true  -> {:ok, :under_maintenance}
          false ->
            case :rabbit_misc.rpc_call(node_name, :rabbit_quorum_queue, :list_with_minimum_quorum_for_cli, [], timeout) do
              [] -> {:ok, []}
              qs when is_list(qs) -> {:ok, qs}
              other -> other
            end
        end
      other -> other
    end
  end

  def output({:ok, :single_node_cluster}, %{formatter: "json"}) do
    {:ok, %{
      "result"  => "ok",
      "message" => "Target node seems to be the only one in a single node cluster, the check does not apply"
    }}
  end
  def output({:ok, :under_maintenance}, %{formatter: "json"}) do
    {:ok, %{
      "result"  => "ok",
      "message" => "Target node seems to be in maintenance mode, the check does not apply"
    }}
  end
  def output({:ok, []}, %{formatter: "json"}) do
    {:ok, %{"result" => "ok"}}
  end
  def output({:ok, :single_node_cluster}, %{silent: true}) do
    {:ok, :check_passed}
  end
  def output({:ok, :under_maintenance}, %{silent: true}) do
    {:ok, :check_passed}
  end
  def output({:ok, []}, %{silent: true}) do
    {:ok, :check_passed}
  end
  def output({:ok, :single_node_cluster}, %{node: node_name}) do
    {:ok, "Node #{node_name} seems to be the only one in a single node cluster, the check does not apply"}
  end
  def output({:ok, :under_maintenance}, %{node: node_name}) do
    {:ok, "Node #{node_name} seems to be in maintenance mode, the check does not apply"}
  end
  def output({:ok, []}, %{node: node_name}) do
    {:ok, "Node #{node_name} reported no quorum queues with minimum quorum"}
  end
  def output({:ok, qs}, %{node: node_name, formatter: "json"}) when is_list(qs) do
    {:error, :check_failed,
     %{
       "result" => "error",
       "queues" => qs,
       "message" => "Node #{node_name} reported local queues with minimum online quorum"
     }}
  end
  def output({:ok, qs}, %{silent: true}) when is_list(qs) do
    {:error, :check_failed}
  end
  def output({:ok, qs}, %{node: node_name}) when is_list(qs) do
    lines = queue_lines(qs, node_name)

    {:error, :check_failed, Enum.join(lines, line_separator())}
  end
  use RabbitMQ.CLI.DefaultOutput

  def help_section(), do: :observability_and_health_checks

  def description() do
    "Health check that exits with a non-zero code if there are queues " <>
    "with minimum online quorum (queues that would lose their quorum if the target node is shut down)"
  end

  def usage, do: "check_if_node_is_quorum_critical"

  def banner([], %{node: node_name}) do
    "Checking if node #{node_name} is critical for quorum of any quorum queues ..."
  end

  #
  # Implementation
  #

  def queue_lines(qs, node_name) do
    for q <- qs, do: "#{q["readable_name"]} would lose quorum if node #{node_name} is stopped"
  end
end