summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWill Holley <willholley@apache.org>2023-04-03 13:22:50 +0100
committerGitHub <noreply@github.com>2023-04-03 13:22:50 +0100
commita9bce2f598edc8ef843baa9412c60d22157eeabf (patch)
tree96bff7d3b47e1a608060ac6d8c884c07145fd9ad
parentd1412552f4a88df3b783199a61030568973b152f (diff)
parent8c1ef5bfeaa6fbf36dc8147eeff2349ec9edff6e (diff)
downloadcouchdb-a9bce2f598edc8ef843baa9412c60d22157eeabf.tar.gz
Merge pull request #4507 from apache/prometheus_metrics
feat: additional prometheus metrics
-rw-r--r--.devcontainer/devcontainer.json2
-rw-r--r--src/couch/src/couch.app.src1
-rw-r--r--src/couch_prometheus/src/couch_prometheus.app.src2
-rw-r--r--src/couch_prometheus/src/couch_prometheus_server.erl51
-rw-r--r--src/couch_prometheus/src/couch_prometheus_util.erl21
-rw-r--r--src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl32
6 files changed, 82 insertions, 27 deletions
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index a14b7b1e6..5e577d96d 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -26,4 +26,4 @@
"postCreateCommand": "./configure && make",
"extensions": ["erlang-ls.erlang-ls"]
-}
+} \ No newline at end of file
diff --git a/src/couch/src/couch.app.src b/src/couch/src/couch.app.src
index 8b0ddfb8e..af2165f5d 100644
--- a/src/couch/src/couch.app.src
+++ b/src/couch/src/couch.app.src
@@ -47,7 +47,6 @@
ioq,
couch_stats,
hyper,
- couch_prometheus,
couch_dist
]},
{env, [
diff --git a/src/couch_prometheus/src/couch_prometheus.app.src b/src/couch_prometheus/src/couch_prometheus.app.src
index bf49e59d2..9d3a36582 100644
--- a/src/couch_prometheus/src/couch_prometheus.app.src
+++ b/src/couch_prometheus/src/couch_prometheus.app.src
@@ -14,7 +14,7 @@
{description, "Aggregated metrics info for Prometheus consumption"},
{vsn, git},
{registered, []},
- {applications, [kernel, stdlib, folsom, couch_stats, couch_log]},
+ {applications, [kernel, stdlib, folsom, couch_stats, couch_log, mem3, couch]},
{mod, {couch_prometheus_app, []}},
{env, []}
]}.
diff --git a/src/couch_prometheus/src/couch_prometheus_server.erl b/src/couch_prometheus/src/couch_prometheus_server.erl
index 70c4790ab..05cd26265 100644
--- a/src/couch_prometheus/src/couch_prometheus_server.erl
+++ b/src/couch_prometheus/src/couch_prometheus_server.erl
@@ -112,12 +112,33 @@ get_system_stats() ->
get_message_queue_stats(),
get_run_queue_stats(),
get_vm_stats(),
- get_ets_stats()
+ get_ets_stats(),
+ get_internal_replication_jobs_stat(),
+ get_membership_stat()
]).
get_uptime_stat() ->
to_prom(uptime_seconds, counter, "couchdb uptime", couch_app:uptime() div 1000).
+get_internal_replication_jobs_stat() ->
+ to_prom(
+ internal_replication_jobs,
+ gauge,
+ "count of internal replication changes to process",
+ mem3_sync:get_backlog()
+ ).
+
+get_membership_stat() ->
+ % expected nodes
+ ClusterNodes = mem3:nodes(),
+ % connected nodes
+ AllNodes = nodes([this, visible]),
+ Labels = [
+ {[{nodes, "cluster_nodes"}], length(ClusterNodes)},
+ {[{nodes, "all_nodes"}], length(AllNodes)}
+ ],
+ to_prom(membership, gauge, "count of nodes in the cluster", Labels).
+
get_vm_stats() ->
MemLabels = lists:map(
fun({Type, Value}) ->
@@ -177,24 +198,27 @@ get_io_stats() ->
].
get_message_queue_stats() ->
- QLenFun = fun(Name) -> message_queue_len(whereis(Name)) end,
- Queues = lists:map(QLenFun, registered()),
+ QFun = fun(Name) -> {Name, message_queue_len(whereis(Name))} end,
+ Queues = lists:map(QFun, registered()),
+ QueueLens = lists:map(fun({_, Len}) -> Len end, Queues),
+ QueueLenByLabel = lists:map(fun({Name, Len}) -> {[{queue_name, Name}], Len} end, Queues),
[
to_prom(
- erlang_message_queues, gauge, "total size of all message queues", lists:sum(Queues)
+ erlang_message_queues, gauge, "total size of all message queues", lists:sum(QueueLens)
),
to_prom(
erlang_message_queue_min,
gauge,
"minimum size across all message queues",
- lists:min(Queues)
+ lists:min(QueueLens)
),
to_prom(
erlang_message_queue_max,
gauge,
"maximum size across all message queues",
- lists:max(Queues)
- )
+ lists:max(QueueLens)
+ ),
+ to_prom(erlang_message_queue_size, gauge, "size of message queue", QueueLenByLabel)
].
message_queue_len(undefined) ->
@@ -247,19 +271,6 @@ update_refresh_timer() ->
-include_lib("couch/include/couch_eunit.hrl").
-system_stats_test() ->
- lists:foreach(
- fun(Line) ->
- ?assert(is_binary(Line)),
- Trimmed = string:trim(Line),
- ?assert(starts_with(<<"couchdb_">>, Trimmed) orelse starts_with(<<"# ">>, Trimmed))
- end,
- get_system_stats()
- ).
-
-starts_with(Prefix, Line) when is_binary(Prefix), is_binary(Line) ->
- binary:longest_common_prefix([Prefix, Line]) > 0.
-
message_queue_len_test() ->
self() ! refresh,
?assert(message_queue_len(self()) >= 1),
diff --git a/src/couch_prometheus/src/couch_prometheus_util.erl b/src/couch_prometheus/src/couch_prometheus_util.erl
index 9e4a74e49..5775b9693 100644
--- a/src/couch_prometheus/src/couch_prometheus_util.erl
+++ b/src/couch_prometheus/src/couch_prometheus_util.erl
@@ -71,6 +71,19 @@ couch_to_prom([couchdb, httpd_status_codes, 200], Info, _All) ->
});
couch_to_prom([couchdb, httpd_status_codes, Code], Info, _All) ->
to_prom(httpd_status_codes, {[{code, Code}], val(Info)});
+% Convert to gauge in prometheus type. This is required because
+% prometheus assumes that counters are cumulative and should be
+% rated by default, whereas folsom (the library CouchDB uses for
+% metrics) allows counters to be decremented as well. Folsom supports
+% gauges but does not track their state to allow increment/decrement.
+% Basically, anywhere we use couch_stats:decrement_count we should
+% be converting to a prometheus gauge.
+couch_to_prom([couchdb, open_databases], Info, _All) ->
+ to_prom(open_databases, gauge, desc(Info), val(Info));
+couch_to_prom([couchdb, open_os_files], Info, _All) ->
+ to_prom(open_os_files, gauge, desc(Info), val(Info));
+couch_to_prom([couchdb, httpd, clients_requesting_changes], Info, _All) ->
+ to_prom(httpd_clients_requesting_changes, gauge, desc(Info), val(Info));
couch_to_prom([ddoc_cache, hit], Info, All) ->
Total = val(Info) + val([ddoc_cache, miss], All),
to_prom(ddoc_cache_requests_total, counter, "number of design doc cache requests", Total);
@@ -109,9 +122,13 @@ type_def(Metric, Type, Desc) ->
to_bin(io_lib:format("# TYPE ~s ~s", [Name, Type]))
].
-to_prom(Metric, Type, Desc, Data) ->
+% support creating a metric series with multiple label/values.
+% Instances is of the form [{[{LabelName, LabelValue}], Value}, ...]
+to_prom(Metric, Type, Desc, Instances) when is_list(Instances) ->
TypeStr = type_def(Metric, Type, Desc),
- [TypeStr] ++ to_prom(Metric, Data).
+ [TypeStr] ++ lists:flatmap(fun(Inst) -> to_prom(Metric, Inst) end, Instances);
+to_prom(Metric, Type, Desc, Data) ->
+ to_prom(Metric, Type, Desc, [Data]).
to_prom(Metric, Instances) when is_list(Instances) ->
lists:flatmap(fun(Inst) -> to_prom(Metric, Inst) end, Instances);
diff --git a/src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl b/src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl
index 9b1c47633..2a1016099 100644
--- a/src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl
+++ b/src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl
@@ -40,7 +40,8 @@ e2e_test_() ->
?TDEF_FE(t_chttpd_port),
?TDEF_FE(t_prometheus_port),
?TDEF_FE(t_metric_updated),
- ?TDEF_FE(t_no_duplicate_metrics)
+ ?TDEF_FE(t_no_duplicate_metrics),
+ ?TDEF_FE(t_starts_with_couchdb)
]
}
}
@@ -70,7 +71,7 @@ reject_test_() ->
}.
setup_prometheus(WithAdditionalPort) ->
- Ctx = test_util:start_couch([chttpd]),
+ Ctx = test_util:start_couch([mem3, chttpd, couch_prometheus]),
Persist = false,
Hashed = couch_passwords:hash_admin_password(?PASS),
ok = config:set("admins", ?USER, binary_to_list(Hashed), Persist),
@@ -145,6 +146,33 @@ t_metric_updated(Port) ->
end
).
+t_starts_with_couchdb(Port) ->
+ Url = node_local_url(Port),
+ Stats = get_stats(Url),
+ Lines = re:split(Stats, "\n"),
+ lists:foreach(
+ fun(Line) ->
+ ?assert(is_binary(Line)),
+ Trimmed = string:trim(Line),
+ Expect = "^(#|couchdb_|$)",
+ case re:run(Trimmed, Expect) of
+ {match, _} ->
+ ok;
+ nomatch ->
+ erlang:error(
+ {assertRegexp_failed, [
+ {module, ?MODULE},
+ {line, ?LINE},
+ {regexp, (Trimmed)},
+ {expected_to_match, Expect},
+ {result, nomatch}
+ ]}
+ )
+ end
+ end,
+ Lines
+ ).
+
node_local_url(Port) ->
Addr = config:get("chttpd", "bind_address", "127.0.0.1"),
lists:concat(["http://", Addr, ":", Port, "/_node/_local/_prometheus"]).