summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWill Holley <willholley@apache.org>2023-03-10 18:10:35 +0000
committerWill Holley <will.holley@uk.ibm.com>2023-03-31 16:28:53 +0100
commitf4fd6b5d337c272664b371addaa5e285f924e931 (patch)
tree800396603906f5248e16dfde32e533ab39f0d1ac
parentbef20f3571847fa3a3dbdc6b2591ad38f75dd215 (diff)
downloadcouchdb-f4fd6b5d337c272664b371addaa5e285f924e931.tar.gz
feat (prometheus): internal_replication_jobs metric
Adds an internal replication backlog metric. In the `_system` endpoint this is called `internal_replication_jobs`, so I've preserved the name, though it appears to represent the backlog of changes. Adding a dependency on mem3 to `couch_prometheus` requires some changes to the tests and dependency tree: - `couchdb.app.src` no longer lists a dependency on `couch_prometheus`. I don't know why this was needed previously - it doesn't appear to be required. - `couch_prometheus` now has dependencies on `couch` and `mem3`. This both ensures that `couch_prometheus` doesn't crash if mem3 isn't running and also resolves a race condition on startup where the `_prometheus` endpoint returns incomplete stats. - `couch_prometheus:system_stats_test/0` is moved to `couch_prometheus_e2e_tests:t_starts_with_couchdb/0`. It is really an integration test, since it depends on the `_prometheus` endpoint being able to collect data for all the metrics, and it tests only that the metrics names begin with `couchdb_`.
-rw-r--r--src/couch/src/couch.app.src1
-rw-r--r--src/couch_prometheus/src/couch_prometheus.app.src2
-rw-r--r--src/couch_prometheus/src/couch_prometheus_server.erl24
-rw-r--r--src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl32
4 files changed, 41 insertions, 18 deletions
diff --git a/src/couch/src/couch.app.src b/src/couch/src/couch.app.src
index 8b0ddfb8e..af2165f5d 100644
--- a/src/couch/src/couch.app.src
+++ b/src/couch/src/couch.app.src
@@ -47,7 +47,6 @@
ioq,
couch_stats,
hyper,
- couch_prometheus,
couch_dist
]},
{env, [
diff --git a/src/couch_prometheus/src/couch_prometheus.app.src b/src/couch_prometheus/src/couch_prometheus.app.src
index bf49e59d2..9d3a36582 100644
--- a/src/couch_prometheus/src/couch_prometheus.app.src
+++ b/src/couch_prometheus/src/couch_prometheus.app.src
@@ -14,7 +14,7 @@
{description, "Aggregated metrics info for Prometheus consumption"},
{vsn, git},
{registered, []},
- {applications, [kernel, stdlib, folsom, couch_stats, couch_log]},
+ {applications, [kernel, stdlib, folsom, couch_stats, couch_log, mem3, couch]},
{mod, {couch_prometheus_app, []}},
{env, []}
]}.
diff --git a/src/couch_prometheus/src/couch_prometheus_server.erl b/src/couch_prometheus/src/couch_prometheus_server.erl
index 939ce00cf..0fd169404 100644
--- a/src/couch_prometheus/src/couch_prometheus_server.erl
+++ b/src/couch_prometheus/src/couch_prometheus_server.erl
@@ -112,12 +112,21 @@ get_system_stats() ->
get_message_queue_stats(),
get_run_queue_stats(),
get_vm_stats(),
- get_ets_stats()
+ get_ets_stats(),
+ get_internal_replication_jobs_stat()
]).
get_uptime_stat() ->
to_prom(uptime_seconds, counter, "couchdb uptime", couch_app:uptime() div 1000).
+get_internal_replication_jobs_stat() ->
+ to_prom(
+ internal_replication_jobs,
+ gauge,
+ "count of internal replication changes to process",
+ mem3_sync:get_backlog()
+ ).
+
get_vm_stats() ->
MemLabels = lists:map(
fun({Type, Value}) ->
@@ -250,19 +259,6 @@ update_refresh_timer() ->
-include_lib("couch/include/couch_eunit.hrl").
-system_stats_test() ->
- lists:foreach(
- fun(Line) ->
- ?assert(is_binary(Line)),
- Trimmed = string:trim(Line),
- ?assert(starts_with(<<"couchdb_">>, Trimmed) orelse starts_with(<<"# ">>, Trimmed))
- end,
- get_system_stats()
- ).
-
-starts_with(Prefix, Line) when is_binary(Prefix), is_binary(Line) ->
- binary:longest_common_prefix([Prefix, Line]) > 0.
-
message_queue_len_test() ->
self() ! refresh,
?assert(message_queue_len(self()) >= 1),
diff --git a/src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl b/src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl
index 9b1c47633..2a1016099 100644
--- a/src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl
+++ b/src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl
@@ -40,7 +40,8 @@ e2e_test_() ->
?TDEF_FE(t_chttpd_port),
?TDEF_FE(t_prometheus_port),
?TDEF_FE(t_metric_updated),
- ?TDEF_FE(t_no_duplicate_metrics)
+ ?TDEF_FE(t_no_duplicate_metrics),
+ ?TDEF_FE(t_starts_with_couchdb)
]
}
}
@@ -70,7 +71,7 @@ reject_test_() ->
}.
setup_prometheus(WithAdditionalPort) ->
- Ctx = test_util:start_couch([chttpd]),
+ Ctx = test_util:start_couch([mem3, chttpd, couch_prometheus]),
Persist = false,
Hashed = couch_passwords:hash_admin_password(?PASS),
ok = config:set("admins", ?USER, binary_to_list(Hashed), Persist),
@@ -145,6 +146,33 @@ t_metric_updated(Port) ->
end
).
+t_starts_with_couchdb(Port) ->
+ Url = node_local_url(Port),
+ Stats = get_stats(Url),
+ Lines = re:split(Stats, "\n"),
+ lists:foreach(
+ fun(Line) ->
+ ?assert(is_binary(Line)),
+ Trimmed = string:trim(Line),
+ Expect = "^(#|couchdb_|$)",
+ case re:run(Trimmed, Expect) of
+ {match, _} ->
+ ok;
+ nomatch ->
+ erlang:error(
+ {assertRegexp_failed, [
+ {module, ?MODULE},
+ {line, ?LINE},
+ {regexp, (Trimmed)},
+ {expected_to_match, Expect},
+ {result, nomatch}
+ ]}
+ )
+ end
+ end,
+ Lines
+ ).
+
node_local_url(Port) ->
Addr = config:get("chttpd", "bind_address", "127.0.0.1"),
lists:concat(["http://", Addr, ":", Port, "/_node/_local/_prometheus"]).