diff options
author | Will Holley <willholley@apache.org> | 2023-03-10 18:10:35 +0000 |
---|---|---|
committer | Will Holley <will.holley@uk.ibm.com> | 2023-03-31 16:28:53 +0100 |
commit | f4fd6b5d337c272664b371addaa5e285f924e931 (patch) | |
tree | 800396603906f5248e16dfde32e533ab39f0d1ac | |
parent | bef20f3571847fa3a3dbdc6b2591ad38f75dd215 (diff) | |
download | couchdb-f4fd6b5d337c272664b371addaa5e285f924e931.tar.gz |
feat (prometheus): internal_replication_jobs metric
Adds an internal replication backlog metric. In the `_system` endpoint
this is called `internal_replication_jobs`, so I've preserved the name,
though it appears to represent the backlog of changes.
Adding a dependency on mem3 to `couch_prometheus` requires some changes
to the tests and dependency tree:
- `couchdb.app.src` no longer lists a dependency on `couch_prometheus`.
I don't know why this was needed previously - it doesn't appear to be
required.
- `couch_prometheus` now has dependencies on `couch` and `mem3`.
This both ensures that `couch_prometheus` doesn't crash if mem3 isn't
running and also resolves a race condition on startup where the
`_prometheus` endpoint returns incomplete stats.
- `couch_prometheus:system_stats_test/0` is moved to
`couch_prometheus_e2e_tests:t_starts_with_couchdb/0`. It is really
an integration test, since it depends on the `_prometheus` endpoint
being able to collect data for all the metrics, and it tests only
that the metrics names begin with `couchdb_`.
-rw-r--r-- | src/couch/src/couch.app.src | 1 | ||||
-rw-r--r-- | src/couch_prometheus/src/couch_prometheus.app.src | 2 | ||||
-rw-r--r-- | src/couch_prometheus/src/couch_prometheus_server.erl | 24 | ||||
-rw-r--r-- | src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl | 32 |
4 files changed, 41 insertions, 18 deletions
diff --git a/src/couch/src/couch.app.src b/src/couch/src/couch.app.src index 8b0ddfb8e..af2165f5d 100644 --- a/src/couch/src/couch.app.src +++ b/src/couch/src/couch.app.src @@ -47,7 +47,6 @@ ioq, couch_stats, hyper, - couch_prometheus, couch_dist ]}, {env, [ diff --git a/src/couch_prometheus/src/couch_prometheus.app.src b/src/couch_prometheus/src/couch_prometheus.app.src index bf49e59d2..9d3a36582 100644 --- a/src/couch_prometheus/src/couch_prometheus.app.src +++ b/src/couch_prometheus/src/couch_prometheus.app.src @@ -14,7 +14,7 @@ {description, "Aggregated metrics info for Prometheus consumption"}, {vsn, git}, {registered, []}, - {applications, [kernel, stdlib, folsom, couch_stats, couch_log]}, + {applications, [kernel, stdlib, folsom, couch_stats, couch_log, mem3, couch]}, {mod, {couch_prometheus_app, []}}, {env, []} ]}. diff --git a/src/couch_prometheus/src/couch_prometheus_server.erl b/src/couch_prometheus/src/couch_prometheus_server.erl index 939ce00cf..0fd169404 100644 --- a/src/couch_prometheus/src/couch_prometheus_server.erl +++ b/src/couch_prometheus/src/couch_prometheus_server.erl @@ -112,12 +112,21 @@ get_system_stats() -> get_message_queue_stats(), get_run_queue_stats(), get_vm_stats(), - get_ets_stats() + get_ets_stats(), + get_internal_replication_jobs_stat() ]). get_uptime_stat() -> to_prom(uptime_seconds, counter, "couchdb uptime", couch_app:uptime() div 1000). +get_internal_replication_jobs_stat() -> + to_prom( + internal_replication_jobs, + gauge, + "count of internal replication changes to process", + mem3_sync:get_backlog() + ). + get_vm_stats() -> MemLabels = lists:map( fun({Type, Value}) -> @@ -250,19 +259,6 @@ update_refresh_timer() -> -include_lib("couch/include/couch_eunit.hrl"). -system_stats_test() -> - lists:foreach( - fun(Line) -> - ?assert(is_binary(Line)), - Trimmed = string:trim(Line), - ?assert(starts_with(<<"couchdb_">>, Trimmed) orelse starts_with(<<"# ">>, Trimmed)) - end, - get_system_stats() - ). - -starts_with(Prefix, Line) when is_binary(Prefix), is_binary(Line) -> - binary:longest_common_prefix([Prefix, Line]) > 0. - message_queue_len_test() -> self() ! refresh, ?assert(message_queue_len(self()) >= 1), diff --git a/src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl b/src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl index 9b1c47633..2a1016099 100644 --- a/src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl +++ b/src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl @@ -40,7 +40,8 @@ e2e_test_() -> ?TDEF_FE(t_chttpd_port), ?TDEF_FE(t_prometheus_port), ?TDEF_FE(t_metric_updated), - ?TDEF_FE(t_no_duplicate_metrics) + ?TDEF_FE(t_no_duplicate_metrics), + ?TDEF_FE(t_starts_with_couchdb) ] } } @@ -70,7 +71,7 @@ reject_test_() -> }. setup_prometheus(WithAdditionalPort) -> - Ctx = test_util:start_couch([chttpd]), + Ctx = test_util:start_couch([mem3, chttpd, couch_prometheus]), Persist = false, Hashed = couch_passwords:hash_admin_password(?PASS), ok = config:set("admins", ?USER, binary_to_list(Hashed), Persist), @@ -145,6 +146,33 @@ t_metric_updated(Port) -> end ). +t_starts_with_couchdb(Port) -> + Url = node_local_url(Port), + Stats = get_stats(Url), + Lines = re:split(Stats, "\n"), + lists:foreach( + fun(Line) -> + ?assert(is_binary(Line)), + Trimmed = string:trim(Line), + Expect = "^(#|couchdb_|$)", + case re:run(Trimmed, Expect) of + {match, _} -> + ok; + nomatch -> + erlang:error( + {assertRegexp_failed, [ + {module, ?MODULE}, + {line, ?LINE}, + {regexp, (Trimmed)}, + {expected_to_match, Expect}, + {result, nomatch} + ]} + ) + end + end, + Lines + ). + node_local_url(Port) -> Addr = config:get("chttpd", "bind_address", "127.0.0.1"), lists:concat(["http://", Addr, ":", Port, "/_node/_local/_prometheus"]). |