diff options
author | Will Holley <willholley@apache.org> | 2023-04-03 13:22:50 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-03 13:22:50 +0100 |
commit | a9bce2f598edc8ef843baa9412c60d22157eeabf (patch) | |
tree | 96bff7d3b47e1a608060ac6d8c884c07145fd9ad | |
parent | d1412552f4a88df3b783199a61030568973b152f (diff) | |
parent | 8c1ef5bfeaa6fbf36dc8147eeff2349ec9edff6e (diff) | |
download | couchdb-a9bce2f598edc8ef843baa9412c60d22157eeabf.tar.gz |
Merge pull request #4507 from apache/prometheus_metrics
feat: additional prometheus metrics
-rw-r--r-- | .devcontainer/devcontainer.json | 2 | ||||
-rw-r--r-- | src/couch/src/couch.app.src | 1 | ||||
-rw-r--r-- | src/couch_prometheus/src/couch_prometheus.app.src | 2 | ||||
-rw-r--r-- | src/couch_prometheus/src/couch_prometheus_server.erl | 51 | ||||
-rw-r--r-- | src/couch_prometheus/src/couch_prometheus_util.erl | 21 | ||||
-rw-r--r-- | src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl | 32 |
6 files changed, 82 insertions, 27 deletions
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index a14b7b1e6..5e577d96d 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -26,4 +26,4 @@ "postCreateCommand": "./configure && make", "extensions": ["erlang-ls.erlang-ls"] -} +}
\ No newline at end of file diff --git a/src/couch/src/couch.app.src b/src/couch/src/couch.app.src index 8b0ddfb8e..af2165f5d 100644 --- a/src/couch/src/couch.app.src +++ b/src/couch/src/couch.app.src @@ -47,7 +47,6 @@ ioq, couch_stats, hyper, - couch_prometheus, couch_dist ]}, {env, [ diff --git a/src/couch_prometheus/src/couch_prometheus.app.src b/src/couch_prometheus/src/couch_prometheus.app.src index bf49e59d2..9d3a36582 100644 --- a/src/couch_prometheus/src/couch_prometheus.app.src +++ b/src/couch_prometheus/src/couch_prometheus.app.src @@ -14,7 +14,7 @@ {description, "Aggregated metrics info for Prometheus consumption"}, {vsn, git}, {registered, []}, - {applications, [kernel, stdlib, folsom, couch_stats, couch_log]}, + {applications, [kernel, stdlib, folsom, couch_stats, couch_log, mem3, couch]}, {mod, {couch_prometheus_app, []}}, {env, []} ]}. diff --git a/src/couch_prometheus/src/couch_prometheus_server.erl b/src/couch_prometheus/src/couch_prometheus_server.erl index 70c4790ab..05cd26265 100644 --- a/src/couch_prometheus/src/couch_prometheus_server.erl +++ b/src/couch_prometheus/src/couch_prometheus_server.erl @@ -112,12 +112,33 @@ get_system_stats() -> get_message_queue_stats(), get_run_queue_stats(), get_vm_stats(), - get_ets_stats() + get_ets_stats(), + get_internal_replication_jobs_stat(), + get_membership_stat() ]). get_uptime_stat() -> to_prom(uptime_seconds, counter, "couchdb uptime", couch_app:uptime() div 1000). +get_internal_replication_jobs_stat() -> + to_prom( + internal_replication_jobs, + gauge, + "count of internal replication changes to process", + mem3_sync:get_backlog() + ). + +get_membership_stat() -> + % expected nodes + ClusterNodes = mem3:nodes(), + % connected nodes + AllNodes = nodes([this, visible]), + Labels = [ + {[{nodes, "cluster_nodes"}], length(ClusterNodes)}, + {[{nodes, "all_nodes"}], length(AllNodes)} + ], + to_prom(membership, gauge, "count of nodes in the cluster", Labels). + get_vm_stats() -> MemLabels = lists:map( fun({Type, Value}) -> @@ -177,24 +198,27 @@ get_io_stats() -> ]. get_message_queue_stats() -> - QLenFun = fun(Name) -> message_queue_len(whereis(Name)) end, - Queues = lists:map(QLenFun, registered()), + QFun = fun(Name) -> {Name, message_queue_len(whereis(Name))} end, + Queues = lists:map(QFun, registered()), + QueueLens = lists:map(fun({_, Len}) -> Len end, Queues), + QueueLenByLabel = lists:map(fun({Name, Len}) -> {[{queue_name, Name}], Len} end, Queues), [ to_prom( - erlang_message_queues, gauge, "total size of all message queues", lists:sum(Queues) + erlang_message_queues, gauge, "total size of all message queues", lists:sum(QueueLens) ), to_prom( erlang_message_queue_min, gauge, "minimum size across all message queues", - lists:min(Queues) + lists:min(QueueLens) ), to_prom( erlang_message_queue_max, gauge, "maximum size across all message queues", - lists:max(Queues) - ) + lists:max(QueueLens) + ), + to_prom(erlang_message_queue_size, gauge, "size of message queue", QueueLenByLabel) ]. message_queue_len(undefined) -> @@ -247,19 +271,6 @@ update_refresh_timer() -> -include_lib("couch/include/couch_eunit.hrl"). -system_stats_test() -> - lists:foreach( - fun(Line) -> - ?assert(is_binary(Line)), - Trimmed = string:trim(Line), - ?assert(starts_with(<<"couchdb_">>, Trimmed) orelse starts_with(<<"# ">>, Trimmed)) - end, - get_system_stats() - ). - -starts_with(Prefix, Line) when is_binary(Prefix), is_binary(Line) -> - binary:longest_common_prefix([Prefix, Line]) > 0. - message_queue_len_test() -> self() ! refresh, ?assert(message_queue_len(self()) >= 1), diff --git a/src/couch_prometheus/src/couch_prometheus_util.erl b/src/couch_prometheus/src/couch_prometheus_util.erl index 9e4a74e49..5775b9693 100644 --- a/src/couch_prometheus/src/couch_prometheus_util.erl +++ b/src/couch_prometheus/src/couch_prometheus_util.erl @@ -71,6 +71,19 @@ couch_to_prom([couchdb, httpd_status_codes, 200], Info, _All) -> }); couch_to_prom([couchdb, httpd_status_codes, Code], Info, _All) -> to_prom(httpd_status_codes, {[{code, Code}], val(Info)}); +% Convert to gauge in prometheus type. This is required because +% prometheus assumes that counters are cumulative and should be +% rated by default, whereas folsom (the library CouchDB uses for +% metrics) allows counters to be decremented as well. Folsom supports +% gauges but does not track their state to allow increment/decrement. +% Basically, anywhere we use couch_stats:decrement_count we should +% be converting to a prometheus gauge. +couch_to_prom([couchdb, open_databases], Info, _All) -> + to_prom(open_databases, gauge, desc(Info), val(Info)); +couch_to_prom([couchdb, open_os_files], Info, _All) -> + to_prom(open_os_files, gauge, desc(Info), val(Info)); +couch_to_prom([couchdb, httpd, clients_requesting_changes], Info, _All) -> + to_prom(httpd_clients_requesting_changes, gauge, desc(Info), val(Info)); couch_to_prom([ddoc_cache, hit], Info, All) -> Total = val(Info) + val([ddoc_cache, miss], All), to_prom(ddoc_cache_requests_total, counter, "number of design doc cache requests", Total); @@ -109,9 +122,13 @@ type_def(Metric, Type, Desc) -> to_bin(io_lib:format("# TYPE ~s ~s", [Name, Type])) ]. -to_prom(Metric, Type, Desc, Data) -> +% support creating a metric series with multiple label/values. +% Instances is of the form [{[{LabelName, LabelValue}], Value}, ...] +to_prom(Metric, Type, Desc, Instances) when is_list(Instances) -> TypeStr = type_def(Metric, Type, Desc), - [TypeStr] ++ to_prom(Metric, Data). + [TypeStr] ++ lists:flatmap(fun(Inst) -> to_prom(Metric, Inst) end, Instances); +to_prom(Metric, Type, Desc, Data) -> + to_prom(Metric, Type, Desc, [Data]). to_prom(Metric, Instances) when is_list(Instances) -> lists:flatmap(fun(Inst) -> to_prom(Metric, Inst) end, Instances); diff --git a/src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl b/src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl index 9b1c47633..2a1016099 100644 --- a/src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl +++ b/src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl @@ -40,7 +40,8 @@ e2e_test_() -> ?TDEF_FE(t_chttpd_port), ?TDEF_FE(t_prometheus_port), ?TDEF_FE(t_metric_updated), - ?TDEF_FE(t_no_duplicate_metrics) + ?TDEF_FE(t_no_duplicate_metrics), + ?TDEF_FE(t_starts_with_couchdb) ] } } @@ -70,7 +71,7 @@ reject_test_() -> }. setup_prometheus(WithAdditionalPort) -> - Ctx = test_util:start_couch([chttpd]), + Ctx = test_util:start_couch([mem3, chttpd, couch_prometheus]), Persist = false, Hashed = couch_passwords:hash_admin_password(?PASS), ok = config:set("admins", ?USER, binary_to_list(Hashed), Persist), @@ -145,6 +146,33 @@ t_metric_updated(Port) -> end ). +t_starts_with_couchdb(Port) -> + Url = node_local_url(Port), + Stats = get_stats(Url), + Lines = re:split(Stats, "\n"), + lists:foreach( + fun(Line) -> + ?assert(is_binary(Line)), + Trimmed = string:trim(Line), + Expect = "^(#|couchdb_|$)", + case re:run(Trimmed, Expect) of + {match, _} -> + ok; + nomatch -> + erlang:error( + {assertRegexp_failed, [ + {module, ?MODULE}, + {line, ?LINE}, + {regexp, (Trimmed)}, + {expected_to_match, Expect}, + {result, nomatch} + ]} + ) + end + end, + Lines + ). + node_local_url(Port) -> Addr = config:get("chttpd", "bind_address", "127.0.0.1"), lists:concat(["http://", Addr, ":", Port, "/_node/_local/_prometheus"]). |