summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTony Sun <tony.sun427@gmail.com>2021-07-09 14:53:02 -0700
committerGitHub <noreply@github.com>2021-07-09 14:53:02 -0700
commitc05fd08308bee5c7c44a7596bb406dadb9981617 (patch)
tree932a2a187800e1a3f1fa6e0fa6b43bf439ee9016
parent3245e9bcce9e640406a22de24db940898bc0386a (diff)
downloadcouchdb-c05fd08308bee5c7c44a7596bb406dadb9981617.tar.gz
Port prometheus 3.x (#3506)
* Add new app couch_prometheus This will be a new app add a _prometheus endpoint which will return metrics information that adheres to the format described at https://prometheus.io/. Initial implementation of new _prometheus endpoint. A gen_server waits for scraping calls while polling couch_stats:fetch and other system info. The return value is constructed to adhere to prometheus format and returned as text/plain. The format code was originally written by @davisp. We add an option to spawn a new mochiweb_http server to allow for an additional port for scraping which does not require authentication. The default ports are 17986, 27986, 37986 across 3 nodes. make release Co-authored-by: Joan Touzet <wohali@users.noreply.github.com>
-rwxr-xr-xconfigure1
-rwxr-xr-xdev/run11
-rw-r--r--rebar.config.script1
-rw-r--r--rel/overlay/etc/default.ini5
-rw-r--r--rel/reltool.config3
-rw-r--r--setup_eunit.template1
-rw-r--r--src/chttpd/src/chttpd_node.erl8
-rw-r--r--src/couch/src/couch.app.src3
-rw-r--r--src/couch_prometheus/src/couch_prometheus.app.src20
-rw-r--r--src/couch_prometheus/src/couch_prometheus.hrl15
-rw-r--r--src/couch_prometheus/src/couch_prometheus_app.erl23
-rw-r--r--src/couch_prometheus/src/couch_prometheus_http.erl102
-rw-r--r--src/couch_prometheus/src/couch_prometheus_server.erl174
-rw-r--r--src/couch_prometheus/src/couch_prometheus_sup.erl39
-rw-r--r--src/couch_prometheus/src/couch_prometheus_util.erl166
-rw-r--r--src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl147
-rw-r--r--src/couch_prometheus/test/eunit/couch_prometheus_util_tests.erl65
17 files changed, 781 insertions, 3 deletions
diff --git a/configure b/configure
index 83be1c260..7a749442a 100755
--- a/configure
+++ b/configure
@@ -202,6 +202,7 @@ cat > rel/couchdb.config << EOF
{node_name, "-name couchdb@127.0.0.1"}.
{cluster_port, 5984}.
{backend_port, 5986}.
+{prometheus_port, 17986}.
EOF
cat > install.mk << EOF
diff --git a/dev/run b/dev/run
index 66d990a09..0c77707ef 100755
--- a/dev/run
+++ b/dev/run
@@ -280,7 +280,9 @@ def check_boot_script(ctx):
@log("Prepare configuration files")
def setup_configs(ctx):
for idx, node in enumerate(ctx["nodes"]):
- cluster_port, backend_port = get_ports(ctx, idx + ctx["node_number"])
+ cluster_port, backend_port, prometheus_port = get_ports(
+ ctx, idx + ctx["node_number"]
+ )
env = {
"prefix": toposixpath(ctx["rootdir"]),
"package_author_name": "The Apache Software Foundation",
@@ -293,6 +295,7 @@ def setup_configs(ctx):
"node_name": "-name %s@127.0.0.1" % node,
"cluster_port": cluster_port,
"backend_port": backend_port,
+ "prometheus_port": prometheus_port,
"uuid": "fake_uuid_for_dev",
"_default": "",
}
@@ -353,7 +356,11 @@ def apply_config_overrides(ctx, content):
def get_ports(ctx, idnode):
assert idnode
if idnode <= 5 and not ctx["auto_ports"]:
- return ((10000 * idnode) + 5984, (10000 * idnode) + 5986)
+ return (
+ (10000 * idnode) + 5984,
+ (10000 * idnode) + 5986,
+ (10000 * idnode) + 7986,
+ )
else:
return tuple(get_available_ports(2))
diff --git a/rebar.config.script b/rebar.config.script
index 52e4e4167..8267fc13e 100644
--- a/rebar.config.script
+++ b/rebar.config.script
@@ -140,6 +140,7 @@ SubDirs = [
"src/setup",
"src/smoosh",
"src/weatherreport",
+ "src/couch_prometheus",
"rel"
].
diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini
index ac4972f76..5f916a0af 100644
--- a/rel/overlay/etc/default.ini
+++ b/rel/overlay/etc/default.ini
@@ -705,3 +705,8 @@ partitioned||* = true
;source_close_timeout_sec = 600
;require_node_param = false
;require_range_param = false
+
+[prometheus]
+additional_port = false
+bind_address = 127.0.0.1
+port = {{prometheus_port}}
diff --git a/rel/reltool.config b/rel/reltool.config
index 924ecd6b2..010a262ac 100644
--- a/rel/reltool.config
+++ b/rel/reltool.config
@@ -62,6 +62,8 @@
smoosh,
snappy,
weatherreport,
+ couch_prometheus,
+
%% extra
recon
]},
@@ -123,6 +125,7 @@
{app, smoosh, [{incl_cond, include}]},
{app, snappy, [{incl_cond, include}]},
{app, weatherreport, [{incl_cond, include}]},
+ {app, couch_prometheus, [{incl_cond, include}]},
%% extra
{app, recon, [{incl_cond, include}]}
diff --git a/setup_eunit.template b/setup_eunit.template
index 97bee466c..3625441bd 100644
--- a/setup_eunit.template
+++ b/setup_eunit.template
@@ -2,6 +2,7 @@
{package_author_name, "The Apache Software Foundation"},
{cluster_port, 5984},
{backend_port, 5986},
+ {prometheus_port, 17986},
{node_name, "-name couchdbtest@127.0.0.1"},
{data_dir, "/tmp"},
diff --git a/src/chttpd/src/chttpd_node.erl b/src/chttpd/src/chttpd_node.erl
index 99407da72..d3d3ff7f8 100644
--- a/src/chttpd/src/chttpd_node.erl
+++ b/src/chttpd/src/chttpd_node.erl
@@ -117,6 +117,14 @@ handle_node_req(#httpd{method='GET', path_parts=[_, Node, <<"_stats">> | Path]}=
chttpd:send_json(Req, EJSON1);
handle_node_req(#httpd{path_parts=[_, _Node, <<"_stats">>]}=Req) ->
send_method_not_allowed(Req, "GET");
+handle_node_req(#httpd{method='GET', path_parts=[_, Node, <<"_prometheus">>]}=Req) ->
+ Metrics = call_node(Node, couch_prometheus_server, scrape, []),
+ Version = call_node(Node, couch_prometheus_server, version, []),
+ Type = "text/plain; version=" ++ Version,
+ Header = [{<<"Content-Type">>, ?l2b(Type)}],
+ chttpd:send_response(Req, 200, Header, Metrics);
+handle_node_req(#httpd{path_parts=[_, _Node, <<"_prometheus">>]}=Req) ->
+ send_method_not_allowed(Req, "GET");
% GET /_node/$node/_system
handle_node_req(#httpd{method='GET', path_parts=[_, Node, <<"_system">>]}=Req) ->
Stats = call_node(Node, chttpd_node, get_stats, []),
diff --git a/src/couch/src/couch.app.src b/src/couch/src/couch.app.src
index 6116c79ba..74674bbb5 100644
--- a/src/couch/src/couch.app.src
+++ b/src/couch/src/couch.app.src
@@ -45,7 +45,8 @@
couch_event,
ioq,
couch_stats,
- hyper
+ hyper,
+ couch_prometheus
]},
{env, [
{ httpd_global_handlers, [
diff --git a/src/couch_prometheus/src/couch_prometheus.app.src b/src/couch_prometheus/src/couch_prometheus.app.src
new file mode 100644
index 000000000..bf49e59d2
--- /dev/null
+++ b/src/couch_prometheus/src/couch_prometheus.app.src
@@ -0,0 +1,20 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+{application, couch_prometheus, [
+ {description, "Aggregated metrics info for Prometheus consumption"},
+ {vsn, git},
+ {registered, []},
+ {applications, [kernel, stdlib, folsom, couch_stats, couch_log]},
+ {mod, {couch_prometheus_app, []}},
+ {env, []}
+]}.
diff --git a/src/couch_prometheus/src/couch_prometheus.hrl b/src/couch_prometheus/src/couch_prometheus.hrl
new file mode 100644
index 000000000..0970f4469
--- /dev/null
+++ b/src/couch_prometheus/src/couch_prometheus.hrl
@@ -0,0 +1,15 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-define(REFRESH_INTERVAL, 5).
+-define(PROMETHEUS_VERSION, "2.0").
+
diff --git a/src/couch_prometheus/src/couch_prometheus_app.erl b/src/couch_prometheus/src/couch_prometheus_app.erl
new file mode 100644
index 000000000..232c16a8a
--- /dev/null
+++ b/src/couch_prometheus/src/couch_prometheus_app.erl
@@ -0,0 +1,23 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_prometheus_app).
+
+-behaviour(application).
+
+-export([start/2, stop/1]).
+
+start(_StartType, _StartArgs) ->
+ couch_prometheus_sup:start_link().
+
+stop(_State) ->
+ ok.
diff --git a/src/couch_prometheus/src/couch_prometheus_http.erl b/src/couch_prometheus/src/couch_prometheus_http.erl
new file mode 100644
index 000000000..bd0c4c6f9
--- /dev/null
+++ b/src/couch_prometheus/src/couch_prometheus_http.erl
@@ -0,0 +1,102 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_prometheus_http).
+
+-compile(tuple_calls).
+
+-export([
+ start_link/0,
+ handle_request/1
+]).
+
+-include("couch_prometheus.hrl").
+-include_lib("couch/include/couch_db.hrl").
+
+start_link() ->
+ IP = case config:get("prometheus", "bind_address", "any") of
+ "any" -> any;
+ Else -> Else
+ end,
+ Port = config:get("prometheus", "port"),
+ ok = couch_httpd:validate_bind_address(IP),
+
+ Options = [
+ {name, ?MODULE},
+ {loop, fun ?MODULE:handle_request/1},
+ {ip, IP},
+ {port, Port}
+ ],
+ case mochiweb_http:start(Options) of
+ {ok, Pid} ->
+ {ok, Pid};
+ {error, Reason} ->
+ io:format("Failure to start Mochiweb: ~s~n", [Reason]),
+ {error, Reason}
+ end.
+
+handle_request(MochiReq) ->
+ RawUri = MochiReq:get(raw_path),
+ {"/" ++ Path, _, _} = mochiweb_util:urlsplit_path(RawUri),
+ PathParts = string:tokens(Path, "/"),
+ try
+ case PathParts of
+ ["_node", Node, "_prometheus"] ->
+ send_prometheus(MochiReq, Node);
+ _ ->
+ send_error(MochiReq, 404, <<"not_found">>, <<>>)
+ end
+ catch T:R ->
+ Body = list_to_binary(io_lib:format("~p:~p", [T, R])),
+ send_error(MochiReq, 500, <<"server_error">>, Body)
+ end.
+
+send_prometheus(MochiReq, Node) ->
+ Type = "text/plain; version=" ++ ?PROMETHEUS_VERSION,
+ Headers = couch_httpd:server_header() ++ [
+ {<<"Content-Type">>, ?l2b(Type)}
+ ],
+ Body = call_node(Node, couch_prometheus_server, scrape, []),
+ send_resp(MochiReq, 200, Headers, Body).
+
+send_resp(MochiReq, Status, ExtraHeaders, Body) ->
+ Headers = couch_httpd:server_header() ++ ExtraHeaders,
+ MochiReq:respond({Status, Headers, Body}).
+
+send_error(MochiReq, Code, Error, Reason) ->
+ Headers = couch_httpd:server_header() ++ [
+ {<<"Content-Type">>, <<"application/json">>}
+ ],
+ JsonError = {[{<<"error">>, Error},
+ {<<"reason">>, Reason}]},
+ Body = ?JSON_ENCODE(JsonError),
+ MochiReq:respond({Code, Headers, Body}).
+
+call_node("_local", Mod, Fun, Args) ->
+ call_node(node(), Mod, Fun, Args);
+call_node(Node0, Mod, Fun, Args) when is_list(Node0) ->
+ Node1 = try
+ list_to_existing_atom(Node0)
+ catch
+ error:badarg ->
+ NoNode = list_to_binary(Node0),
+ throw({not_found, <<"no such node: ", NoNode/binary>>})
+ end,
+ call_node(Node1, Mod, Fun, Args);
+call_node(Node, Mod, Fun, Args) when is_atom(Node) ->
+ case rpc:call(Node, Mod, Fun, Args) of
+ {badrpc, nodedown} ->
+ Reason = list_to_binary(io_lib:format("~s is down", [Node])),
+ throw({error, {nodedown, Reason}});
+ Else ->
+ Else
+ end.
diff --git a/src/couch_prometheus/src/couch_prometheus_server.erl b/src/couch_prometheus/src/couch_prometheus_server.erl
new file mode 100644
index 000000000..e97df04a4
--- /dev/null
+++ b/src/couch_prometheus/src/couch_prometheus_server.erl
@@ -0,0 +1,174 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_prometheus_server).
+
+-behaviour(gen_server).
+
+-import(couch_prometheus_util, [
+ couch_to_prom/3,
+ to_prom/3,
+ to_prom_summary/2
+]).
+
+-export([
+ scrape/0,
+ version/0
+]).
+
+-export([
+ start_link/0,
+ init/1,
+ handle_call/3,
+ handle_cast/2,
+ handle_info/2,
+ code_change/3,
+ terminate/2
+]).
+
+-include("couch_prometheus.hrl").
+
+start_link() ->
+ gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
+
+-record(st, {
+ metrics,
+ refresh
+}).
+
+init([]) ->
+ Metrics = refresh_metrics(),
+ RT = update_refresh_timer(),
+ {ok, #st{metrics=Metrics, refresh=RT}}.
+
+scrape() ->
+ {ok, Metrics} = gen_server:call(?MODULE, scrape),
+ Metrics.
+
+version() ->
+ ?PROMETHEUS_VERSION.
+
+handle_call(scrape, _from, #st{metrics = Metrics}=State) ->
+ {reply, {ok, Metrics}, State};
+handle_call(refresh, _from, #st{refresh=OldRT} = State) ->
+ timer:cancel(OldRT),
+ Metrics = refresh_metrics(),
+ RT = update_refresh_timer(),
+ {reply, ok, State#st{metrics=Metrics, refresh=RT}};
+handle_call(Msg, _From, State) ->
+ {stop, {unknown_call, Msg}, error, State}.
+
+handle_cast(Msg, State) ->
+ {stop, {unknown_cast, Msg}, State}.
+
+handle_info(refresh, State) ->
+ Metrics = refresh_metrics(),
+ RT = update_refresh_timer(),
+ {noreply, State#st{metrics=Metrics, refresh=RT}};
+handle_info(Msg, State) ->
+ {stop, {unknown_info, Msg}, State}.
+
+terminate(_Reason, _State) ->
+ ok.
+
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+refresh_metrics() ->
+ CouchDB = get_couchdb_stats(),
+ System = couch_stats_httpd:to_ejson(get_system_stats()),
+ couch_prometheus_util:to_bin(lists:map(fun(Line) ->
+ io_lib:format("~s~n", [Line])
+ end, CouchDB ++ System)).
+
+get_couchdb_stats() ->
+ Stats = lists:sort(couch_stats:fetch()),
+ lists:flatmap(fun({Path, Info}) ->
+ couch_to_prom(Path, Info, Stats)
+ end, Stats).
+
+get_system_stats() ->
+ lists:flatten([
+ get_uptime_stat(),
+ get_vm_stats(),
+ get_io_stats(),
+ get_message_queue_stats(),
+ get_run_queue_stats(),
+ get_vm_stats(),
+ get_ets_stats()
+ ]).
+
+get_uptime_stat() ->
+ to_prom(uptime_seconds, counter, couch_app:uptime() div 1000).
+
+get_vm_stats() ->
+ MemLabels = lists:map(fun({Type, Value}) ->
+ {[{memory_type, Type}], Value}
+ end, erlang:memory()),
+ {NumGCs, WordsReclaimed, _} = erlang:statistics(garbage_collection),
+ CtxSwitches = element(1, erlang:statistics(context_switches)),
+ Reds = element(1, erlang:statistics(reductions)),
+ ProcCount = erlang:system_info(process_count),
+ ProcLimit = erlang:system_info(process_limit),
+ [
+ to_prom(erlang_memory_bytes, gauge, MemLabels),
+ to_prom(erlang_gc_collections_total, counter, NumGCs),
+ to_prom(erlang_gc_words_reclaimed_total, counter, WordsReclaimed),
+ to_prom(erlang_context_switches_total, counter, CtxSwitches),
+ to_prom(erlang_reductions_total, counter, Reds),
+ to_prom(erlang_processes, gauge, ProcCount),
+ to_prom(erlang_process_limit, gauge, ProcLimit)
+ ].
+
+get_io_stats() ->
+ {{input, In}, {output, Out}} = erlang:statistics(io),
+ [
+ to_prom(erlang_io_recv_bytes_total, counter, In),
+ to_prom(erlang_io_sent_bytes_total, counter, Out)
+ ].
+
+get_message_queue_stats() ->
+ Queues = lists:map(fun(Name) ->
+ case process_info(whereis(Name), message_queue_len) of
+ {message_queue_len, N} ->
+ N;
+ _ ->
+ 0
+ end
+ end, registered()),
+ [
+ to_prom(erlang_message_queues, gauge, lists:sum(Queues)),
+ to_prom(erlang_message_queue_min, gauge, lists:min(Queues)),
+ to_prom(erlang_message_queue_max, gauge, lists:max(Queues))
+ ].
+
+get_run_queue_stats() ->
+ %% Workaround for https://bugs.erlang.org/browse/ERL-1355
+ {Normal, Dirty} = case erlang:system_info(dirty_cpu_schedulers) > 0 of
+ false ->
+ {statistics(run_queue), 0};
+ true ->
+ [DCQ | SQs] = lists:reverse(statistics(run_queue_lengths)),
+ {lists:sum(SQs), DCQ}
+ end,
+ [
+ to_prom(erlang_scheduler_queues, gauge, Normal),
+ to_prom(erlang_dirty_cpu_scheduler_queues, gauge, Dirty)
+ ].
+
+get_ets_stats() ->
+ NumTabs = length(ets:all()),
+ to_prom(erlang_ets_table, gauge, NumTabs).
+
+update_refresh_timer() ->
+ RefreshTime = 1000 * config:get_integer("couch_prometheus", "interval", ?REFRESH_INTERVAL),
+ erlang:send_after(RefreshTime, self(), refresh).
diff --git a/src/couch_prometheus/src/couch_prometheus_sup.erl b/src/couch_prometheus/src/couch_prometheus_sup.erl
new file mode 100644
index 000000000..8d8c7e078
--- /dev/null
+++ b/src/couch_prometheus/src/couch_prometheus_sup.erl
@@ -0,0 +1,39 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_prometheus_sup).
+
+-behaviour(supervisor).
+
+-export([
+ start_link/0,
+ init/1
+]).
+
+-define(CHILD(I, Type), {I, {I, start_link, []}, permanent, 5000, Type, [I]}).
+
+start_link() ->
+ supervisor:start_link({local, ?MODULE}, ?MODULE, []).
+
+init([]) ->
+ {ok, {
+ {one_for_one, 5, 10}, [
+ ?CHILD(couch_prometheus_server, worker)
+ ] ++ maybe_start_prometheus_http()
+ }}.
+
+maybe_start_prometheus_http() ->
+ case config:get("prometheus", "additional_port", "false") of
+ "false" -> [];
+ "true" -> [?CHILD(couch_prometheus_http, worker)];
+ _ -> []
+ end.
diff --git a/src/couch_prometheus/src/couch_prometheus_util.erl b/src/couch_prometheus/src/couch_prometheus_util.erl
new file mode 100644
index 000000000..c3b58cb3a
--- /dev/null
+++ b/src/couch_prometheus/src/couch_prometheus_util.erl
@@ -0,0 +1,166 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_prometheus_util ).
+
+-export([
+ couch_to_prom/3,
+ to_bin/1,
+ to_prom/3,
+ to_prom_summary/2
+]).
+
+-include("couch_prometheus.hrl").
+
+couch_to_prom([couch_log, level, alert], Info, _All) ->
+ to_prom(couch_log_requests_total, counter, {[{level, alert}], val(Info)});
+couch_to_prom([couch_log, level, Level], Info, _All) ->
+ to_prom(couch_log_requests_total, {[{level, Level}], val(Info)});
+
+couch_to_prom([couch_replicator, checkpoints, failure], Info, _All) ->
+ to_prom(couch_replicator_checkpoints_failure_total, counter, val(Info));
+couch_to_prom([couch_replicator, checkpoints, success], Info, All) ->
+ Total = val(Info) + val([couch_replicator, checkpoints, failure], All),
+ to_prom(couch_replicator_checkpoints_total, counter, Total);
+couch_to_prom([couch_replicator, responses, failure], Info, _All) ->
+ to_prom(couch_replicator_responses_failure_total, counter, val(Info));
+couch_to_prom([couch_replicator, responses, success], Info, All) ->
+ Total = val(Info) + val([couch_replicator, responses, failure], All),
+ to_prom(couch_replicator_responses_total, counter, Total);
+couch_to_prom([couch_replicator, stream_responses, failure], Info, _All) ->
+ to_prom(couch_replicator_stream_responses_failure_total, counter, val(Info));
+couch_to_prom([couch_replicator, stream_responses, success], Info, All) ->
+ Total = val(Info) + val([couch_replicator, stream_responses, failure], All),
+ to_prom(couch_replicator_stream_responses_total, counter, Total);
+
+couch_to_prom([couchdb, auth_cache_hits], Info, All) ->
+ Total = val(Info) + val([couchdb, auth_cache_misses], All),
+ to_prom(auth_cache_requests_total, counter, Total);
+couch_to_prom([couchdb, auth_cache_misses], Info, _All) ->
+ to_prom(auth_cache_misses_total, counter, val(Info));
+couch_to_prom([couchdb, httpd_request_methods, 'COPY'], Info, _All) ->
+ to_prom(httpd_request_methods, counter, {[{method, 'COPY'}], val(Info)});
+couch_to_prom([couchdb, httpd_request_methods, Method], Info, _All) ->
+ to_prom(httpd_request_methods, {[{method, Method}], val(Info)});
+couch_to_prom([couchdb, httpd_status_codes, Code], Info, _All) ->
+ to_prom(httpd_status_codes, {[{code, Code}], val(Info)});
+
+couch_to_prom([ddoc_cache, hit], Info, All) ->
+ Total = val(Info) + val([ddoc_cache, miss], All),
+ to_prom(ddoc_cache_requests_total, counter, Total);
+couch_to_prom([ddoc_cache, miss], Info, _All) ->
+ to_prom(ddoc_cache_requests_failures_total, counter, val(Info));
+couch_to_prom([ddoc_cache, recovery], Info, _All) ->
+ to_prom(ddoc_cache_requests_recovery_total, counter, val(Info));
+
+couch_to_prom([fabric, read_repairs, failure], Info, _All) ->
+ to_prom(fabric_read_repairs_failures_total, counter, val(Info));
+couch_to_prom([fabric, read_repairs, success], Info, All) ->
+ Total = val(Info) + val([fabric, read_repairs, failure], All),
+ to_prom(fabric_read_repairs_total, counter, Total);
+
+couch_to_prom([rexi, streams, timeout, init_stream], Info, _All) ->
+ to_prom(rexi_streams_timeout_total, counter, {[{stage, init_stream}], val(Info)});
+couch_to_prom([rexi_streams, timeout, Stage], Info, _All) ->
+ to_prom(rexi_streams_timeout_total, {[{stage, Stage}], val(Info)});
+
+couch_to_prom([couchdb | Rest], Info, All) ->
+ couch_to_prom(Rest, Info, All);
+
+couch_to_prom(Path, Info, _All) ->
+ case lists:keyfind(type, 1, Info) of
+ {type, counter} ->
+ Metric = counter_metric(Path),
+ to_prom(Metric, counter, val(Info));
+ {type, gauge} ->
+ to_prom(path_to_name(Path), gauge, val(Info));
+ {type, histogram} ->
+ to_prom_summary(Path, Info)
+ end.
+
+to_prom(Metric, Type, Data) ->
+ TypeStr = to_bin(io_lib:format("# TYPE ~s ~s", [to_prom_name(Metric), Type])),
+ [TypeStr] ++ to_prom(Metric, Data).
+
+to_prom(Metric, Instances) when is_list(Instances) ->
+ lists:flatmap(fun(Inst) -> to_prom(Metric, Inst) end, Instances);
+to_prom(Metric, {Labels, Value}) ->
+ LabelParts = lists:map(fun({K, V}) ->
+ lists:flatten(io_lib:format("~s=\"~s\"", [to_bin(K), to_bin(V)]))
+ end, Labels),
+ MetricStr = case length(LabelParts) > 0 of
+ true ->
+ LabelStr = string:join(LabelParts, ", "),
+ lists:flatten(io_lib:format("~s{~s}", [to_prom_name(Metric), LabelStr]));
+ false ->
+ lists:flatten(io_lib:format("~s", [to_prom_name(Metric)]))
+ end,
+ [to_bin(io_lib:format("~s ~p", [MetricStr, Value]))];
+to_prom(Metric, Value) ->
+ [to_bin(io_lib:format("~s ~p", [to_prom_name(Metric), Value]))].
+
+to_prom_summary(Path, Info) ->
+ Metric = path_to_name(Path ++ ["seconds"]),
+ {value, Value} = lists:keyfind(value, 1, Info),
+ {arithmetic_mean, Mean} = lists:keyfind(arithmetic_mean, 1, Value),
+ {percentile, Percentiles} = lists:keyfind(percentile, 1, Value),
+ {n, Count} = lists:keyfind(n, 1, Value),
+ Quantiles = lists:map(fun({Perc, Val0}) ->
+ % Prometheus uses seconds, so we need to covert milliseconds to seconds
+ Val = Val0/1000,
+ case Perc of
+ 50 -> {[{quantile, <<"0.5">>}], Val};
+ 75 -> {[{quantile, <<"0.75">>}], Val};
+ 90 -> {[{quantile, <<"0.9">>}], Val};
+ 95 -> {[{quantile, <<"0.95">>}], Val};
+ 99 -> {[{quantile, <<"0.99">>}], Val};
+ 999 -> {[{quantile, <<"0.999">>}], Val}
+ end
+ end, Percentiles),
+ SumMetric = path_to_name(Path ++ ["seconds", "sum"]),
+ SumStat = to_prom(SumMetric, Count * Mean),
+ CountMetric = path_to_name(Path ++ ["seconds", "count"]),
+ CountStat = to_prom(CountMetric, Count),
+ to_prom(Metric, summary, Quantiles) ++ [SumStat, CountStat].
+
+to_prom_name(Metric) ->
+ to_bin(io_lib:format("couchdb_~s", [Metric])).
+
+path_to_name(Path) ->
+ Parts = lists:map(fun(Part) ->
+ io_lib:format("~s", [Part])
+ end, Path),
+ string:join(Parts, "_").
+
+counter_metric(Path) ->
+ Name = path_to_name(Path),
+ case string:find(Name, <<"_total">>, trailing) == <<"_total">> of
+ true -> Name;
+ false -> to_bin(io_lib:format("~s_total", [Name]))
+ end.
+
+to_bin(Data) when is_list(Data) ->
+ iolist_to_binary(Data);
+to_bin(Data) when is_atom(Data) ->
+ atom_to_binary(Data, utf8);
+to_bin(Data) when is_integer(Data) ->
+ integer_to_binary(Data);
+to_bin(Data) when is_binary(Data) ->
+ Data.
+
+val(Data) ->
+ {value, V} = lists:keyfind(value, 1, Data),
+ V.
+
+val(Key, Stats) ->
+ {Key, Data} = lists:keyfind(Key, 1, Stats),
+ val(Data). \ No newline at end of file
diff --git a/src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl b/src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl
new file mode 100644
index 000000000..c862b9a9f
--- /dev/null
+++ b/src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl
@@ -0,0 +1,147 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_prometheus_e2e_tests).
+
+-include_lib("couch/include/couch_eunit.hrl").
+-include_lib("couch/include/couch_db.hrl").
+
+-define(USER, "prometheus_test_admin").
+-define(PASS, "pass").
+-define(AUTH, {basic_auth, {?USER, ?PASS}}).
+-define(PROM_PORT, "17986").
+-define(CONTENT_JSON, {"Content-Type", "application/json"}).
+
+start() ->
+ test_util:start_couch([chttpd, couch_prometheus]).
+
+setup() ->
+ Hashed = couch_passwords:hash_admin_password(?PASS),
+ ok = config:set("admins", ?USER, ?b2l(Hashed), _Persist=false),
+ ok = config:set_integer("stats", "interval", 2),
+ ok = config:set_integer("couch_prometheus", "interval", 1),
+ Port = mochiweb_socket_server:get(chttpd, port),
+ construct_url(Port).
+
+teardown(_) ->
+ ok.
+
+couch_prometheus_e2e_test_() ->
+ {
+ "Prometheus E2E Tests",
+ {
+ setup,
+ fun start/0, fun test_util:stop_couch/1,
+ {
+ foreach,
+ fun setup/0, fun teardown/1,
+ [
+ fun node_call_chttpd/1,
+ fun node_call_prometheus_http/1,
+ fun deny_prometheus_http/1,
+ fun node_see_updated_metrics/1
+ ]
+ }
+ }
+ }.
+
+% normal chttpd path via cluster port
+node_call_chttpd(Url) ->
+ {ok, RC1, _, _} = test_request:get(
+ Url,
+ [?CONTENT_JSON, ?AUTH],
+ []
+ ),
+ ?_assertEqual(200, RC1).
+
+% normal chttpd path via cluster port
+node_see_updated_metrics(Url) ->
+ TmpDb = ?tempdb(),
+ Addr = config:get("chttpd", "bind_address", "127.0.0.1"),
+ Port = mochiweb_socket_server:get(chttpd, port),
+ DbUrl = lists:concat(["http://", Addr, ":", Port, "/", ?b2l(TmpDb)]),
+ create_db(DbUrl),
+ [create_doc(DbUrl, "testdoc" ++ integer_to_binary(I)) || I <- lists:seq(1, 100)],
+ delete_db(DbUrl),
+ InitMetrics = wait_for_metrics(Url, "couchdb_httpd_requests_total 0", 5000),
+ UpdatedMetrics = wait_for_metrics(Url, "couchdb_httpd_requests_total", 10000),
+ % since the puts happen so fast, we can't have an exact
+ % total requests given the scraping interval. so we just want to acknowledge
+ % a change as occurred
+ ?_assertNotEqual(InitMetrics, UpdatedMetrics).
+
+% normal chttpd path via cluster port
+node_call_prometheus_http(_) ->
+ maybe_start_http_server("true"),
+ Url = construct_url(?PROM_PORT),
+ {ok, RC1, _, _} = test_request:get(
+ Url,
+ [?CONTENT_JSON, ?AUTH],
+ []
+ ),
+ % since this port doesn't require auth, this should work
+ {ok, RC2, _, _} = test_request:get(
+ Url,
+ [?CONTENT_JSON],
+ []
+ ),
+ delete_db(Url),
+ ?_assertEqual(200, RC2).
+
+% we don't start the http server
+deny_prometheus_http(_) ->
+ maybe_start_http_server("false"),
+ Url = construct_url(?PROM_PORT),
+ Response = test_request:get(
+ Url,
+ [?CONTENT_JSON, ?AUTH],
+ []
+ ),
+ ?_assertEqual({error,{conn_failed,{error,econnrefused}}}, Response).
+
+maybe_start_http_server(Additional) ->
+ test_util:stop_applications([couch_prometheus, chttpd]),
+ Hashed = couch_passwords:hash_admin_password(?PASS),
+ ok = config:set("admins", ?USER, ?b2l(Hashed), _Persist=false),
+ ok = config:set("prometheus", "additional_port", Additional),
+ ok = config:set("prometheus", "port", ?PROM_PORT),
+ test_util:start_applications([couch_prometheus, chttpd]).
+
+construct_url(Port) ->
+ Addr = config:get("chttpd", "bind_address", "127.0.0.1"),
+ lists:concat(["http://", Addr, ":", Port, "/_node/_local/_prometheus"]).
+
+create_db(Url) ->
+ Addr = config:get("chttpd", "bind_address", "127.0.0.1"),
+ Port = mochiweb_socket_server:get(chttpd, port),
+ {ok, Status, _, _} = test_request:put(Url, [?CONTENT_JSON, ?AUTH], "{}"),
+ ?assert(Status =:= 201 orelse Status =:= 202).
+
+delete_db(Url) ->
+ {ok, 200, _, _} = test_request:delete(Url, [?AUTH]).
+
+create_doc(Url, Id) ->
+ test_request:put(Url ++ "/" ++ Id,
+ [?CONTENT_JSON, ?AUTH], "{\"mr\": \"rockoartischocko\"}").
+
+wait_for_metrics(Url, Value, Timeout) ->
+ test_util:wait(fun() ->
+ {ok, _, _, Body} = test_request:get(
+ Url,
+ [?CONTENT_JSON, ?AUTH],
+ []
+ ),
+ case string:find(Body, Value) of
+ nomatch -> wait;
+ M -> M
+ end
+ end, Timeout).
diff --git a/src/couch_prometheus/test/eunit/couch_prometheus_util_tests.erl b/src/couch_prometheus/test/eunit/couch_prometheus_util_tests.erl
new file mode 100644
index 000000000..8fe17e561
--- /dev/null
+++ b/src/couch_prometheus/test/eunit/couch_prometheus_util_tests.erl
@@ -0,0 +1,65 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_prometheus_util_tests).
+
+-include_lib("couch/include/couch_eunit.hrl").
+
+-import(couch_prometheus_util, [
+ to_prom/3,
+ to_prom_summary/2
+]).
+
+couch_prometheus_util_test_() ->
+ [
+ ?_assertEqual(<<"couchdb_ddoc_cache 10">>,
+ test_to_prom_output(ddoc_cache, counter, 10)),
+ ?_assertEqual(<<"couchdb_httpd_status_codes{code=\"200\"} 3">>,
+ test_to_prom_output(httpd_status_codes, counter, {[{code, 200}], 3})),
+ ?_assertEqual(<<"couchdb_temperature_celsius 36">>,
+ test_to_prom_output(temperature_celsius, gauge, 36)),
+ ?_assertEqual(<<"couchdb_mango_query_time_seconds{quantile=\"0.75\"} 4.5">>,
+ test_to_prom_sum_output([mango_query_time], [
+ {value,
+ [
+ {min,0.0},
+ {max,0.0},
+ {arithmetic_mean,0.0},
+ {geometric_mean,0.0},
+ {harmonic_mean,0.0},
+ {median,0.0},{variance,0.0},
+ {standard_deviation,0.0},
+ {skewness,0.0},{kurtosis,0.0},
+ {percentile,[
+ {50,0.0},
+ {75, 4500},
+ {90,0.0},
+ {95,0.0},
+ {99,0.0},
+ {999,0.0}]},
+ {histogram,[
+ {0,0}]},
+ {n,0}
+ ]
+ },
+ {type,histogram},
+ {desc, <<"length of time processing a mango query">>}
+ ]))
+ ].
+
+test_to_prom_output(Metric, Type, Val) ->
+ Out = to_prom(Metric, Type, Val),
+ lists:nth(2, Out).
+
+test_to_prom_sum_output(Metric, Info) ->
+ Out = to_prom_summary(Metric, Info),
+ lists:nth(3, Out). \ No newline at end of file