diff options
author | Tony Sun <tony.sun427@gmail.com> | 2021-07-09 14:53:02 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-07-09 14:53:02 -0700 |
commit | c05fd08308bee5c7c44a7596bb406dadb9981617 (patch) | |
tree | 932a2a187800e1a3f1fa6e0fa6b43bf439ee9016 | |
parent | 3245e9bcce9e640406a22de24db940898bc0386a (diff) | |
download | couchdb-c05fd08308bee5c7c44a7596bb406dadb9981617.tar.gz |
Port prometheus 3.x (#3506)
* Add new app couch_prometheus
This will be a new app add a _prometheus endpoint which will
return metrics information that adheres to the format described at
https://prometheus.io/.
Initial implementation of new _prometheus endpoint. A gen_server
waits for scraping calls while polling couch_stats:fetch and
other system info. The return value is constructed to adhere to
prometheus format and returned as text/plain. The format code
was originally written by @davisp.
We add an option to spawn a new mochiweb_http server to allow for an
additional port for scraping which does not require authentication.
The default ports are 17986, 27986, 37986 across 3 nodes.
make release
Co-authored-by: Joan Touzet <wohali@users.noreply.github.com>
-rwxr-xr-x | configure | 1 | ||||
-rwxr-xr-x | dev/run | 11 | ||||
-rw-r--r-- | rebar.config.script | 1 | ||||
-rw-r--r-- | rel/overlay/etc/default.ini | 5 | ||||
-rw-r--r-- | rel/reltool.config | 3 | ||||
-rw-r--r-- | setup_eunit.template | 1 | ||||
-rw-r--r-- | src/chttpd/src/chttpd_node.erl | 8 | ||||
-rw-r--r-- | src/couch/src/couch.app.src | 3 | ||||
-rw-r--r-- | src/couch_prometheus/src/couch_prometheus.app.src | 20 | ||||
-rw-r--r-- | src/couch_prometheus/src/couch_prometheus.hrl | 15 | ||||
-rw-r--r-- | src/couch_prometheus/src/couch_prometheus_app.erl | 23 | ||||
-rw-r--r-- | src/couch_prometheus/src/couch_prometheus_http.erl | 102 | ||||
-rw-r--r-- | src/couch_prometheus/src/couch_prometheus_server.erl | 174 | ||||
-rw-r--r-- | src/couch_prometheus/src/couch_prometheus_sup.erl | 39 | ||||
-rw-r--r-- | src/couch_prometheus/src/couch_prometheus_util.erl | 166 | ||||
-rw-r--r-- | src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl | 147 | ||||
-rw-r--r-- | src/couch_prometheus/test/eunit/couch_prometheus_util_tests.erl | 65 |
17 files changed, 781 insertions, 3 deletions
@@ -202,6 +202,7 @@ cat > rel/couchdb.config << EOF {node_name, "-name couchdb@127.0.0.1"}. {cluster_port, 5984}. {backend_port, 5986}. +{prometheus_port, 17986}. EOF cat > install.mk << EOF @@ -280,7 +280,9 @@ def check_boot_script(ctx): @log("Prepare configuration files") def setup_configs(ctx): for idx, node in enumerate(ctx["nodes"]): - cluster_port, backend_port = get_ports(ctx, idx + ctx["node_number"]) + cluster_port, backend_port, prometheus_port = get_ports( + ctx, idx + ctx["node_number"] + ) env = { "prefix": toposixpath(ctx["rootdir"]), "package_author_name": "The Apache Software Foundation", @@ -293,6 +295,7 @@ def setup_configs(ctx): "node_name": "-name %s@127.0.0.1" % node, "cluster_port": cluster_port, "backend_port": backend_port, + "prometheus_port": prometheus_port, "uuid": "fake_uuid_for_dev", "_default": "", } @@ -353,7 +356,11 @@ def apply_config_overrides(ctx, content): def get_ports(ctx, idnode): assert idnode if idnode <= 5 and not ctx["auto_ports"]: - return ((10000 * idnode) + 5984, (10000 * idnode) + 5986) + return ( + (10000 * idnode) + 5984, + (10000 * idnode) + 5986, + (10000 * idnode) + 7986, + ) else: return tuple(get_available_ports(2)) diff --git a/rebar.config.script b/rebar.config.script index 52e4e4167..8267fc13e 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -140,6 +140,7 @@ SubDirs = [ "src/setup", "src/smoosh", "src/weatherreport", + "src/couch_prometheus", "rel" ]. diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index ac4972f76..5f916a0af 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -705,3 +705,8 @@ partitioned||* = true ;source_close_timeout_sec = 600 ;require_node_param = false ;require_range_param = false + +[prometheus] +additional_port = false +bind_address = 127.0.0.1 +port = {{prometheus_port}} diff --git a/rel/reltool.config b/rel/reltool.config index 924ecd6b2..010a262ac 100644 --- a/rel/reltool.config +++ b/rel/reltool.config @@ -62,6 +62,8 @@ smoosh, snappy, weatherreport, + couch_prometheus, + %% extra recon ]}, @@ -123,6 +125,7 @@ {app, smoosh, [{incl_cond, include}]}, {app, snappy, [{incl_cond, include}]}, {app, weatherreport, [{incl_cond, include}]}, + {app, couch_prometheus, [{incl_cond, include}]}, %% extra {app, recon, [{incl_cond, include}]} diff --git a/setup_eunit.template b/setup_eunit.template index 97bee466c..3625441bd 100644 --- a/setup_eunit.template +++ b/setup_eunit.template @@ -2,6 +2,7 @@ {package_author_name, "The Apache Software Foundation"}, {cluster_port, 5984}, {backend_port, 5986}, + {prometheus_port, 17986}, {node_name, "-name couchdbtest@127.0.0.1"}, {data_dir, "/tmp"}, diff --git a/src/chttpd/src/chttpd_node.erl b/src/chttpd/src/chttpd_node.erl index 99407da72..d3d3ff7f8 100644 --- a/src/chttpd/src/chttpd_node.erl +++ b/src/chttpd/src/chttpd_node.erl @@ -117,6 +117,14 @@ handle_node_req(#httpd{method='GET', path_parts=[_, Node, <<"_stats">> | Path]}= chttpd:send_json(Req, EJSON1); handle_node_req(#httpd{path_parts=[_, _Node, <<"_stats">>]}=Req) -> send_method_not_allowed(Req, "GET"); +handle_node_req(#httpd{method='GET', path_parts=[_, Node, <<"_prometheus">>]}=Req) -> + Metrics = call_node(Node, couch_prometheus_server, scrape, []), + Version = call_node(Node, couch_prometheus_server, version, []), + Type = "text/plain; version=" ++ Version, + Header = [{<<"Content-Type">>, ?l2b(Type)}], + chttpd:send_response(Req, 200, Header, Metrics); +handle_node_req(#httpd{path_parts=[_, _Node, <<"_prometheus">>]}=Req) -> + send_method_not_allowed(Req, "GET"); % GET /_node/$node/_system handle_node_req(#httpd{method='GET', path_parts=[_, Node, <<"_system">>]}=Req) -> Stats = call_node(Node, chttpd_node, get_stats, []), diff --git a/src/couch/src/couch.app.src b/src/couch/src/couch.app.src index 6116c79ba..74674bbb5 100644 --- a/src/couch/src/couch.app.src +++ b/src/couch/src/couch.app.src @@ -45,7 +45,8 @@ couch_event, ioq, couch_stats, - hyper + hyper, + couch_prometheus ]}, {env, [ { httpd_global_handlers, [ diff --git a/src/couch_prometheus/src/couch_prometheus.app.src b/src/couch_prometheus/src/couch_prometheus.app.src new file mode 100644 index 000000000..bf49e59d2 --- /dev/null +++ b/src/couch_prometheus/src/couch_prometheus.app.src @@ -0,0 +1,20 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +{application, couch_prometheus, [ + {description, "Aggregated metrics info for Prometheus consumption"}, + {vsn, git}, + {registered, []}, + {applications, [kernel, stdlib, folsom, couch_stats, couch_log]}, + {mod, {couch_prometheus_app, []}}, + {env, []} +]}. diff --git a/src/couch_prometheus/src/couch_prometheus.hrl b/src/couch_prometheus/src/couch_prometheus.hrl new file mode 100644 index 000000000..0970f4469 --- /dev/null +++ b/src/couch_prometheus/src/couch_prometheus.hrl @@ -0,0 +1,15 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-define(REFRESH_INTERVAL, 5). +-define(PROMETHEUS_VERSION, "2.0"). + diff --git a/src/couch_prometheus/src/couch_prometheus_app.erl b/src/couch_prometheus/src/couch_prometheus_app.erl new file mode 100644 index 000000000..232c16a8a --- /dev/null +++ b/src/couch_prometheus/src/couch_prometheus_app.erl @@ -0,0 +1,23 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_prometheus_app). + +-behaviour(application). + +-export([start/2, stop/1]). + +start(_StartType, _StartArgs) -> + couch_prometheus_sup:start_link(). + +stop(_State) -> + ok. diff --git a/src/couch_prometheus/src/couch_prometheus_http.erl b/src/couch_prometheus/src/couch_prometheus_http.erl new file mode 100644 index 000000000..bd0c4c6f9 --- /dev/null +++ b/src/couch_prometheus/src/couch_prometheus_http.erl @@ -0,0 +1,102 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_prometheus_http). + +-compile(tuple_calls). + +-export([ + start_link/0, + handle_request/1 +]). + +-include("couch_prometheus.hrl"). +-include_lib("couch/include/couch_db.hrl"). + +start_link() -> + IP = case config:get("prometheus", "bind_address", "any") of + "any" -> any; + Else -> Else + end, + Port = config:get("prometheus", "port"), + ok = couch_httpd:validate_bind_address(IP), + + Options = [ + {name, ?MODULE}, + {loop, fun ?MODULE:handle_request/1}, + {ip, IP}, + {port, Port} + ], + case mochiweb_http:start(Options) of + {ok, Pid} -> + {ok, Pid}; + {error, Reason} -> + io:format("Failure to start Mochiweb: ~s~n", [Reason]), + {error, Reason} + end. + +handle_request(MochiReq) -> + RawUri = MochiReq:get(raw_path), + {"/" ++ Path, _, _} = mochiweb_util:urlsplit_path(RawUri), + PathParts = string:tokens(Path, "/"), + try + case PathParts of + ["_node", Node, "_prometheus"] -> + send_prometheus(MochiReq, Node); + _ -> + send_error(MochiReq, 404, <<"not_found">>, <<>>) + end + catch T:R -> + Body = list_to_binary(io_lib:format("~p:~p", [T, R])), + send_error(MochiReq, 500, <<"server_error">>, Body) + end. + +send_prometheus(MochiReq, Node) -> + Type = "text/plain; version=" ++ ?PROMETHEUS_VERSION, + Headers = couch_httpd:server_header() ++ [ + {<<"Content-Type">>, ?l2b(Type)} + ], + Body = call_node(Node, couch_prometheus_server, scrape, []), + send_resp(MochiReq, 200, Headers, Body). + +send_resp(MochiReq, Status, ExtraHeaders, Body) -> + Headers = couch_httpd:server_header() ++ ExtraHeaders, + MochiReq:respond({Status, Headers, Body}). + +send_error(MochiReq, Code, Error, Reason) -> + Headers = couch_httpd:server_header() ++ [ + {<<"Content-Type">>, <<"application/json">>} + ], + JsonError = {[{<<"error">>, Error}, + {<<"reason">>, Reason}]}, + Body = ?JSON_ENCODE(JsonError), + MochiReq:respond({Code, Headers, Body}). + +call_node("_local", Mod, Fun, Args) -> + call_node(node(), Mod, Fun, Args); +call_node(Node0, Mod, Fun, Args) when is_list(Node0) -> + Node1 = try + list_to_existing_atom(Node0) + catch + error:badarg -> + NoNode = list_to_binary(Node0), + throw({not_found, <<"no such node: ", NoNode/binary>>}) + end, + call_node(Node1, Mod, Fun, Args); +call_node(Node, Mod, Fun, Args) when is_atom(Node) -> + case rpc:call(Node, Mod, Fun, Args) of + {badrpc, nodedown} -> + Reason = list_to_binary(io_lib:format("~s is down", [Node])), + throw({error, {nodedown, Reason}}); + Else -> + Else + end. diff --git a/src/couch_prometheus/src/couch_prometheus_server.erl b/src/couch_prometheus/src/couch_prometheus_server.erl new file mode 100644 index 000000000..e97df04a4 --- /dev/null +++ b/src/couch_prometheus/src/couch_prometheus_server.erl @@ -0,0 +1,174 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_prometheus_server). + +-behaviour(gen_server). + +-import(couch_prometheus_util, [ + couch_to_prom/3, + to_prom/3, + to_prom_summary/2 +]). + +-export([ + scrape/0, + version/0 +]). + +-export([ + start_link/0, + init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + code_change/3, + terminate/2 +]). + +-include("couch_prometheus.hrl"). + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +-record(st, { + metrics, + refresh +}). + +init([]) -> + Metrics = refresh_metrics(), + RT = update_refresh_timer(), + {ok, #st{metrics=Metrics, refresh=RT}}. + +scrape() -> + {ok, Metrics} = gen_server:call(?MODULE, scrape), + Metrics. + +version() -> + ?PROMETHEUS_VERSION. + +handle_call(scrape, _from, #st{metrics = Metrics}=State) -> + {reply, {ok, Metrics}, State}; +handle_call(refresh, _from, #st{refresh=OldRT} = State) -> + timer:cancel(OldRT), + Metrics = refresh_metrics(), + RT = update_refresh_timer(), + {reply, ok, State#st{metrics=Metrics, refresh=RT}}; +handle_call(Msg, _From, State) -> + {stop, {unknown_call, Msg}, error, State}. + +handle_cast(Msg, State) -> + {stop, {unknown_cast, Msg}, State}. + +handle_info(refresh, State) -> + Metrics = refresh_metrics(), + RT = update_refresh_timer(), + {noreply, State#st{metrics=Metrics, refresh=RT}}; +handle_info(Msg, State) -> + {stop, {unknown_info, Msg}, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +refresh_metrics() -> + CouchDB = get_couchdb_stats(), + System = couch_stats_httpd:to_ejson(get_system_stats()), + couch_prometheus_util:to_bin(lists:map(fun(Line) -> + io_lib:format("~s~n", [Line]) + end, CouchDB ++ System)). + +get_couchdb_stats() -> + Stats = lists:sort(couch_stats:fetch()), + lists:flatmap(fun({Path, Info}) -> + couch_to_prom(Path, Info, Stats) + end, Stats). + +get_system_stats() -> + lists:flatten([ + get_uptime_stat(), + get_vm_stats(), + get_io_stats(), + get_message_queue_stats(), + get_run_queue_stats(), + get_vm_stats(), + get_ets_stats() + ]). + +get_uptime_stat() -> + to_prom(uptime_seconds, counter, couch_app:uptime() div 1000). + +get_vm_stats() -> + MemLabels = lists:map(fun({Type, Value}) -> + {[{memory_type, Type}], Value} + end, erlang:memory()), + {NumGCs, WordsReclaimed, _} = erlang:statistics(garbage_collection), + CtxSwitches = element(1, erlang:statistics(context_switches)), + Reds = element(1, erlang:statistics(reductions)), + ProcCount = erlang:system_info(process_count), + ProcLimit = erlang:system_info(process_limit), + [ + to_prom(erlang_memory_bytes, gauge, MemLabels), + to_prom(erlang_gc_collections_total, counter, NumGCs), + to_prom(erlang_gc_words_reclaimed_total, counter, WordsReclaimed), + to_prom(erlang_context_switches_total, counter, CtxSwitches), + to_prom(erlang_reductions_total, counter, Reds), + to_prom(erlang_processes, gauge, ProcCount), + to_prom(erlang_process_limit, gauge, ProcLimit) + ]. + +get_io_stats() -> + {{input, In}, {output, Out}} = erlang:statistics(io), + [ + to_prom(erlang_io_recv_bytes_total, counter, In), + to_prom(erlang_io_sent_bytes_total, counter, Out) + ]. + +get_message_queue_stats() -> + Queues = lists:map(fun(Name) -> + case process_info(whereis(Name), message_queue_len) of + {message_queue_len, N} -> + N; + _ -> + 0 + end + end, registered()), + [ + to_prom(erlang_message_queues, gauge, lists:sum(Queues)), + to_prom(erlang_message_queue_min, gauge, lists:min(Queues)), + to_prom(erlang_message_queue_max, gauge, lists:max(Queues)) + ]. + +get_run_queue_stats() -> + %% Workaround for https://bugs.erlang.org/browse/ERL-1355 + {Normal, Dirty} = case erlang:system_info(dirty_cpu_schedulers) > 0 of + false -> + {statistics(run_queue), 0}; + true -> + [DCQ | SQs] = lists:reverse(statistics(run_queue_lengths)), + {lists:sum(SQs), DCQ} + end, + [ + to_prom(erlang_scheduler_queues, gauge, Normal), + to_prom(erlang_dirty_cpu_scheduler_queues, gauge, Dirty) + ]. + +get_ets_stats() -> + NumTabs = length(ets:all()), + to_prom(erlang_ets_table, gauge, NumTabs). + +update_refresh_timer() -> + RefreshTime = 1000 * config:get_integer("couch_prometheus", "interval", ?REFRESH_INTERVAL), + erlang:send_after(RefreshTime, self(), refresh). diff --git a/src/couch_prometheus/src/couch_prometheus_sup.erl b/src/couch_prometheus/src/couch_prometheus_sup.erl new file mode 100644 index 000000000..8d8c7e078 --- /dev/null +++ b/src/couch_prometheus/src/couch_prometheus_sup.erl @@ -0,0 +1,39 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_prometheus_sup). + +-behaviour(supervisor). + +-export([ + start_link/0, + init/1 +]). + +-define(CHILD(I, Type), {I, {I, start_link, []}, permanent, 5000, Type, [I]}). + +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + +init([]) -> + {ok, { + {one_for_one, 5, 10}, [ + ?CHILD(couch_prometheus_server, worker) + ] ++ maybe_start_prometheus_http() + }}. + +maybe_start_prometheus_http() -> + case config:get("prometheus", "additional_port", "false") of + "false" -> []; + "true" -> [?CHILD(couch_prometheus_http, worker)]; + _ -> [] + end. diff --git a/src/couch_prometheus/src/couch_prometheus_util.erl b/src/couch_prometheus/src/couch_prometheus_util.erl new file mode 100644 index 000000000..c3b58cb3a --- /dev/null +++ b/src/couch_prometheus/src/couch_prometheus_util.erl @@ -0,0 +1,166 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_prometheus_util ). + +-export([ + couch_to_prom/3, + to_bin/1, + to_prom/3, + to_prom_summary/2 +]). + +-include("couch_prometheus.hrl"). + +couch_to_prom([couch_log, level, alert], Info, _All) -> + to_prom(couch_log_requests_total, counter, {[{level, alert}], val(Info)}); +couch_to_prom([couch_log, level, Level], Info, _All) -> + to_prom(couch_log_requests_total, {[{level, Level}], val(Info)}); + +couch_to_prom([couch_replicator, checkpoints, failure], Info, _All) -> + to_prom(couch_replicator_checkpoints_failure_total, counter, val(Info)); +couch_to_prom([couch_replicator, checkpoints, success], Info, All) -> + Total = val(Info) + val([couch_replicator, checkpoints, failure], All), + to_prom(couch_replicator_checkpoints_total, counter, Total); +couch_to_prom([couch_replicator, responses, failure], Info, _All) -> + to_prom(couch_replicator_responses_failure_total, counter, val(Info)); +couch_to_prom([couch_replicator, responses, success], Info, All) -> + Total = val(Info) + val([couch_replicator, responses, failure], All), + to_prom(couch_replicator_responses_total, counter, Total); +couch_to_prom([couch_replicator, stream_responses, failure], Info, _All) -> + to_prom(couch_replicator_stream_responses_failure_total, counter, val(Info)); +couch_to_prom([couch_replicator, stream_responses, success], Info, All) -> + Total = val(Info) + val([couch_replicator, stream_responses, failure], All), + to_prom(couch_replicator_stream_responses_total, counter, Total); + +couch_to_prom([couchdb, auth_cache_hits], Info, All) -> + Total = val(Info) + val([couchdb, auth_cache_misses], All), + to_prom(auth_cache_requests_total, counter, Total); +couch_to_prom([couchdb, auth_cache_misses], Info, _All) -> + to_prom(auth_cache_misses_total, counter, val(Info)); +couch_to_prom([couchdb, httpd_request_methods, 'COPY'], Info, _All) -> + to_prom(httpd_request_methods, counter, {[{method, 'COPY'}], val(Info)}); +couch_to_prom([couchdb, httpd_request_methods, Method], Info, _All) -> + to_prom(httpd_request_methods, {[{method, Method}], val(Info)}); +couch_to_prom([couchdb, httpd_status_codes, Code], Info, _All) -> + to_prom(httpd_status_codes, {[{code, Code}], val(Info)}); + +couch_to_prom([ddoc_cache, hit], Info, All) -> + Total = val(Info) + val([ddoc_cache, miss], All), + to_prom(ddoc_cache_requests_total, counter, Total); +couch_to_prom([ddoc_cache, miss], Info, _All) -> + to_prom(ddoc_cache_requests_failures_total, counter, val(Info)); +couch_to_prom([ddoc_cache, recovery], Info, _All) -> + to_prom(ddoc_cache_requests_recovery_total, counter, val(Info)); + +couch_to_prom([fabric, read_repairs, failure], Info, _All) -> + to_prom(fabric_read_repairs_failures_total, counter, val(Info)); +couch_to_prom([fabric, read_repairs, success], Info, All) -> + Total = val(Info) + val([fabric, read_repairs, failure], All), + to_prom(fabric_read_repairs_total, counter, Total); + +couch_to_prom([rexi, streams, timeout, init_stream], Info, _All) -> + to_prom(rexi_streams_timeout_total, counter, {[{stage, init_stream}], val(Info)}); +couch_to_prom([rexi_streams, timeout, Stage], Info, _All) -> + to_prom(rexi_streams_timeout_total, {[{stage, Stage}], val(Info)}); + +couch_to_prom([couchdb | Rest], Info, All) -> + couch_to_prom(Rest, Info, All); + +couch_to_prom(Path, Info, _All) -> + case lists:keyfind(type, 1, Info) of + {type, counter} -> + Metric = counter_metric(Path), + to_prom(Metric, counter, val(Info)); + {type, gauge} -> + to_prom(path_to_name(Path), gauge, val(Info)); + {type, histogram} -> + to_prom_summary(Path, Info) + end. + +to_prom(Metric, Type, Data) -> + TypeStr = to_bin(io_lib:format("# TYPE ~s ~s", [to_prom_name(Metric), Type])), + [TypeStr] ++ to_prom(Metric, Data). + +to_prom(Metric, Instances) when is_list(Instances) -> + lists:flatmap(fun(Inst) -> to_prom(Metric, Inst) end, Instances); +to_prom(Metric, {Labels, Value}) -> + LabelParts = lists:map(fun({K, V}) -> + lists:flatten(io_lib:format("~s=\"~s\"", [to_bin(K), to_bin(V)])) + end, Labels), + MetricStr = case length(LabelParts) > 0 of + true -> + LabelStr = string:join(LabelParts, ", "), + lists:flatten(io_lib:format("~s{~s}", [to_prom_name(Metric), LabelStr])); + false -> + lists:flatten(io_lib:format("~s", [to_prom_name(Metric)])) + end, + [to_bin(io_lib:format("~s ~p", [MetricStr, Value]))]; +to_prom(Metric, Value) -> + [to_bin(io_lib:format("~s ~p", [to_prom_name(Metric), Value]))]. + +to_prom_summary(Path, Info) -> + Metric = path_to_name(Path ++ ["seconds"]), + {value, Value} = lists:keyfind(value, 1, Info), + {arithmetic_mean, Mean} = lists:keyfind(arithmetic_mean, 1, Value), + {percentile, Percentiles} = lists:keyfind(percentile, 1, Value), + {n, Count} = lists:keyfind(n, 1, Value), + Quantiles = lists:map(fun({Perc, Val0}) -> + % Prometheus uses seconds, so we need to covert milliseconds to seconds + Val = Val0/1000, + case Perc of + 50 -> {[{quantile, <<"0.5">>}], Val}; + 75 -> {[{quantile, <<"0.75">>}], Val}; + 90 -> {[{quantile, <<"0.9">>}], Val}; + 95 -> {[{quantile, <<"0.95">>}], Val}; + 99 -> {[{quantile, <<"0.99">>}], Val}; + 999 -> {[{quantile, <<"0.999">>}], Val} + end + end, Percentiles), + SumMetric = path_to_name(Path ++ ["seconds", "sum"]), + SumStat = to_prom(SumMetric, Count * Mean), + CountMetric = path_to_name(Path ++ ["seconds", "count"]), + CountStat = to_prom(CountMetric, Count), + to_prom(Metric, summary, Quantiles) ++ [SumStat, CountStat]. + +to_prom_name(Metric) -> + to_bin(io_lib:format("couchdb_~s", [Metric])). + +path_to_name(Path) -> + Parts = lists:map(fun(Part) -> + io_lib:format("~s", [Part]) + end, Path), + string:join(Parts, "_"). + +counter_metric(Path) -> + Name = path_to_name(Path), + case string:find(Name, <<"_total">>, trailing) == <<"_total">> of + true -> Name; + false -> to_bin(io_lib:format("~s_total", [Name])) + end. + +to_bin(Data) when is_list(Data) -> + iolist_to_binary(Data); +to_bin(Data) when is_atom(Data) -> + atom_to_binary(Data, utf8); +to_bin(Data) when is_integer(Data) -> + integer_to_binary(Data); +to_bin(Data) when is_binary(Data) -> + Data. + +val(Data) -> + {value, V} = lists:keyfind(value, 1, Data), + V. + +val(Key, Stats) -> + {Key, Data} = lists:keyfind(Key, 1, Stats), + val(Data).
\ No newline at end of file diff --git a/src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl b/src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl new file mode 100644 index 000000000..c862b9a9f --- /dev/null +++ b/src/couch_prometheus/test/eunit/couch_prometheus_e2e_tests.erl @@ -0,0 +1,147 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_prometheus_e2e_tests). + +-include_lib("couch/include/couch_eunit.hrl"). +-include_lib("couch/include/couch_db.hrl"). + +-define(USER, "prometheus_test_admin"). +-define(PASS, "pass"). +-define(AUTH, {basic_auth, {?USER, ?PASS}}). +-define(PROM_PORT, "17986"). +-define(CONTENT_JSON, {"Content-Type", "application/json"}). + +start() -> + test_util:start_couch([chttpd, couch_prometheus]). + +setup() -> + Hashed = couch_passwords:hash_admin_password(?PASS), + ok = config:set("admins", ?USER, ?b2l(Hashed), _Persist=false), + ok = config:set_integer("stats", "interval", 2), + ok = config:set_integer("couch_prometheus", "interval", 1), + Port = mochiweb_socket_server:get(chttpd, port), + construct_url(Port). + +teardown(_) -> + ok. + +couch_prometheus_e2e_test_() -> + { + "Prometheus E2E Tests", + { + setup, + fun start/0, fun test_util:stop_couch/1, + { + foreach, + fun setup/0, fun teardown/1, + [ + fun node_call_chttpd/1, + fun node_call_prometheus_http/1, + fun deny_prometheus_http/1, + fun node_see_updated_metrics/1 + ] + } + } + }. + +% normal chttpd path via cluster port +node_call_chttpd(Url) -> + {ok, RC1, _, _} = test_request:get( + Url, + [?CONTENT_JSON, ?AUTH], + [] + ), + ?_assertEqual(200, RC1). + +% normal chttpd path via cluster port +node_see_updated_metrics(Url) -> + TmpDb = ?tempdb(), + Addr = config:get("chttpd", "bind_address", "127.0.0.1"), + Port = mochiweb_socket_server:get(chttpd, port), + DbUrl = lists:concat(["http://", Addr, ":", Port, "/", ?b2l(TmpDb)]), + create_db(DbUrl), + [create_doc(DbUrl, "testdoc" ++ integer_to_binary(I)) || I <- lists:seq(1, 100)], + delete_db(DbUrl), + InitMetrics = wait_for_metrics(Url, "couchdb_httpd_requests_total 0", 5000), + UpdatedMetrics = wait_for_metrics(Url, "couchdb_httpd_requests_total", 10000), + % since the puts happen so fast, we can't have an exact + % total requests given the scraping interval. so we just want to acknowledge + % a change as occurred + ?_assertNotEqual(InitMetrics, UpdatedMetrics). + +% normal chttpd path via cluster port +node_call_prometheus_http(_) -> + maybe_start_http_server("true"), + Url = construct_url(?PROM_PORT), + {ok, RC1, _, _} = test_request:get( + Url, + [?CONTENT_JSON, ?AUTH], + [] + ), + % since this port doesn't require auth, this should work + {ok, RC2, _, _} = test_request:get( + Url, + [?CONTENT_JSON], + [] + ), + delete_db(Url), + ?_assertEqual(200, RC2). + +% we don't start the http server +deny_prometheus_http(_) -> + maybe_start_http_server("false"), + Url = construct_url(?PROM_PORT), + Response = test_request:get( + Url, + [?CONTENT_JSON, ?AUTH], + [] + ), + ?_assertEqual({error,{conn_failed,{error,econnrefused}}}, Response). + +maybe_start_http_server(Additional) -> + test_util:stop_applications([couch_prometheus, chttpd]), + Hashed = couch_passwords:hash_admin_password(?PASS), + ok = config:set("admins", ?USER, ?b2l(Hashed), _Persist=false), + ok = config:set("prometheus", "additional_port", Additional), + ok = config:set("prometheus", "port", ?PROM_PORT), + test_util:start_applications([couch_prometheus, chttpd]). + +construct_url(Port) -> + Addr = config:get("chttpd", "bind_address", "127.0.0.1"), + lists:concat(["http://", Addr, ":", Port, "/_node/_local/_prometheus"]). + +create_db(Url) -> + Addr = config:get("chttpd", "bind_address", "127.0.0.1"), + Port = mochiweb_socket_server:get(chttpd, port), + {ok, Status, _, _} = test_request:put(Url, [?CONTENT_JSON, ?AUTH], "{}"), + ?assert(Status =:= 201 orelse Status =:= 202). + +delete_db(Url) -> + {ok, 200, _, _} = test_request:delete(Url, [?AUTH]). + +create_doc(Url, Id) -> + test_request:put(Url ++ "/" ++ Id, + [?CONTENT_JSON, ?AUTH], "{\"mr\": \"rockoartischocko\"}"). + +wait_for_metrics(Url, Value, Timeout) -> + test_util:wait(fun() -> + {ok, _, _, Body} = test_request:get( + Url, + [?CONTENT_JSON, ?AUTH], + [] + ), + case string:find(Body, Value) of + nomatch -> wait; + M -> M + end + end, Timeout). diff --git a/src/couch_prometheus/test/eunit/couch_prometheus_util_tests.erl b/src/couch_prometheus/test/eunit/couch_prometheus_util_tests.erl new file mode 100644 index 000000000..8fe17e561 --- /dev/null +++ b/src/couch_prometheus/test/eunit/couch_prometheus_util_tests.erl @@ -0,0 +1,65 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(couch_prometheus_util_tests). + +-include_lib("couch/include/couch_eunit.hrl"). + +-import(couch_prometheus_util, [ + to_prom/3, + to_prom_summary/2 +]). + +couch_prometheus_util_test_() -> + [ + ?_assertEqual(<<"couchdb_ddoc_cache 10">>, + test_to_prom_output(ddoc_cache, counter, 10)), + ?_assertEqual(<<"couchdb_httpd_status_codes{code=\"200\"} 3">>, + test_to_prom_output(httpd_status_codes, counter, {[{code, 200}], 3})), + ?_assertEqual(<<"couchdb_temperature_celsius 36">>, + test_to_prom_output(temperature_celsius, gauge, 36)), + ?_assertEqual(<<"couchdb_mango_query_time_seconds{quantile=\"0.75\"} 4.5">>, + test_to_prom_sum_output([mango_query_time], [ + {value, + [ + {min,0.0}, + {max,0.0}, + {arithmetic_mean,0.0}, + {geometric_mean,0.0}, + {harmonic_mean,0.0}, + {median,0.0},{variance,0.0}, + {standard_deviation,0.0}, + {skewness,0.0},{kurtosis,0.0}, + {percentile,[ + {50,0.0}, + {75, 4500}, + {90,0.0}, + {95,0.0}, + {99,0.0}, + {999,0.0}]}, + {histogram,[ + {0,0}]}, + {n,0} + ] + }, + {type,histogram}, + {desc, <<"length of time processing a mango query">>} + ])) + ]. + +test_to_prom_output(Metric, Type, Val) -> + Out = to_prom(Metric, Type, Val), + lists:nth(2, Out). + +test_to_prom_sum_output(Metric, Info) -> + Out = to_prom_summary(Metric, Info), + lists:nth(3, Out).
\ No newline at end of file |