diff options
author | Jay Doane <jaydoane@apache.org> | 2021-04-20 00:11:00 -0700 |
---|---|---|
committer | Jay Doane <jaydoane@apache.org> | 2021-04-20 00:11:00 -0700 |
commit | 41288124af048837814474f71d897b9da2ac9f2b (patch) | |
tree | d744f6e4505c9548a22db351ab7a1cc3441b7934 | |
parent | ffe608a8ed2b74ef35838ebbf90216b96721bf2b (diff) | |
parent | 3afa803fd19ee6a7b9037b0e7e586ee40de22513 (diff) | |
download | couchdb-41288124af048837814474f71d897b9da2ac9f2b.tar.gz |
Merge remote-tracking branch 'weatherreport/riaknostic-squash' into weatherreport
34 files changed, 3353 insertions, 0 deletions
diff --git a/src/weatherreport/.gitignore b/src/weatherreport/.gitignore new file mode 100644 index 000000000..d6cf1d58f --- /dev/null +++ b/src/weatherreport/.gitignore @@ -0,0 +1,13 @@ +doc/ +deps/ +ebin/* +log/ +edoc/ +index.html +weatherreport +*.png +pkg/ +erl_crash.dump +.eunit/ +*~ +#*# diff --git a/src/weatherreport/.manifest b/src/weatherreport/.manifest new file mode 100644 index 000000000..73b293867 --- /dev/null +++ b/src/weatherreport/.manifest @@ -0,0 +1,5 @@ +src +weatherreport +doc +LICENSE +README.md diff --git a/src/weatherreport/.travis.yml b/src/weatherreport/.travis.yml new file mode 100644 index 000000000..ec6b937b0 --- /dev/null +++ b/src/weatherreport/.travis.yml @@ -0,0 +1,8 @@ +language: erlang +notifications: + disabled: true +env: + - R15B + - R14B04 + - R14B03 + - R14B02 diff --git a/src/weatherreport/DEVELOPMENT.md b/src/weatherreport/DEVELOPMENT.md new file mode 100644 index 000000000..129e96499 --- /dev/null +++ b/src/weatherreport/DEVELOPMENT.md @@ -0,0 +1,73 @@ +# WeatherReport Development + +WeatherReport requires a sane GNU build system and a recent version of +Erlang. It has `twig` and `getopt` as dependencies, so those must be +compatible with your version of Erlang. Release versions are currently +built with Erlang version R14B03, while development versions are targeted at Erlang version R14B04. + +See the `rebar.config` file for more details. + +To build WeatherReport, simply run `make`: + +```bash +$ make +./rebar get-deps +==> weatherreport (get-deps) +Pulling getopt from {git,"git://github.com/jcomellas/getopt.git", + {tag,"v0.4.3"}} +Cloning into 'getopt'... +Pulling twig from {git,"https://github.com/cloudant/twig.git",master} +Cloning into 'twig'... +Pulling config from {git,"git://github.com/cloudant/config.git",{tag,"0.2.5"}} +Cloning into 'config'... +==> getopt (get-deps) +==> twig (get-deps) +==> config (get-deps) +./rebar compile +==> getopt (compile) +Compiled src/getopt.erl +==> twig (compile) +Compiled src/twig_sup.erl +Compiled src/twig_util.erl +Compiled src/twig_app.erl +Compiled src/twig_event_handler.erl +Compiled src/twig.erl +Compiled src/twig_monitor.erl +Compiled src/trunc_io.erl +==> config (compile) +Compiled src/config_listener.erl +Compiled src/config_sup.erl +Compiled src/config_app.erl +Compiled src/config_util.erl +Compiled src/config_writer.erl +Compiled src/config.erl +==> weatherreport (compile) +Compiled src/weatherreport_check.erl +Compiled src/weatherreport_config.erl +Compiled src/weatherreport_util.erl +Compiled src/weatherreport_node.erl +Compiled src/weatherreport_check_nodes_connected.erl +Compiled src/weatherreport_check_memory_use.erl +Compiled src/weatherreport_check_membership.erl +Compiled src/weatherreport.erl +./rebar escriptize +==> getopt (escriptize) +==> twig (escriptize) +==> config (escriptize) +==> weatherreport (escriptize) +``` + +Now you can invoke the script manually via the below command: + +```bash +$ ./weatherreport --etc /path/to/etc [other options] +``` + +To generate the edoc reference, use `make docs` and then open the +`doc/index.html` file in your browser. Detailed discussion of the +internal APIs that you can use in developing new diagnostics is found +in the edocs. + +## Contributing + +We want your code! Fork the [github repository](https://github.com/cloudant/weatherreport) and send a pull request if you'd like to add a new check, contribute improvements to existing checks or improve the way WeatherReport works. diff --git a/src/weatherreport/LICENSE b/src/weatherreport/LICENSE new file mode 100644 index 000000000..e454a5258 --- /dev/null +++ b/src/weatherreport/LICENSE @@ -0,0 +1,178 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + diff --git a/src/weatherreport/Makefile b/src/weatherreport/Makefile new file mode 100644 index 000000000..f816ad068 --- /dev/null +++ b/src/weatherreport/Makefile @@ -0,0 +1,27 @@ +.PHONY: rel deps test + +all: deps compile + +compile: + rebar compile + +deps: + rebar get-deps + +clean: + rebar clean + +distclean: clean + rebar delete-deps + +test: + rebar compile eunit + +escriptize: + rebar escriptize + +## +## Doc targets +## +docs: + rebar doc skip_deps=true diff --git a/src/weatherreport/README.md b/src/weatherreport/README.md new file mode 100644 index 000000000..013ba42c7 --- /dev/null +++ b/src/weatherreport/README.md @@ -0,0 +1,86 @@ +# Weather Report + +`weatherreport` is an escript and set of tools that diagnoses common problems which could affect a CouchDB node or cluster. + +## Overview + +Here is a basic example of using `weatherreport` followed immediately by the command's output: + +```bash +$ ./weatherreport --etc /path/to/etc +[warning] Cluster member node3@127.0.0.1 is not connected to this node. Please check whether it is down. +``` + +## Installation + +WeatherReport depends on features introduced by Erlang version R14B04, so verify that you've installed this version of Erlang before proceeding with installation. + +Installation is currently a matter of cloning the git repository and running `make`. + +## Usage + +For most cases, you can just run the `weatherreport` command as given at the top of this README. However, sometimes you might want to know some extra detail or run only specific checks. For that, there are command-line options. Execute `weatherreport --help` to learn more about these options: + +```bash +weatherreport --help +Usage: weatherreport [-d <level>] [-e] [-h] [-l] [check_name ...] + + -d, --level Minimum message severity level (default: notice) + -l, --list Describe available diagnostic tasks + -e, --expert Perform more detailed diagnostics + -h, --help Display help/usage + check_name A specific check to run +``` + +To get an idea of what checks will be run, use the `--list` option: + +```bash +weatherreport diag --list +Available diagnostic checks: + + membership Cluster membership validity + memory_use Measure memory usage + nodes_connected Cluster node liveness +``` + +If you want all the gory details about what WeatherReport is doing, you can run the checks at a more verbose logging level with the --level option: + +```bash +$ ./weatherreport --etc /path/to/etc -d debug +[debug] Not connected to the local cluster node, trying to connect. alive:false connect_failed:undefined +[debug] Starting distributed Erlang. +[debug] Connected to local cluster node 'node1@127.0.0.1'. +[debug] Local RPC: mem3:nodes([]) [5000] +[debug] Local RPC: os:getpid([]) [5000] +[debug] Running shell command: ps -o pmem,rss -p 73905 +[debug] Shell command output: +%MEM RSS + 0.3 25116 + + +[debug] Local RPC: erlang:nodes([]) [5000] +[debug] Local RPC: mem3:nodes([]) [5000] +[warning] Cluster member node3@127.0.0.1 is not connected to this node. Please check whether it is down. +[info] Process is using 0.3% of available RAM, totalling 25116 KB of real memory. +``` + +Most times you'll want to use the defaults, but any Syslog severity name will do (from most to least verbose): `debug, info, notice, warning, error, critical, alert, emergency`. + +Finally, if you want to run just a single diagnostic or a list of specific ones, you can pass their name(s): + +```bash +$ ./weatherreport --etc /path/to/etc nodes_connected +[warning] Cluster member node3@127.0.0.1 is not connected to this node. Please check whether it is down. +``` + +## Contributing + +0. Read DEVELOPMENT.md +1. Fork the project on [Github](https://github.com/cloudant/weatherreport). +2. Make your changes or additions on a "topic" branch, test and + document them. If you are making a new diagnostic, make sure you + give some module-level information about the checks it + performs. *Note*: diagnostics _should not_ make modifications, only + inspect things. +3. Push to your fork and send a pull-request. +4. A project committer will review your pull-request and get back to you. diff --git a/src/weatherreport/doc/overview.edoc b/src/weatherreport/doc/overview.edoc new file mode 100644 index 000000000..30dbd18af --- /dev/null +++ b/src/weatherreport/doc/overview.edoc @@ -0,0 +1,23 @@ +@author Basho Technologies, Inc. +@copyright 2011 Basho Technologies, Inc. +@version 1.0.0 +@title riaknostic: Automated diagnostic tools for Riak +@doc <p>riaknostic is an escript and set of tools that diagnoses common problems which could affect a Riak node or cluster. When experiencing any problem with Riak, riaknostic should be the first thing run during troubleshooting. The tool is integrated with Riak via the <code>riak-admin</code> script.</p> + +<pre>$ riak-admin diag</pre> + +<p>This documentation describes the <code>riaknostic</code> API and user interface commands. The application's core consists of 5 modules:</p> + +<ul> +<li><strong>riaknostic</strong> - the core of the script, including CLI parsing and dispatching commands.</li> +<li><strong>riaknostic_check</strong> - the behaviour module that all diagnostics must implement, including some general + functions that support the <code>riaknostic</code> module.</li> +<li><strong>riaknostic_config</strong> - convenience functions for inspecting the configuration of the local Riak node.</li> +<li><strong>riaknostic_node</strong> - functions for sending commands to or inspecting the local Riak node or all members of the cluster.</li> +<li><strong>riaknostic_util</strong> - utility functions, including for running shell programs</li> +</ul> + +<p>All other included modules are generally prefixed with <code>riaknostic_check_</code> and are individual diagnostics that can be run.</p> + +<p>riaknostic is licensed under the Apache v2 license.</p> +@end
\ No newline at end of file diff --git a/src/weatherreport/how_to_add_a_check.md b/src/weatherreport/how_to_add_a_check.md new file mode 100644 index 000000000..b78640e61 --- /dev/null +++ b/src/weatherreport/how_to_add_a_check.md @@ -0,0 +1,113 @@ +# How to add a check + +A new check can be added by creating a new file in the src/ directory named +`weatherreport_check_NAME.erl` where `NAME` is a short descriptive check name +(e.g. memory_use). + +The file must be an erlang module which implements the `weatherreport_check` +behaviour. This requires the following four functions to be implemented (see +the documentation of the `weatherreport_check` module for more details): + + - `description/0` Return a short description of what the check does. This will + be printed to the console when `weatherreport` is run with the `-l` option. + + - `valid/0` Check that running the diagnostic check is valid. Any preconditions + required by the check (e.g. cluster connectivity) should be carried out here. + If a check has no prerequisites then this function can just return `true`. + + - `check/0` The function that actually performs the check. Typically this will + involve either calls to the local OS (via `weatherreport_util:run_command/1`, + calls to the local cluster node (via `weatherreport_node:local_command/3`) or + calls to the cluster (via `weatherreport_node:cluster_command/3`). This + command should return a list of tuples of the form `{LogLevel, Message}` + where `LogLevel` is an atom that specifies a supported log level (e.g. + `warning` or `info`) and `Message` is any erlang term that is matched by the + `format/1` function. + + - `format/1` This function is used to format the messages returned by `check/0` + and its clauses must match all possible messages returnable by `check/0`. It + should return a tuple of the form `{String, Args}` where `String` is the + format string `Args` is the list of formatting arguments. The format string + should be a human-readable description of the message. + +## Annotated example + +The following annotated example is based on `weatherreport_check_memory_use.erl` +and the file header and licence is omitted. + +```erlang +%% @doc Diagnostic that checks the current memory usage. If memory +%% usage is high, a warning message will be sent, otherwise only +%% informational messages. +``` + +The module begins with an edoc declaration which provides af full description of +the check. Any relevant details which cannot be communicated in the one-line +string returned by `description/0` function should be included here. + +```erlang +-module(weatherreport_check_memory_use). +-behaviour(weatherreport_check). + +-export([description/0, + valid/0, + check/0, + format/1]). +``` + +The module name is specified, the `weatherreport_check` behaviour is set and the +functions required by that behaviour are exported. + +```erlang +-spec description() -> string(). +description() -> + "Measure memory usage". +``` + +Define `description/0` which returns a concise description for inclusion in +command line output. + +```erlang +-spec valid() -> boolean(). +valid() -> + weatherreport_node:can_connect(). +``` + +Define `valid/0` which is used to check that we can connect to the local cluster +node. Connectivity to the local node is required in this check so that the OS +process ID can be obtained. + +```erlang +-spec check() -> [{atom(), term()}]. +check() -> + Pid = weatherreport_node:pid(), + Output = weatherreport_util:run_command("ps -o pmem,rss -p " ++ Pid), + [_,_,Percent, RealSize| _] = string:tokens(Output, "/n \n"), + Messages = [{info, {process_usage, Percent, RealSize}}], + case weatherreport_util:binary_to_float(list_to_binary(Percent)) >= 90 of + false -> + Messages; + true -> + [{critical, {high_memory, Percent}} | Messages] + end. +``` + +The actual code that carries out the check. Note that an `info` message is +always returned and a `critical` message is appended to the `Messages` list +only if memory usage exceeds a hard-coded threshold. Note also that there are +two message forms: `{process_usage, Percent RealSize}` and +`{high_memory, Percent}`. When `format/1` is defined it must match both of +these message forms. + +```erlang +-spec format(term()) -> {io:format(), [term()]}. +format({high_memory, Percent}) -> + {"Memory usage is HIGH: ~s% of available RAM", [Percent]}; +format({process_usage, Percent, Real}) -> + {"Process is using ~s% of available RAM, totalling ~s KB of real memory.", [Percent, Real]}. +``` + +Finally `format/1` is defined. There are two function clauses, one to match each +of the message forms that can be returned by check. The tuple returned by this +function will eventually be used to generate the text displayed in the console +output. diff --git a/src/weatherreport/rebar.config b/src/weatherreport/rebar.config new file mode 100644 index 000000000..5a8f69bad --- /dev/null +++ b/src/weatherreport/rebar.config @@ -0,0 +1,31 @@ +%% ------------------------------------------------------------------- +%% +%% riaknostic - automated diagnostic tools for Riak +%% +%% Copyright (c) 2011 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +%% +%% Modified to handle dependencies for weatherreport +%% Copyright (c) 2014 Cloudant +%% +%% ------------------------------------------------------------------- + +{escript_shebang, "#!/usr/bin/env escript\n"}. +{escript_comment, "%% -nocookie\n"}. + +{escript_incl_apps, [config]}. diff --git a/src/weatherreport/src/weatherreport.app.src b/src/weatherreport/src/weatherreport.app.src new file mode 100644 index 000000000..dda1cd03e --- /dev/null +++ b/src/weatherreport/src/weatherreport.app.src @@ -0,0 +1,39 @@ +%% ------------------------------------------------------------------- +%% +%% riaknostic - automated diagnostic tools for Riak +%% +%% Copyright (c) 2011 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +%% +%% File renamed from riaknostic.app.src to weatherreport.app.src and +%% modified to work with Apache CouchDB. +%% +%% Copyright (c) 2014 Cloudant +%% +%% ------------------------------------------------------------------- + +{application, weatherreport, [ + {description, "Diagnostic tools for Apache CouchDB"}, + {vsn, git}, + {registered, []}, + {applications, [ + kernel, + stdlib, + inets + ]} + ]}. diff --git a/src/weatherreport/src/weatherreport.erl b/src/weatherreport/src/weatherreport.erl new file mode 100644 index 000000000..bd7d65cca --- /dev/null +++ b/src/weatherreport/src/weatherreport.erl @@ -0,0 +1,181 @@ +%% ------------------------------------------------------------------- +%% +%% riaknostic - automated diagnostic tools for Riak +%% +%% Copyright (c) 2011 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +%% +%% File renamed from riaknostic.erl to weatherreport.erl and modified +%% to work with Apache CouchDB +%% +%% Copyright (c) 2014 Cloudant +%% +%% ------------------------------------------------------------------- + +%% @doc <p>The <code>weatherreport</code> module is the entry point for +%% the escript. It is responsible for parsing command-line arguments +%% and switches, printing the available checks, listing the help text, +%% or running all or the specified checks, depending on the command +%% line.</p> +%% +%% <p>The <code>getopt</code> application and module is used +%% for command-line parsing. The defined switches and arguments are:</p> +%% <pre>$ ./weatherreport --etc etc [-d level] [-l] [-h] [check_name...]</pre> +%% +%% <table class="options"> +%% <tr><td><code>--etc etc</code></td><td>the location of the CouchDB +%% configuration directory</td></tr> +%% <tr><td><code>-d, --level level</code>  </td><td>the severity of +%% messages you want to see, defaulting to 'notice'. Equivalent to +%% syslog severity levels.</td></tr> +%% <tr><td><code>-l, --list</code></td><td>lists available checks, +%% that is, modules that implement <code>weatherreport_check</code>. A +%% "short name" will be given for ease-of-use.</td></tr> +%% <tr><td><code>-h, --help</code></td><td> - print command usage +%% ("help")</td></tr> +%% <tr><td><code>check_name</code></td><td>when given, a specific +%% check or list of checks to run</td></tr> +%% </table> +%% @end +-module(weatherreport). +-export([main/1]). + +-define(OPTS, [ + {etc, undefined, "etc", string, undefined }, + {level, $d, "level", {atom, notice}, "Minimum message severity level (default: notice)"}, + {expert, $e, "expert", undefined, "Perform more detailed diagnostics" }, + {usage, $h, "help", undefined, "Display help/usage" }, + {list, $l, "list", undefined, "Describe available diagnostic tasks" }, + {all_nodes, $a, "all-nodes", undefined, "Run weatherreport on all cluster nodes" }, + {timeout, $t, "timeout", integer, "Timeout value (in ms) for each diagnostic check" } + ]). + +-define(USAGE_OPTS, [ O || O <- ?OPTS, + element(5,O) =/= undefined]). + +%% @doc The main entry point for the weatherreport escript. +-spec main(CommandLineArguments::[string()]) -> any(). +main(Args) -> + application:load(weatherreport), + + case weatherreport_getopt:parse(?OPTS, Args) of + {ok, {Opts, NonOptArgs}} -> + case process_opts(Opts) of + list -> list_checks(); + usage -> usage(); + run -> run(NonOptArgs) + end; + {error, Error} -> + io:format("Invalid option sequence given: ~w~n", [Error]), + usage() + end. + +list_checks() -> + Descriptions = [ {weatherreport_util:short_name(Mod), Mod:description()} || + Mod <- weatherreport_check:modules() ], + io:format("Available diagnostic checks:~n~n"), + lists:foreach(fun({Mod, Desc}) -> + io:format(" ~.20s ~s~n", [Mod, Desc]) + end, lists:sort(Descriptions)). + +usage() -> + weatherreport_getopt:usage(?USAGE_OPTS, "weatherreport ", "[check_name ...]", [{"check_name", "A specific check to run"}]). + +run(InputChecks) -> + case weatherreport_config:prepare() of + {error, Reason} -> + io:format("Fatal error: ~s~n", [Reason]), + halt(1); + _ -> + ok + end, + Checks = case InputChecks of + [] -> + weatherreport_check:modules(); + _ -> + ShortNames = [{weatherreport_util:short_name(Mod), Mod} || Mod <- weatherreport_check:modules() ], + element(1, lists:foldr(fun validate_checks/2, {[], ShortNames}, InputChecks)) + end, + Messages = case application:get_env(weatherreport, all_nodes) of + {ok, true} -> + weatherreport_runner:run(Checks, all); + _ -> + weatherreport_runner:run(Checks) + end, + case Messages of + [] -> + io:format("No diagnostic messages to report.~n"), + halt(0); + _ -> + %% Print the most critical messages first + FilteredMessages = lists:filter(fun({_,Level,_,_}) -> + weatherreport_log:should_log(Level) + end, Messages), + SortedMessages = lists:sort(fun({_, ALevel, _, _}, {_, BLevel, _, _}) -> + weatherreport_log:level(ALevel) =< weatherreport_log:level(BLevel) + end, FilteredMessages), + case SortedMessages of + [] -> + io:format("No diagnostic messages to report.~n"), + halt(0); + _ -> + lists:foreach(fun weatherreport_check:print/1, SortedMessages), + weatherreport_util:flush_stdout(), + halt(1) + end, + halt(1) + end. + +validate_checks(Check, {Mods, SNames}) -> + case lists:keyfind(Check, 1, SNames) of + {Check, Mod} -> + {[Mod|Mods], lists:delete({Check, Mod}, SNames)}; + _ -> + io:format("Unknown check '~s' specified, skipping.~n", [Check]), + {Mods, SNames} + end. + +process_opts(Opts) -> + process_opts(Opts, run). + +process_opts([], Result) -> + Result; +process_opts([H|T], Result) -> + process_opts(T, process_option(H, Result)). + +process_option({etc,Path}, Result) -> + application:set_env(weatherreport, etc, filename:absname(Path)), + Result; +process_option({level, Level}, Result) -> + application:set_env(weatherreport, log_level, Level), + Result; +process_option({timeout, Timeout}, Result) -> + application:set_env(weatherreport, timeout, Timeout), + Result; +process_option(expert, Result) -> + application:set_env(weatherreport, expert, true), + Result; +process_option(all_nodes, Result) -> + application:set_env(weatherreport, all_nodes, true), + Result; +process_option(list, usage) -> %% Help should have precedence over listing checks + usage; +process_option(list, _) -> + list; +process_option(usage, _) -> + usage. diff --git a/src/weatherreport/src/weatherreport_check.erl b/src/weatherreport/src/weatherreport_check.erl new file mode 100644 index 000000000..e01cb8cc1 --- /dev/null +++ b/src/weatherreport/src/weatherreport_check.erl @@ -0,0 +1,107 @@ +%% ------------------------------------------------------------------- +%% +%% riaknostic - automated diagnostic tools for Riak +%% +%% Copyright (c) 2011 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +%% +%% File renamed from riaknostic_check.erl to weatherreport_check.erl +%% and modified to work with Apache CouchDB +%% +%% Copyright (c) 2014 Cloudant +%% +%% ------------------------------------------------------------------- + +%% @doc <p>Enforces a common API among all diagnostic modules and +%% provides some automation around their execution.</p> +%% <h2>Behaviour Specification</h2> +%% +%% <h3>description/0</h3> +%% <pre>-spec description() -> iodata().</pre> +%% <p>A short description of what the diagnostic does, which will be +%% printed when the script is given the <code>-l</code> flag.</p> +%% +%% <h3>valid/0</h3> +%% <pre>-spec valid() -> boolean().</pre> +%% <p>Whether the diagnostic is valid to run. For example, some checks +%% require connectivity to the cluster node and hence call {@link +%% weatherreport_node:can_connect/0. weatherreport_node:can_connect()}.</p> +%% +%% <h3>check/0</h3> +%% <pre>-spec check() -> [{atom(), term()}].</pre> +%% <p>Runs the diagnostic, returning a list of pairs, where the first +%% is a severity level and the second is any term that is understood +%% by the <code>format/1</code> callback.</p> +%% +%% <h3>format/1</h3> +%% <pre>-spec format(term()) -> iodata() | {io:format(), [term()]}.</pre> +%% <p>Formats terms that were returned from <code>check/0</code> for +%% output to the console. Valid return values are an iolist (string, +%% binary, etc) or a pair of a format string and a list of terms, as +%% you would pass to {@link io:format/2. io:format/2}.</p> +%% @end + +-module(weatherreport_check). +-export([behaviour_info/1]). +-export([check/2, + modules/0, + print/1]). + +%% @doc The behaviour definition for diagnostic modules. +-spec behaviour_info(atom()) -> 'undefined' | [{atom(), arity()}]. +behaviour_info(callbacks) -> + [{description, 0}, + {valid, 0}, + {check, 1}, + {format, 1}]; +behaviour_info(_) -> + undefined. + +%% @doc Runs the diagnostic in the given module, if it is valid. Returns a +%% list of messages that will be printed later using print/1. +-spec check(Module::module(), list()) -> [{atom(), module(), term()}]. +check(Module, Opts) -> + case Module:valid() of + true -> + [ {Level, Module, Message} || {Level, Message} <- Module:check(Opts) ]; + _ -> + [] + end. + +%% @doc Collects a list of diagnostic modules included in the +%% weatherreport application. +-spec modules() -> [module()]. +modules() -> + {ok, Mods} = application:get_key(weatherreport, modules), + [ M || M <- Mods, + Attr <- M:module_info(attributes), + {behaviour, [?MODULE]} =:= Attr orelse {behavior, [?MODULE]} =:= Attr ]. + + +%% @doc Formats and prints the given message. The diagnostic +%% module's format/1 function will be called to provide a +%% human-readable message. It should return an iolist() or a 2-tuple +%% consisting of a format string and a list of terms. +-spec print({Node::atom(), Level::atom(), Module::module(), Data::term()}) -> ok. +print({Node, Level, Mod, Data}) -> + case Mod:format(Data) of + {Format, Terms} -> + weatherreport_log:log(Node, Level, Format, Terms); + String -> + weatherreport_log:log(Node, Level, String) + end. diff --git a/src/weatherreport/src/weatherreport_check_custodian.erl b/src/weatherreport/src/weatherreport_check_custodian.erl new file mode 100644 index 000000000..36a6a4c8b --- /dev/null +++ b/src/weatherreport/src/weatherreport_check_custodian.erl @@ -0,0 +1,82 @@ +%% ------------------------------------------------------------------- +%% +%% weatherreport - automated diagnostic tools for CouchDB +%% +%% Copyright (c) 2014 Cloudant +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +%% +%% @doc Diagnostic that performs safety and liveness checks on +%% cluster shards. Shard safety is determined by the availability of +%% the nodes that contain copies of that shard. A shard is considered +%% unsafe if one or more nodes containing copies are unavailable. +%% Shard liveness is similar but also requires nodes containing copies +%% to be actively participating in the cluster. If one or more nodes +%% containing copies are in maintenance mode then liveness is impaired. +%% Messages are also returned for any databases where there are +%% conflicting shard maps. + +-module(weatherreport_check_custodian). +-behaviour(weatherreport_check). + +-export([description/0, + valid/0, + check/1, + format/1]). + +-include_lib("eunit/include/eunit.hrl"). + +-spec description() -> string(). +description() -> + "Shard safety/liveness checks". + +-spec valid() -> boolean(). +valid() -> + weatherreport_node:can_connect(). + +n_to_level(2) -> + warning; +n_to_level(1) -> + error; +n_to_level(0) -> + critical; +n_to_level(_) -> + info. + +report_to_message({DbName, ShardRange, {Type, N}}) -> + {n_to_level(N), {Type, N, DbName, ShardRange}}; +report_to_message({DbName, {conflicted, N}}) -> + {warning, {conflicted, N, DbName}}. + +-spec check(list()) -> [{atom(), term()}]. +check(_Opts) -> + case custodian:report() of + [] -> + [{info, ok}]; + Report -> + lists:map(fun(R) -> report_to_message(R) end, Report) + end. + +-spec format(term()) -> {io:format(), [term()]}. +format(ok) -> + {"All shards available and alive.", []}; +format({Type, N, DbName, ShardRange}) -> + {"~w ~w shards for Db: ~s Range: ~w.", [N, Type, DbName, ShardRange]}; +format({conflicted, 1, DbName}) -> + {"1 conflicted shard map for Db: ~s", [DbName]}; +format({conflicted, N, DbName}) -> + {"~w conflicted shard maps for Db: ~s", [N, DbName]}. diff --git a/src/weatherreport/src/weatherreport_check_disk.erl b/src/weatherreport/src/weatherreport_check_disk.erl new file mode 100644 index 000000000..cf05f5907 --- /dev/null +++ b/src/weatherreport/src/weatherreport_check_disk.erl @@ -0,0 +1,172 @@ +%% ------------------------------------------------------------------- +%% +%% riaknostic - automated diagnostic tools for Riak +%% +%% Copyright (c) 2011 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +%% +%% File renamed from riaknostic_check_disk.erl to +%% weatherreport_check_disk.erl and modified to work with Apache +%% CouchDB +%% +%% Copyright (c) 2014 Cloudant +%% +%% ------------------------------------------------------------------- + +%% @doc Diagnostic that checks permissions on data directories and +%% whether noatime is set. It will only check data directories of +%% known storage backends. +-module(weatherreport_check_disk). +-behaviour(weatherreport_check). + +%% The file that we will attempt to create and read under each data directory. +-define(TEST_FILE, "weatherreport.tmp"). + +%% A dependent chain of permissions checking functions. +-define(CHECKPERMFUNS, [fun check_is_dir/1, + fun check_is_writeable/1, + fun check_is_readable/1, + fun check_is_file_readable/1, + fun check_atime/1]). + +-include_lib("kernel/include/file.hrl"). + +-export([description/0, + valid/0, + check/1, + format/1]). + +-spec description() -> string(). +description() -> + "Data directory permissions and atime". + +-spec valid() -> true. +valid() -> + true. + +-spec check(list()) -> [{atom(), term()}]. +check(_Opts) -> + DataDirs = weatherreport_config:data_directories(), + %% Add additional disk checks in the function below + lists:flatmap(fun(Dir) -> + check_directory_permissions(Dir) + end, + DataDirs). + +-spec format(term()) -> {io:format(), [term()]}. +format({disk_full, DataDir}) -> + {"Disk containing data directory ~s is full! " + "Please check that it is set to the correct location and that there are not " + "other files using up space intended for Riak.", [DataDir]}; +format({no_data_dir, DataDir}) -> + {"Data directory ~s does not exist. Please create it.", [DataDir]}; +format({no_write, DataDir}) -> + User = weatherreport_config:user(), + {"No write access to data directory ~s. Please make it writeable by the '~s' user.", [DataDir, User]}; +format({no_read, DataDir}) -> + User = weatherreport_config:user(), + {"No read access to data directory ~s. Please make it readable by the '~s' user.", [DataDir, User]}; +format({write_check, File}) -> + {"Write-test file ~s is a directory! Please remove it so this test can continue.", [File]}; +format({atime, Dir}) -> + {"Data directory ~s is not mounted with 'noatime'. " + "Please remount its disk with the 'noatime' flag to improve performance.", [Dir]}. + +%%% Private functions + +check_directory_permissions(Directory) -> + check_directory(Directory, ?CHECKPERMFUNS). + +%% Run a list of check functions against the given directory, +%% returning the first non-ok result. +check_directory(_, []) -> + []; +check_directory(Directory, [Check|Checks]) -> + case Check(Directory) of + ok -> + check_directory(Directory, Checks); + Message -> + [ Message ] + end. + +%% Check if the path is actually a directory +check_is_dir(Directory) -> + case filelib:is_dir(Directory) of + true -> + ok; + _ -> + {error, {no_data_dir, Directory}} + end. + +%% Check if the directory is writeable +check_is_writeable(Directory) -> + File = filename:join([Directory, ?TEST_FILE]), + case file:write_file(File, <<"ok">>) of + ok -> + ok; + {error, Error} when Error == enoent orelse Error == eacces -> + {error, {no_write, Directory}}; + {error, enospc} -> + {critical, {disk_full, Directory}}; + {error, eisdir} -> + {error, {write_check, File}} + end. + +%% Check if the directory is readable +check_is_readable(Directory) -> + case file:read_file_info(Directory) of + {ok, #file_info{access=Access}} when Access == read orelse + Access == read_write -> + ok; + {error, eacces} -> + {error, {no_read, Directory}}; + {error, Error} when Error == enoent orelse + Error == enotdir -> + {error, {no_data_dir, Directory}}; + _ -> + {error, {no_read, Directory}} + end. + +%% Check if the file we created is readable +check_is_file_readable(Directory) -> + File = filename:join([Directory, ?TEST_FILE]), + case file:read_file(File) of + {error, Error} when Error == eacces orelse + Error == enotdir -> + {error, {no_read, Directory}}; + {error, enoent} -> + {error, {write_check, File}}; + _ -> ok + end. + +%% Check if the directory is mounted with 'noatime' +check_atime(Directory) -> + File = filename:join([Directory, ?TEST_FILE]), + weatherreport_util:run_command("touch -at 201401010000.00 " ++ File), + {ok, FileInfo1} = file:read_file_info(File), + {ok, S} = file:open(File, [read]), + io:get_line(S, ''), + file:close(S), + {ok, FileInfo2} = file:read_file_info(File), + file:delete(File), + case (FileInfo1#file_info.atime =/= FileInfo2#file_info.atime) of + true -> + {notice, {atime, Directory}}; + _ -> + ok + end. diff --git a/src/weatherreport/src/weatherreport_check_internal_replication.erl b/src/weatherreport/src/weatherreport_check_internal_replication.erl new file mode 100644 index 000000000..7cfdea09e --- /dev/null +++ b/src/weatherreport/src/weatherreport_check_internal_replication.erl @@ -0,0 +1,57 @@ +%% ------------------------------------------------------------------- +%% +%% weatherreport - automated diagnostic tools for CouchDB +%% +%% Copyright (c) 2014 Cloudant +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +%% @doc Diagnostic that checks the current size of the mem3_sync +%% backlog. The size is printed as an info message if under a defined +%% threshold, or as a warning if above the threshold. +-module(weatherreport_check_internal_replication). +-behaviour(weatherreport_check). + +-export([description/0, + valid/0, + check/1, + format/1]). + +-define(THRESHOLD, 1000000). + +-spec description() -> string(). +description() -> + "Check the number of pending internal replication jobs". + +-spec valid() -> boolean(). +valid() -> + weatherreport_node:can_connect(). + +-spec total_to_level(integer()) -> atom(). +total_to_level(Total) when Total > ?THRESHOLD -> + warning; +total_to_level(_Total) -> + info. + +-spec check(list()) -> [{atom(), term()}]. +check(_Opts) -> + Backlog = mem3_sync:get_backlog(), + [{total_to_level(Backlog), Backlog}]. + +-spec format(term()) -> {io:format(), [term()]}. +format(Backlog) -> + {"Total number of pending internal replication jobs: ~w", [Backlog]}. diff --git a/src/weatherreport/src/weatherreport_check_ioq.erl b/src/weatherreport/src/weatherreport_check_ioq.erl new file mode 100644 index 000000000..de0d8b155 --- /dev/null +++ b/src/weatherreport/src/weatherreport_check_ioq.erl @@ -0,0 +1,78 @@ +%% ------------------------------------------------------------------- +%% +%% weatherreport - automated diagnostic tools for CouchDB +%% +%% Copyright (c) 2014 Cloudant +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +%% @doc Diagnostic that checks the total number of IOQ requests. If +%% the total exceeds a configured threshold a warning message will be +%% sent, otherwise only an information message. +-module(weatherreport_check_ioq). +-behaviour(weatherreport_check). + +-export([description/0, + valid/0, + check/1, + format/1]). + +-define(THRESHOLD, 500). + +-spec description() -> string(). +description() -> + "Check the total number of active IOQ requests". + +-spec valid() -> boolean(). +valid() -> + weatherreport_node:can_connect(). + +-spec total_to_level(integer()) -> atom(). +total_to_level(Total) when Total > ?THRESHOLD -> + warning; +total_to_level(_Total) -> + info. + +-spec sum_channels(list(), list()) -> list(). +sum_channels([], Acc) -> + Acc; +sum_channels([{_Name, Value} | Rest], Acc) -> + sum_channels(Rest, Acc + lists:sum(Value)). + +-spec sum_queues(list(), list()) -> list(). +sum_queues([], Acc) -> + Acc; +sum_queues([{channels, {Channels}} | Rest], Acc) -> + sum_queues(Rest, sum_channels(Channels, Acc)); +sum_queues([{_Name, Value} | Rest], Acc) -> + sum_queues(Rest, Acc + Value). + +-spec check(list()) -> [{atom(), term()}]. +check(_Opts) -> + case ioq:get_disk_queues() of + Queues when is_list(Queues) -> + Total = sum_queues(Queues, 0), + [{total_to_level(Total), {ioq_requests, Total, Queues}}]; + Error -> + [{warning, {ioq_requests_unknown, Error}}] + end. + +-spec format(term()) -> {io:format(), [term()]}. +format({ioq_requests_unknown, Error}) -> + {"Could not determine total number of IOQ requests: ~w~n", [Error]}; +format({ioq_requests, Total, Queues}) -> + {"Total number of active IOQ requests is: ~w ~w", [Total, Queues]}. diff --git a/src/weatherreport/src/weatherreport_check_mem3_sync.erl b/src/weatherreport/src/weatherreport_check_mem3_sync.erl new file mode 100644 index 000000000..8dfe41c02 --- /dev/null +++ b/src/weatherreport/src/weatherreport_check_mem3_sync.erl @@ -0,0 +1,55 @@ +%% ------------------------------------------------------------------- +%% +%% weatherreport - automated diagnostic tools for CouchDB +%% +%% Copyright (c) 2014 Cloudant +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +%% @doc Diagnostic that checks for the presence of the mem3_sync +%% registered process. If this is not found a warning message will be +%% sent, otherwise only informational messages. +-module(weatherreport_check_mem3_sync). +-behaviour(weatherreport_check). + +-export([description/0, + valid/0, + check/1, + format/1]). + +-spec description() -> string(). +description() -> + "Check there is a registered mem3_sync process". + +-spec valid() -> boolean(). +valid() -> + weatherreport_node:can_connect(). + +-spec check(list()) -> [{atom(), term()}]. +check(_Opts) -> + case erlang:whereis(mem3_sync) of + undefined -> + [{warning, mem3_sync_not_found}]; + Pid -> + [{info, {mem3_sync_found, Pid}}] + end. + +-spec format(term()) -> {io:format(), [term()]}. +format(mem3_sync_not_found) -> + {"No mem3_sync process found on local node.", []}; +format({mem3_sync_found, Pid}) -> + {"mem3_sync process found on local node with pid ~w", [Pid]}. diff --git a/src/weatherreport/src/weatherreport_check_membership.erl b/src/weatherreport/src/weatherreport_check_membership.erl new file mode 100644 index 000000000..c3b865174 --- /dev/null +++ b/src/weatherreport/src/weatherreport_check_membership.erl @@ -0,0 +1,65 @@ +%% ------------------------------------------------------------------- +%% +%% riaknostic - automated diagnostic tools for Riak +%% +%% Copyright (c) 2011 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +%% +%% File renamed from riaknostic_check_ring_membership.erl to +%% weatherreport_check_membership.erl and modified to work with Apache +%% CouchDB +%% +%% Copyright (c) 2014 Cloudant +%% +%% ------------------------------------------------------------------- + +%% @doc Diagnostic that checks whether the local node is a member of +%% the ring. This might arise when the node name in vm.args has +%% changed but the node has not been renamed in the ring. +-module(weatherreport_check_membership). +-behaviour(weatherreport_check). + +-export([description/0, + valid/0, + check/1, + format/1]). + +-include_lib("eunit/include/eunit.hrl"). + +-spec description() -> string(). +description() -> + "Cluster membership validity". + +-spec valid() -> boolean(). +valid() -> + weatherreport_node:can_connect(). + +-spec check(list()) -> [{atom(), term()}]. +check(_Opts) -> + NodeName = node(), + Members = mem3:nodes(), + case lists:member(NodeName, Members) of + true -> + []; + false -> + [{warning, {not_ring_member, NodeName}}] + end. + +-spec format(term()) -> {io:format(), [term()]}. +format({not_ring_member, Nodename}) -> + {"Local node ~w is not a member of the cluster. Please check that the -name setting in vm.args is correct.", [Nodename]}. diff --git a/src/weatherreport/src/weatherreport_check_memory_use.erl b/src/weatherreport/src/weatherreport_check_memory_use.erl new file mode 100644 index 000000000..0d93f9d1b --- /dev/null +++ b/src/weatherreport/src/weatherreport_check_memory_use.erl @@ -0,0 +1,67 @@ +%% ------------------------------------------------------------------- +%% +%% riaknostic - automated diagnostic tools for Riak +%% +%% Copyright (c) 2011 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +%% +%% File renamed from riaknostic_check_memory_use.erl to +%% weatherreport_check_memory_use.erl and modified to work with Apache +%% CouchDB +%% +%% Copyright (c) 2014 Cloudant +%% +%% ------------------------------------------------------------------- + +%% @doc Diagnostic that checks the current memory usage. If memory +%% usage is high, a warning message will be sent, otherwise only +%% informational messages. +-module(weatherreport_check_memory_use). +-behaviour(weatherreport_check). + +-export([description/0, + valid/0, + check/1, + format/1]). + +-spec description() -> string(). +description() -> + "Measure memory usage". + +-spec valid() -> boolean(). +valid() -> + weatherreport_node:can_connect(). + +-spec check(list()) -> [{atom(), term()}]. +check(_Opts) -> + Pid = weatherreport_node:pid(), + Output = weatherreport_util:run_command("ps -o pmem,rss -p " ++ Pid), + [_,_,Percent, RealSize| _] = string:tokens(Output, "/n \n"), + Messages = [{info, {process_usage, Percent, RealSize}}], + case weatherreport_util:binary_to_float(list_to_binary(Percent)) >= 90 of + false -> + Messages; + true -> + [{critical, {high_memory, Percent}} | Messages] + end. + +-spec format(term()) -> {io:format(), [term()]}. +format({high_memory, Percent}) -> + {"Memory usage is HIGH: ~s% of available RAM", [Percent]}; +format({process_usage, Percent, Real}) -> + {"Process is using ~s% of available RAM, totalling ~s KB of real memory.", [Percent, Real]}. diff --git a/src/weatherreport/src/weatherreport_check_message_queues.erl b/src/weatherreport/src/weatherreport_check_message_queues.erl new file mode 100644 index 000000000..ae99ff4dc --- /dev/null +++ b/src/weatherreport/src/weatherreport_check_message_queues.erl @@ -0,0 +1,57 @@ +%% ------------------------------------------------------------------- +%% +%% weatherreport - automated diagnostic tools for CouchDB +%% +%% Copyright (c) 2014 Cloudant +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +%% @doc Diagnostic that checks for processes with large mailboxes +%% and sends a warning message if one or more processes exceed the +%% threshold. +-module(weatherreport_check_message_queues). +-behaviour(weatherreport_check). + +-export([description/0, + valid/0, + check/1, + format/1]). + +-define(THRESHOLD, 1000). + +-spec description() -> string(). +description() -> + "Check for processes with large mailboxes". + +-spec valid() -> boolean(). +valid() -> + weatherreport_node:can_connect(). + +-spec check(list()) -> [{atom(), term()}]. +check(Opts) -> + weatherreport_util:check_proc_count( + message_queue_len, + ?THRESHOLD, + Opts). + +-spec format(term()) -> {io:format(), [term()]}. +format({high, {Pid, MBoxSize, Info, Pinfo}}) -> + {"Process ~w has excessive mailbox size of ~w: ~w ~w", [Pid, MBoxSize, Info, Pinfo]}; +format({high, {Pid, MBoxSize, Info}}) -> + {"Process ~w has excessive mailbox size of ~w: ~w", [Pid, MBoxSize, Info]}; +format({ok, {Pid, MBoxSize, Info}}) -> + {"Process ~w has mailbox size of ~w: ~w", [Pid, MBoxSize, Info]}. diff --git a/src/weatherreport/src/weatherreport_check_node_stats.erl b/src/weatherreport/src/weatherreport_check_node_stats.erl new file mode 100644 index 000000000..27b77cefd --- /dev/null +++ b/src/weatherreport/src/weatherreport_check_node_stats.erl @@ -0,0 +1,66 @@ +%% ------------------------------------------------------------------- +%% +%% weatherreport - automated diagnostic tools for CouchDB +%% +%% Copyright (c) 2014 Cloudant +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +%% @doc Diagnostic that checks various erlang VM statistics that are +%% useful for diagnostics. A warning message is printed if certain stats +%% rise above pre-determined thresholds, otherwise an info message is sent. +-module(weatherreport_check_node_stats). +-behaviour(weatherreport_check). + +-export([description/0, + valid/0, + check/1, + format/1]). + +-define(SAMPLES, 10). +-define(T_RUN_QUEUE, 40). +-define(T_PROCESS_COUNT, 100000). + +-spec description() -> string(). +description() -> + "Check useful erlang statistics for diagnostics". + +-spec valid() -> boolean(). +valid() -> + weatherreport_node:can_connect(). + +-spec sum_absolute_stats({list(), list()}, list()) -> list(). +sum_absolute_stats({AbsStats, _}, AbsSum) -> + [{K, V + proplists:get_value(K, AbsSum, 0)} || {K, V} <- AbsStats]. + +-spec mean_to_message({atom(), integer()}) -> {atom(), {atom(), integer()}}. +mean_to_message({run_queue, Mean}) when Mean > ?T_RUN_QUEUE -> + {warning, {run_queue, Mean}}; +mean_to_message({process_count, Mean}) when Mean > ?T_PROCESS_COUNT -> + {warning, {process_count, Mean}}; +mean_to_message({Statistic, Mean}) -> + {info, {Statistic, Mean}}. + +-spec check(list()) -> [{atom(), term()}]. +check(_Opts) -> + SumOfStats = recon:node_stats(?SAMPLES, 100, fun sum_absolute_stats/2, []), + MeanStats = [{K, erlang:round(V / ?SAMPLES)} || {K, V} <- SumOfStats], + lists:map(fun mean_to_message/1, MeanStats). + +-spec format(term()) -> {io:format(), [term()]}. +format({Statistic, Value}) -> + {"Mean ~w over one second is ~w", [Statistic, Value]}. diff --git a/src/weatherreport/src/weatherreport_check_nodes_connected.erl b/src/weatherreport/src/weatherreport_check_nodes_connected.erl new file mode 100644 index 000000000..136b5d069 --- /dev/null +++ b/src/weatherreport/src/weatherreport_check_nodes_connected.erl @@ -0,0 +1,59 @@ +%% ------------------------------------------------------------------- +%% +%% riaknostic - automated diagnostic tools for Riak +%% +%% Copyright (c) 2011 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +%% +%% File renamed from riaknostic_check_nodes_connected.erl to +%% weatherreport_check_nodes_connected.erl and modified to work with +%% Apache CouchDB +%% +%% Copyright (c) 2014 Cloudant +%% +%% ------------------------------------------------------------------- + +%% @doc Diagnostic check that detects cluster members that are down. +-module(weatherreport_check_nodes_connected). +-behaviour(weatherreport_check). + +-export([description/0, + valid/0, + check/1, + format/1]). + +-spec description() -> string(). +description() -> + "Cluster node liveness". + +-spec valid() -> boolean(). +valid() -> + weatherreport_node:can_connect(). + +-spec check(list()) -> [{atom(), term()}]. +check(_Opts) -> + NodeName = node(), + ConnectedNodes = [NodeName | erlang:nodes()], + Members = mem3:nodes(), + [{warning, {node_disconnected, N}} || N <- Members, + N =/= NodeName, + lists:member(N, ConnectedNodes) == false]. + +-spec format(term()) -> {io:format(), [term()]}. +format({node_disconnected, Node}) -> + {"Cluster member ~s is not connected to this node. Please check whether it is down.", [Node]}. diff --git a/src/weatherreport/src/weatherreport_check_process_calls.erl b/src/weatherreport/src/weatherreport_check_process_calls.erl new file mode 100644 index 000000000..75dcf7a54 --- /dev/null +++ b/src/weatherreport/src/weatherreport_check_process_calls.erl @@ -0,0 +1,98 @@ +%% ------------------------------------------------------------------- +%% +%% weatherreport - automated diagnostic tools for CouchDB +%% +%% Copyright (c) 2014 Cloudant +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +%% @doc Diagnostic that checks for large numbers of processes sharing +%% the same current or initial function call +-module(weatherreport_check_process_calls). +-behaviour(weatherreport_check). + +-export([description/0, + valid/0, + check/1, + format/1]). + +-define(THRESHOLD, 1000). + +-spec description() -> string(). +description() -> + "Check for large numbers of processes with the same current/initial call". + +-spec valid() -> boolean(). +valid() -> + weatherreport_node:can_connect(). + +-spec total_to_level(integer()) -> atom(). +total_to_level(Total) when Total > ?THRESHOLD -> + notice; +total_to_level(_Total) -> + info. + +fold_processes([], Acc, _Lim, _CallType, _Opts) -> + Acc; +fold_processes(_, Acc, 0, _CallType, _Opts) -> + Acc; +fold_processes([{Count, undefined} | T], Acc, Lim, CallType, Opts) -> + Level = total_to_level(Count), + Message = {Level, {process_count, {CallType, Count, undefined}}}, + fold_processes(T, [Message | Acc], Lim - 1, CallType, Opts); +fold_processes([{Count, {M, F, A}} | T], Acc, Lim, CallType, Opts) -> + Level = total_to_level(Count), + Message = case proplists:get_value(expert, Opts) of + true -> + PidFun = list_to_atom("find_by_" ++ CallType ++ "_call"), + Pids = erlang:apply(recon, PidFun, [M, F]), + Pinfos = lists:map(fun(Pid) -> + Pinfo = recon:info(Pid), + {Pid, Pinfo} + end, lists:sublist(Pids, 10)), + {Level, {process_count, {CallType, Count, M, F, A, Pinfos}}}; + _ -> + {Level, {process_count, {CallType, Count, M, F, A}}} + end, + fold_processes(T, [Message | Acc], Lim - 1, CallType, Opts). + +-spec check(list()) -> [{atom(), term()}]. +check(Opts) -> + CurrentCallCounts = recon:show_current_call_counts(), + CurrentCallMessages = fold_processes( + CurrentCallCounts, + [], + 10, + "current", + Opts + ), + FirstCallCounts = recon:show_first_call_counts(), + lists:reverse(fold_processes( + FirstCallCounts, + CurrentCallMessages, + 10, + "first", + Opts + )). + +-spec format(term()) -> {io:format(), [term()]}. +format({process_count, {CallType, Count, undefined}}) -> + {"~w processes with ~s call ~w", [Count, CallType, undefined]}; +format({process_count, {CallType, Count, M, F, A}}) -> + {"~w processes with ~s call ~w:~w/~w", [Count, CallType, M, F, A]}; +format({process_count, {CallType, Count, M, F, A, Pinfos}}) -> + {"~w processes with ~s call ~w:~w/~w ~w", [Count, CallType, M, F, A, Pinfos]}. diff --git a/src/weatherreport/src/weatherreport_check_process_memory.erl b/src/weatherreport/src/weatherreport_check_process_memory.erl new file mode 100644 index 000000000..2f766cdfe --- /dev/null +++ b/src/weatherreport/src/weatherreport_check_process_memory.erl @@ -0,0 +1,57 @@ +%% ------------------------------------------------------------------- +%% +%% weatherreport - automated diagnostic tools for CouchDB +%% +%% Copyright (c) 2014 Cloudant +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +%% @doc Diagnostic that checks for processes with high memory usage +%% and sends a warning message if one or more processes exceed the +%% threshold. +-module(weatherreport_check_process_memory). +-behaviour(weatherreport_check). + +-export([description/0, + valid/0, + check/1, + format/1]). + +-define(THRESHOLD, 104857600). + +-spec description() -> string(). +description() -> + "Check for processes with high memory usage". + +-spec valid() -> boolean(). +valid() -> + weatherreport_node:can_connect(). + +-spec check(list()) -> [{atom(), term()}]. +check(Opts) -> + weatherreport_util:check_proc_count( + memory, + ?THRESHOLD, + Opts). + +-spec format(term()) -> {io:format(), [term()]}. +format({high, {Pid, Memory, Info, Pinfo}}) -> + {"Process ~w has excessive memory usage of ~w: ~w ~w", [Pid, Memory, Info, Pinfo]}; +format({high, {Pid, Memory, Info}}) -> + {"Process ~w has excessive memory usage of ~w: ~w", [Pid, Memory, Info]}; +format({ok, {Pid, Memory, Info}}) -> + {"Process ~w has memory usage of ~w: ~w", [Pid, Memory, Info]}. diff --git a/src/weatherreport/src/weatherreport_check_safe_to_rebuild.erl b/src/weatherreport/src/weatherreport_check_safe_to_rebuild.erl new file mode 100644 index 000000000..a7c46c979 --- /dev/null +++ b/src/weatherreport/src/weatherreport_check_safe_to_rebuild.erl @@ -0,0 +1,116 @@ +%% ------------------------------------------------------------------- +%% +%% weatherreport - automated diagnostic tools for CouchDB +%% +%% Copyright (c) 2014 Cloudant +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +%% @doc Diagnostic that checks whether the current node can be +%% safely rebuilt (i.e. taken out of service). +-module(weatherreport_check_safe_to_rebuild). +-behaviour(weatherreport_check). + +-export([description/0, + valid/0, + check/1, + format/1]). + +-spec description() -> string(). +description() -> + "Check whether the node can safely be taken out of service". + +-spec valid() -> boolean(). +valid() -> + weatherreport_node:can_connect(). + +%% @doc Check if rebuilding a node is safe. Safe in this context means +%% that no shard would end up with N<Threshold when the node is offline +-spec safe_to_rebuild(atom(), integer()) -> [list()]. +safe_to_rebuild(Node, RawThreshold) -> + Threshold = case config:get("couchdb", "maintenance_mode") of + "true" -> + RawThreshold - 1; + _ -> + RawThreshold + end, + BelowThreshold = fun + ({_, _, {_, C}}) when C =< Threshold -> true; + (_) -> false end, + ToKV = fun({Db, Range, Status}) -> {[Db, Range], Status} end, + + ShardsInDanger = dict:from_list( + lists:map( + ToKV, + lists:filter(BelowThreshold, custodian:report()) + ) + ), + + mem3_shards:fold( + fun(Shard, Acc) -> + case Shard of + {shard, _, Node, Db, [Start, End], _} -> + case dict:find([Db, [Start, End]], ShardsInDanger) of + {_, _} -> + PrettyRange = [ + couch_util:to_hex(<<Start:32/integer>>), + couch_util:to_hex(<<End:32/integer>>) + ], + PrettyShard = lists:flatten( + io_lib:format("~s ~s-~s", [Db | PrettyRange]) + ), + [PrettyShard | Acc]; + _ -> + Acc + end; + _ -> Acc + end + end, + [] + ). + +-spec shards_to_message(atom(), list()) -> {atom(), {atom(), list()}}. +shards_to_message(n1, []) -> + {info, {n1, []}}; +shards_to_message(n1, Shards) -> + {error, {n1, Shards}}; +shards_to_message(n0, []) -> + {info, {n0, []}}; +shards_to_message(n0, Shards) -> + {crit, {n0, Shards}}. + +-spec check(list()) -> [{atom(), term()}]. +check(_Opts) -> + N0Shards = safe_to_rebuild(node(), 1), + N1Shards = lists:subtract(safe_to_rebuild(node(), 2), N0Shards), + [shards_to_message(n0, N0Shards), shards_to_message(n1, N1Shards)]. + +-spec format(term()) -> {io:format(), [term()]}. +format({n1, []}) -> + {"This node can be rebuilt without causing any shards to become N=1", []}; +format({n1, Shards}) -> + { + "Rebuilding this node will leave the following shards with only one live copy: ~s", + [string:join(Shards, ", ")] + }; +format({n0, []}) -> + {"This node can be rebuilt without causing any shards to become N=0", []}; +format({n0, Shards}) -> + { + "Rebuilding this node will leave the following shard with NO live copies: ~s", + [string:join(Shards, ", ")] + }. diff --git a/src/weatherreport/src/weatherreport_check_search.erl b/src/weatherreport/src/weatherreport_check_search.erl new file mode 100644 index 000000000..29fe05029 --- /dev/null +++ b/src/weatherreport/src/weatherreport_check_search.erl @@ -0,0 +1,57 @@ +%% ------------------------------------------------------------------- +%% +%% weatherreport - automated diagnostic tools for CouchDB +%% +%% Copyright (c) 2014 Cloudant +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +%% @doc Diagnostic that checks the local clouseau node is responsive. +%% If clouseau is unresponsive then search will not work. An info +%% message is returned if clouseau responds to pings and an error +%% otherwise. +-module(weatherreport_check_search). +-behaviour(weatherreport_check). + +-export([description/0, + valid/0, + check/1, + format/1]). + +-spec description() -> string(). +description() -> + "Check the local search node is responsive". + +-spec valid() -> boolean(). +valid() -> + weatherreport_node:can_connect(). + +-spec check(list()) -> [{atom(), term()}]. +check(_Opts) -> + SearchNode = 'clouseau@127.0.0.1', + case net_adm:ping(SearchNode) of + pong -> + [{info, {clouseau_ok, SearchNode}}]; + Error -> + [{error, {clouseau_error, SearchNode, Error}}] + end. + +-spec format(term()) -> {io:format(), [term()]}. +format({clouseau_ok, SearchNode}) -> + {"Local search node at ~w responding ok", [SearchNode]}; +format({clouseau_error, SearchNode, Error}) -> + {"Local search node at ~w not responding: ~w", [SearchNode, Error]}. diff --git a/src/weatherreport/src/weatherreport_check_tcp_queues.erl b/src/weatherreport/src/weatherreport_check_tcp_queues.erl new file mode 100644 index 000000000..8e161487f --- /dev/null +++ b/src/weatherreport/src/weatherreport_check_tcp_queues.erl @@ -0,0 +1,89 @@ +%% ------------------------------------------------------------------- +%% +%% weatherreport - automated diagnostic tools for CouchDB +%% +%% Copyright (c) 2014 Cloudant +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +%% @doc Diagnostic that checks the current tcp recv and send queues. +%% If the queues are high a warning message will be send, otherwise +%% only an informational message. +-module(weatherreport_check_tcp_queues). +-behaviour(weatherreport_check). + +-export([description/0, + valid/0, + check/1, + format/1]). + +-define(THRESHOLD, 1000000). + +-spec description() -> string(). +description() -> + "Measure the length of tcp queues in the kernel". + +-spec valid() -> boolean(). +valid() -> + weatherreport_node:can_connect(). + +%% @doc Converts the raw text output of netstat into the sum of the +%% tcp recv and send queues. +-spec sum_queues(string()) -> {integer(), integer()}. +sum_queues(Netstats) -> + sum_queues(string:tokens(Netstats, "\n"), {0, 0}). + +%% @doc Converts the rows of text output of netstat into the sum of +%% the tcp recv and send queues. Note that this function is tightly coupled +%% to the output of the netstat command provided by the system OS (tested +%% with netstat 1.42). +-spec sum_queues([string()], {integer(), integer()}) -> {integer(), integer()}. +sum_queues([], Acc) -> + Acc; +sum_queues([Row | Rest], {SumRecvQ, SumSendQ}) -> + {RecvQ, SendQ} = case string:tokens(Row, " ") of + [[$t, $c, $p | _] | _]=Cols -> + {Rq, Sq} = {lists:nth(2, Cols), lists:nth(3, Cols)}, + {list_to_integer(Rq), list_to_integer(Sq)}; + _ -> + {0, 0} + end, + sum_queues(Rest, {RecvQ + SumRecvQ, SendQ + SumSendQ}). + +%% @doc Converts the sum of queue lengths to a log message at the approriate +%% level, given ?THRESHOLD +-spec sum_to_message(integer(), string()) -> {atom(), term()}. +sum_to_message(Sum, Prefix) when Sum > ?THRESHOLD -> + {warning, {list_to_atom(Prefix ++ "_high"), Sum}}; +sum_to_message(Sum, Prefix) -> + {info, {list_to_atom(Prefix ++ "_ok"), Sum}}. + +-spec check(list()) -> [{atom(), term()}]. +check(_Opts) -> + Netstats = weatherreport_util:run_command("netstat"), + {SumRecvQ, SumSendQ} = sum_queues(Netstats), + [sum_to_message(SumRecvQ, "recv_q"), sum_to_message(SumSendQ, "send_q")]. + +-spec format(term()) -> {io:format(), [term()]}. +format({recv_q_high, QLen}) -> + {"Total TCP Recv-Q is HIGH: ~w", [QLen]}; +format({recv_q_ok, QLen}) -> + {"Total TCP Recv-Q is ok: ~w", [QLen]}; +format({send_q_high, QLen}) -> + {"Total TCP Send-Q is HIGH: ~w", [QLen]}; +format({send_q_ok, QLen}) -> + {"Total TCP Send-Q is ok: ~w", [QLen]}. diff --git a/src/weatherreport/src/weatherreport_config.erl b/src/weatherreport/src/weatherreport_config.erl new file mode 100644 index 000000000..267562d21 --- /dev/null +++ b/src/weatherreport/src/weatherreport_config.erl @@ -0,0 +1,196 @@ +%% ------------------------------------------------------------------- +%% +%% riaknostic - automated diagnostic tools for Riak +%% +%% Copyright (c) 2011 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +%% +%% File renamed from riaknostic_config.erl to weatherreport_config.erl +%% Copyright (c) 2014 Cloudant +%% +%% ------------------------------------------------------------------- + +%% @doc Provides convenient access to configuration values. When +%% the {@link weatherreport. weatherreport} module calls {@link +%% prepare/0. prepare/0}, CouchDB's <code>default.ini</code>, +%% <code>local.ini</code> and <code>vm.args</code> files will be +%% parsed and memoized. +%% @end + +-module(weatherreport_config). + +-export([prepare/0, + data_directories/0, + get_vm_env/1, + etc_dir/0, + timeout/0, + node_name/0, + cookie/0, + user/0]). + +%% @doc Prepares appropriate configuration to the weatherreport script +%% can run. This is called by the weaterreport module and you do +%% not need to invoke it. +-spec prepare() -> ok | {error, iodata()}. +prepare() -> + prepare([fun load_app_config/0, fun load_vm_args/0]). + +prepare([]) -> + ok; +prepare([Fun|T]) -> + case Fun() of + {error, Reason} -> + {error, Reason}; + _ -> + prepare(T) + end. + +%% @doc Determines where CouchDB is configured to store data. Returns a +%% list of paths to directories defined by storage backends. +-spec data_directories() -> [ file:filename() ]. +data_directories() -> + [config:get("couchdb","view_index_dir"), config:get("couchdb","database_dir")]. + +%% @doc Get an -env flag out of the vm.args file. +-spec get_vm_env(string()) -> string() | undefined. +get_vm_env(Key) -> + case application:get_env(weatherreport, vm_env) of + undefined -> + undefined; + {ok, PList} -> + proplists:get_value(Key, PList) + end. + +%% @doc Determines the user/uid that the script is running as. +-spec user() -> string(). +user() -> + case weatherreport_util:run_command("whoami") of + [] -> + undefined; + Resp -> + [_Newline | Resp1] = lists:reverse(Resp), + lists:reverse(Resp1) + end. + +%% @doc The specified timeout value for diagnostic checks run via RPC +-spec timeout() -> integer(). +timeout() -> + case application:get_env(weatherreport, timeout) of + {ok, Timeout} -> + Timeout; + _ -> + 300000 + end. + +%% @doc The CouchDB configuration directory. +-spec etc_dir() -> file:filename(). +etc_dir() -> + case application:get_env(weatherreport, etc) of + undefined -> + ExecDir = filename:absname(filename:dirname(escript:script_name())), + filename:join(ExecDir, "../etc"); + {ok, Path} -> + filename:absname(Path, "/") + end. + +%% @doc The local node name. Includes whether the node uses short +%% or long nodenames for distributed Erlang. +-spec node_name() -> {shortnames | longnames, Name::string()}. +node_name() -> + case application:get_env(weatherreport, node_name) of + undefined -> + undefined; + {ok, Node} -> + Node + end. + +%% @doc The node's distributed Erlang cookie. +-spec cookie() -> atom(). +cookie() -> + case application:get_env(weatherreport, cookie) of + undefined -> + undefined; + {ok, Cookie} -> + list_to_atom(Cookie) + end. + +load_app_config() -> + Etc = ?MODULE:etc_dir(), + IniFiles = [ + filename:join(Etc, "default.ini"), + filename:join(Etc, "local.ini") + ], + weatherreport_log:log(node(), debug, "Reading config from files: ~p", [IniFiles]), + config:start_link(IniFiles), + weatherreport_log:log(node(), debug, "Local node config: ~p~n", [config:all()]). + +load_vm_args() -> + VmArgs = case init:get_argument(vm_args) of + {ok, [[X]]} -> X; + _ -> + %% This is a backup. If for some reason -vm_args isn't specified + %% then assume it lives in the same dir as app.config + filename:absname("./vm.args", ?MODULE:etc_dir()) + end, + + case file:read_file(VmArgs) of + {error, Reason} -> + {error, io_lib:format("Could not read ~s, received error ~w!", [VmArgs, Reason])}; + {ok, Binary} -> + load_vm_args(Binary) + end. + +load_vm_args(Bin) when is_binary(Bin) -> + load_vm_args(re:split(Bin, "\s*\r?\n\s*", [{return, list}, trim])); +load_vm_args([]) -> + ok; +load_vm_args([[$#|_]|T]) -> + load_vm_args(T); +load_vm_args([""|T]) -> + load_vm_args(T); +load_vm_args(["-sname " ++ NodeName|T]) -> + application:set_env(weatherreport, node_name, {shortnames, string:strip(NodeName)}), + load_vm_args(T); +load_vm_args(["-name " ++ NodeName|T]) -> + application:set_env(weatherreport, node_name, {longnames, string:strip(NodeName)}), + load_vm_args(T); +load_vm_args(["-setcookie " ++ Cookie|T]) -> + application:set_env(weatherreport, cookie, string:strip(Cookie)), + load_vm_args(T); +load_vm_args(["-env " ++ Env|T]) -> + [Key, Value] = re:split(Env, "\s+", [{return, list}, trim]), + add_or_insert_env(vm_env, {Key, Value}), + load_vm_args(T); +load_vm_args([[$+|EmuFlags]|T]) -> + [Flag|Rest] = re:split(EmuFlags, "\s+", [{return,list}, trim]), + add_or_insert_env(emu_flags, {[$+|Flag], Rest}), + load_vm_args(T); +load_vm_args([[$-|InitFlags]|T]) -> + [Flag|Rest] = re:split(InitFlags, "\s+", [{return,list}, trim]), + add_or_insert_env(init_flags, {[$-|Flag], Rest}), + load_vm_args(T); +load_vm_args([Line|_]) -> + {error, io_lib:format("Erroneous line in vm.args: ~s", [Line])}. + +add_or_insert_env(Key, Value) -> + case application:get_env(weatherreport, Key) of + undefined -> + application:set_env(weatherreport, Key, [Value]); + {ok, List} -> + application:set_env(weatherreport, Key, [Value|List]) + end. diff --git a/src/weatherreport/src/weatherreport_getopt.erl b/src/weatherreport/src/weatherreport_getopt.erl new file mode 100644 index 000000000..cbee63cd6 --- /dev/null +++ b/src/weatherreport/src/weatherreport_getopt.erl @@ -0,0 +1,621 @@ +%%%------------------------------------------------------------------- +%%% @author Juan Jose Comellas <juanjo@comellas.org> +%%% @copyright (C) 2009 Juan Jose Comellas +%%% @doc Parses command line options with a format similar to that of GNU getopt. +%%% @end +%%% +%%% This source file is subject to the New BSD License. You should have received +%%% a copy of the New BSD license with this software. If not, it can be +%%% retrieved from: http://www.opensource.org/licenses/bsd-license.php +%%%------------------------------------------------------------------- +-module(weatherreport_getopt). +-author('juanjo@comellas.org'). + +-export([parse/2, usage/2, usage/3, usage/4]). + +-export_type([arg_type/0, + arg_value/0, + arg_spec/0, + simple_option/0, + compound_option/0, + option/0, + option_spec/0]). + +-define(TAB_LENGTH, 8). +%% Indentation of the help messages in number of tabs. +-define(INDENTATION, 3). + +%% Position of each field in the option specification tuple. +-define(OPT_NAME, 1). +-define(OPT_SHORT, 2). +-define(OPT_LONG, 3). +-define(OPT_ARG, 4). +-define(OPT_HELP, 5). + +-define(IS_OPT_SPEC(Opt), (tuple_size(Opt) =:= ?OPT_HELP)). + + +%% Atom indicating the data type that an argument can be converted to. +-type arg_type() :: 'atom' | 'binary' | 'boolean' | 'float' | 'integer' | 'string'. +%% Data type that an argument can be converted to. +-type arg_value() :: atom() | binary() | boolean() | float() | integer() | string(). +%% Argument specification. +-type arg_spec() :: arg_type() | {arg_type(), arg_value()} | undefined. +%% Option type and optional default argument. +-type simple_option() :: atom(). +-type compound_option() :: {atom(), arg_value()}. +-type option() :: simple_option() | compound_option(). +%% Command line option specification. +-type option_spec() :: { + Name :: atom(), + Short :: char() | undefined, + Long :: string() | undefined, + ArgSpec :: arg_spec(), + Help :: string() | undefined + }. +%% Output streams +-type output_stream() :: 'standard_io' | 'standard_error'. + + +%% @doc Parse the command line options and arguments returning a list of tuples +%% and/or atoms using the Erlang convention for sending options to a +%% function. +-spec parse([option_spec()], string() | [string()]) -> + {ok, {[option()], [string()]}} | {error, {Reason :: atom(), Data :: any()}}. +parse(OptSpecList, CmdLine) -> + try + Args = if + is_integer(hd(CmdLine)) -> + string:tokens(CmdLine, " \t\n"); + true -> + CmdLine + end, + parse(OptSpecList, [], [], 0, Args) + catch + throw: {error, {_Reason, _Data}} = Error -> + Error + end. + + +-spec parse([option_spec()], [option()], [string()], integer(), [string()]) -> + {ok, {[option()], [string()]}}. +%% Process the option terminator. +parse(OptSpecList, OptAcc, ArgAcc, _ArgPos, ["--" | Tail]) -> + %% Any argument present after the terminator is not considered an option. + {ok, {lists:reverse(append_default_options(OptSpecList, OptAcc)), lists:reverse(ArgAcc, Tail)}}; +%% Process long options. +parse(OptSpecList, OptAcc, ArgAcc, ArgPos, ["--" ++ OptArg = OptStr | Tail]) -> + parse_long_option(OptSpecList, OptAcc, ArgAcc, ArgPos, Tail, OptStr, OptArg); +%% Process short options. +parse(OptSpecList, OptAcc, ArgAcc, ArgPos, ["-" ++ ([_Char | _] = OptArg) = OptStr | Tail]) -> + parse_short_option(OptSpecList, OptAcc, ArgAcc, ArgPos, Tail, OptStr, OptArg); +%% Process non-option arguments. +parse(OptSpecList, OptAcc, ArgAcc, ArgPos, [Arg | Tail]) -> + case find_non_option_arg(OptSpecList, ArgPos) of + {value, OptSpec} when ?IS_OPT_SPEC(OptSpec) -> + parse(OptSpecList, add_option_with_arg(OptSpec, Arg, OptAcc), ArgAcc, ArgPos + 1, Tail); + false -> + parse(OptSpecList, OptAcc, [Arg | ArgAcc], ArgPos, Tail) + end; +parse(OptSpecList, OptAcc, ArgAcc, _ArgPos, []) -> + %% Once we have completed gathering the options we add the ones that were + %% not present but had default arguments in the specification. + {ok, {lists:reverse(append_default_options(OptSpecList, OptAcc)), lists:reverse(ArgAcc)}}. + + +%% @doc Parse a long option, add it to the option accumulator and continue +%% parsing the rest of the arguments recursively. +%% A long option can have the following syntax: +%% --foo Single option 'foo', no argument +%% --foo=bar Single option 'foo', argument "bar" +%% --foo bar Single option 'foo', argument "bar" +-spec parse_long_option([option_spec()], [option()], [string()], integer(), [string()], string(), string()) -> + {ok, {[option()], [string()]}}. +parse_long_option(OptSpecList, OptAcc, ArgAcc, ArgPos, Args, OptStr, OptArg) -> + case split_assigned_arg(OptArg) of + {Long, Arg} -> + %% Get option that has its argument within the same string + %% separated by an equal ('=') character (e.g. "--port=1000"). + parse_long_option_assigned_arg(OptSpecList, OptAcc, ArgAcc, ArgPos, Args, OptStr, Long, Arg); + + Long -> + case lists:keyfind(Long, ?OPT_LONG, OptSpecList) of + {Name, _Short, Long, undefined, _Help} -> + parse(OptSpecList, [Name | OptAcc], ArgAcc, ArgPos, Args); + + {_Name, _Short, Long, _ArgSpec, _Help} = OptSpec -> + %% The option argument string is empty, but the option requires + %% an argument, so we look into the next string in the list. + %% e.g ["--port", "1000"] + parse_long_option_next_arg(OptSpecList, OptAcc, ArgAcc, ArgPos, Args, OptSpec); + false -> + throw({error, {invalid_option, OptStr}}) + end + end. + + +%% @doc Parse an option where the argument is 'assigned' in the same string using +%% the '=' character, add it to the option accumulator and continue parsing the +%% rest of the arguments recursively. This syntax is only valid for long options. +-spec parse_long_option_assigned_arg([option_spec()], [option()], [string()], integer(), + [string()], string(), string(), string()) -> + {ok, {[option()], [string()]}}. +parse_long_option_assigned_arg(OptSpecList, OptAcc, ArgAcc, ArgPos, Args, OptStr, Long, Arg) -> + case lists:keyfind(Long, ?OPT_LONG, OptSpecList) of + {_Name, _Short, Long, ArgSpec, _Help} = OptSpec -> + case ArgSpec of + undefined -> + throw({error, {invalid_option_arg, OptStr}}); + _ -> + parse(OptSpecList, add_option_with_assigned_arg(OptSpec, Arg, OptAcc), ArgAcc, ArgPos, Args) + end; + false -> + throw({error, {invalid_option, OptStr}}) + end. + + +%% @doc Split an option string that may contain an option with its argument +%% separated by an equal ('=') character (e.g. "port=1000"). +-spec split_assigned_arg(string()) -> {Name :: string(), Arg :: string()} | string(). +split_assigned_arg(OptStr) -> + split_assigned_arg(OptStr, OptStr, []). + +split_assigned_arg(_OptStr, "=" ++ Tail, Acc) -> + {lists:reverse(Acc), Tail}; +split_assigned_arg(OptStr, [Char | Tail], Acc) -> + split_assigned_arg(OptStr, Tail, [Char | Acc]); +split_assigned_arg(OptStr, [], _Acc) -> + OptStr. + + +%% @doc Retrieve the argument for an option from the next string in the list of +%% command-line parameters or set the value of the argument from the argument +%% specification (for boolean and integer arguments), if possible. +parse_long_option_next_arg(OptSpecList, OptAcc, ArgAcc, ArgPos, Args, {Name, _Short, _Long, ArgSpec, _Help} = OptSpec) -> + ArgSpecType = arg_spec_type(ArgSpec), + case Args =:= [] orelse is_implicit_arg(ArgSpecType, hd(Args)) of + true -> + parse(OptSpecList, add_option_with_implicit_arg(OptSpec, OptAcc), ArgAcc, ArgPos, Args); + false -> + [Arg | Tail] = Args, + try + parse(OptSpecList, [{Name, to_type(ArgSpecType, Arg)} | OptAcc], ArgAcc, ArgPos, Tail) + catch + error:_ -> + throw({error, {invalid_option_arg, {Name, Arg}}}) + end + end. + + +%% @doc Parse a short option, add it to the option accumulator and continue +%% parsing the rest of the arguments recursively. +%% A short option can have the following syntax: +%% -a Single option 'a', no argument or implicit boolean argument +%% -a foo Single option 'a', argument "foo" +%% -afoo Single option 'a', argument "foo" +%% -abc Multiple options: 'a'; 'b'; 'c' +%% -bcafoo Multiple options: 'b'; 'c'; 'a' with argument "foo" +%% -aaa Multiple repetitions of option 'a' (only valid for options with integer arguments) +-spec parse_short_option([option_spec()], [option()], [string()], integer(), [string()], string(), string()) -> + {ok, {[option()], [string()]}}. +parse_short_option(OptSpecList, OptAcc, ArgAcc, ArgPos, Args, OptStr, OptArg) -> + parse_short_option(OptSpecList, OptAcc, ArgAcc, ArgPos, Args, OptStr, first, OptArg). + +parse_short_option(OptSpecList, OptAcc, ArgAcc, ArgPos, Args, OptStr, OptPos, [Short | Arg]) -> + case lists:keyfind(Short, ?OPT_SHORT, OptSpecList) of + {Name, Short, _Long, undefined, _Help} -> + parse_short_option(OptSpecList, [Name | OptAcc], ArgAcc, ArgPos, Args, OptStr, first, Arg); + + {_Name, Short, _Long, ArgSpec, _Help} = OptSpec -> + %% The option has a specification, so it requires an argument. + case Arg of + [] -> + %% The option argument string is empty, but the option requires + %% an argument, so we look into the next string in the list. + parse_short_option_next_arg(OptSpecList, OptAcc, ArgAcc, ArgPos, Args, OptSpec, OptPos); + + _ -> + case is_valid_arg(ArgSpec, Arg) of + true -> + parse(OptSpecList, add_option_with_arg(OptSpec, Arg, OptAcc), ArgAcc, ArgPos, Args); + _ -> + NewOptAcc = case OptPos of + first -> add_option_with_implicit_arg(OptSpec, OptAcc); + _ -> add_option_with_implicit_incrementable_arg(OptSpec, OptAcc) + end, + parse_short_option(OptSpecList, NewOptAcc, ArgAcc, ArgPos, Args, OptStr, next, Arg) + end + end; + + false -> + throw({error, {invalid_option, OptStr}}) + end; +parse_short_option(OptSpecList, OptAcc, ArgAcc, ArgPos, Args, _OptStr, _OptPos, []) -> + parse(OptSpecList, OptAcc, ArgAcc, ArgPos, Args). + + +%% @doc Retrieve the argument for an option from the next string in the list of +%% command-line parameters or set the value of the argument from the argument +%% specification (for boolean and integer arguments), if possible. +parse_short_option_next_arg(OptSpecList, OptAcc, ArgAcc, ArgPos, Args, {Name, _Short, _Long, ArgSpec, _Help} = OptSpec, OptPos) -> + case Args =:= [] orelse is_implicit_arg(ArgSpec, hd(Args)) of + true when OptPos =:= first -> + parse(OptSpecList, add_option_with_implicit_arg(OptSpec, OptAcc), ArgAcc, ArgPos, Args); + true -> + parse(OptSpecList, add_option_with_implicit_incrementable_arg(OptSpec, OptAcc), ArgAcc, ArgPos, Args); + false -> + [Arg | Tail] = Args, + try + parse(OptSpecList, [{Name, to_type(ArgSpec, Arg)} | OptAcc], ArgAcc, ArgPos, Tail) + catch + error:_ -> + throw({error, {invalid_option_arg, {Name, Arg}}}) + end + end. + + +%% @doc Find the option for the discrete argument in position specified in the +%% Pos argument. +-spec find_non_option_arg([option_spec()], integer()) -> {value, option_spec()} | false. +find_non_option_arg([{_Name, undefined, undefined, _ArgSpec, _Help} = OptSpec | _Tail], 0) -> + {value, OptSpec}; +find_non_option_arg([{_Name, undefined, undefined, _ArgSpec, _Help} | Tail], Pos) -> + find_non_option_arg(Tail, Pos - 1); +find_non_option_arg([_Head | Tail], Pos) -> + find_non_option_arg(Tail, Pos); +find_non_option_arg([], _Pos) -> + false. + + +%% @doc Append options that were not present in the command line arguments with +%% their default arguments. +-spec append_default_options([option_spec()], [option()]) -> [option()]. +append_default_options([{Name, _Short, _Long, {_Type, DefaultArg}, _Help} | Tail], OptAcc) -> + append_default_options(Tail, + case lists:keymember(Name, 1, OptAcc) of + false -> + [{Name, DefaultArg} | OptAcc]; + _ -> + OptAcc + end); +%% For options with no default argument. +append_default_options([_Head | Tail], OptAcc) -> + append_default_options(Tail, OptAcc); +append_default_options([], OptAcc) -> + OptAcc. + + +%% @doc Add an option with argument converting it to the data type indicated by the +%% argument specification. +-spec add_option_with_arg(option_spec(), string(), [option()]) -> [option()]. +add_option_with_arg({Name, _Short, _Long, ArgSpec, _Help} = OptSpec, Arg, OptAcc) -> + case is_valid_arg(ArgSpec, Arg) of + true -> + try + [{Name, to_type(ArgSpec, Arg)} | OptAcc] + catch + error:_ -> + throw({error, {invalid_option_arg, {Name, Arg}}}) + end; + false -> + add_option_with_implicit_arg(OptSpec, OptAcc) + end. + + +%% @doc Add an option with argument that was part of an assignment expression +%% (e.g. "--verbose=3") converting it to the data type indicated by the +%% argument specification. +-spec add_option_with_assigned_arg(option_spec(), string(), [option()]) -> [option()]. +add_option_with_assigned_arg({Name, _Short, _Long, ArgSpec, _Help}, Arg, OptAcc) -> + try + [{Name, to_type(ArgSpec, Arg)} | OptAcc] + catch + error:_ -> + throw({error, {invalid_option_arg, {Name, Arg}}}) + end. + + +%% @doc Add an option that required an argument but did not have one. Some data +%% types (boolean, integer) allow implicit or assumed arguments. +-spec add_option_with_implicit_arg(option_spec(), [option()]) -> [option()]. +add_option_with_implicit_arg({Name, _Short, _Long, ArgSpec, _Help}, OptAcc) -> + case arg_spec_type(ArgSpec) of + boolean -> + %% Special case for boolean arguments: if there is no argument we + %% set the value to 'true'. + [{Name, true} | OptAcc]; + integer -> + %% Special case for integer arguments: if the option had not been set + %% before we set the value to 1. This is needed to support options like + %% "-v" to return something like {verbose, 1}. + [{Name, 1} | OptAcc]; + _ -> + throw({error, {missing_option_arg, Name}}) + end. + + +%% @doc Add an option with an implicit or assumed argument. +-spec add_option_with_implicit_incrementable_arg(option_spec() | arg_spec(), [option()]) -> [option()]. +add_option_with_implicit_incrementable_arg({Name, _Short, _Long, ArgSpec, _Help}, OptAcc) -> + case arg_spec_type(ArgSpec) of + boolean -> + %% Special case for boolean arguments: if there is no argument we + %% set the value to 'true'. + [{Name, true} | OptAcc]; + integer -> + %% Special case for integer arguments: if the option had not been set + %% before we set the value to 1; if not we increment the previous value + %% the option had. This is needed to support options like "-vvv" to + %% return something like {verbose, 3}. + case OptAcc of + [{Name, Count} | Tail] -> + [{Name, Count + 1} | Tail]; + _ -> + [{Name, 1} | OptAcc] + end; + _ -> + throw({error, {missing_option_arg, Name}}) + end. + + +%% @doc Retrieve the data type form an argument specification. +-spec arg_spec_type(arg_spec()) -> arg_type() | undefined. +arg_spec_type({Type, _DefaultArg}) -> + Type; +arg_spec_type(Type) when is_atom(Type) -> + Type. + + +%% @doc Convert an argument string to its corresponding data type. +-spec to_type(arg_spec() | arg_type(), string()) -> arg_value(). +to_type({Type, _DefaultArg}, Arg) -> + to_type(Type, Arg); +to_type(binary, Arg) -> + list_to_binary(Arg); +to_type(atom, Arg) -> + list_to_atom(Arg); +to_type(integer, Arg) -> + list_to_integer(Arg); +to_type(float, Arg) -> + list_to_float(Arg); +to_type(boolean, Arg) -> + LowerArg = string:to_lower(Arg), + case is_arg_true(LowerArg) of + true -> + true; + _ -> + case is_arg_false(LowerArg) of + true -> + false; + false -> + erlang:error(badarg) + end + end; +to_type(_Type, Arg) -> + Arg. + + +-spec is_arg_true(string()) -> boolean(). +is_arg_true(Arg) -> + (Arg =:= "true") orelse (Arg =:= "t") orelse + (Arg =:= "yes") orelse (Arg =:= "y") orelse + (Arg =:= "on") orelse (Arg =:= "enabled") orelse + (Arg =:= "1"). + + +-spec is_arg_false(string()) -> boolean(). +is_arg_false(Arg) -> + (Arg =:= "false") orelse (Arg =:= "f") orelse + (Arg =:= "no") orelse (Arg =:= "n") orelse + (Arg =:= "off") orelse (Arg =:= "disabled") orelse + (Arg =:= "0"). + + +-spec is_valid_arg(arg_spec(), nonempty_string()) -> boolean(). +is_valid_arg({Type, _DefaultArg}, Arg) -> + is_valid_arg(Type, Arg); +is_valid_arg(boolean, Arg) -> + is_boolean_arg(Arg); +is_valid_arg(integer, Arg) -> + is_non_neg_integer_arg(Arg); +is_valid_arg(float, Arg) -> + is_non_neg_float_arg(Arg); +is_valid_arg(_Type, _Arg) -> + true. + + +-spec is_implicit_arg(arg_spec(), nonempty_string()) -> boolean(). +is_implicit_arg({Type, _DefaultArg}, Arg) -> + is_implicit_arg(Type, Arg); +is_implicit_arg(boolean, Arg) -> + not is_boolean_arg(Arg); +is_implicit_arg(integer, Arg) -> + not is_integer_arg(Arg); +is_implicit_arg(_Type, _Arg) -> + false. + + +-spec is_boolean_arg(string()) -> boolean(). +is_boolean_arg(Arg) -> + LowerArg = string:to_lower(Arg), + is_arg_true(LowerArg) orelse is_arg_false(LowerArg). + + +-spec is_integer_arg(string()) -> boolean(). +is_integer_arg("-" ++ Tail) -> + is_non_neg_integer_arg(Tail); +is_integer_arg(Arg) -> + is_non_neg_integer_arg(Arg). + + +-spec is_non_neg_integer_arg(string()) -> boolean(). +is_non_neg_integer_arg([Head | Tail]) when Head >= $0, Head =< $9 -> + is_non_neg_integer_arg(Tail); +is_non_neg_integer_arg([_Head | _Tail]) -> + false; +is_non_neg_integer_arg([]) -> + true. + + +-spec is_non_neg_float_arg(string()) -> boolean(). +is_non_neg_float_arg([Head | Tail]) when (Head >= $0 andalso Head =< $9) orelse Head =:= $. -> + is_non_neg_float_arg(Tail); +is_non_neg_float_arg([_Head | _Tail]) -> + false; +is_non_neg_float_arg([]) -> + true. + + +%% @doc Show a message on standard_error indicating the command line options and +%% arguments that are supported by the program. +-spec usage([option_spec()], string()) -> ok. +usage(OptSpecList, ProgramName) -> + usage(OptSpecList, ProgramName, standard_error). + + +%% @doc Show a message on standard_error or standard_io indicating the command line options and +%% arguments that are supported by the program. +-spec usage([option_spec()], string(), output_stream() | string()) -> ok. +usage(OptSpecList, ProgramName, OutputStream) when is_atom(OutputStream) -> + io:format(OutputStream, "Usage: ~s~s~n~n~s~n", + [ProgramName, usage_cmd_line(OptSpecList), usage_options(OptSpecList)]); +%% @doc Show a message on standard_error indicating the command line options and +%% arguments that are supported by the program. The CmdLineTail argument +%% is a string that is added to the end of the usage command line. +usage(OptSpecList, ProgramName, CmdLineTail) -> + usage(OptSpecList, ProgramName, CmdLineTail, standard_error). + + +%% @doc Show a message on standard_error or standard_io indicating the command line options and +%% arguments that are supported by the program. The CmdLineTail argument +%% is a string that is added to the end of the usage command line. +-spec usage([option_spec()], string(), string(), output_stream() | [{string(), string()}]) -> ok. +usage(OptSpecList, ProgramName, CmdLineTail, OutputStream) when is_atom(OutputStream) -> + io:format(OutputStream, "Usage: ~s~s ~s~n~n~s~n", + [ProgramName, usage_cmd_line(OptSpecList), CmdLineTail, usage_options(OptSpecList)]); +%% @doc Show a message on standard_error indicating the command line options and +%% arguments that are supported by the program. The CmdLineTail and OptionsTail +%% arguments are a string that is added to the end of the usage command line +%% and a list of tuples that are added to the end of the options' help lines. +usage(OptSpecList, ProgramName, CmdLineTail, OptionsTail) -> + usage(OptSpecList, ProgramName, CmdLineTail, OptionsTail, standard_error). + + +%% @doc Show a message on standard_error or standard_io indicating the command line options and +%% arguments that are supported by the program. The CmdLineTail and OptionsTail +%% arguments are a string that is added to the end of the usage command line +%% and a list of tuples that are added to the end of the options' help lines. +-spec usage([option_spec()], string(), string(), [{string(), string()}], output_stream()) -> ok. +usage(OptSpecList, ProgramName, CmdLineTail, OptionsTail, OutputStream) -> + UsageOptions = lists:foldl( + fun ({Prefix, Help}, Acc) -> + add_option_help(Prefix, Help, Acc) + end, usage_options_reverse(OptSpecList, []), OptionsTail), + io:format(OutputStream, "Usage: ~s~s ~s~n~n~s~n", + [ProgramName, usage_cmd_line(OptSpecList), CmdLineTail, + lists:flatten(lists:reverse(UsageOptions))]). + + +%% @doc Return a string with the syntax for the command line options and +%% arguments. +-spec usage_cmd_line([option_spec()]) -> string(). +usage_cmd_line(OptSpecList) -> + usage_cmd_line(OptSpecList, []). + +usage_cmd_line([{Name, Short, Long, ArgSpec, _Help} | Tail], Acc) -> + CmdLine = + case ArgSpec of + undefined -> + if + %% For options with short form and no argument. + Short =/= undefined -> + [$\s, $[, $-, Short, $]]; + %% For options with only long form and no argument. + Long =/= undefined -> + [$\s, $[, $-, $-, Long, $]]; + true -> + [] + end; + _ -> + if + %% For options with short form and argument. + Short =/= undefined -> + [$\s, $[, $-, Short, $\s, $<, atom_to_list(Name), $>, $]]; + %% For options with only long form and argument. + Long =/= undefined -> + [$\s, $[, $-, $-, Long, $\s, $<, atom_to_list(Name), $>, $]]; + %% For options with neither short nor long form and argument. + true -> + [$\s, $<, atom_to_list(Name), $>] + end + end, + usage_cmd_line(Tail, [CmdLine | Acc]); +usage_cmd_line([], Acc) -> + lists:flatten(lists:reverse(Acc)). + + +%% @doc Return a string with the help message for each of the options and +%% arguments. +-spec usage_options([option_spec()]) -> string(). +usage_options(OptSpecList) -> + lists:flatten(lists:reverse(usage_options_reverse(OptSpecList, []))). + +usage_options_reverse([{Name, Short, Long, _ArgSpec, Help} | Tail], Acc) -> + Prefix = + case Long of + undefined -> + case Short of + %% Neither short nor long form (non-option argument). + undefined -> + [$<, atom_to_list(Name), $>]; + %% Only short form. + _ -> + [$-, Short] + end; + _ -> + case Short of + %% Only long form. + undefined -> + [$-, $- | Long]; + %% Both short and long form. + _ -> + [$-, Short, $,, $\s, $-, $- | Long] + end + end, + usage_options_reverse(Tail, add_option_help(Prefix, Help, Acc)); +usage_options_reverse([], Acc) -> + Acc. + + +%% @doc Add the help message corresponding to an option specification to a list +%% with the correct indentation. +-spec add_option_help(Prefix :: string(), Help :: string(), Acc :: string()) -> string(). +add_option_help(Prefix, Help, Acc) when is_list(Help), Help =/= [] -> + FlatPrefix = lists:flatten(Prefix), + case ((?INDENTATION * ?TAB_LENGTH) - 2 - length(FlatPrefix)) of + TabSize when TabSize > 0 -> + Tab = lists:duplicate(ceiling(TabSize / ?TAB_LENGTH), $\t), + [[$\s, $\s, FlatPrefix, Tab, Help, $\n] | Acc]; + _ -> + % The indentation for the option description is 3 tabs (i.e. 24 characters) + % IMPORTANT: Change the number of tabs below if you change the + % value of the INDENTATION macro. + [[$\t, $\t, $\t, Help, $\n], [$\s, $\s, FlatPrefix, $\n] | Acc] + end; +add_option_help(_Opt, _Prefix, Acc) -> + Acc. + + + +%% @doc Return the smallest integral value not less than the argument. +-spec ceiling(float()) -> integer(). +ceiling(X) -> + T = erlang:trunc(X), + case (X - T) of + % Neg when Neg < 0 -> + % T; + Pos when Pos > 0 -> + T + 1; + _ -> + T + end. diff --git a/src/weatherreport/src/weatherreport_log.erl b/src/weatherreport/src/weatherreport_log.erl new file mode 100644 index 000000000..29547da6f --- /dev/null +++ b/src/weatherreport/src/weatherreport_log.erl @@ -0,0 +1,69 @@ +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. + +-module(weatherreport_log). +-export([ + level/1, + log/3, + log/4, + should_log/1 +]). + + +level(debug) -> 7; +level(info) -> 6; +level(notice) -> 5; +level(warn) -> 4; +level(warning) -> 4; +level(err) -> 3; +level(error) -> 3; +level(crit) -> 2; +level(alert) -> 1; +level(emerg) -> 0; +level(panic) -> 0; + +level(I) when is_integer(I), I >= 0, I =< 7 -> + I; +level(_BadLevel) -> + 3. + + +log(Node, Level, Format, Terms) -> + case should_log(Level) of + true -> + Prefix = get_prefix(Node, Level), + Message = io_lib:format(Format, Terms), + io:format("~s ~s~n", [Prefix, Message]); + false -> + ok + end. + +log(Node, Level, String) -> + case should_log(Level) of + true -> + Prefix = get_prefix(Node, Level), + io:format("~s ~s~n", [Prefix, String]); + false -> + ok + end. + +should_log(Level) -> + AppLevel = case application:get_env(weatherreport, log_level) of + undefined -> info; + {ok, L0} -> L0 + end, + level(AppLevel) >= level(Level). + +get_prefix(Node, Level) -> + io_lib:format("[~w] [~w]", [Node, Level]). diff --git a/src/weatherreport/src/weatherreport_node.erl b/src/weatherreport/src/weatherreport_node.erl new file mode 100644 index 000000000..85f0e3be8 --- /dev/null +++ b/src/weatherreport/src/weatherreport_node.erl @@ -0,0 +1,208 @@ +%% ------------------------------------------------------------------- +%% +%% riaknostic - automated diagnostic tools for Riak +%% +%% Copyright (c) 2011 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +%% +%% File renamed from riaknostic_node.erl to weatherreport_node.erl and +%% modified to work with Apache CouchDB +%% +%% Copyright (c) 2014 Cloudant +%% +%% ------------------------------------------------------------------- + +%% @doc Functions that help diagnostics interact with the local +%% node or other members of the cluster. +-module(weatherreport_node). + +-export([can_connect/0, + can_connect_all/0, + pid/0, + local_command/2, + local_command/3, + local_command/4, + multicall/5, + nodename/0 + ]). + +%% @doc Calls the given 0-arity module and function on the local +%% node and returns the result of that call. +%% @equiv local_command(Module, Function, []) +%% @see can_connect/0. +-spec local_command(Module::atom(), Function::atom()) -> term(). +local_command(Module, Function) -> + local_command(Module, Function, []). + +%% @doc Calls the given module and function with the given arguments +%% on the local node and returns the result of that call. +%% @equiv local_command(Module, Function, Args, 5000) +%% @see can_connect/0 +-spec local_command(Module::atom(), Function::atom(), Args::[term()]) -> term(). +local_command(Module, Function, Args) -> + local_command(Module, Function, Args, weatherreport_config:timeout()). + +%% @doc Calls the given module and function with the given arguments +%% on the local node and returns the result of that call, +%% returning an error if the call doesn't complete within the given +%% timeout. +%% @equiv rpc:call(NodeName, Module, Function, Args, Timeout) +%% @see can_connect/0 +-spec local_command(Module::atom(), Function::atom(), Args::[term()], Timeout::integer()) -> term(). +local_command(Module, Function, Args, Timeout) -> + case is_cluster_node() of + true -> + weatherreport_log:log( + node(), + debug, + "Local function call: ~p:~p(~p)", + [Module, Function, Args] + ), + erlang:apply(Module, Function, Args); + _ -> + weatherreport_log:log( + node(), + debug, + "Local RPC: ~p:~p(~p) [~p]", + [Module, Function, Args, Timeout] + ), + rpc:call(nodename(), Module, Function, Args, Timeout) + end. + +%% @doc Call rpc:multicall/5 from the local cluster node rather than the +%% escript. +-spec multicall([node()], Module::atom(), Function::atom(), Args::[term()], Timeout::integer()) -> term(). +multicall(Nodes, Module, Function, Args, Timeout) -> + case local_command(rpc, multicall, [Nodes, Module, Function, Args, Timeout]) of + {badrpc, Reason} -> + {[{badrpc, Reason}], []}; + Resp -> + Resp + end. + +%% @doc Retrieves the operating system's process ID of the local +%% node. +%% @equiv local_command(os, getpid) +%% @see can_connect/0 +-spec pid() -> string(). +pid() -> + local_command(os, getpid). + +%% @doc Attempts to connect to the local node if it is not +%% already, and returns whether connection was successful. +-spec can_connect() -> true | false. +can_connect() -> + case is_connected() or is_cluster_node() of + true -> true; + false -> + weatherreport_log:log( + node(), + debug, + "Not connected to the local cluster node, trying to connect. alive:~p connect_failed:~p", + [is_alive(), connect_failed()] + ), + maybe_connect() + end. + +-spec can_connect_all() -> true | false. +can_connect_all() -> + case is_connected() of + true -> + case weatherreport_check_nodes_connected:check() of + [] -> true; + _ -> false + end; + false -> false + end. + +nodename() -> + Name = case weatherreport_config:node_name() of + undefined -> + atom_to_list(node()); + {_, NodeName} -> + NodeName + end, + case string:tokens(Name, "@") of + [_Node, _Host] -> + list_to_atom(Name); + [Node] -> + [_, Host] = string:tokens(atom_to_list(node()), "@"), + list_to_atom(lists:concat([Node, "@", Host])) + end. + +%% Private functions +is_cluster_node() -> + nodename() =:= node(). + +is_connected() -> + is_alive() andalso connect_failed() =/= true. + +maybe_connect() -> + case connect_failed() of + true -> false; + _ -> try_connect() + end. + +try_connect() -> + TargetNode = nodename(), + case is_alive() of + true -> ok; + _ -> start_net() + end, + case {net_kernel:hidden_connect_node(TargetNode), net_adm:ping(TargetNode)} of + {true, pong} -> + application:set_env(weatherreport, connect_failed, false), + weatherreport_log:log( + node(), + debug, + "Connected to local cluster node ~p.", + [TargetNode] + ), + true; + _ -> + application:set_env(weatherreport, connect_failed, true), + weatherreport_log:log( + node(), + warning, + "Could not connect to the local cluster node ~p, some checks will not run.", + [TargetNode] + ), + false + end. + +connect_failed() -> + case application:get_env(weatherreport, connect_failed) of + {ok, true} -> true; + undefined -> undefined; + _ -> false + end. + +start_net() -> + weatherreport_log:log(node(), debug, "Starting distributed Erlang."), + {Type, NodeName} = weatherreport_config:node_name(), + ThisNode = append_node_suffix(NodeName, "_diag"), + {ok, _} = net_kernel:start([ThisNode, Type]), + erlang:set_cookie(node(), weatherreport_config:cookie()). + +append_node_suffix(Name, Suffix) -> + case string:tokens(Name, "@") of + [Node, Host] -> + list_to_atom(lists:concat([Node, Suffix, os:getpid(), "@", Host])); + [Node] -> + list_to_atom(lists:concat([Node, Suffix, os:getpid()])) + end. diff --git a/src/weatherreport/src/weatherreport_runner.erl b/src/weatherreport/src/weatherreport_runner.erl new file mode 100644 index 000000000..e67940ace --- /dev/null +++ b/src/weatherreport/src/weatherreport_runner.erl @@ -0,0 +1,89 @@ +%% ------------------------------------------------------------------- +%% +%% weatherreport - automated diagnostic tools for CouchDB +%% +%% Copyright (c) 2014 Cloudant +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +%% @doc <p>The <code>weatherreport_runner</code> module provides +%% utility functions for running checks either on a single node or +%% multiple nodes. + +-module(weatherreport_runner). + +-export([run/1, run/2, format/1]). + +%% @doc Run the supplied list of checks on the local node +-spec run([Module::atom()]) -> [tuple()]. +run(Checks) -> + weatherreport_node:can_connect(), + run(Checks, [weatherreport_node:nodename()]). + +%% @doc Run the supplied list of checks on the supplied list of cluster nodes +-spec run([Module::atom()], [node()] | all) -> [tuple()]. +run(Checks, all) -> + weatherreport_node:can_connect(), + case weatherreport_node:local_command(mem3, nodes, []) of + ClusterNodes when is_list(ClusterNodes) -> + run(Checks, ClusterNodes); + Error -> + [{node(), critical, weatherreport_runner, {checks_failed, Error}}] + end; +run(Checks, Nodes) -> + CheckOpts = get_check_options(), + lists:flatten(lists:foldl(fun(Mod, Acc) -> + {Resps, BadNodes} = weatherreport_node:multicall( + Nodes, + erlang, + apply, + [fun() -> {node(), weatherreport_check:check(Mod, CheckOpts)} end, []], + weatherreport_config:timeout() + ), + TransformFailedCheck = fun(Node) -> + {node(), crit, weatherreport_runner, {check_failed, Mod, Node}} + end, + FailedChecks = [TransformFailedCheck(Node) || Node <- BadNodes], + TransformResponse = fun + ({badrpc, Error}) -> + [{node(), crit, weatherreport_runner, {badrpc, Mod, Error}}]; + ({Node, Messages}) -> + [{Node, Lvl, Module, Msg} || {Lvl, Module, Msg} <- Messages] + end, + Responses = [TransformResponse(Resp) || Resp <- Resps], + [Responses ++ FailedChecks | Acc] + end, [], Checks)). + +%% @doc Part of the weatherreport_check behaviour. This means that any messages +%% returned by this module can be handled via the existing message reporting +%% code. +format({checks_failed, Error}) -> + {"Could not run checks - received error: ~w", [Error]}; +format({check_failed, Check, Node}) -> + {"Could not run check ~w on cluster node ~w", [Check, Node]}; +format({badrpc, Check, Error}) -> + {"Bad rpc call executing check ~w: ~w", [Check, Error]}. + +%% Private functions +get_check_options() -> + Expert = case application:get_env(weatherreport, expert) of + {ok, true} -> + true; + _ -> + false + end, + [{expert, Expert}]. diff --git a/src/weatherreport/src/weatherreport_util.erl b/src/weatherreport/src/weatherreport_util.erl new file mode 100644 index 000000000..e5202fa9e --- /dev/null +++ b/src/weatherreport/src/weatherreport_util.erl @@ -0,0 +1,111 @@ +%% ------------------------------------------------------------------- +%% +%% riaknostic - automated diagnostic tools for Riak +%% +%% Copyright (c) 2011 Basho Technologies, Inc. All Rights Reserved. +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- +%% +%% File renamed from riaknostic_util.erl to weatherreport_util.erl +%% Copyright (c) 2014 Cloudant +%% +%% ------------------------------------------------------------------- + +%% @doc Utility functions for weatherreport. +%% @end +-module(weatherreport_util). +-export([short_name/1, + run_command/1, + binary_to_float/1, + flush_stdout/0, + check_proc_count/3]). + +%% @doc Converts a check module name into a short name that can be +%% used to refer to a check on the command line. For example, +%% <code>weatherreport_check_memory_use becomes</code> +%% <code>"memory_use"</code>. +-spec short_name(module()) -> iodata() | unicode:charlist(). +short_name(Mod) when is_atom(Mod) -> + re:replace(atom_to_list(Mod), "weatherreport_check_", "", [{return, list}]). + +%% @doc Runs a shell command and returns the output. stderr is +%% redirected to stdout so its output will be included. +-spec run_command(Command::iodata()) -> StdOut::iodata(). +run_command(Command) -> + weatherreport_log:log( + node(), + debug, + "Running shell command: ~s", + [Command] + ), + Port = erlang:open_port({spawn,Command},[exit_status, stderr_to_stdout]), + do_read(Port, []). + +do_read(Port, Acc) -> + receive + {Port, {data, StdOut}} -> + weatherreport_log:log( + node(), + debug, + "Shell command output: ~n~s~n", + [StdOut] + ), + do_read(Port, Acc ++ StdOut); + {Port, {exit_status, _}} -> + %%port_close(Port), + Acc; + Other -> + io:format("~w", [Other]), + do_read(Port, Acc) + end. + +%% @doc Converts a binary containing a text representation of a float +%% into a float type. +-spec binary_to_float(binary()) -> float(). +binary_to_float(Bin) -> + list_to_float(binary_to_list(Bin)). + +flush_stdout() -> + timer:sleep(1000). + +%% @doc Utility function to check processes based on an attribute returned +%% by recon:proc_count/2. +-spec check_proc_count(atom(), integer(), list()) -> [{atom(), term()}]. +check_proc_count(Key, Threshold, Opts) -> + Processes = recon:proc_count(Key, 10), + procs_to_messages(Processes, Threshold, [], Opts). + +%% @doc Utility function to convert the list of process info returned by +%% recon:proc_count/2 into a list of diagnostic messages. +-spec procs_to_messages(list(), integer(), list(), list()) -> [{atom(), term()}]. +procs_to_messages([], _Threshold, Acc, _Opts) -> + Acc; +procs_to_messages([{Pid, Value, Info} | T], Threshold, Acc, Opts) -> + Level = case Value > Threshold of + true -> warning; + _ -> info + end, + Message = case {Level, proplists:get_value(expert, Opts)} of + {warning, true} -> + Pinfo = recon:info(Pid), + {warning, {high, {Pid, Value, Info, Pinfo}}}; + {warning, _} -> + {warning, {high, {Pid, Value, Info}}}; + {info, _} -> + {info, {ok, {Pid, Value, Info}}} + end, + procs_to_messages(T, Threshold, [Message | Acc], Opts). |