From 926f740563b1ba187338e119acbbfbc608883274 Mon Sep 17 00:00:00 2001 From: Greg Farnum Date: Thu, 17 Oct 2013 17:41:33 -0700 Subject: ceph: behave more kindly when we can't connect to the cluster Set up a check_for_local_options() function and a try_failure_options() function. If we have "local options" (presently just "--help"), set a 2-second timeout on the cluster connection; if we fail, go into try_failure_options(). try_failure_options() will do whatever is appropriate given the passed-in commands; again, right now it just prints out the help text if that was asked for. To facilitate this and let people work around extreme network issues, add a "--cluster-timeout" option and pass that into the cluster connect() function. This can now be used to override our new default 2-second timeout on --help, or to set a timeout after which other commands will give up (they presently will try forever). Signed-off-by: Greg Farnum --- src/ceph.in | 47 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/src/ceph.in b/src/ceph.in index 29fae7273cf..3bf4312af0e 100755 --- a/src/ceph.in +++ b/src/ceph.in @@ -159,6 +159,9 @@ def parse_cmdargs(args=None, target=''): parser.add_argument('-f', '--format', choices=['json', 'json-pretty', 'xml', 'xml-pretty', 'plain'], dest='output_format') + parser.add_argument('--connect-timeout', dest='cluster_timeout', + help='set a timeout on the cluster connection; fail if we exceed it') + # returns a Namespace with the parsed args, and a list of all extras parsed_args, extras = parser.parse_known_args(args) @@ -412,6 +415,28 @@ def new_style_command(parsed_args, cmdargs, target, sigdict, inbuf, verbose): return json_command(cluster_handle, target=target, argdict=valid_dict, inbuf=inbuf) +def check_for_local_options(parsed_args): + if parsed_args.help: + return True + return False + +def try_failure_options(timeout, parsed_args, parser, args): + handle_it = False + if parsed_args.help: + handle_it = True + + if handle_it: + if timeout is not None: + print "\nCould not connect to cluster within {t} seconds, but doing what we can locally".format(t=timeout) + print "(You can control that timeout with the --connect-timeout option if you want to try longer)" + else: + print "Could not set up cluster, but doing what we can locally" + + print "The following help text is incomplete as we could not fetch the monitor commands" + do_help(parser, args) + + return handle_it + def complete(sigdict, args, target): """ Command completion. Match as much of [args] as possible, @@ -577,14 +602,30 @@ def main(): print >> sys.stderr, '"{0} tell" is deprecated; try "tell {0}." instead (id can be "*") '.format(childargs[0]) return 1 + timeout = None + if check_for_local_options(parsed_args): + timeout = 2 + if parsed_args.cluster_timeout: + timeout = parsed_args.cluster_timeout + try: - cluster_handle.connect() + cluster_handle.connect(timeout=timeout) except KeyboardInterrupt: print >> sys.stderr, 'Cluster connection aborted' return 1 + except rados.ObjectNotFound as e: + handled = try_failure_options(timeout=None, parsed_args=parsed_args,\ + parser=parser, args=childargs) + if not handled: + print >> sys.stderr, 'Error connecting to cluster: {0}'.\ + format(e.__class__.__name__) + return 1 except Exception as e: - print >> sys.stderr, 'Error connecting to cluster: {0}'.\ - format(e.__class__.__name__) + handled = try_failure_options(timeout=timeout, parsed_args=parsed_args,\ + parser=parser, args=childargs) + if not handled: + print >> sys.stderr, 'Error connecting to cluster: {0}'.\ + format(e.__class__.__name__) return 1 if parsed_args.help: -- cgit v1.2.1