diff options
author | Sheldon Sandbekkhaug <sheldon.sandbekkhaug@calxeda.com> | 2013-06-21 16:09:06 -0500 |
---|---|---|
committer | Sheldon Sandbekkhaug <sheldon.sandbekkhaug@calxeda.com> | 2013-06-21 16:22:16 -0500 |
commit | 5b3e94d50784235616ca2383592bea71d50a6239 (patch) | |
tree | 18e1ad8e06a4a0b0b902ab428931536fc319c093 | |
parent | 1df4df675dbf81358880f529a49db01d40c0a248 (diff) | |
download | cxmanage-5b3e94d50784235616ca2383592bea71d50a6239.tar.gz |
(CXMAN-194) Create a command to get all data
coredump.py
Added cxmanage command to get a lot of data and store it in a .tar
cxmanage
Added the coredump command
-rw-r--r-- | cxmanage/commands/coredump.py | 507 | ||||
-rwxr-xr-x | scripts/cxmanage | 18 |
2 files changed, 525 insertions, 0 deletions
diff --git a/cxmanage/commands/coredump.py b/cxmanage/commands/coredump.py new file mode 100644 index 0000000..b18b9bc --- /dev/null +++ b/cxmanage/commands/coredump.py @@ -0,0 +1,507 @@ +#!/usr/bin/env python + +# Copyright 2013 Calxeda, Inc. All Rights Reserved. + + +import os +import subprocess + +from subprocess import call + +from cxmanage import get_tftp, get_nodes, run_command + + +def coredump_command(args): + """Get information pertaining to each node. This includes: + IP addresses (ECME and server) + Version info (like cxmanage info) + MAC addresses + Sensor readings + Sensor data records + Firmware info + Boot order + SELs (System Event Logs), + Depth charts + Routing Tables + + All nodes in the fabric should be powered on before running this command. + + This data will be written to a set of files. Each node will get its own + file. All of these files will be archived and saved to the user's current + directory. + + Internally, this command is called from: + ~/virtual_testenv/workspace/cx_manage_util/scripts/cxmanage + """ + + tftp = get_tftp(args) + nodes = get_nodes(args, tftp) + + temp_dir = "coredump" + + try: + os.mkdir(temp_dir) + except: + """ Create a new directory with a unique name. The number + associated with the directory (e.g. the "5" in "coredump5/") + should match the number of the tar file (e.g. "coredump5.tar"). + + """ + temp_dir, _ = get_unused_directory_and_file_names(".", "coredump.tar") + + os.mkdir(temp_dir) + + os.chdir(temp_dir) + + quiet = args.quiet + + write_ip_addresses(args, nodes) + + if not quiet: + print("Getting boot order...") + write_boot_order(args, nodes) + + if not quiet: + print("Getting version information...") + write_version_info(args, nodes) + + if not quiet: + print("Getting MAC addresses...") + write_mac_addrs(args, nodes) + + if not quiet: + print("Getting sensor information...") + write_sensor_info(args, nodes) + print("Done!") + + if not quiet: + print("Getting sensor data records...") + write_sdr(args, nodes) + print("Done!") + + if not quiet: + print("Getting firmware information...") + write_fwinfo(args, nodes) + + if not quiet: + print("Getting system event logs...") + write_sel(args, nodes) + print("Done!") + + if not quiet: + print("Getting depth charts...") + write_depth_chart(args, nodes) + + if not quiet: + print("Getting routing tables...") + write_routing_table(args, nodes) + + # Archive the files + os.chdir("..") + archive(temp_dir) + + # The original files are already archived. + delete_files(temp_dir) + + return 0 + + +def write_ip_addresses(args, nodes): + """ Write the ECME and server IP addresses for each node + to their respective files. + + """ + ip_discover_results, ip_discover_errors = run_command( + args, + nodes, + 'get_server_ip') + + if (len(ip_discover_errors) > 0 and (not args.quiet)): + print("WARNING: Error discovering server IP addresses on " + "the following nodes:") + for error in ip_discover_errors: + print(node_ip(error)) + + for node in nodes: + write_to_file( + node, + ["Node " + str(node.node_id)], + False + ) + + if node in ip_discover_results: + write_to_file( + node, + ["[ ECME : Server ]", + node_ip(node) + " : " + ip_discover_results[node]] + ) + else: + write_to_file( + node, + ["[ ECME : Server ]", + node_ip(node) + " : (Unknown)"] + ) + + +def write_version_info(args, nodes): + """ Write the version info (like cxmanage info) for each node + to their respective files. + + """ + info_results, info_errors = run_command(args, nodes, "get_versions") + + # This will be used when writing version info to file + components = [ + ("ecme_version", "ECME version"), + ("cdb_version", "CDB version"), + ("stage2_version", "Stage2boot version"), + ("bootlog_version", "Bootlog version"), + ("a9boot_version", "A9boot version"), + ("uboot_version", "Uboot version"), + ("ubootenv_version", "Ubootenv version"), + ("dtb_version", "DTB version") + ] + + for node in nodes: + lines = [] # The lines of text to write to file + # \n is used here to give a blank line before this section + lines.append( + "\n[ Version Info for Node " + + str(node.node_id) + " ]" + ) + + if node in info_results: + info_result = info_results[node] + lines.append( + "Hardware version : %s" % + info_result.hardware_version + ) + lines.append( + "Firmware version : %s" % + info_result.firmware_version + ) + for var, description in components: + if hasattr(info_result, var): + version = getattr(info_result, var) + lines.append("%s: %s" % (description.ljust(19), version)) + else: + lines.append("No version information could be found.") + + write_to_file(node, lines) + + +def write_mac_addrs(args, nodes): + """ Write the MAC addresses for each node to their respective files. """ + mac_addr_results, mac_addr_errors = run_command( + args, + nodes, + "get_fabric_macaddrs" + ) + + for node in nodes: + lines = [] # Lines of text to write to file + # \n is used here to give a blank line before this section + lines.append("\n[ MAC Addresses for Node " + str(node.node_id) + " ]") + + if node in mac_addr_results: + for port in mac_addr_results[node][node.node_id]: + for mac_address in mac_addr_results[node][node.node_id][port]: + lines.append("Node %i, Port %i: %s" % + (node.node_id, port, mac_address) + ) + else: + lines.append("\nWARNING: No MAC addresses found!") + write_to_file(node, lines) + + +def write_sensor_info(args, nodes): + """ Write sensor information for each node to their respective files. """ + for node in nodes: + lines = [] # Lines of text to write to file + # \n is used here to give a blank line before this section + lines.append("\n[ Sensors for Node " + str(node.node_id) + " ]") + + try: + sensor_command = "ipmitool sensor -I lanplus -H " + \ + node_ip(node) + " -U admin -P admin list -v" + sensor_call = subprocess.Popen( + sensor_command.split(), + stdout=subprocess.PIPE + ) + sensor_info = sensor_call.communicate()[0] + lines.append(sensor_info) + except Exception as e: + lines.append("Could not get sensor info! " + str(e)) + if not args.quiet: + print("Failed to get sensor information for " + node_ip(node)) + write_to_file(node, lines) + + +def write_sdr(args, nodes): + """ Write the sensor data record for each node to their + respective files. + + """ + for node in nodes: + lines = [] # Lines of text to write to file + # \n is used here to give a blank line before this section + lines.append( + "\n[ Sensor Data Record for Node " + str( + node.node_id) + " ]") + + try: + sdr_command = "ipmitool sdr -I lanplus -H " + \ + node_ip(node) + " -U admin -P admin info" + sdr_call = subprocess.Popen( + sdr_command.split(), + stdout=subprocess.PIPE + ) + sdr = sdr_call.communicate()[0] + lines.append(sdr) + except Exception as e: + lines.append("Could not get SDR! " + str(e)) + if not args.quiet: + print("Failed to get sensor data record for " + node_ip(node)) + write_to_file(node, lines) + + +def write_fwinfo(args, nodes): + """ Write information about each node's firware partitions + to its respective file. + + """ + results, errors = run_command(args, nodes, "get_firmware_info") + + for node in nodes: + lines = [] # Lines of text to write to file + # \n is used here to give a blank line before this section + lines.append("\n[ Firmware Info for Node " + str(node.node_id) + " ]") + + if node in results: + for partition in results[node]: + lines.append("\nPartition : %s" % partition.partition) + lines.append("Type : %s" % partition.type) + lines.append("Offset : %s" % partition.offset) + lines.append("Size : %s" % partition.size) + lines.append("Priority : %s" % partition.priority) + lines.append("Daddr : %s" % partition.daddr) + lines.append("Flags : %s" % partition.flags) + lines.append("Version : %s" % partition.version) + lines.append("In Use : %s" % partition.in_use) + else: + lines.append("Could not get firmware info!") + write_to_file(node, lines) + + +def write_boot_order(args, nodes): + """ Write the boot order of each node to their respective files. """ + results, boot_order_errors = run_command(args, nodes, "get_boot_order") + + for node in nodes: + lines = [] # Lines of text to write to file + # \n is used here to give a blank line before this section + lines.append("\n[ Boot Order for Node " + str(node.node_id) + " ]") + + if node in results: + lines.append(", ".join(results[node])) + else: + lines.append("Could not get boot order!") + + write_to_file(node, lines) + + +def write_sel(args, nodes): + """ Write the SEL for each node to their respective files. """ + for node in nodes: + lines = [] # Lines of text to write to file + # \n is used here to give a blank line before this section + lines.append( + "\n[ System Event Log for Node " + str( + node.node_id) + " ]") + + try: + sel_command = "ipmitool sel -I lanplus -H " + node_ip(node) + \ + " -U admin -P admin list" + sel_call = subprocess.Popen( + sel_command.split(), + stdout=subprocess.PIPE + ) + sel = sel_call.communicate()[0] + lines.append(sel) + except Exception as e: + lines.append("Could not get SEL! " + str(e)) + if not args.quiet: + print("Failed to get system event log for " + node_ip(node)) + + write_to_file(node, lines) + + +def write_depth_chart(args, nodes): + """ Write the depth chart for each node to their respective files. """ + depth_results, depth_errors = run_command(args, nodes, "get_depth_chart") + + for node in nodes: + lines = [] # Lines of text to write to file + # \n is used here to give a blank line before this section + lines.append("\n[ Depth Chart for Node " + str(node.node_id) + " ]") + + if node in depth_results: + depth_chart = depth_results[node] + for key in depth_chart: + subchart = depth_chart[key] + lines.append("To node " + str(key)) + for subkey in subchart: + lines.append(" " + str(subkey) + + " : " + str(subchart[subkey]) + ) + else: + lines.append("Could not get depth chart!") + + write_to_file(node, lines) + + +def write_routing_table(args, nodes): + """ Write the routing table for each node to their respective files. """ + routing_results, routing_errors = run_command( + args, nodes, "get_routing_table") + + for node in nodes: + lines = [] # Lines of text to write to file + # \n is used here to give a blank line before this section + lines.append("\n[ Routing Table for Node " + str(node.node_id) + " ]") + + if node in routing_results: + table = routing_results[node] + for node_to in table: + lines.append(str(node_to) + " : " + str(table[node_to])) + else: + lines.append("Could not get routing table!") + + write_to_file(node, lines) + + +def write_to_file(node, toWrite, add_newlines=True): + """ Append toWrite to an info file for every node in nodes. + + :param node: Node object to write about + :type node: Nobe object + :param toWrite: Text to write to the files + :type toWrite: List of strings + : +param add_newlines: Whether to add newline characters before + every item in toWrite. True by default. True will add newline + characters. + :type add_newlines: bool + + """ + + with open("node" + str(node.node_id) + ".txt", 'a') as file: + for line in toWrite: + if add_newlines: + file.write("\n" + line) + else: + file.write(line) + + +def node_ip(node): + """ Return a string containing the given node's ECME IP address. + :returns: A string containing the node's ECME IP address. + :rtype: string + :param node: The node to get an IP address from. + :type node: Node object + + """ + return str(node)[6:] + + +def delete_files(directory): + """ Remove all files inside directory, and directory itself. """ + command = "rm -r " + directory + call(command.split()) + + +def archive(directory_to_archive): + """ Creates a .tar containing everything in the directory_to_archive. + The .tar is saved to the current directory under the same name as + the directory, but with .tar appended. + + :param directory_to_archive: A path to the directory to be archived. + :type directory_to_archive: string + + """ + + make_tar_command = "tar -cf " + directory_to_archive + ".tar " + \ + directory_to_archive + + call(make_tar_command.split()) + print("Finished! One archive created:\n" + directory_to_archive + ".tar") + + +def get_unused_directory_and_file_names(directory, name): + """ Given a directory and filename, determine an unused directory name + and an unused file name that share the same name before any extensions. + + Returns: A list containing two strings: an unused directory name, and + an unused file name. + + :param directory: A path to the directory to look in. + :type directory: string + :param name: The original name of a file, possibly including an extension + :type name: string + + """ + dir_name, _, _ = name.partition(".") + + dir_number = 0 + file_number = 0 + + while True: + # Try to get an unused dir starting from file_number + new_dir, dir_number = get_unused_name(directory, dir_name, file_number) + + # Try to get an unused name starting from dir_number + new_file, file_number = get_unused_name(directory, name, dir_number) + + # dir_number and file_number always indicate unused names + if dir_number == file_number: + return [new_dir, new_file] + + +def get_unused_name(directory, name, number=0): + """ Get a new filename. This new filename cannot be the same as any + existing file in directory, and should be based off of name. name should + contain one or zero periods, and should not start with a period. + The return type is a list containing the new filename (string), and the + number attached to it (int). + + :param directory: A path to directory + :type directory: string + :param name: The original name of a file, possibly with an extension + :type name: string + :param number: The number to start searching from + :type number: int + + """ + name, dot, extension = name.partition('.') + + # Create a new string, similar to "name0.extension" + destination = name + str(number) + dot + extension + exit = False + + # Increment the number in name#.ext until we find an unused filename + while exit == False: + command = 'ls' + + a_call = subprocess.Popen( + command.split(), + stdout=subprocess.PIPE + ) + call_result = a_call.communicate()[0] + + if destination in call_result: + number = number + 1 + destination = name + str(number) + dot + extension + else: + exit = True + + return [destination, number] diff --git a/scripts/cxmanage b/scripts/cxmanage index 101b30b..5f38fec 100755 --- a/scripts/cxmanage +++ b/scripts/cxmanage @@ -46,6 +46,7 @@ from cxmanage.commands.config import config_reset_command, config_boot_command from cxmanage.commands.info import info_command from cxmanage.commands.ipmitool import ipmitool_command from cxmanage.commands.ipdiscover import ipdiscover_command +from cxmanage.commands.coredump import coredump_command PYIPMI_VERSION = '0.7.1' @@ -284,6 +285,23 @@ def build_parser(): parser.add_argument('hostname', help='nodes to operate on (see examples below)') + # Coredump command + coredump = subparsers.add_parser('coredump', help='Get all data from each node') + coredump.add_argument('sensor_name', help='Sensor name to read', + nargs='?', default='') + coredump.add_argument('-A', '--aggressive', action='store_true', + help='discover IPs aggressively') + coredump.add_argument('-U', '--server-user', type=str, default='user1', + metavar='USER', help='Server-side Linux username') + coredump.add_argument('-P', '--server-password', type=str, + default='1Password', metavar='PASSWORD', + help='Server-side Linux password') + coredump.add_argument('-6', '--ipv6', action='store_true', + help='Discover IPv6 addresses') + coredump.add_argument('-I', '--interface', type=str, default=None, + help='Network interface to check') + coredump.set_defaults(func=coredump_command) + return parser |