summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorge Kraft <george.kraft@calxeda.com>2012-09-06 15:01:16 -0500
committerGeorge Kraft <george.kraft@calxeda.com>2012-09-06 15:01:16 -0500
commit00c18047eece40a672be73abfbf1145c85b0a961 (patch)
tree250ea421305400160d1fd3189a1b7185dd202ecd
parent9d5039dd141626b2219ebe1d9037d9ca94adf38a (diff)
downloadcxmanage-00c18047eece40a672be73abfbf1145c85b0a961.tar.gz
CXMAN-84: Attempt firmware uploads 3 times per partition
This should hopefully make firmware updates more resilient during unfavorable conditions where transfers occasionally fail. Also reduced the timeout for a single attempt to 3 minutes. It really should never take longer than about 20 seconds for a single partition anyway.
-rw-r--r--cxmanage/target.py39
1 files changed, 22 insertions, 17 deletions
diff --git a/cxmanage/target.py b/cxmanage/target.py
index 2b49b8d..af6e031 100644
--- a/cxmanage/target.py
+++ b/cxmanage/target.py
@@ -440,22 +440,27 @@ class Target:
# Upload image to tftp server
filename = image.upload(self.work_dir, tftp, priority, daddr)
- # Send firmware update command
- image_type = image.type
- result = self.bmc.update_firmware(filename,
- partition_id, image_type, tftp_address)
- handle = result.tftp_handle_id
+ errors = 0
+ while True:
+ try:
+ # Update the firmware
+ handle = self.bmc.update_firmware(filename,
+ partition_id, image.type, tftp_address).tftp_handle_id
+ self._wait_for_transfer(handle)
+
+ # Verify crc and activate
+ result = self.bmc.check_firmware(partition_id)
+ if hasattr(result, "crc32") and result.error == None:
+ self.bmc.activate_firmware(partition_id)
+ else:
+ raise CxmanageError("Node reported crc32 check failure")
- # Wait for update to finish
- self._wait_for_transfer(handle)
+ break
- # Verify crc
- result = self.bmc.check_firmware(partition_id)
- if hasattr(result, "crc32") and result.error == None:
- # Activate
- self.bmc.activate_firmware(partition_id)
- else:
- raise CxmanageError("Node reported crc32 check failure")
+ except CxmanageError as e:
+ errors += 1
+ if errors >= 3:
+ raise e
def _download_image(self, tftp, partition):
""" Download an image from the target.
@@ -503,10 +508,10 @@ class Target:
if result.status != "In progress":
break
- # Time out after 5 minutes
+ # Time out after 3 minutes
counter += 1
- if counter >= 300:
- raise CxmanageError("Transfer timed out after 5 minutes")
+ if counter >= 180:
+ raise CxmanageError("Transfer timed out after 3 minutes")
if result.status != "Complete":
raise CxmanageError("Node reported transfer failure")