Limit CaptureRegion sizes in format_inspector for VMDK and VHDX

VMDK: When parsing a VMDK file to calculate its size, the format_inspector determines the location of the Descriptor section by reading two uint64 from the headers of the file and uses them to create the descriptor CaptureRegion. It would be possible to craft a VMDK file that commands the format_inspector to create a very big CaptureRegion, thus exhausting resources on the glance-api process. This patch binds the beginning of the descriptor to 0x200 and limits the size of the CaptureRegion to 1MB, similar to how the VMDK descriptor is parsed by qemu. VHDX: It is a bit more involved, but similar: when looking for the VIRTUAL_DISK_SIZE metadata, the format_inspector was creating an unbounded CaptureRegion. In the same way as it seems to be done in Qemu, we now limit the upper bound of this CaptureRegion. Change-Id: I3ec5a33df20e1cfb6673f4ff1c7c91aacd065532
author: Guillaume Espanel <guillaume.espanel.ext@ovhcloud.com> 2023-01-25 11:53:09 +0100
committer: Dan Smith <dansmith@redhat.com> 2023-02-03 08:04:17 -0800
commit: d4d33ee30f303f783c0640cd72acb31b313e1164 (patch)
tree: 51293010ff075b16781b47cce83d6b19d6df82aa
parent: 907c56265438da43c13bf1a3369d5459b1267e34 (diff)
download: glance-d4d33ee30f303f783c0640cd72acb31b313e1164.tar.gz
2 files changed, 139 insertions, 3 deletions
diff --git a/glance/common/format_inspector.py b/glance/common/format_inspector.py
index 351c300dd..550cceadb 100755
--- a/glance/common/format_inspector.py
+++ b/glance/common/format_inspector.py
@@ -345,6 +345,7 @@ class VHDXInspector(FileInspector):
     """
     METAREGION = '8B7CA206-4790-4B9A-B8FE-575F050F886E'
     VIRTUAL_DISK_SIZE = '2FA54224-CD1B-4876-B211-5DBED83BF4B8'
+    VHDX_METADATA_TABLE_MAX_SIZE = 32 * 2048  # From qemu
 
     def __init__(self, *a, **k):
         super(VHDXInspector, self).__init__(*a, **k)
@@ -459,6 +460,8 @@ class VHDXInspector(FileInspector):
                 item_offset, item_length, _reserved = struct.unpack(
                     '<III',
                     meta_buffer[entry_offset + 16:entry_offset + 28])
+                item_length = min(item_length,
+                                  self.VHDX_METADATA_TABLE_MAX_SIZE)
                 self.region('metadata').length = len(meta_buffer)
                 self._log.debug('Found entry at offset %x', item_offset)
                 # Metadata item offset is from the beginning of the metadata
@@ -516,6 +519,12 @@ class VMDKInspector(FileInspector):
     variable number of 512 byte sectors, but is just text defining the
     layout of the disk.
     """
+
+    # The beginning and max size of the descriptor is also hardcoded in Qemu
+    # at 0x200 and 1MB - 1
+    DESC_OFFSET = 0x200
+    DESC_MAX_SIZE = (1 << 20) - 1
+
     def __init__(self, *a, **k):
         super(VMDKInspector, self).__init__(*a, **k)
         self.new_region('header', CaptureRegion(0, 512))
@@ -532,15 +541,22 @@ class VMDKInspector(FileInspector):
 
         if sig != b'KDMV':
             raise ImageFormatError('Signature KDMV not found: %r' % sig)
-            return
 
         if ver not in (1, 2, 3):
             raise ImageFormatError('Unsupported format version %i' % ver)
-            return
+
+        # Since we parse both desc_sec and desc_num (the location of the
+        # VMDK's descriptor, expressed in 512 bytes sectors) we enforce a
+        # check on the bounds to create a reasonable CaptureRegion. This
+        # is similar to how it's done in qemu.
+        desc_offset = desc_sec * 512
+        desc_size = min(desc_num * 512, self.DESC_MAX_SIZE)
+        if desc_offset != self.DESC_OFFSET:
+            raise ImageFormatError("Wrong descriptor location")
 
         if not self.has_region('descriptor'):
             self.new_region('descriptor', CaptureRegion(
-                desc_sec * 512, desc_num * 512))
+                desc_offset, desc_size))
 
     @property
     def format_match(self):
diff --git a/glance/tests/unit/common/test_format_inspector.py b/glance/tests/unit/common/test_format_inspector.py
index d229d094f..db6a9830b 100644
--- a/glance/tests/unit/common/test_format_inspector.py
+++ b/glance/tests/unit/common/test_format_inspector.py
@@ -16,6 +16,7 @@
 import io
 import os
 import re
+import struct
 import subprocess
 import tempfile
 from unittest import mock
@@ -63,6 +64,28 @@ class TestFormatInspectors(test_utils.BaseTestCase):
             shell=True)
         return fn
 
+    def _create_allocated_vmdk(self, size_mb):
+        # We need a "big" VMDK file to exercise some parts of the code of the
+        # format_inspector. A way to create one is to first create an empty
+        # file, and then to convert it with the -S 0 option.
+        fn = tempfile.mktemp(prefix='glance-unittest-formatinspector-',
+                             suffix='.vmdk')
+        self._created_files.append(fn)
+        zeroes = tempfile.mktemp(prefix='glance-unittest-formatinspector-',
+                                 suffix='.zero')
+        self._created_files.append(zeroes)
+
+        # Create an empty file
+        subprocess.check_output(
+            'dd if=/dev/zero of=%s bs=1M count=%i' % (zeroes, size_mb),
+            shell=True)
+
+        # Convert it to VMDK
+        subprocess.check_output(
+            'qemu-img convert -f raw -O vmdk -S 0 %s %s' % (zeroes, fn),
+            shell=True)
+        return fn
+
     def _test_format_at_block_size(self, format_name, img, block_size):
         fmt = format_inspector.get_inspector(format_name)()
         self.assertIsNotNone(fmt,
@@ -119,6 +142,64 @@ class TestFormatInspectors(test_utils.BaseTestCase):
     def test_vmdk(self):
         self._test_format('vmdk')
 
+    def test_vmdk_bad_descriptor_offset(self):
+        format_name = 'vmdk'
+        image_size = 10 * units.Mi
+        descriptorOffsetAddr = 0x1c
+        BAD_ADDRESS = 0x400
+        img = self._create_img(format_name, image_size)
+
+        # Corrupt the header
+        fd = open(img, 'r+b')
+        fd.seek(descriptorOffsetAddr)
+        fd.write(struct.pack('<Q', BAD_ADDRESS // 512))
+        fd.close()
+
+        # Read the format in various sizes, some of which will read whole
+        # sections in a single read, others will be completely unaligned, etc.
+        for block_size in (64 * units.Ki, 512, 17, 1 * units.Mi):
+            fmt = self._test_format_at_block_size(format_name, img, block_size)
+            self.assertTrue(fmt.format_match,
+                            'Failed to match %s at size %i block %i' % (
+                                format_name, image_size, block_size))
+            self.assertEqual(0, fmt.virtual_size,
+                             ('Calculated a virtual size for a corrupt %s at '
+                              'size %i block %i') % (format_name, image_size,
+                                                     block_size))
+
+    def test_vmdk_bad_descriptor_mem_limit(self):
+        format_name = 'vmdk'
+        image_size = 5 * units.Mi
+        virtual_size = 5 * units.Mi
+        descriptorOffsetAddr = 0x1c
+        descriptorSizeAddr = descriptorOffsetAddr + 8
+        twoMBInSectors = (2 << 20) // 512
+        # We need a big VMDK because otherwise we will not have enough data to
+        # fill-up the CaptureRegion.
+        img = self._create_allocated_vmdk(image_size // units.Mi)
+
+        # Corrupt the end of descriptor address so it "ends" at 2MB
+        fd = open(img, 'r+b')
+        fd.seek(descriptorSizeAddr)
+        fd.write(struct.pack('<Q', twoMBInSectors))
+        fd.close()
+
+        # Read the format in various sizes, some of which will read whole
+        # sections in a single read, others will be completely unaligned, etc.
+        for block_size in (64 * units.Ki, 512, 17, 1 * units.Mi):
+            fmt = self._test_format_at_block_size(format_name, img, block_size)
+            self.assertTrue(fmt.format_match,
+                            'Failed to match %s at size %i block %i' % (
+                                format_name, image_size, block_size))
+            self.assertEqual(virtual_size, fmt.virtual_size,
+                             ('Failed to calculate size for %s at size %i '
+                              'block %i') % (format_name, image_size,
+                                             block_size))
+            memory = sum(fmt.context_info.values())
+            self.assertLess(memory, 1.5 * units.Mi,
+                            'Format used more than 1.5MiB of memory: %s' % (
+                                fmt.context_info))
+
     def test_vdi(self):
         self._test_format('vdi')
 
@@ -275,3 +356,42 @@ class TestFormatInspectorInfra(test_utils.BaseTestCase):
         self.assertEqual(format_inspector.QcowInspector,
                          format_inspector.get_inspector('qcow2'))
         self.assertIsNone(format_inspector.get_inspector('foo'))
+
+
+class TestFormatInspectorsTargeted(test_utils.BaseTestCase):
+    def _make_vhd_meta(self, guid_raw, item_length):
+        # Meta region header, padded to 32 bytes
+        data = struct.pack('<8sHH', b'metadata', 0, 1)
+        data += b'0' * 20
+
+        # Metadata table entry, 16-byte GUID, 12-byte information,
+        # padded to 32-bytes
+        data += guid_raw
+        data += struct.pack('<III', 256, item_length, 0)
+        data += b'0' * 6
+
+        return data
+
+    def test_vhd_table_over_limit(self):
+        ins = format_inspector.VHDXInspector()
+        meta = format_inspector.CaptureRegion(0, 0)
+        desired = b'012345678ABCDEF0'
+        # This is a poorly-crafted image that specifies a larger table size
+        # than is allowed
+        meta.data = self._make_vhd_meta(desired, 33 * 2048)
+        ins.new_region('metadata', meta)
+        new_region = ins._find_meta_entry(ins._guid(desired))
+        # Make sure we clamp to our limit of 32 * 2048
+        self.assertEqual(
+            format_inspector.VHDXInspector.VHDX_METADATA_TABLE_MAX_SIZE,
+            new_region.length)
+
+    def test_vhd_table_under_limit(self):
+        ins = format_inspector.VHDXInspector()
+        meta = format_inspector.CaptureRegion(0, 0)
+        desired = b'012345678ABCDEF0'
+        meta.data = self._make_vhd_meta(desired, 16 * 2048)
+        ins.new_region('metadata', meta)
+        new_region = ins._find_meta_entry(ins._guid(desired))
+        # Table size was under the limit, make sure we get it back
+        self.assertEqual(16 * 2048, new_region.length)
author	Guillaume Espanel <guillaume.espanel.ext@ovhcloud.com>	2023-01-25 11:53:09 +0100
committer	Dan Smith <dansmith@redhat.com>	2023-02-03 08:04:17 -0800
commit	d4d33ee30f303f783c0640cd72acb31b313e1164 (patch)
tree	51293010ff075b16781b47cce83d6b19d6df82aa
parent	907c56265438da43c13bf1a3369d5459b1267e34 (diff)
download	glance-d4d33ee30f303f783c0640cd72acb31b313e1164.tar.gz