diff options
Diffstat (limited to 'swift/obj/header.py')
-rw-r--r-- | swift/obj/header.py | 394 |
1 files changed, 394 insertions, 0 deletions
diff --git a/swift/obj/header.py b/swift/obj/header.py new file mode 100644 index 000000000..fd442ea83 --- /dev/null +++ b/swift/obj/header.py @@ -0,0 +1,394 @@ +# Copyright (c) 2010-2012 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import six +import os +import struct + +from swift.common.utils import fdatasync + +PICKLE_PROTOCOL = 2 + +# header version to use for new objects +OBJECT_HEADER_VERSION = 4 +VOLUME_HEADER_VERSION = 1 + +# maximum serialized header length +MAX_OBJECT_HEADER_LEN = 512 +MAX_VOLUME_HEADER_LEN = 128 + +OBJECT_START_MARKER = b"SWIFTOBJ" +VOLUME_START_MARKER = b"SWIFTVOL" + +# object alignment within a volume. +# this is needed so that FALLOC_FL_PUNCH_HOLE can actually return space back +# to the filesystem (tested on XFS and ext4) +# we may not need to align files in volumes dedicated to short-lived files, +# such as tombstones (.ts extension), +# but currently we do align for all volume types. +ALIGNMENT = 4096 + +# constants +STATE_OBJ_FILE = 0 +STATE_OBJ_QUARANTINED = 1 + + +class HeaderException(IOError): + def __init__(self, message): + self.message = message + super(HeaderException, self).__init__(message) + + +object_header_formats = { + 1: '8sBQQQ30sQQQQQ', + 2: '8sBQQQ64sQQQQQ', # 64 characters for the filename + 3: '8sBQQQ64sQQQQQB', # add state field + 4: '8sBQQQ64sQQQQQB32s' # add metadata checksum +} + + +class ObjectHeader(object): + """ + Version 1: + Magic string (8 bytes) + Header version (1 byte) + Policy index (8 bytes) + Object hash (16 bytes) (__init__) + Filename (30 chars) + Metadata offset (8 bytes) + Metadata size (8 bytes) + Data offset (8 bytes) + Data size (8 bytes) + Total object size (8 bytes) + + Version 2: similar but 64 chars for the filename + Version 3: Adds a "state" field (unsigned char) + """ + + def __init__(self, version=OBJECT_HEADER_VERSION): + if version not in object_header_formats.keys(): + raise HeaderException('Unsupported object header version') + self.magic_string = OBJECT_START_MARKER + self.version = version + + def __eq__(self, other): + return self.__dict__ == other.__dict__ + + def __len__(self): + try: + fmt = object_header_formats[self.version] + except KeyError: + raise HeaderException('Unsupported header version') + return struct.calcsize(fmt) + + def pack(self): + version_to_pack = { + 1: self.__pack_v1, + 2: self.__pack_v2, + 3: self.__pack_v3, + 4: self.__pack_v4 + } + return version_to_pack[self.version]() + + def __pack_v1(self): + fmt = object_header_formats[1] + ohash_h = int(self.ohash, 16) >> 64 + ohash_l = int(self.ohash, 16) & 0x0000000000000000ffffffffffffffff + + args = (self.magic_string, self.version, + self.policy_idx, ohash_h, ohash_l, + str(self.filename).encode('ascii'), + self.metadata_offset, self.metadata_size, + self.data_offset, self.data_size, self.total_size) + + return struct.pack(fmt, *args) + + def __pack_v2(self): + fmt = object_header_formats[2] + ohash_h = int(self.ohash, 16) >> 64 + ohash_l = int(self.ohash, 16) & 0x0000000000000000ffffffffffffffff + + args = (self.magic_string, self.version, + self.policy_idx, ohash_h, ohash_l, + str(self.filename).encode('ascii'), + self.metadata_offset, self.metadata_size, + self.data_offset, self.data_size, self.total_size) + + return struct.pack(fmt, *args) + + def __pack_v3(self): + fmt = object_header_formats[3] + ohash_h = int(self.ohash, 16) >> 64 + ohash_l = int(self.ohash, 16) & 0x0000000000000000ffffffffffffffff + + args = (self.magic_string, self.version, + self.policy_idx, ohash_h, ohash_l, + str(self.filename).encode('ascii'), + self.metadata_offset, self.metadata_size, + self.data_offset, self.data_size, self.total_size, self.state) + + return struct.pack(fmt, *args) + + def __pack_v4(self): + fmt = object_header_formats[4] + ohash_h = int(self.ohash, 16) >> 64 + ohash_l = int(self.ohash, 16) & 0x0000000000000000ffffffffffffffff + + args = (self.magic_string, self.version, + self.policy_idx, ohash_h, ohash_l, + str(self.filename).encode('ascii'), + self.metadata_offset, self.metadata_size, + self.data_offset, self.data_size, self.total_size, self.state, + self.metastr_md5) + + return struct.pack(fmt, *args) + + @classmethod + def unpack(cls, buf): + version_to_unpack = { + 1: cls.__unpack_v1, + 2: cls.__unpack_v2, + 3: cls.__unpack_v3, + 4: cls.__unpack_v4 + } + + if buf[0:8] != OBJECT_START_MARKER: + raise HeaderException('Not a header') + version = struct.unpack('<B', buf[8:9])[0] + if version not in object_header_formats.keys(): + raise HeaderException('Unsupported header version') + + return version_to_unpack[version](buf) + + @classmethod + def __unpack_v1(cls, buf): + fmt = object_header_formats[1] + raw_header = struct.unpack(fmt, buf[0:struct.calcsize(fmt)]) + header = cls() + header.magic_string = raw_header[0] + header.version = raw_header[1] + header.policy_idx = raw_header[2] + header.ohash = "{:032x}".format((raw_header[3] << 64) + raw_header[4]) + if six.PY2: + header.filename = raw_header[5].rstrip(b'\0') + else: + header.filename = raw_header[5].rstrip(b'\0').decode('ascii') + header.metadata_offset = raw_header[6] + header.metadata_size = raw_header[7] + header.data_offset = raw_header[8] + header.data_size = raw_header[9] + # currently, total_size gets padded to the next 4k boundary, so that + # fallocate can reclaim the block when hole punching. + header.total_size = raw_header[10] + + return header + + @classmethod + def __unpack_v2(cls, buf): + fmt = object_header_formats[2] + raw_header = struct.unpack(fmt, buf[0:struct.calcsize(fmt)]) + header = cls() + header.magic_string = raw_header[0] + header.version = raw_header[1] + header.policy_idx = raw_header[2] + header.ohash = "{:032x}".format((raw_header[3] << 64) + raw_header[4]) + if six.PY2: + header.filename = raw_header[5].rstrip(b'\0') + else: + header.filename = raw_header[5].rstrip(b'\0').decode('ascii') + header.metadata_offset = raw_header[6] + header.metadata_size = raw_header[7] + header.data_offset = raw_header[8] + header.data_size = raw_header[9] + # currently, total_size gets padded to the next 4k boundary, so that + # fallocate can reclaim the block when hole punching. + header.total_size = raw_header[10] + + return header + + @classmethod + def __unpack_v3(cls, buf): + fmt = object_header_formats[3] + raw_header = struct.unpack(fmt, buf[0:struct.calcsize(fmt)]) + header = cls() + header.magic_string = raw_header[0] + header.version = raw_header[1] + header.policy_idx = raw_header[2] + header.ohash = "{:032x}".format((raw_header[3] << 64) + raw_header[4]) + if six.PY2: + header.filename = raw_header[5].rstrip(b'\0') + else: + header.filename = raw_header[5].rstrip(b'\0').decode('ascii') + header.metadata_offset = raw_header[6] + header.metadata_size = raw_header[7] + header.data_offset = raw_header[8] + header.data_size = raw_header[9] + # currently, total_size gets padded to the next 4k boundary, so that + # fallocate can reclaim the block when hole punching. + header.total_size = raw_header[10] + header.state = raw_header[11] + + return header + + @classmethod + def __unpack_v4(cls, buf): + fmt = object_header_formats[4] + raw_header = struct.unpack(fmt, buf[0:struct.calcsize(fmt)]) + header = cls() + header.magic_string = raw_header[0] + header.version = raw_header[1] + header.policy_idx = raw_header[2] + header.ohash = "{:032x}".format((raw_header[3] << 64) + raw_header[4]) + if six.PY2: + header.filename = raw_header[5].rstrip(b'\0') + else: + header.filename = raw_header[5].rstrip(b'\0').decode('ascii') + header.metadata_offset = raw_header[6] + header.metadata_size = raw_header[7] + header.data_offset = raw_header[8] + header.data_size = raw_header[9] + # currently, total_size gets padded to the next 4k boundary, so that + # fallocate can reclaim the block when hole punching. + header.total_size = raw_header[10] + header.state = raw_header[11] + header.metastr_md5 = raw_header[12] + + return header + + +volume_header_formats = { + 1: '8sBQQQQLQ' +} + + +class VolumeHeader(object): + """ + Version 1: + Magic string (8 bytes) + Header version (1 byte) + Volume index (8 bytes) + Partition index (8 bytes) + Volume type (8 bytes) + First object offset (8 bytes) + Volume state (4 bytes) (enum from fmgr.proto) + Volume compaction target (8 bytes) + (only valid if state is STATE_COMPACTION_SRC) + """ + def __init__(self, version=VOLUME_HEADER_VERSION): + self.magic_string = VOLUME_START_MARKER + self.version = version + self.state = 0 + self.compaction_target = 0 + + def __str__(self): + prop_list = ['volume_idx', 'partition', 'type', + 'state', 'compaction_target'] + h_str = "" + for prop in prop_list: + h_str += "{}: {}\n".format(prop, getattr(self, prop)) + return h_str[:-1] + + def __len__(self): + try: + fmt = volume_header_formats[self.version] + except KeyError: + raise HeaderException('Unsupported header version') + return struct.calcsize(fmt) + + def pack(self): + version_to_pack = { + 1: self.__pack_v1, + } + return version_to_pack[self.version]() + + def __pack_v1(self): + fmt = volume_header_formats[1] + + args = (self.magic_string, self.version, + self.volume_idx, self.partition, self.type, + self.first_obj_offset, self.state, + self.compaction_target) + + return struct.pack(fmt, *args) + + @classmethod + def unpack(cls, buf): + version_to_unpack = { + 1: cls.__unpack_v1 + } + if buf[0:8] != VOLUME_START_MARKER: + raise HeaderException('Not a header') + version = struct.unpack('<B', buf[8:9])[0] + if version not in volume_header_formats.keys(): + raise HeaderException('Unsupported header version') + + return version_to_unpack[version](buf) + + @classmethod + def __unpack_v1(cls, buf): + fmt = volume_header_formats[1] + raw_header = struct.unpack(fmt, buf[0:struct.calcsize(fmt)]) + header = cls() + header.magic_string = raw_header[0] + header.version = raw_header[1] + header.volume_idx = raw_header[2] + header.partition = raw_header[3] + header.type = raw_header[4] + header.first_obj_offset = raw_header[5] + header.state = raw_header[6] + header.compaction_target = raw_header[7] + + return header + + +# Read volume header. Expects fp to be positionned at header offset +def read_volume_header(fp): + buf = fp.read(MAX_VOLUME_HEADER_LEN) + header = VolumeHeader.unpack(buf) + return header + + +def write_volume_header(header, fd): + os.write(fd, header.pack()) + + +def read_object_header(fp): + """ + Read object header + :param fp: opened file, positioned at header start + :return: an ObjectHeader + """ + buf = fp.read(MAX_OBJECT_HEADER_LEN) + header = ObjectHeader.unpack(buf) + return header + + +def write_object_header(header, fp): + """ + Rewrites header in open file + :param header: header to write + :param fp: opened volume + """ + fp.write(header.pack()) + fdatasync(fp.fileno()) + + +def erase_object_header(fd, offset): + """ + Erase an object header by writing null bytes over it + :param fd: volume file descriptor + :param offset: absolute header offset + """ + os.lseek(fd, offset, os.SEEK_SET) + os.write(fd, b"\x00" * MAX_OBJECT_HEADER_LEN) |