summaryrefslogtreecommitdiff
path: root/pyeclib/ec_iface.py
blob: fa50249a4daf9c1284f1b295d678c0dbe68afd3d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
# Copyright (c) 2013-2014, Kevin Greenan (kmgreen2@gmail.com)
# Copyright (c) 2014, Tushar Gohad (tushar.gohad@intel.com)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.  THIS SOFTWARE IS
# PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
# NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from .enum import Enum
from .enum import unique
from .utils import create_instance
from .utils import positive_int_value
from pyeclib_c import get_liberasurecode_version

import warnings

import logging
from logging.handlers import SysLogHandler
logger = logging.getLogger('pyeclib')
syslog_handler = SysLogHandler()
logger.addHandler(syslog_handler)


def check_backend_available(backend_name):
    try:
        from pyeclib_c import check_backend_available

        if backend_name.startswith('flat_xor_hd'):
            int_type = PyECLib_EC_Types.get_by_name('flat_xor_hd')
        else:
            int_type = PyECLib_EC_Types.get_by_name(backend_name)
        if not int_type:
            return False
        return check_backend_available(int_type.value)
    except ImportError:
        # check_backend_available has been supported since
        # liberasurecode>=1.2.0 so we need to define the func for older
        # liberasurecode version

        # select available k, m values
        if backend_name.startswith('flat_xor_hd'):
            k, m = (10, 5)
        else:
            k, m = (10, 4)
        try:
            ECDriver(ec_type=backend_name, k=k, m=m)
        except ECDriverError:
            return False
        return True


def PyECLibVersion(z, y, x):
    return (((z) << 16) + ((y) << 8) + (x))


PYECLIB_MAJOR = 1
PYECLIB_MINOR = 1
PYECLIB_REV = 2
PYECLIB_VERSION = PyECLibVersion(PYECLIB_MAJOR, PYECLIB_MINOR,
                                 PYECLIB_REV)


PYECLIB_MAX_DATA = 32
PYECLIB_MAX_PARITY = 32


@unique
class PyECLibEnum(Enum):

    def describe(self):
        # returns supported types
        return list(self)

    @classmethod
    def has_enum(cls, name):
        # returns True if name is a valid member of the enum
        try:
            cls.__getattr__(name)
        except AttributeError:
            return False
        return True

    @classmethod
    def get_by_name(cls, name):
        try:
            obj = cls.__getattr__(name)
        except AttributeError:
            return None
        return obj

    @classmethod
    def names(cls):
        return [name for name, value in cls.__members__.items()]

    @classmethod
    def values(cls):
        return [value for name, value in cls.__members__.items()]

    def __str__(self):
        return "%s: %d" % (self.name, self.value)


# Erasure Code backends supported as of this PyECLib API rev
class PyECLib_EC_Types(PyECLibEnum):
    # Note: the Enum start value defaults to 1 as the starting value and not 0
    # 0 is False in the boolean sense but enum members evaluate to True
    jerasure_rs_vand = 1
    jerasure_rs_cauchy = 2
    flat_xor_hd = 3
    isa_l_rs_vand = 4
    shss = 5
    liberasurecode_rs_vand = 6
    isa_l_rs_cauchy = 7
    libphazr = 8


# Output of Erasure (en)Coding process are data "fragments".  Fragment data
# integrity checks are provided by a checksum embedded in a header (prepend)
# for each fragment.

# The following Enum defines the schemes supported for fragment checksums.
# The checksum type is "none" unless specified.
class PyECLib_FRAGHDRCHKSUM_Types(PyECLibEnum):
    # Note: the Enum start value defaults to 1 as the starting value and not 0
    # 0 is False in the boolean sense but enum members evaluate to True
    none = 1
    inline_crc32 = 2


# Main ECDriver class
class ECDriver(object):
    '''A driver to encode, decode, and reconstruct erasure-coded data.'''

    def __init__(self, **kwargs):
        '''
        :param ec_type: the erasure coding type to use for this driver.
        :param k: number of data fragments to use. Required.
        :param m: number of parity fragments to use. Required.
        :param chksum_type:
        :param validate: default: False
        :param library_import_str: default: 'pyeclib.core.ECPyECLibDriver'

        You must provide either ``ec_type`` or ``library_import_str``;
        typically you just want to use ``ec_type``. See ALL_EC_TYPES for the
        list of all EC types supported by PyECLib, and VALID_EC_TYPES for the
        list of all EC types currently available on this system.
        '''
        self.k = -1
        self.m = -1
        self.hd = -1
        self.ec_type = None
        self.chksum_type = None
        self.validate = False

        for required in ('k', 'm'):
            if required not in kwargs:
                raise ECDriverError(
                    "Invalid Argument: %s is required" % required)

        if 'ec_type' not in kwargs and 'library_import_str' not in kwargs:
            raise ECDriverError(
                "Invalid Argument: either ec_type or library_import_str "
                "must be provided")

        for (key, value) in kwargs.items():
            if key == "k":
                try:
                    self.k = positive_int_value(value)
                except ValueError:
                    raise ECDriverError(
                        "Invalid number of data fragments (k)")
            elif key == "m":
                try:
                    self.m = positive_int_value(value)
                except ValueError:
                    raise ECDriverError(
                        "Invalid number of parity fragments (m)")
            elif key == "ec_type":
                if value in ["flat_xor_hd", "flat_xor_hd_3", "flat_xor_hd_4"]:
                    if value == "flat_xor_hd" or value == "flat_xor_hd_3":
                        self.hd = 3
                    elif value == "flat_xor_hd_4":
                        self.hd = 4
                    value = "flat_xor_hd"
                elif value == "libphazr":
                    self.hd = 1

                if PyECLib_EC_Types.has_enum(value):
                    self.ec_type = PyECLib_EC_Types.get_by_name(value)
                    if self.ec_type in (PyECLib_EC_Types.jerasure_rs_vand,
                                        PyECLib_EC_Types.jerasure_rs_cauchy):
                        warnings.warn('Jerasure support is deprecated and '
                                      'may be removed in a future release',
                                      FutureWarning, stacklevel=2)

                else:
                    raise ECBackendNotSupported(
                        "%s is not a valid EC type for PyECLib!" % value)
            elif key == "chksum_type":
                if PyECLib_FRAGHDRCHKSUM_Types.has_enum(value):
                    self.chksum_type = \
                        PyECLib_FRAGHDRCHKSUM_Types.get_by_name(value)
                else:
                    raise ECDriverError(
                        "%s is not a valid checksum type for PyECLib!" % value)
            elif key == "validate":
                # validate if the ec type is available (runtime check)
                self.validate = value

        if self.hd == -1:
            self.hd = self.m

        self.library_import_str = kwargs.pop('library_import_str',
                                             'pyeclib.core.ECPyECLibDriver')
        #
        # Instantiate EC backend driver
        #
        self.ec_lib_reference = create_instance(
            self.library_import_str,
            k=self.k,
            m=self.m,
            hd=self.hd,
            ec_type=self.ec_type,
            chksum_type=self.chksum_type,
            validate=int(self.validate)
        )

        #
        # Verify that the imported library implements the required functions
        #
        required_methods = [
            'decode',
            'encode',
            'reconstruct',
            'fragments_needed',
            'min_parity_fragments_needed',
            'get_metadata',
            'verify_stripe_metadata',
            'get_segment_info',
        ]

        missing_methods = ' '.join(
            method for method in required_methods
            if not callable(getattr(self.ec_lib_reference, method, None)))

        if missing_methods:
            raise ECDriverError(
                "The following required methods are not implemented "
                "in %s: %s" % (self.library_import_str, missing_methods))

    def __repr__(self):
        return '%s(ec_type=%r, k=%r, m=%r)' % (
            type(self).__name__,
            'flat_xor_hd_%s' % self.hd if self.ec_type.name == 'flat_xor_hd'
            else self.ec_type.name,
            self.k,
            self.m)

    def encode(self, data_bytes):
        """
        Encode an arbitrary-sized string
        :param data_bytes: the buffer to encode
        :returns: a list of buffers (first k entries are data and
                  the last m are parity)
        :raises: ECDriverError if there is an error during encoding
        """
        return self.ec_lib_reference.encode(data_bytes)

    def decode(self, fragment_payloads, ranges=None,
               force_metadata_checks=False):
        """
        Decode a set of fragments into a buffer that represents the original
        buffer passed into encode().

        :param fragment_payloads: a list of buffers representing a subset of
                                  the list generated by encode()
        :param ranges (optional): a list of byte ranges to return instead of
                                  the entire buffer
        :param force_metadata_checks (optional): validate collective integrity
                                  of the fragments before trying to decode
        :returns: a buffer
        :raises: ECDriverError if there is an error during decoding
        """
        return self.ec_lib_reference.decode(fragment_payloads, ranges,
                                            force_metadata_checks)

    def reconstruct(self, available_fragment_payloads,
                    missing_fragment_indexes):
        """
        Reconstruct a missing fragment from a subset of available fragments.

        :param available_fragment_payloads: a list of buffers representing
                                            a subset of the list generated
                                            by encode()
        :param missing_fragment_indexes: a list of integers representing
                                         the indexes of the fragments to be
                                         reconstructed.
        :returns: a list of buffers (ordered by fragment index) containing
                  the reconstructed payload associated with the indexes
                  provided in missing_fragment_indexes
        :raises: ECDriverError if there is an error during decoding or there
                 are not sufficient fragments to decode
        """
        return self.ec_lib_reference.reconstruct(
            available_fragment_payloads, missing_fragment_indexes)

    def fragments_needed(self, reconstruction_indexes,
                         exclude_indexes=None):
        """
        Determine which fragments are needed to reconstruct some subset of
        missing fragments.

        :param reconstruction_indexes: a list of integers representing the
                                         indexes of the fragments to be
                                         reconstructed.
        :param exclude_indexes: a list of integers representing the
                                         indexes of the fragments to be
                                         excluded from the reconstruction
                                         equations.
        :returns: a list containing fragment indexes that can be used to
                  reconstruct the missing fragments.
        :raises: ECDriverError if there is an error during decoding or there
                 are not sufficient fragments to decode
        """
        if exclude_indexes is None:
            exclude_indexes = []
        return self.ec_lib_reference.fragments_needed(reconstruction_indexes,
                                                      exclude_indexes)

    def min_parity_fragments_needed(self):
        return self.ec_lib_reference.min_parity_fragments_needed()

    def get_metadata(self, fragment, formatted=0):
        """
        Get opaque metadata for a fragment.  The metadata is opaque to the
        client, but meaningful to the underlying library.  It is used to verify
        stripes in verify_stripe_metadata().

        :param fragment: a buffer representing a single fragment generated by
                         the encode() function.
        :returns: an opaque buffer to be passed into verify_stripe_metadata()
        :raises: ECDriverError if there was a problem getting the metadata.
        """
        return self.ec_lib_reference.get_metadata(fragment, formatted)

    def verify_stripe_metadata(self, fragment_metadata_list):
        """
        Verify a subset of fragments generated by encode()

        :param fragment_metadata_list: a list of buffers representing the
                                       metadata from a subset of the framgments
                                       generated by encode().
        :returns: 'None' if the metadata is consistent.
                  a list of fragment indexes corresponding to inconsistent
                  fragments
        :raises: ECDriverError if there was a problem verifying the metadata

        """
        return self.ec_lib_reference.verify_stripe_metadata(
            fragment_metadata_list)

    def get_segment_info(self, data_len, segment_size):
        """
        Get segmentation info for a given data length and
        segment size.

        Semment info returns a dict with the following keys:

        segment_size: size of the payload to give to encode()
        last_segment_size: size of the payload to give to encode()
        fragment_size: the fragment size returned by encode()
        last_fragment_size: the fragment size returned by encode()
        num_segments: number of segments

        This allows the caller to prepare requests
        when segmenting a data stream to be EC'd.

        Since the data length will rarely be aligned
        to the segment size, the last segment will be
        a different size than the others.

        There are restrictions on the length given to encode(),
        so calling this before encode is highly recommended when
        segmenting a data stream.
        """
        return self.ec_lib_reference.get_segment_info(data_len, segment_size)

    #
    # Map of segment indexes with a list of tuples
    #
    def get_segment_info_byterange(self, ranges, data_len, segment_size):
        """
        Get segmentation info for a byterange request, given a data length and
        segment size.

        This will return a map-of-maps that represents a recipe describing
        the segments and ranges within each segment needed to satisfy a range
        request.

        Assume a range request is given for an object with segment size 3K and
        a 1 MB file:

        Ranges = (0, 1), (1, 12), (10, 1000), (0, segment_size-1),
                 (1, segment_size+1), (segment_size-1, 2*segment_size)

        This will return a map keyed on the ranges, where there is a recipe
        given for each range:

        {
         (0, 1): {0: (0, 1)},
         (10, 1000): {0: (10, 1000)},
         (1, 12): {0: (1, 12)},
         (0, 3071): {0: (0, 3071)},
         (3071, 6144): {0: (3071, 3071), 1: (0, 3071), 2: (0, 0)},
         (1, 3073): {0: (1, 3071), 1: (0,0)}
        }

        """

        segment_info = self.ec_lib_reference.get_segment_info(
            data_len, segment_size)

        segment_size = segment_info['segment_size']

        sorted_ranges = ranges[:]
        sorted_ranges.sort(key=lambda obj: obj[0])

        recipe = {}

        for r in ranges:
            segment_map = {}
            begin_off = r[0]
            end_off = r[1]
            begin_segment = begin_off // segment_size
            end_segment = end_off // segment_size

            if begin_segment == end_segment:
                begin_relative_off = begin_off % segment_size
                end_relative_off = end_off % segment_size
                segment_map[begin_segment] = (begin_relative_off,
                                              end_relative_off)
            else:
                begin_relative_off = begin_off % segment_size
                end_relative_off = end_off % segment_size

                segment_map[begin_segment] = (begin_relative_off,
                                              segment_size - 1)

                for middle_segment in range(begin_segment + 1, end_segment):
                    segment_map[middle_segment] = (0, segment_size - 1)

                segment_map[end_segment] = (0, end_relative_off)

            recipe[r] = segment_map

        return recipe


# PyECLib Exceptions

# Generic ECDriverException
class ECDriverError(Exception):
    def __init__(self, error):
        try:
            self.error_str = str(error)
        except Exception:
            self.error_str = 'Error retrieving the error message from %s' \
                % error.__class__.__name__

    def __str__(self):
        return self.error_str


# More specific exceptions, mapped to liberasurecode error codes

# Specified EC backend is not supported by PyECLib/liberasurecode
class ECBackendNotSupported(ECDriverError):
    pass


# Unsupported EC method
class ECMethodNotImplemented(ECDriverError):
    pass


# liberasurecode backend init error
class ECBackendInitializationError(ECDriverError):
    pass


# Specified backend instance is invalid/unavailable
class ECBackendInstanceNotAvailable(ECDriverError):
    pass


# Specified backend instance is busy
class ECBackendInstanceInUse(ECDriverError):
    pass


# Invalid parameter passed to a method
class ECInvalidParameter(ECDriverError):
    pass


# Invalid or incompatible fragment metadata
class ECInvalidFragmentMetadata(ECDriverError):
    pass


# Fragment checksum verification failed
class ECBadFragmentChecksum(ECDriverError):
    pass


# Insufficient fragments specified for decode or reconstruct operation
class ECInsufficientFragments(ECDriverError):
    pass


# Out of memory
class ECOutOfMemory(ECDriverError):
    pass


# PyECLib helper for "available" EC types
ALL_EC_TYPES = [
    'jerasure_rs_vand',
    'jerasure_rs_cauchy',
    'flat_xor_hd_3',
    'flat_xor_hd_4',
    'isa_l_rs_vand',
    'shss',
    'liberasurecode_rs_vand',
    'isa_l_rs_cauchy',
    'libphazr',
]


def _PyECLibValidECTypes():
    available_ec_types = []
    for _type in ALL_EC_TYPES:
        if check_backend_available(_type):
            available_ec_types.append(_type)
    return available_ec_types


VALID_EC_TYPES = _PyECLibValidECTypes()


def _liberasurecode_version():
    version_int = get_liberasurecode_version()
    version_hex_str = hex(version_int)
    version_hex_str = version_hex_str.lstrip('0x')
    major = str(int(version_hex_str[-6:-4]))
    minor = str(int(version_hex_str[-4:-2]))
    rev = str(int(version_hex_str[-2:]))
    version_str = '.'.join([major, minor, rev])

    # liberasurecode < 1.3.1 should be incompatible but
    # just warn until packagers build the required version
    # See https://bugs.launchpad.net/swift/+bug/1639691 in detail
    required_version = ((1 << 16) + (3 << 8) + 1)
    if version_int < required_version:
        logger.warning(
            'DEPRECATED WARNING: your liberasurecode '
            '%s will be deprecated in the near future because of the issue '
            'https://bugs.launchpad.net/swift/+bug/1639691; '
            'Please upgrade to >=1.3.1 and rebuild pyeclib to suppress '
            'this message' % version_str)
    return version_str


LIBERASURECODE_VERSION = _liberasurecode_version()