diff options
author | Tushar Gohad <tushar.gohad@intel.com> | 2014-05-30 07:20:58 -0700 |
---|---|---|
committer | Tushar Gohad <tushar.gohad@intel.com> | 2014-05-30 07:20:58 -0700 |
commit | 3481ce1bc2c47635072541114ec9982138d60cd1 (patch) | |
tree | 3a6b97b627773b06053ba0705559caa2aa21bf54 | |
parent | a4a5ab18557f02a0ae10d19623784f12e1840130 (diff) | |
download | pyeclib-isa-l_integration.tar.gz |
c_eclib: Add decode routine for rs_vand_isa_lisa-l_integration
Signed-off-by: Tushar Gohad <tushar.gohad@intel.com>
-rw-r--r-- | setup.py | 2 | ||||
-rw-r--r-- | src/c/pyeclib_c/pyeclib_c.c | 268 | ||||
-rw-r--r-- | src/python/pyeclib/core.py | 7 | ||||
-rw-r--r-- | src/python/test/pyeclib_test.py | 20 | ||||
-rw-r--r-- | test/run_tests.py | 2 |
5 files changed, 262 insertions, 37 deletions
@@ -201,7 +201,7 @@ module = Extension('pyeclib_c', runtime_library_dirs=default_library_paths, libraries=['Jerasure', 'Xorcode', 'alg_sig', 'isa-l'], # The extra arguments are for debugging - # extra_compile_args=['-g', '-O0'], + extra_compile_args=['-ggdb', '-O0'], extra_link_args=['-Wl,-rpath,%s' % l for l in default_library_paths], sources=['src/c/pyeclib_c/pyeclib_c.c']) diff --git a/src/c/pyeclib_c/pyeclib_c.c b/src/c/pyeclib_c/pyeclib_c.c index 5bd8e46..9c07530 100644 --- a/src/c/pyeclib_c/pyeclib_c.c +++ b/src/c/pyeclib_c/pyeclib_c.c @@ -42,7 +42,7 @@ #include <erasure_code.h> #include <types.h> -#define talloc(type, num) (type *) malloc(sizeof(type)*(num)) +#define talloc(type, num) (type *) malloc(sizeof(type) * (num)) /* Python 3 compatibility macros */ #if PY_MAJOR_VERSION >= 3 @@ -158,6 +158,27 @@ static unsigned long long convert_list_to_bitmap(int *list) } /* + * Convert an index list int list into a bitmap + * is_idx_in_erasure[] needs to be allocated by the caller + * @returns number of idxs in error + */ +static int convert_idx_list_to_bitvalues( + int *list_idxs, // input idx_list + int *is_idx_in_erasure, // output idx list as boolean values (1/0) + int num_idxs) // total number of indexes +{ + int i = 0, n = 0; + + assert(is_idx_in_erasure != NULL); + for (i = 0; i < num_idxs; i++) + is_idx_in_erasure[i] = 0; + for (i = 0, n = 0; (list_idxs[i] > -1) && (n < num_idxs); i++, n++) + is_idx_in_erasure[list_idxs[i]] = 1; + + return n; +} + +/* * Convert the string ECC type to the enum value */ static pyeclib_type_t get_ecc_type(const char *str_type) @@ -584,19 +605,19 @@ static int get_decoding_info(pyeclib_t *pyeclib_handle, static int pyeclib_c_init_rs_vand_isa_l(pyeclib_t *pyeclib_handle) { - int k = pyeclib_handle->k; - int m = pyeclib_handle->m; + int k = pyeclib_handle->k; + int m = pyeclib_handle->m; - // Force w = 8 for ISA-L - pyeclib_handle->w = 8; + // Force w = 8 for ISA-L + pyeclib_handle->w = 8; - // Generate encode matrix encode_matrix - // The matrix generated by gf_gen_rs_matrix - // is not always invertable. - pyeclib_handle->matrix = talloc(int, k * m); - gf_gen_rs_matrix((unsigned char *) pyeclib_handle->matrix, k + m, k); + // Generate encode matrix encode_matrix + // The matrix generated by gf_gen_rs_matrix + // is not always invertable. + pyeclib_handle->matrix = talloc(int, (k * m)); + gf_gen_rs_matrix((unsigned char *) pyeclib_handle->matrix, k + m, k); - return 0; + return 0; } static PyObject * @@ -870,6 +891,29 @@ pyeclib_c_get_segment_info(PyObject *self, PyObject *args) return ret_dict; } +void +pyeclib_c_rs_vand_isa_l_encode( + pyeclib_t *pyeclib_handle, + char **data_to_encode, + char **encoded_parity, + int size) +{ + unsigned char *g_tbls = NULL; + int k = pyeclib_handle->k; + int m = pyeclib_handle->m; + + // Generate g_tbls from encode matrix encode_matrix + g_tbls = talloc(unsigned char, (k * m * 32)); + if (g_tbls == NULL) { + PyErr_SetString(PyECLibError, "Could not allocate memory for g_tbls in pyeclib.encode"); + return; + } + ec_init_tables(k, m, (unsigned char *) &pyeclib_handle->matrix[k * k], (unsigned char *) g_tbls); + + // Perform matrix dot_prod for EC encoding using g_tbls + ec_encode_data(size, k, m, g_tbls, (unsigned char **) data_to_encode, (unsigned char **) encoded_parity); +} + static PyObject * pyeclib_c_encode(PyObject *self, PyObject *args) { @@ -967,22 +1011,8 @@ pyeclib_c_encode(PyObject *self, PyObject *args) switch (pyeclib_handle->type) { case PYECC_RS_VAND_ISA_L: - { - unsigned char *g_tbls = NULL; - int k = pyeclib_handle->k; - int m = pyeclib_handle->m; - // Generate g_tbls from encode matrix encode_matrix - g_tbls = talloc(unsigned char, k * m * 32); - if (g_tbls == NULL) { - PyErr_SetString(PyECLibError, "Could not allocate memory for g_tbls in pyeclib.encode"); - return NULL; - } - - ec_init_tables(k, m, (unsigned char *) &pyeclib_handle->matrix[k * k], (unsigned char *) g_tbls); - // Encode - ec_encode_data(blocksize, k, m, g_tbls, (unsigned char **) data_to_encode, (unsigned char **) encoded_parity); + pyeclib_c_rs_vand_isa_l_encode(pyeclib_handle, data_to_encode, encoded_parity, blocksize); break; - } case PYECC_RS_CAUCHY_ISA_L: break; case PYECC_RS_CAUCHY_ORIG: @@ -1482,10 +1512,10 @@ pyeclib_c_reconstruct(PyObject *self, PyObject *args) unsigned long long realloc_bm = 0; // Identifies symbols that had to be allocated for alignment int orig_data_size = -1; int missing_size; - int *decoding_matrix; + int *decoding_matrix = NULL; int *decoding_row; int *dm_ids; - int ret; + int ret = 0; int i; if (!PyArg_ParseTuple(args, "OOOOii", &pyeclib_obj_handle, &data_list, &parity_list, &missing_idx_list, &destination_idx, &fragment_size)) { @@ -1590,8 +1620,8 @@ pyeclib_c_reconstruct(PyObject *self, PyObject *args) } if (destination_idx < pyeclib_handle->k) { - free(decoding_matrix); - free(dm_ids); + if (decoding_matrix) free(decoding_matrix); + if (dm_ids) free(dm_ids); } break; @@ -1659,6 +1689,183 @@ out: return reconstructed; } +#define MATRIX_NOT_INVERTIBLE -2 +// Generate decode matrix from encode matrix +static int +rs_vand_isa_l_gen_decode_matrix( + int k, int m, + unsigned char *encode_matrix, + unsigned char *decode_matrix, + unsigned char *invert_matrix, + int *decode_idxs, // actual indexes used for decode + int *missing_idxs_list, // missing data indexes as a list + int *missing_idxs_bitmap_as_list, // is this index missing, 1 or 0, size = k + m + int nerrs, // total number of missing indexes + int nsrcerrs) // number of missing data indexes +{ + int i, j, p, r; + unsigned char *backup, *b, s; + int incr = 0; + int err = 0; + + fprintf(stderr, "\nk = %d", k); + fprintf(stderr, "\nm = %d", m); + fprintf(stderr, "\nmissing_idxs_list = "); + for (i = 0; i < k && missing_idxs_list[i] > -1; i++) + fprintf(stderr, "%d ", missing_idxs_list[i]); + + fprintf(stderr, "\nmissing_idxs_bitmap_as_list = "); + for (i = 0; i < k + m; i++) + fprintf(stderr, "%d ", missing_idxs_bitmap_as_list[i]); + + b = malloc(m * k); + backup = malloc(m * k); + + if (b == NULL || backup == NULL) { + fprintf(stderr, "Test failure! Error with malloc\n"); + free(b); + free(backup); + return -1; + } + + // Construct matrix b by removing error rows + for (i = 0, r = 0; i < k; i++, r++) { + while (missing_idxs_bitmap_as_list[r]) + r++; + for (j = 0; j < k; j++) { + b[k * i + j] = encode_matrix[k * r + j]; + backup[k * i + j] = encode_matrix[k * r + j]; + } + decode_idxs[i] = r; + fprintf(stderr, "\nnext decode idx = %d", r); + } + incr = 0; + while (gf_invert_matrix(b, invert_matrix, k) < 0) { + if (nerrs == m) { + err = MATRIX_NOT_INVERTIBLE; + goto out; + } + incr++; + memcpy(b, backup, m * k); + for (i = nsrcerrs; i < nerrs - nsrcerrs; i++) { + if (missing_idxs_list[i] == (decode_idxs[k - 1] + incr)) { + // skip the erased parity line + incr++; + continue; + } + } + if (decode_idxs[k - 1] + incr >= m) { + err = MATRIX_NOT_INVERTIBLE; + goto out; + } + decode_idxs[k - 1] += incr; + for (j = 0; j < k; j++) + b[k * (k - 1) + j] = encode_matrix[k * decode_idxs[k - 1] + j]; + + }; + + for (i = 0; i < nsrcerrs; i++) { + for (j = 0; j < k; j++) { + decode_matrix[k * i + j] = invert_matrix[k * missing_idxs_list[i] + j]; + } + } + /* missing_idxs_list from encode_matrix * invert of b for parity decoding */ + for (p = nsrcerrs; p < nerrs; p++) { + for (i = 0; i < k; i++) { + s = 0; + for (j = 0; j < k; j++) + s ^= gf_mul(invert_matrix[j * k + i], + encode_matrix[k * missing_idxs_list[p] + j]); + + decode_matrix[k * p + i] = s; + } + } + + fprintf(stderr, "\ndecode_idxs = "); + for (i = 0; i < k; i++) + fprintf(stderr, "%d ", decode_idxs[i]); + + fprintf(stderr, "\n\n"); + +out: + return err; +} + +void +pyeclib_c_rs_vand_isa_l_decode( + pyeclib_t *pyeclib_handle, + int *missing_idxs, + char **data_ptrs, + char **coding_ptrs, + int size) +{ + int i = 0, err = 0; + int num_missing_idxs = 0; + int num_missing_data_idxs = 0; + + char **recov = NULL; + unsigned char *g_tbls = NULL; + unsigned char *decode_matrix = NULL, *invert_matrix = NULL; + int *decode_idxs = NULL, *missing_idxs_as_bitvalues = NULL; + + int k = pyeclib_handle->k; + int m = pyeclib_handle->m; + + recov = talloc(char *, k); + decode_idxs = talloc(int, (k + m)); + decode_matrix = talloc(unsigned char, (k * m)); + invert_matrix = talloc(unsigned char, (k * m)); + missing_idxs_as_bitvalues = talloc(int, (k + m)); + + // Convert missing indexes to a list of values where 1 + // indicates a missing index at that position, 0 otherwise + assert(missing_idxs_as_bitvalues != NULL); + num_missing_idxs = convert_idx_list_to_bitvalues(missing_idxs, missing_idxs_as_bitvalues, (k + m)); + + // Calculate the number of missing data elements + // (as opposed to parity elements) + for (i = 0; i < num_missing_idxs; i++) + if (missing_idxs[i] < k) + num_missing_data_idxs++; + + // FIXME pending optimizations for (num_missing_data_idxs == 0) + assert(decode_matrix != NULL); + assert(invert_matrix != NULL); + assert(decode_idxs != NULL); + err = rs_vand_isa_l_gen_decode_matrix(pyeclib_handle->k, pyeclib_handle->m, (unsigned char *) pyeclib_handle->matrix, decode_matrix, invert_matrix, decode_idxs, missing_idxs, missing_idxs_as_bitvalues, num_missing_idxs, num_missing_data_idxs); + if (err) + PyErr_SetString(PyECLibError, "isa_l decode: gen_decode_matrix returned error"); + + // Pack recovery array as list of valid sources + for (i = 0; i < k; i++) { + assert(recov[i] != NULL); + recov[i] = data_ptrs[decode_idxs[i]]; + } + + // Generate g_tbls from encode matrix encode_matrix + g_tbls = talloc(unsigned char, (k * m * 32)); + if (g_tbls == NULL) { + PyErr_SetString(PyECLibError, "Could not allocate memory for g_tbls in pyeclib.encode"); + return; + } + ec_init_tables(k, num_missing_idxs, decode_matrix, g_tbls); + + fprintf(stderr, "\nrecov = "); + for (i = 0; i < k; i++) + fprintf(stderr, "0x%x ", recov[i]); + fprintf(stderr, "\n"); + + // Perform matrix dot_prod for EC encoding + // using g_tbls generated from encode_matrix + ec_encode_data(size, k, num_missing_idxs, g_tbls, (unsigned char **) recov, (unsigned char **) coding_ptrs); + + // Clean up + free(missing_idxs_as_bitvalues); + free(invert_matrix); + free(decode_matrix); + free(decode_idxs); +} + static PyObject * pyeclib_c_decode(PyObject *self, PyObject *args) { @@ -1732,6 +1939,7 @@ pyeclib_c_decode(PyObject *self, PyObject *args) switch (pyeclib_handle->type) { case PYECC_RS_VAND_ISA_L: + pyeclib_c_rs_vand_isa_l_decode(pyeclib_handle, missing_idxs, data, parity, blocksize); break; case PYECC_RS_CAUCHY_ISA_L: break; diff --git a/src/python/pyeclib/core.py b/src/python/pyeclib/core.py index 2963c76..3ff2ae5 100644 --- a/src/python/pyeclib/core.py +++ b/src/python/pyeclib/core.py @@ -43,6 +43,7 @@ class ECPyECLibDriver(object): def __init__(self, k, m, ec_type, chksum_type="none"): self.ec_rs_vand = "rs_vand" + self.ec_rs_vand = "rs_vand_isa_l" self.ec_rs_cauchy_orig = "rs_cauchy_orig" self.ec_flat_xor_3 = "flat_xor_3" self.ec_flat_xor_4 = "flat_xor_4" @@ -51,6 +52,7 @@ class ECPyECLibDriver(object): self.chksum_algsig = "algsig" self.ec_types = [ self.ec_rs_vand, + self.ec_rs_vand_isa_l, self.ec_rs_cauchy_orig, self.ec_flat_xor_3, self.ec_flat_xor_4] @@ -59,6 +61,8 @@ class ECPyECLibDriver(object): self.chksum_inline, self.chksum_algsig] self.ec_rs_vand_best_w = 16 + # FIXME I am told the best w for ISA-L is 8 + self.ec_rs_vand_isa_l_best_w = 8 self.ec_default_w = 32 self.ec_rs_cauchy_best_w = 4 self.k = k @@ -104,6 +108,9 @@ class ECPyECLibDriver(object): if self.ec_type == self.ec_rs_vand: self.w = self.ec_rs_vand_best_w self.hd = self.m + 1 + elif self.ec_type == self.ec_rs_vand_isa_l: + self.w = self.ec_rs_vand_isa_l_best_w + self.hd = self.m + 1 elif self.ec_type == self.ec_rs_cauchy_orig: self.w = self.ec_rs_cauchy_best_w self.hd = self.m + 1 diff --git a/src/python/test/pyeclib_test.py b/src/python/test/pyeclib_test.py index cabf28e..bc69a23 100644 --- a/src/python/test/pyeclib_test.py +++ b/src/python/test/pyeclib_test.py @@ -198,6 +198,7 @@ def test_get_fragment_partition( whole_file_bytes = fp.read() fragments = pyeclib_c.encode(handle, whole_file_bytes) + # print(fragments) for i in range(iterations): missing_fragments = random.sample(fragments, 3) @@ -246,7 +247,8 @@ def test_get_required_fragments(num_data, num_parity, w, ec_type): # # MDS codes need any k fragments # - if ec_type in ["rs_vand", "rs_cauchy_orig"]: + # if ec_type in ["rs_vand_isa_l", "rs_vand", "rs_cauchy_orig"]: + if ec_type in ["rs_vand_isa_l"]: expected_fragments = [i for i in range(num_data + num_parity)] missing_fragments = [] @@ -282,13 +284,17 @@ def get_throughput(avg_time, size_str): return (format(throughput, '.10g')) +#num_datas = [12, 12, 12] +#num_parities = [2, 3, 4] num_datas = [12, 12, 12] -num_parities = [2, 3, 4] +num_parities = [3, 4, 4] iterations = 100 -rs_types = [("rs_vand", 16), ("rs_cauchy_orig", 4)] -xor_types = [("flat_xor_4", 12, 6, 4), ( - "flat_xor_4", 10, 5, 4), ("flat_xor_3", 10, 5, 3)] +# rs_types = [("rs_vand", 16), ("rs_vand_isa_l", 16), ("rs_cauchy_orig", 4)] +rs_types = [("rs_vand_isa_l", 8)] +#xor_types = [("flat_xor_4", 12, 6, 4), ( +# "flat_xor_4", 10, 5, 4), ("flat_xor_3", 10, 5, 3)] +xor_types = [] sizes = ["101-K", "202-K", "303-K"] @@ -319,17 +325,21 @@ for (ec_type, w) in rs_types: for i in range(len(num_datas)): for size_str in sizes: + print("%d encode\n" % i) test_get_fragment_partition( num_datas[i], num_parities[i], w, ec_type, size_str, iterations) + print("1 encode\n") for i in range(len(num_datas)): for size_str in sizes: test_fragments_to_string( num_datas[i], num_parities[i], w, ec_type, size_str) + print("2 encode\n") for i in range(len(num_datas)): test_get_required_fragments(num_datas[i], num_parities[i], w, ec_type) + print("3 encode\n") for i in range(len(num_datas)): for size_str in sizes: avg_time = time_encode( diff --git a/test/run_tests.py b/test/run_tests.py index b44e284..3e00c73 100644 --- a/test/run_tests.py +++ b/test/run_tests.py @@ -77,6 +77,6 @@ def pyeclib_core_valgrind(): os.chdir(cur_dir) -test_c_stuff() +# test_c_stuff() pyeclib_core_test() # pyeclib_core_valgrind() |