summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTushar Gohad <tushar.gohad@intel.com>2014-05-30 07:20:58 -0700
committerTushar Gohad <tushar.gohad@intel.com>2014-05-30 07:20:58 -0700
commit3481ce1bc2c47635072541114ec9982138d60cd1 (patch)
tree3a6b97b627773b06053ba0705559caa2aa21bf54
parenta4a5ab18557f02a0ae10d19623784f12e1840130 (diff)
downloadpyeclib-isa-l_integration.tar.gz
c_eclib: Add decode routine for rs_vand_isa_lisa-l_integration
Signed-off-by: Tushar Gohad <tushar.gohad@intel.com>
-rw-r--r--setup.py2
-rw-r--r--src/c/pyeclib_c/pyeclib_c.c268
-rw-r--r--src/python/pyeclib/core.py7
-rw-r--r--src/python/test/pyeclib_test.py20
-rw-r--r--test/run_tests.py2
5 files changed, 262 insertions, 37 deletions
diff --git a/setup.py b/setup.py
index 7a76fdf..5e53e8d 100644
--- a/setup.py
+++ b/setup.py
@@ -201,7 +201,7 @@ module = Extension('pyeclib_c',
runtime_library_dirs=default_library_paths,
libraries=['Jerasure', 'Xorcode', 'alg_sig', 'isa-l'],
# The extra arguments are for debugging
- # extra_compile_args=['-g', '-O0'],
+ extra_compile_args=['-ggdb', '-O0'],
extra_link_args=['-Wl,-rpath,%s' %
l for l in default_library_paths],
sources=['src/c/pyeclib_c/pyeclib_c.c'])
diff --git a/src/c/pyeclib_c/pyeclib_c.c b/src/c/pyeclib_c/pyeclib_c.c
index 5bd8e46..9c07530 100644
--- a/src/c/pyeclib_c/pyeclib_c.c
+++ b/src/c/pyeclib_c/pyeclib_c.c
@@ -42,7 +42,7 @@
#include <erasure_code.h>
#include <types.h>
-#define talloc(type, num) (type *) malloc(sizeof(type)*(num))
+#define talloc(type, num) (type *) malloc(sizeof(type) * (num))
/* Python 3 compatibility macros */
#if PY_MAJOR_VERSION >= 3
@@ -158,6 +158,27 @@ static unsigned long long convert_list_to_bitmap(int *list)
}
/*
+ * Convert an index list int list into a bitmap
+ * is_idx_in_erasure[] needs to be allocated by the caller
+ * @returns number of idxs in error
+ */
+static int convert_idx_list_to_bitvalues(
+ int *list_idxs, // input idx_list
+ int *is_idx_in_erasure, // output idx list as boolean values (1/0)
+ int num_idxs) // total number of indexes
+{
+ int i = 0, n = 0;
+
+ assert(is_idx_in_erasure != NULL);
+ for (i = 0; i < num_idxs; i++)
+ is_idx_in_erasure[i] = 0;
+ for (i = 0, n = 0; (list_idxs[i] > -1) && (n < num_idxs); i++, n++)
+ is_idx_in_erasure[list_idxs[i]] = 1;
+
+ return n;
+}
+
+/*
* Convert the string ECC type to the enum value
*/
static pyeclib_type_t get_ecc_type(const char *str_type)
@@ -584,19 +605,19 @@ static int get_decoding_info(pyeclib_t *pyeclib_handle,
static int
pyeclib_c_init_rs_vand_isa_l(pyeclib_t *pyeclib_handle)
{
- int k = pyeclib_handle->k;
- int m = pyeclib_handle->m;
+ int k = pyeclib_handle->k;
+ int m = pyeclib_handle->m;
- // Force w = 8 for ISA-L
- pyeclib_handle->w = 8;
+ // Force w = 8 for ISA-L
+ pyeclib_handle->w = 8;
- // Generate encode matrix encode_matrix
- // The matrix generated by gf_gen_rs_matrix
- // is not always invertable.
- pyeclib_handle->matrix = talloc(int, k * m);
- gf_gen_rs_matrix((unsigned char *) pyeclib_handle->matrix, k + m, k);
+ // Generate encode matrix encode_matrix
+ // The matrix generated by gf_gen_rs_matrix
+ // is not always invertable.
+ pyeclib_handle->matrix = talloc(int, (k * m));
+ gf_gen_rs_matrix((unsigned char *) pyeclib_handle->matrix, k + m, k);
- return 0;
+ return 0;
}
static PyObject *
@@ -870,6 +891,29 @@ pyeclib_c_get_segment_info(PyObject *self, PyObject *args)
return ret_dict;
}
+void
+pyeclib_c_rs_vand_isa_l_encode(
+ pyeclib_t *pyeclib_handle,
+ char **data_to_encode,
+ char **encoded_parity,
+ int size)
+{
+ unsigned char *g_tbls = NULL;
+ int k = pyeclib_handle->k;
+ int m = pyeclib_handle->m;
+
+ // Generate g_tbls from encode matrix encode_matrix
+ g_tbls = talloc(unsigned char, (k * m * 32));
+ if (g_tbls == NULL) {
+ PyErr_SetString(PyECLibError, "Could not allocate memory for g_tbls in pyeclib.encode");
+ return;
+ }
+ ec_init_tables(k, m, (unsigned char *) &pyeclib_handle->matrix[k * k], (unsigned char *) g_tbls);
+
+ // Perform matrix dot_prod for EC encoding using g_tbls
+ ec_encode_data(size, k, m, g_tbls, (unsigned char **) data_to_encode, (unsigned char **) encoded_parity);
+}
+
static PyObject *
pyeclib_c_encode(PyObject *self, PyObject *args)
{
@@ -967,22 +1011,8 @@ pyeclib_c_encode(PyObject *self, PyObject *args)
switch (pyeclib_handle->type) {
case PYECC_RS_VAND_ISA_L:
- {
- unsigned char *g_tbls = NULL;
- int k = pyeclib_handle->k;
- int m = pyeclib_handle->m;
- // Generate g_tbls from encode matrix encode_matrix
- g_tbls = talloc(unsigned char, k * m * 32);
- if (g_tbls == NULL) {
- PyErr_SetString(PyECLibError, "Could not allocate memory for g_tbls in pyeclib.encode");
- return NULL;
- }
-
- ec_init_tables(k, m, (unsigned char *) &pyeclib_handle->matrix[k * k], (unsigned char *) g_tbls);
- // Encode
- ec_encode_data(blocksize, k, m, g_tbls, (unsigned char **) data_to_encode, (unsigned char **) encoded_parity);
+ pyeclib_c_rs_vand_isa_l_encode(pyeclib_handle, data_to_encode, encoded_parity, blocksize);
break;
- }
case PYECC_RS_CAUCHY_ISA_L:
break;
case PYECC_RS_CAUCHY_ORIG:
@@ -1482,10 +1512,10 @@ pyeclib_c_reconstruct(PyObject *self, PyObject *args)
unsigned long long realloc_bm = 0; // Identifies symbols that had to be allocated for alignment
int orig_data_size = -1;
int missing_size;
- int *decoding_matrix;
+ int *decoding_matrix = NULL;
int *decoding_row;
int *dm_ids;
- int ret;
+ int ret = 0;
int i;
if (!PyArg_ParseTuple(args, "OOOOii", &pyeclib_obj_handle, &data_list, &parity_list, &missing_idx_list, &destination_idx, &fragment_size)) {
@@ -1590,8 +1620,8 @@ pyeclib_c_reconstruct(PyObject *self, PyObject *args)
}
if (destination_idx < pyeclib_handle->k) {
- free(decoding_matrix);
- free(dm_ids);
+ if (decoding_matrix) free(decoding_matrix);
+ if (dm_ids) free(dm_ids);
}
break;
@@ -1659,6 +1689,183 @@ out:
return reconstructed;
}
+#define MATRIX_NOT_INVERTIBLE -2
+// Generate decode matrix from encode matrix
+static int
+rs_vand_isa_l_gen_decode_matrix(
+ int k, int m,
+ unsigned char *encode_matrix,
+ unsigned char *decode_matrix,
+ unsigned char *invert_matrix,
+ int *decode_idxs, // actual indexes used for decode
+ int *missing_idxs_list, // missing data indexes as a list
+ int *missing_idxs_bitmap_as_list, // is this index missing, 1 or 0, size = k + m
+ int nerrs, // total number of missing indexes
+ int nsrcerrs) // number of missing data indexes
+{
+ int i, j, p, r;
+ unsigned char *backup, *b, s;
+ int incr = 0;
+ int err = 0;
+
+ fprintf(stderr, "\nk = %d", k);
+ fprintf(stderr, "\nm = %d", m);
+ fprintf(stderr, "\nmissing_idxs_list = ");
+ for (i = 0; i < k && missing_idxs_list[i] > -1; i++)
+ fprintf(stderr, "%d ", missing_idxs_list[i]);
+
+ fprintf(stderr, "\nmissing_idxs_bitmap_as_list = ");
+ for (i = 0; i < k + m; i++)
+ fprintf(stderr, "%d ", missing_idxs_bitmap_as_list[i]);
+
+ b = malloc(m * k);
+ backup = malloc(m * k);
+
+ if (b == NULL || backup == NULL) {
+ fprintf(stderr, "Test failure! Error with malloc\n");
+ free(b);
+ free(backup);
+ return -1;
+ }
+
+ // Construct matrix b by removing error rows
+ for (i = 0, r = 0; i < k; i++, r++) {
+ while (missing_idxs_bitmap_as_list[r])
+ r++;
+ for (j = 0; j < k; j++) {
+ b[k * i + j] = encode_matrix[k * r + j];
+ backup[k * i + j] = encode_matrix[k * r + j];
+ }
+ decode_idxs[i] = r;
+ fprintf(stderr, "\nnext decode idx = %d", r);
+ }
+ incr = 0;
+ while (gf_invert_matrix(b, invert_matrix, k) < 0) {
+ if (nerrs == m) {
+ err = MATRIX_NOT_INVERTIBLE;
+ goto out;
+ }
+ incr++;
+ memcpy(b, backup, m * k);
+ for (i = nsrcerrs; i < nerrs - nsrcerrs; i++) {
+ if (missing_idxs_list[i] == (decode_idxs[k - 1] + incr)) {
+ // skip the erased parity line
+ incr++;
+ continue;
+ }
+ }
+ if (decode_idxs[k - 1] + incr >= m) {
+ err = MATRIX_NOT_INVERTIBLE;
+ goto out;
+ }
+ decode_idxs[k - 1] += incr;
+ for (j = 0; j < k; j++)
+ b[k * (k - 1) + j] = encode_matrix[k * decode_idxs[k - 1] + j];
+
+ };
+
+ for (i = 0; i < nsrcerrs; i++) {
+ for (j = 0; j < k; j++) {
+ decode_matrix[k * i + j] = invert_matrix[k * missing_idxs_list[i] + j];
+ }
+ }
+ /* missing_idxs_list from encode_matrix * invert of b for parity decoding */
+ for (p = nsrcerrs; p < nerrs; p++) {
+ for (i = 0; i < k; i++) {
+ s = 0;
+ for (j = 0; j < k; j++)
+ s ^= gf_mul(invert_matrix[j * k + i],
+ encode_matrix[k * missing_idxs_list[p] + j]);
+
+ decode_matrix[k * p + i] = s;
+ }
+ }
+
+ fprintf(stderr, "\ndecode_idxs = ");
+ for (i = 0; i < k; i++)
+ fprintf(stderr, "%d ", decode_idxs[i]);
+
+ fprintf(stderr, "\n\n");
+
+out:
+ return err;
+}
+
+void
+pyeclib_c_rs_vand_isa_l_decode(
+ pyeclib_t *pyeclib_handle,
+ int *missing_idxs,
+ char **data_ptrs,
+ char **coding_ptrs,
+ int size)
+{
+ int i = 0, err = 0;
+ int num_missing_idxs = 0;
+ int num_missing_data_idxs = 0;
+
+ char **recov = NULL;
+ unsigned char *g_tbls = NULL;
+ unsigned char *decode_matrix = NULL, *invert_matrix = NULL;
+ int *decode_idxs = NULL, *missing_idxs_as_bitvalues = NULL;
+
+ int k = pyeclib_handle->k;
+ int m = pyeclib_handle->m;
+
+ recov = talloc(char *, k);
+ decode_idxs = talloc(int, (k + m));
+ decode_matrix = talloc(unsigned char, (k * m));
+ invert_matrix = talloc(unsigned char, (k * m));
+ missing_idxs_as_bitvalues = talloc(int, (k + m));
+
+ // Convert missing indexes to a list of values where 1
+ // indicates a missing index at that position, 0 otherwise
+ assert(missing_idxs_as_bitvalues != NULL);
+ num_missing_idxs = convert_idx_list_to_bitvalues(missing_idxs, missing_idxs_as_bitvalues, (k + m));
+
+ // Calculate the number of missing data elements
+ // (as opposed to parity elements)
+ for (i = 0; i < num_missing_idxs; i++)
+ if (missing_idxs[i] < k)
+ num_missing_data_idxs++;
+
+ // FIXME pending optimizations for (num_missing_data_idxs == 0)
+ assert(decode_matrix != NULL);
+ assert(invert_matrix != NULL);
+ assert(decode_idxs != NULL);
+ err = rs_vand_isa_l_gen_decode_matrix(pyeclib_handle->k, pyeclib_handle->m, (unsigned char *) pyeclib_handle->matrix, decode_matrix, invert_matrix, decode_idxs, missing_idxs, missing_idxs_as_bitvalues, num_missing_idxs, num_missing_data_idxs);
+ if (err)
+ PyErr_SetString(PyECLibError, "isa_l decode: gen_decode_matrix returned error");
+
+ // Pack recovery array as list of valid sources
+ for (i = 0; i < k; i++) {
+ assert(recov[i] != NULL);
+ recov[i] = data_ptrs[decode_idxs[i]];
+ }
+
+ // Generate g_tbls from encode matrix encode_matrix
+ g_tbls = talloc(unsigned char, (k * m * 32));
+ if (g_tbls == NULL) {
+ PyErr_SetString(PyECLibError, "Could not allocate memory for g_tbls in pyeclib.encode");
+ return;
+ }
+ ec_init_tables(k, num_missing_idxs, decode_matrix, g_tbls);
+
+ fprintf(stderr, "\nrecov = ");
+ for (i = 0; i < k; i++)
+ fprintf(stderr, "0x%x ", recov[i]);
+ fprintf(stderr, "\n");
+
+ // Perform matrix dot_prod for EC encoding
+ // using g_tbls generated from encode_matrix
+ ec_encode_data(size, k, num_missing_idxs, g_tbls, (unsigned char **) recov, (unsigned char **) coding_ptrs);
+
+ // Clean up
+ free(missing_idxs_as_bitvalues);
+ free(invert_matrix);
+ free(decode_matrix);
+ free(decode_idxs);
+}
+
static PyObject *
pyeclib_c_decode(PyObject *self, PyObject *args)
{
@@ -1732,6 +1939,7 @@ pyeclib_c_decode(PyObject *self, PyObject *args)
switch (pyeclib_handle->type) {
case PYECC_RS_VAND_ISA_L:
+ pyeclib_c_rs_vand_isa_l_decode(pyeclib_handle, missing_idxs, data, parity, blocksize);
break;
case PYECC_RS_CAUCHY_ISA_L:
break;
diff --git a/src/python/pyeclib/core.py b/src/python/pyeclib/core.py
index 2963c76..3ff2ae5 100644
--- a/src/python/pyeclib/core.py
+++ b/src/python/pyeclib/core.py
@@ -43,6 +43,7 @@ class ECPyECLibDriver(object):
def __init__(self, k, m, ec_type, chksum_type="none"):
self.ec_rs_vand = "rs_vand"
+ self.ec_rs_vand = "rs_vand_isa_l"
self.ec_rs_cauchy_orig = "rs_cauchy_orig"
self.ec_flat_xor_3 = "flat_xor_3"
self.ec_flat_xor_4 = "flat_xor_4"
@@ -51,6 +52,7 @@ class ECPyECLibDriver(object):
self.chksum_algsig = "algsig"
self.ec_types = [
self.ec_rs_vand,
+ self.ec_rs_vand_isa_l,
self.ec_rs_cauchy_orig,
self.ec_flat_xor_3,
self.ec_flat_xor_4]
@@ -59,6 +61,8 @@ class ECPyECLibDriver(object):
self.chksum_inline,
self.chksum_algsig]
self.ec_rs_vand_best_w = 16
+ # FIXME I am told the best w for ISA-L is 8
+ self.ec_rs_vand_isa_l_best_w = 8
self.ec_default_w = 32
self.ec_rs_cauchy_best_w = 4
self.k = k
@@ -104,6 +108,9 @@ class ECPyECLibDriver(object):
if self.ec_type == self.ec_rs_vand:
self.w = self.ec_rs_vand_best_w
self.hd = self.m + 1
+ elif self.ec_type == self.ec_rs_vand_isa_l:
+ self.w = self.ec_rs_vand_isa_l_best_w
+ self.hd = self.m + 1
elif self.ec_type == self.ec_rs_cauchy_orig:
self.w = self.ec_rs_cauchy_best_w
self.hd = self.m + 1
diff --git a/src/python/test/pyeclib_test.py b/src/python/test/pyeclib_test.py
index cabf28e..bc69a23 100644
--- a/src/python/test/pyeclib_test.py
+++ b/src/python/test/pyeclib_test.py
@@ -198,6 +198,7 @@ def test_get_fragment_partition(
whole_file_bytes = fp.read()
fragments = pyeclib_c.encode(handle, whole_file_bytes)
+ # print(fragments)
for i in range(iterations):
missing_fragments = random.sample(fragments, 3)
@@ -246,7 +247,8 @@ def test_get_required_fragments(num_data, num_parity, w, ec_type):
#
# MDS codes need any k fragments
#
- if ec_type in ["rs_vand", "rs_cauchy_orig"]:
+ # if ec_type in ["rs_vand_isa_l", "rs_vand", "rs_cauchy_orig"]:
+ if ec_type in ["rs_vand_isa_l"]:
expected_fragments = [i for i in range(num_data + num_parity)]
missing_fragments = []
@@ -282,13 +284,17 @@ def get_throughput(avg_time, size_str):
return (format(throughput, '.10g'))
+#num_datas = [12, 12, 12]
+#num_parities = [2, 3, 4]
num_datas = [12, 12, 12]
-num_parities = [2, 3, 4]
+num_parities = [3, 4, 4]
iterations = 100
-rs_types = [("rs_vand", 16), ("rs_cauchy_orig", 4)]
-xor_types = [("flat_xor_4", 12, 6, 4), (
- "flat_xor_4", 10, 5, 4), ("flat_xor_3", 10, 5, 3)]
+# rs_types = [("rs_vand", 16), ("rs_vand_isa_l", 16), ("rs_cauchy_orig", 4)]
+rs_types = [("rs_vand_isa_l", 8)]
+#xor_types = [("flat_xor_4", 12, 6, 4), (
+# "flat_xor_4", 10, 5, 4), ("flat_xor_3", 10, 5, 3)]
+xor_types = []
sizes = ["101-K", "202-K", "303-K"]
@@ -319,17 +325,21 @@ for (ec_type, w) in rs_types:
for i in range(len(num_datas)):
for size_str in sizes:
+ print("%d encode\n" % i)
test_get_fragment_partition(
num_datas[i], num_parities[i], w, ec_type, size_str, iterations)
+ print("1 encode\n")
for i in range(len(num_datas)):
for size_str in sizes:
test_fragments_to_string(
num_datas[i], num_parities[i], w, ec_type, size_str)
+ print("2 encode\n")
for i in range(len(num_datas)):
test_get_required_fragments(num_datas[i], num_parities[i], w, ec_type)
+ print("3 encode\n")
for i in range(len(num_datas)):
for size_str in sizes:
avg_time = time_encode(
diff --git a/test/run_tests.py b/test/run_tests.py
index b44e284..3e00c73 100644
--- a/test/run_tests.py
+++ b/test/run_tests.py
@@ -77,6 +77,6 @@ def pyeclib_core_valgrind():
os.chdir(cur_dir)
-test_c_stuff()
+# test_c_stuff()
pyeclib_core_test()
# pyeclib_core_valgrind()