diff options
Diffstat (limited to 'ext')
-rw-r--r-- | ext/extractors/csv/Makefile.am | 10 | ||||
-rw-r--r-- | ext/extractors/csv/csv_extractor.c | 166 |
2 files changed, 176 insertions, 0 deletions
diff --git a/ext/extractors/csv/Makefile.am b/ext/extractors/csv/Makefile.am new file mode 100644 index 00000000000..bb2a35bbf8e --- /dev/null +++ b/ext/extractors/csv/Makefile.am @@ -0,0 +1,10 @@ +AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include + +noinst_LTLIBRARIES = libwiredtiger_csv_extractor.la +libwiredtiger_csv_extractor_la_SOURCES = csv_extractor.c + +# libtool hack: noinst_LTLIBRARIES turns off building shared libraries as well +# as installation, it will only build static libraries. As far as I can tell, +# the "approved" libtool way to turn them back on is by adding -rpath. +libwiredtiger_csv_extractor_la_LDFLAGS = \ + -avoid-version -module -rpath /nowhere diff --git a/ext/extractors/csv/csv_extractor.c b/ext/extractors/csv/csv_extractor.c new file mode 100644 index 00000000000..efab4ad2eba --- /dev/null +++ b/ext/extractors/csv/csv_extractor.c @@ -0,0 +1,166 @@ +/*- + * Public Domain 2014-2015 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <string.h> +#include <limits.h> +#include <errno.h> +#include <stdlib.h> + +#include <wiredtiger_ext.h> + +/* + * A simple WiredTiger extractor that separates a single string field, + * interpreted as column separated values (CSV), into component pieces. + * When an index is configured with this extractor and app_metadata + * set to a number N, the Nth field is returned as a string. + * + * For example, if a value in the primary table is + * "Paris,France,CET,2273305" + * and this extractor is configured with app_metadata=2, then + * the extractor for this value would return "CET". + */ + +/* Local extractor structure. */ +typedef struct { + WT_EXTRACTOR extractor; /* Must come first */ + WT_EXTENSION_API *wt_api; /* Extension API */ + int field_num; /* Field to extract */ +} CSV_EXTRACTOR; + +/* + * csv_extract -- + * WiredTiger CSV extraction. + */ +static int +csv_extract(WT_EXTRACTOR *extractor, WT_SESSION *session, + const WT_ITEM *key, const WT_ITEM *value, WT_CURSOR *result_cursor) +{ + char ch, *p, *pend, *valstr; + const CSV_EXTRACTOR *cvs_extractor; + int i, ret; + + (void)key; /* Unused parameters */ + + cvs_extractor = (const CSV_EXTRACTOR *)extractor; + + /* Unpack the value. */ + if ((ret = wiredtiger_struct_unpack( + session, value->data, value->size, "S", &valstr)) != 0) + return (ret); + + p = valstr; + pend = strchr(p, ','); + for (i = 0; i < cvs_extractor->field_num && pend != NULL; i++) { + p = pend + 1; + pend = strchr(p, ','); + } + if (i == cvs_extractor->field_num) { + if (pend == NULL) + pend = p + strlen(p); + /* + * The key we must return is a null terminated string, but p + * is not NULL-terminated. Make it so, for the duration of + * the insert operation. This is ugly. There are + * alternatives, but they aren't pretty either. + */ + ch = *pend; + *pend = '\0'; + result_cursor->set_key(result_cursor, p); + ret = result_cursor->insert(result_cursor); + *pend = ch; + if (ret != 0) + return (ret); + } + return (0); +} + +/* + * csv_customize -- + * The customize function creates a customized extractor, + * needed to save the field number. + */ +static int +csv_customize(WT_EXTRACTOR *extractor, WT_SESSION *session, + const char *uri, WT_CONFIG_ITEM *appcfg, WT_EXTRACTOR **customp) +{ + const CSV_EXTRACTOR *orig; + CSV_EXTRACTOR *csv_extractor; + long field_num; + + (void)session; /* Unused parameters */ + (void)uri; /* Unused parameters */ + + orig = (const CSV_EXTRACTOR *)extractor; + field_num = strtol(appcfg->str, NULL, 10); + if (field_num < 0 || field_num > INT_MAX) + return (EINVAL); + if ((csv_extractor = calloc(1, sizeof(CSV_EXTRACTOR))) == NULL) + return (errno); + + *csv_extractor = *orig; + csv_extractor->field_num = field_num; + *customp = (WT_EXTRACTOR *)csv_extractor; + return (0); +} + +/* + * csv_terminate -- + * Terminate is called to free the CSV and any associated memory. + */ +static int +csv_terminate(WT_EXTRACTOR *extractor, WT_SESSION *session) +{ + (void)session; /* Unused parameters */ + + /* Free the allocated memory. */ + free(extractor); + return (0); +} + +/* + * wiredtiger_extension_init -- + * WiredTiger CSV extraction extension. + */ +int +wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config) +{ + CSV_EXTRACTOR *csv_extractor; + + (void)config; /* Unused parameters */ + + if ((csv_extractor = calloc(1, sizeof(CSV_EXTRACTOR))) == NULL) + return (errno); + + csv_extractor->extractor.extract = csv_extract; + csv_extractor->extractor.customize = csv_customize; + csv_extractor->extractor.terminate = csv_terminate; + csv_extractor->wt_api = connection->get_extension_api(connection); + + return (connection->add_extractor( + connection, "csv", (WT_EXTRACTOR *)csv_extractor, NULL)); +} |