summaryrefslogtreecommitdiff
path: root/lang/sql/odbc/csvtable.c
diff options
context:
space:
mode:
Diffstat (limited to 'lang/sql/odbc/csvtable.c')
-rw-r--r--lang/sql/odbc/csvtable.c1616
1 files changed, 1616 insertions, 0 deletions
diff --git a/lang/sql/odbc/csvtable.c b/lang/sql/odbc/csvtable.c
new file mode 100644
index 00000000..112b19a5
--- /dev/null
+++ b/lang/sql/odbc/csvtable.c
@@ -0,0 +1,1616 @@
+/**
+ * @file csvtable.c
+ * SQLite extension module for mapping a CSV file as
+ * a read-only SQLite virtual table plus extension
+ * function to import a CSV file as a real table.
+ *
+ * 2012 July 27
+ *
+ * The author disclaims copyright to this source code. In place of
+ * a legal notice, here is a blessing:
+ *
+ * May you do good and not evil.
+ * May you find forgiveness for yourself and forgive others.
+ * May you share freely, never taking more than you give.
+ *
+ ********************************************************************
+ */
+
+#ifdef STANDALONE
+#include <sqlite3.h>
+#else
+#include <sqlite3ext.h>
+static SQLITE_EXTENSION_INIT1
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+
+#ifdef _WIN32
+#include <windows.h>
+#define strcasecmp _stricmp
+#define strncasecmp _strnicmp
+#endif
+
+/**
+ * @typedef csv_file
+ * @struct csv_file
+ * Structure to implement CSV file handle.
+ */
+
+typedef struct csv_file {
+ FILE *f; /**< CSV file */
+ char *sep; /**< column separator characters */
+ char *quot; /**< text quoting characters */
+ int isdos; /**< true, when DOS format detected */
+ int maxl; /**< max. capacity of line buffer */
+ char *line; /**< line buffer */
+ long pos0; /**< file position for rewind */
+ int maxc; /**< max. capacity of column buffer */
+ int ncols; /**< number of columns */
+ char **cols; /**< column buffer */
+} csv_file;
+
+/**
+ * @typedef csv_guess_fmt
+ * @struct csv_guess_fmt
+ * Info to guess CSV layout.
+ */
+
+typedef struct csv_guess_fmt {
+ int nlines;
+ int hist[256];
+} csv_guess_fmt;
+
+/**
+ * @typedef csv_vtab
+ * @struct csv_vtab
+ * Structure to describe a CSV virtual table.
+ */
+
+typedef struct csv_vtab {
+ sqlite3_vtab vtab; /**< SQLite virtual table */
+ csv_file *csv; /**< CSV file handle */
+ int convert; /**< convert flags */
+ char coltypes[1]; /**< column types */
+} csv_vtab;
+
+/**
+ * @typedef csv_cursor
+ * @struct csv_cursor
+ * Structure to describe CSV virtual table cursor.
+ */
+
+typedef struct {
+ sqlite3_vtab_cursor cursor; /**< SQLite virtual table cursor */
+ long pos; /**< CSV file position */
+} csv_cursor;
+
+/**
+ * Free dynamically allocated string buffer
+ * @param in input string pointer
+ */
+
+static void
+append_free(char **in)
+{
+ long *p = (long *) *in;
+
+ if (p) {
+ p -= 2;
+ sqlite3_free(p);
+ *in = 0;
+ }
+}
+
+/**
+ * Append a string to dynamically allocated string buffer
+ * with optional quoting
+ * @param in input string pointer
+ * @param append string to append
+ * @param quote quote character or NUL
+ * @result new string to be free'd with append_free()
+ */
+
+static char *
+append(char **in, char const *append, char quote)
+{
+ long *p = (long *) *in;
+ long len, maxlen, actlen;
+ int i;
+ char *pp;
+ int nappend = append ? strlen(append) : 0;
+
+ if (p) {
+ p -= 2;
+ maxlen = p[0];
+ actlen = p[1];
+ } else {
+ maxlen = actlen = 0;
+ }
+ len = nappend + actlen;
+ if (quote) {
+ len += 2;
+ for (i = 0; i < nappend; i++) {
+ if (append[i] == quote) {
+ len++;
+ }
+ }
+ } else if (!nappend) {
+ return *in;
+ }
+ if (len >= maxlen - 1) {
+ long *q;
+
+ maxlen = (len + 0x03ff) & (~0x3ff);
+ q = (long *) sqlite3_realloc(p, maxlen + 1 + 2 * sizeof (long));
+ if (!q) {
+ return 0;
+ }
+ if (!p) {
+ q[1] = 0;
+ }
+ p = q;
+ p[0] = maxlen;
+ *in = (char *) (p + 2);
+ }
+ pp = *in + actlen;
+ if (quote) {
+ *pp++ = quote;
+ for (i = 0; i < nappend; i++) {
+ *pp++ = append[i];
+ if (append[i] == quote) {
+ *pp++ = quote;
+ }
+ }
+ *pp++ = quote;
+ *pp = '\0';
+ } else {
+ if (nappend) {
+ memcpy(pp, append, nappend);
+ pp += nappend;
+ *pp = '\0';
+ }
+ }
+ p[1] = pp - *in;
+ return *in;
+}
+
+/**
+ * Strip off quotes given string.
+ * @param in string to be processed
+ * @result new string to be free'd with sqlite3_free()
+ */
+
+static char *
+unquote(char const *in)
+{
+ char c, *ret;
+ int i;
+
+ ret = sqlite3_malloc(strlen(in) + 1);
+ if (ret) {
+ c = in[0];
+ if ((c == '"') || (c == '\'')) {
+ i = strlen(in + 1);
+ if ((i > 0) && (in[i] == c)) {
+ strcpy(ret, in + 1);
+ ret[i - 1] = '\0';
+ return ret;
+ }
+ }
+ strcpy(ret, in);
+ }
+ return ret;
+}
+
+/**
+ * Map string to SQLite data type.
+ * @param type string to be mapped
+ * @result SQLITE_TEXT et.al.
+ */
+
+static int
+maptype(char const *type)
+{
+ int typelen = type ? strlen(type) : 0;
+
+ if ((typelen >= 3) &&
+ (strncasecmp(type, "integer", 7) == 0)) {
+ return SQLITE_INTEGER;
+ }
+ if ((typelen >= 6) &&
+ (strncasecmp(type, "double", 6) == 0)) {
+ return SQLITE_FLOAT;
+ }
+ if ((typelen >= 5) &&
+ (strncasecmp(type, "float", 5) == 0)) {
+ return SQLITE_FLOAT;
+ }
+ if ((typelen >= 4) &&
+ (strncasecmp(type, "real", 4) == 0)) {
+ return SQLITE_FLOAT;
+ }
+ return SQLITE_TEXT;
+}
+
+/**
+ * Convert and collapse white space in column names to underscore
+ * @param names string vector of column names
+ * @param ncols number of columns
+ */
+
+static void
+conv_names(char **names, int ncols)
+{
+ int i;
+ char *p, *q;
+ static const char ws[] = "\n\t\r\b\v ";
+
+ if (!names || ncols <= 0) {
+ return;
+ }
+ for (i = 0; i < ncols; i++) {
+ p = names[i];
+
+ while (*p) {
+ if (strchr(ws, *p)) {
+ *p++ = '_';
+ q = p;
+ while (*q && strchr(ws, *q)) {
+ ++q;
+ }
+ if (*q && q > p) {
+ strcpy(p, q);
+ }
+ continue;
+ }
+ ++p;
+ }
+ }
+}
+
+/**
+ * Make result data or parameter binding accoring to type
+ * @param ctx SQLite function context or NULL
+ * @param stmt SQLite statement or NULL
+ * @param idx parameter number, 1-based
+ * @param data string data
+ * @param len string length
+ * @param type SQLite type
+ */
+
+static void
+result_or_bind(sqlite3_context *ctx, sqlite3_stmt *stmt, int idx,
+ char *data, int len, int type)
+{
+ char *endp;
+
+ if (!data) {
+ if (ctx) {
+ sqlite3_result_null(ctx);
+ } else {
+ sqlite3_bind_null(stmt, idx);
+ }
+ return;
+ }
+ if (type == SQLITE_INTEGER) {
+ sqlite_int64 val;
+#if defined(_WIN32) || defined(_WIN64)
+ char endc;
+
+ if (sscanf(data, "%I64d%c", &val, &endc) == 1) {
+ if (ctx) {
+ sqlite3_result_int64(ctx, val);
+ } else {
+ sqlite3_bind_int64(stmt, idx, val);
+ }
+ return;
+ }
+#else
+ endp = 0;
+#ifdef __osf__
+ val = strtol(data, &endp, 0);
+#else
+ val = strtoll(data, &endp, 0);
+#endif
+ if (endp && (endp != data) && !*endp) {
+ if (ctx) {
+ sqlite3_result_int64(ctx, val);
+ } else {
+ sqlite3_bind_int64(stmt, idx, val);
+ }
+ return;
+ }
+#endif
+ } else if (type == SQLITE_FLOAT) {
+ double val;
+
+ endp = 0;
+ val = strtod(data, &endp);
+ if (endp && (endp != data) && !*endp) {
+ if (ctx) {
+ sqlite3_result_double(ctx, val);
+ } else {
+ sqlite3_bind_double(stmt, idx, val);
+ }
+ return;
+ }
+ }
+ if (ctx) {
+ sqlite3_result_text(ctx, data, len, SQLITE_TRANSIENT);
+ } else {
+ sqlite3_bind_text(stmt, idx, data, len, SQLITE_TRANSIENT);
+ }
+}
+
+/**
+ * Process one column of the current row
+ * @param ctx SQLite function context or NULL
+ * @param stmt SQLite statement or NULL
+ * @param idx parameter index, 1-based
+ * @param data string data
+ * @param type SQLite type
+ * @param conv conversion flags
+ */
+
+static int
+process_col(sqlite3_context *ctx, sqlite3_stmt *stmt, int idx,
+ char *data, int type, int conv)
+{
+ char c, *p;
+ const char flchars[] = "Ee+-.,0123456789";
+
+ if (!data) {
+ goto putdata;
+ }
+
+ /*
+ * Floating point number test,
+ * converts single comma to dot.
+ */
+ c = data[0];
+ if ((c != '\0') && strchr(flchars + 2, c)) {
+ p = data + 1;
+ while (*p && strchr(flchars, *p)) {
+ ++p;
+ }
+ if (*p == '\0') {
+ char *first = 0;
+ int n = 0;
+
+ p = data;
+ while (p) {
+ p = strchr(p, ',');
+ if (!p) {
+ break;
+ }
+ if (++n == 1) {
+ first = p;
+ }
+ ++p;
+ }
+ if (first) {
+ *first = '.';
+ goto putdata;
+ }
+ }
+ }
+ if (conv) {
+ char *utf = sqlite3_malloc(strlen(data) * 2 + 2);
+
+ if (utf) {
+ p = utf;
+ while ((c = *data) != '\0') {
+ if (((conv & 10) == 10) && (c == '\\')) {
+ if (data[1] == 'q') {
+ *p++ = '\'';
+ data += 2;
+ continue;
+ }
+ }
+ if ((conv & 2) && (c == '\\')) {
+ char c2 = data[1];
+
+ switch (c2) {
+ case '\0':
+ goto convdone;
+ case 'n':
+ *p = '\n';
+ break;
+ case 't':
+ *p = '\t';
+ break;
+ case 'r':
+ *p = '\r';
+ break;
+ case 'f':
+ *p = '\f';
+ break;
+ case 'v':
+ *p = '\v';
+ break;
+ case 'b':
+ *p = '\b';
+ break;
+ case 'a':
+ *p = '\a';
+ break;
+ case '?':
+ *p = '\?';
+ break;
+ case '\'':
+ *p = '\'';
+ break;
+ case '"':
+ *p = '\"';
+ break;
+ case '\\':
+ *p = '\\';
+ break;
+ default:
+ *p++ = c;
+ *p = c2;
+ break;
+ }
+ p++;
+ data += 2;
+ continue;
+ }
+ if ((conv & 1) && (c & 0x80)) {
+ *p++ = 0xc0 | ((c >> 6) & 0x1f);
+ *p++ = 0x80 | (c & 0x3f);
+ } else {
+ *p++ = c;
+ }
+ data++;
+ }
+convdone:
+ *p = '\0';
+ result_or_bind(ctx, stmt, idx, utf, p - utf, type);
+ sqlite3_free(utf);
+ return SQLITE_OK;
+ } else {
+ if (ctx) {
+ sqlite3_result_error(ctx, "out of memory", -1);
+ }
+ return SQLITE_NOMEM;
+ }
+ }
+putdata:
+ result_or_bind(ctx, stmt, idx, data, -1, type);
+ return SQLITE_OK;
+}
+
+/**
+ * Open CSV file for reading and return handle to it.
+ * @param filename name of CSV file
+ * @param sep column separator characters or NULL
+ * @param quot string quote characters or NULL
+ * @result CSV file handle
+ */
+
+static csv_file *
+csv_open(const char *filename, const char *sep, const char *quot)
+{
+ FILE *f;
+ csv_file *csv;
+
+#ifdef _WIN32
+ f = fopen(filename, "rb");
+#else
+ f = fopen(filename, "r");
+#endif
+ if (!f) {
+ return 0;
+ }
+ csv = sqlite3_malloc(sizeof (csv_file));
+ if (!csv) {
+error0:
+ fclose(f);
+ return 0;
+ }
+ csv->f = f;
+ if (sep && sep[0]) {
+ csv->sep = sqlite3_malloc(strlen(sep) + 1);
+ if (!csv->sep) {
+error1:
+ sqlite3_free(csv);
+ goto error0;
+ }
+ strcpy(csv->sep, sep);
+ } else {
+ csv->sep = 0;
+ }
+ if (quot && quot[0]) {
+ csv->quot = sqlite3_malloc(strlen(quot) + 1);
+ if (!csv->quot) {
+ if (csv->sep) {
+ sqlite3_free(csv->sep);
+ }
+ goto error1;
+ }
+ strcpy(csv->quot, quot);
+ } else {
+ csv->quot = 0;
+ }
+ csv->isdos = 0;
+ csv->maxl = 0;
+ csv->line = 0;
+ csv->pos0 = 0;
+ csv->maxc = 0;
+ csv->ncols = 0;
+ csv->cols = 0;
+ return csv;
+}
+
+/**
+ * Close CSV file handle.
+ * @param csv CSV file handle
+ */
+
+static void
+csv_close(csv_file *csv)
+{
+ if (csv) {
+ if (csv->sep) {
+ sqlite3_free(csv->sep);
+ }
+ if (csv->quot) {
+ sqlite3_free(csv->quot);
+ }
+ if (csv->line) {
+ sqlite3_free(csv->line);
+ }
+ if (csv->cols) {
+ sqlite3_free(csv->cols);
+ }
+ if (csv->f) {
+ fclose(csv->f);
+ }
+ sqlite3_free(csv);
+ }
+}
+
+/**
+ * Test EOF on CSV file handle.
+ * @param csv CSV file handle
+ * @result true when file position is at EOF
+ */
+
+static int
+csv_eof(csv_file *csv)
+{
+ if (csv && csv->f) {
+ return feof(csv->f);
+ }
+ return 1;
+}
+
+/**
+ * Position CSV file handle.
+ * @param csv CSV file handle
+ * @param pos position to seek
+ * @result 0 on success, EOF on error
+ */
+
+static long
+csv_seek(csv_file *csv, long pos)
+{
+ if (csv && csv->f) {
+ return fseek(csv->f, pos, SEEK_SET);
+ }
+ return EOF;
+}
+
+/**
+ * Rewind CSV file handle.
+ * @param csv CSV file handle
+ */
+
+static void
+csv_rewind(csv_file *csv)
+{
+ if (csv && csv->f) {
+ csv_seek(csv, csv->pos0);
+ }
+}
+
+/**
+ * Return current position of CSV file handle.
+ * @param csv CSV file handle
+ * @result current file position
+ */
+
+static long
+csv_tell(csv_file *csv)
+{
+ if (csv && csv->f) {
+ return ftell(csv->f);
+ }
+ return EOF;
+}
+
+/**
+ * Read and process one line of CSV file handle.
+ * @param csv CSV file handle
+ * @param guess NULL or buffer for guessing file format
+ * @result number of columns on success, EOF on error
+ */
+
+static int
+csv_getline(csv_file *csv, csv_guess_fmt *guess)
+{
+ int i, index = 0, inq = 0, c, col;
+ char *p, *sep;
+
+ if (!csv || !csv->f) {
+ return EOF;
+ }
+ while (1) {
+ c = fgetc(csv->f);
+ if (c == EOF) {
+ if (index > 0) {
+ break;
+ }
+ return EOF;
+ }
+ if (c == '\0') {
+ continue;
+ }
+ if (c == '\r') {
+ int c2 = fgetc(csv->f);
+ c = '\n';
+
+ if (c2 == '\n') {
+ csv->isdos = 1;
+ } else if (c2 != EOF) {
+ ungetc(c2, csv->f);
+ }
+ }
+ /* check for DOS EOF (Ctrl-Z) */
+ if (csv->isdos && (c == '\032')) {
+ int c2 = fgetc(csv->f);
+
+ if (c2 == EOF) {
+ if (index > 0) {
+ break;
+ }
+ return EOF;
+ }
+ ungetc(c2, csv->f);
+ }
+ if (index >= csv->maxl - 1) {
+ int n = csv->maxl * 2;
+ char *line;
+
+ if (n <= 0) {
+ n = 4096;
+ }
+ line = sqlite3_malloc(n);
+ if (!line) {
+ return EOF;
+ }
+ if (csv->line) {
+ memcpy(line, csv->line, index);
+ sqlite3_free(csv->line);
+ }
+ csv->maxl = n;
+ csv->line = line;
+ }
+ csv->line[index++] = c;
+ if (csv->quot && (p = strchr(csv->quot, c))) {
+ if (inq) {
+ if (*p == inq) {
+ inq = 0;
+ }
+ } else {
+ inq = *p;
+ }
+ }
+ if (!inq && (c == '\n')) {
+ break;
+ }
+ }
+ if (guess) {
+ for (i = 0; i < index; i++) {
+ guess->hist[csv->line[i] & 0xFF] += 1;
+ }
+ guess->nlines += 1;
+ csv->ncols = 0;
+ return 0;
+ }
+
+ for (i = index - 1; i >= 0; i--) {
+ if (csv->line[i] != '\n') {
+ break;
+ }
+ }
+ index = i + 1;
+ csv->line[index] = '\0';
+ i = inq = col = 0;
+ sep = csv->sep ? csv->sep : ";";
+ if (!csv->cols) {
+ int n = 128;
+
+ csv->cols = sqlite3_malloc(sizeof (char *) * n);
+ if (!csv->cols) {
+ return EOF;
+ }
+ csv->maxc = n;
+ }
+ csv->cols[col++] = csv->line;
+ while (i < index) {
+ if (csv->quot && (p = strchr(csv->quot, csv->line[i]))) {
+ if (inq) {
+ if (*p == inq) {
+ inq = 0;
+ }
+ } else {
+ inq = *p;
+ }
+ }
+ if (!inq && (p = strchr(sep, csv->line[i]))) {
+ p = csv->line + i;
+ *p = '\0';
+ if (col >= csv->maxc) {
+ int n = csv->maxc * 2;
+ char **cols;
+
+ cols = sqlite3_realloc(csv->cols, sizeof (char *) * n);
+ if (!cols) {
+ return EOF;
+ }
+ csv->cols = cols;
+ csv->maxc = n;
+ }
+ csv->cols[col++] = p + 1;
+ }
+ ++i;
+ }
+ csv->ncols = col;
+
+ /* strip off quotes */
+ if (csv->quot) {
+ for (i = 0; i < col; i++) {
+ if (*csv->cols[i]) {
+ p = strchr(csv->quot, *csv->cols[i]);
+ if (p) {
+ char *src, *dst;
+
+ c = *p;
+ csv->cols[i] += 1;
+ sep = csv->cols[i] + strlen(csv->cols[i]) - 1;
+ if ((sep >= csv->cols[i]) && (*sep == c)) {
+ *sep = '\0';
+ }
+ /* collapse quote escape sequences */
+ src = csv->cols[i];
+ dst = 0;
+ while (*src) {
+ if ((*src == c) && (src[1] == c)) {
+ if (!dst) {
+ dst = src;
+ }
+ src++;
+ while (*src) {
+ *dst++ = *src++;
+ if (*src == c) {
+ --src;
+ break;
+ }
+ }
+ }
+ ++src;
+ }
+ if (dst) {
+ *dst++ = '\0';
+ }
+ }
+ }
+ }
+ }
+ return col;
+}
+
+/**
+ * Return number of columns of current row in CSV file.
+ * @param csv CSV file handle
+ * @result number of columns of current row
+ */
+
+static int
+csv_ncols(csv_file *csv)
+{
+ if (csv && csv->cols) {
+ return csv->ncols;
+ }
+ return 0;
+}
+
+/**
+ * Return nth column of current row in CSV file.
+ * @param csv CSV file handle
+ * @param n column number
+ * @result string pointer or NULL
+ */
+
+static char *
+csv_coldata(csv_file *csv, int n)
+{
+ if (csv && csv->cols && (n >= 0) && (n < csv->ncols)) {
+ return csv->cols[n];
+ }
+ return 0;
+}
+
+/**
+ * Guess CSV layout of CSV file handle.
+ * @param csv CSV file handle
+ * @result 0 on succes, EOF on error
+ */
+
+static int
+csv_guess(csv_file *csv)
+{
+ csv_guess_fmt guess;
+ int i, n;
+ char *p, sep[32], quot[4];
+ const struct {
+ int c;
+ int min;
+ } sep_test[] = {
+ { ',', 2 },
+ { ';', 2 },
+ { '\t', 2 },
+ { ' ', 4 },
+ { '|', 2 }
+ };
+
+ if (!csv) {
+ return EOF;
+ }
+ memset(&guess, 0, sizeof (guess));
+ csv->pos0 = 0;
+ csv_rewind(csv);
+ for (i = n = 0; i < 10; i++) {
+ n = csv_getline(csv, &guess);
+ if (n == EOF) {
+ break;
+ }
+ }
+ csv_rewind(csv);
+ if (n && !i) {
+ return EOF;
+ }
+ p = quot;
+ n = '"';
+ if (guess.hist[n] > 1) {
+ *p++ = n;
+ }
+ n = '\'';
+ if (guess.hist[n] > 1) {
+ *p++ = n;
+ }
+ *p = '\0';
+ p = sep;
+ for (i = 0; i < sizeof (sep_test) / sizeof (sep_test[0]); i++) {
+ if (guess.hist[sep_test[i].c] > sep_test[i].min * guess.nlines) {
+ *p++ = sep_test[i].c;
+ }
+ }
+ *p = '\0';
+ if (quot[0]) {
+ p = sqlite3_malloc(strlen(quot) + 1);
+ if (p) {
+ strcpy(p, quot);
+ if (csv->quot) {
+ sqlite3_free(csv->quot);
+ }
+ csv->quot = p;
+ } else {
+ return EOF;
+ }
+ }
+ if (sep[0]) {
+ p = sqlite3_malloc(strlen(sep) + 1);
+ if (p) {
+ strcpy(p, sep);
+ if (csv->sep) {
+ sqlite3_free(csv->sep);
+ }
+ csv->sep = p;
+ } else {
+ return EOF;
+ }
+ }
+ return 0;
+}
+
+/**
+ * Connect to virtual table
+ * @param db SQLite database pointer
+ * @param aux user specific pointer (unused)
+ * @param argc argument count
+ * @param argv argument vector
+ * @param vtabp pointer receiving virtual table pointer
+ * @param errp pointer receiving error messag
+ * @result SQLite error code
+ *
+ * Argument vector contains:
+ *
+ * argv[0] - module name<br>
+ * argv[1] - database name<br>
+ * argv[2] - table name (virtual table)<br>
+ * argv[3] - filename (required)<br>
+ * argv[4] - number, when non-zero use first line as column names,
+ * when negative use given type names (optional)<br>
+ * argv[5] - number, when non-zero, translate data (optional, see below)<br>
+ * argv[6] - column separator characters (optional)<br>
+ * argv[7] - string quoting characters (optional)<br>
+ * argv[8] - column/type name for first column (optional)<br>
+ * ..<br>
+ * argv[X] - column/type name for last column (optional)<br><br>
+ *
+ * Translation flags:
+ *
+ * 1 - convert ISO-8859-1 to UTF-8<br>
+ * 2 - perform backslash substitution<br>
+ * 4 - convert and collapse white-space in column names to underscore<br>
+ * 10 - convert \q to single quote, in addition to backslash substitution<br>
+ */
+
+static int
+csv_vtab_connect(sqlite3* db, void *aux, int argc, const char * const *argv,
+ sqlite3_vtab **vtabp, char **errp)
+{
+ csv_file *csv;
+ int rc = SQLITE_ERROR, i, ncnames, row1;
+ char **cnames, *schema = 0, **nargv;
+ csv_vtab *vtab;
+
+ if (argc < 4) {
+ *errp = sqlite3_mprintf("input file name missing");
+ return SQLITE_ERROR;
+ }
+ nargv = sqlite3_malloc(sizeof (char *) * argc);
+ memset(nargv, 0, sizeof (char *) * argc);
+ for (i = 3; i < argc; i++) {
+ nargv[i] = unquote(argv[i]);
+ }
+ csv = csv_open(nargv[3], (argc > 6) ? nargv[6] : 0,
+ (argc > 7) ? nargv[7] : 0);
+ if (!csv) {
+ *errp = sqlite3_mprintf("unable to open input file");
+cleanup:
+ append_free(&schema);
+ for (i = 3; i < argc; i++) {
+ if (nargv[i]) {
+ sqlite3_free(nargv[i]);
+ }
+ }
+ return rc;
+ }
+ if (!csv->sep && !csv->quot) {
+ csv_guess(csv);
+ }
+ csv->pos0 = 0;
+ row1 = 0;
+ if (argc > 4) {
+ row1 = strtol(nargv[4], 0, 10);
+ }
+ if (row1) {
+ /* use column names from 1st row */
+ csv_getline(csv, 0);
+ if (csv->ncols < 1) {
+ csv_close(csv);
+ *errp = sqlite3_mprintf("unable to get column names");
+ goto cleanup;
+ }
+ csv->pos0 = csv_tell(csv);
+ csv_rewind(csv);
+ ncnames = csv_ncols(csv);
+ cnames = csv->cols;
+ } else if (argc > 8) {
+ ncnames = argc - 8;
+ cnames = (char **) nargv + 8;
+ } else {
+ /* use number of columns from 1st row */
+ csv_getline(csv, 0);
+ if (csv->ncols < 1) {
+ csv_close(csv);
+ *errp = sqlite3_mprintf("unable to get column names");
+ goto cleanup;
+ }
+ csv_rewind(csv);
+ ncnames = csv_ncols(csv);
+ cnames = 0;
+ }
+ vtab = sqlite3_malloc(sizeof(csv_vtab) + ncnames);
+ if (!vtab) {
+ csv_close(csv);
+ *errp = sqlite3_mprintf("out of memory");
+ goto cleanup;
+ }
+ memset(vtab, 0, sizeof (*vtab));
+ vtab->convert = 0;
+ if (argc > 5) {
+ vtab->convert = strtol(nargv[5], 0, 10);
+ if (row1 && (vtab->convert & 4)) {
+ conv_names(cnames, ncnames);
+ }
+ }
+ vtab->csv = csv;
+ append(&schema, "CREATE TABLE x(", 0);
+ for (i = 0; i < ncnames; i++) {
+ vtab->coltypes[i] = SQLITE_TEXT;
+ if (!cnames || !cnames[i]) {
+ char colname[64];
+
+ sprintf(colname, "column_%d", i + 1);
+ append(&schema, colname, '"');
+ } else if (row1 > 0) {
+ append(&schema, cnames[i], '"');
+ } else if (row1 < 0) {
+ append(&schema, cnames[i], '"');
+ if (i + 8 < argc) {
+ char *type = nargv[i + 8];
+
+ append(&schema, " ", 0);
+ append(&schema, type, 0);
+ vtab->coltypes[i] = maptype(type);
+ }
+ } else {
+ char *type = cnames[i];
+
+ append(&schema, cnames[i], 0);
+ while (*type && !strchr(" \t", *type)) {
+ type++;
+ }
+ while (*type && strchr(" \t", *type)) {
+ type++;
+ }
+ vtab->coltypes[i] = maptype(type);
+ }
+ if (i < ncnames - 1) {
+ append(&schema, ",", 0);
+ }
+ }
+ append(&schema, ")", 0);
+ rc = sqlite3_declare_vtab(db, schema);
+ if (rc != SQLITE_OK) {
+ csv_close(csv);
+ sqlite3_free(vtab);
+ *errp = sqlite3_mprintf("table definition failed, error %d, "
+ "schema '%s'", rc, schema);
+ goto cleanup;
+ }
+ *vtabp = &vtab->vtab;
+ *errp = 0;
+ goto cleanup;
+}
+
+/**
+ * Create virtual table
+ * @param db SQLite database pointer
+ * @param aux user specific pointer (unused)
+ * @param argc argument count
+ * @param argv argument vector
+ * @param vtabp pointer receiving virtual table pointer
+ * @param errp pointer receiving error messag
+ * @result SQLite error code
+ */
+
+static int
+csv_vtab_create(sqlite3* db, void *aux, int argc,
+ const char *const *argv,
+ sqlite3_vtab **vtabp, char **errp)
+{
+ return csv_vtab_connect(db, aux, argc, argv, vtabp, errp);
+}
+
+/**
+ * Disconnect virtual table.
+ * @param vtab virtual table pointer
+ * @result always SQLITE_OK
+ */
+
+static int
+csv_vtab_disconnect(sqlite3_vtab *vtab)
+{
+ csv_vtab *tab = (csv_vtab *) vtab;
+
+ csv_close(tab->csv);
+ sqlite3_free(tab);
+ return SQLITE_OK;
+}
+
+/**
+ * Destroy virtual table.
+ * @param vtab virtual table pointer
+ * @result always SQLITE_OK
+ */
+
+static int
+csv_vtab_destroy(sqlite3_vtab *vtab)
+{
+ return csv_vtab_disconnect(vtab);
+}
+
+/**
+ * Determines information for filter function according to constraints.
+ * @param vtab virtual table
+ * @param info index/constraint iinformation
+ * @result SQLite error code
+ */
+
+static int
+csv_vtab_bestindex(sqlite3_vtab *vtab, sqlite3_index_info *info)
+{
+ return SQLITE_OK;
+}
+
+/**
+ * Open virtual table and return cursor.
+ * @param vtab virtual table pointer
+ * @param cursorp pointer receiving cursor pointer
+ * @result SQLite error code
+ */
+
+static int
+csv_vtab_open(sqlite3_vtab *vtab, sqlite3_vtab_cursor **cursorp)
+{
+ csv_cursor *cur = sqlite3_malloc(sizeof(*cur));
+ csv_vtab *tab = (csv_vtab *) vtab;
+
+ if (!cur) {
+ return SQLITE_ERROR;
+ }
+ cur->cursor.pVtab = vtab;
+ csv_rewind(tab->csv);
+ cur->pos = csv_tell(tab->csv);
+ *cursorp = &cur->cursor;
+ return SQLITE_OK;
+}
+
+/**
+ * Close virtual table cursor.
+ * @param cursor cursor pointer
+ * @result SQLite error code
+ */
+
+static int
+csv_vtab_close(sqlite3_vtab_cursor *cursor)
+{
+ sqlite3_free(cursor);
+ return SQLITE_OK;
+}
+
+/**
+ * Retrieve next row from virtual table cursor
+ * @param cursor virtual table cursor
+ * @result SQLite error code
+ */
+
+static int
+csv_vtab_next(sqlite3_vtab_cursor *cursor)
+{
+ csv_cursor *cur = (csv_cursor *) cursor;
+ csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
+
+ cur->pos = csv_tell(tab->csv);
+ csv_getline(tab->csv, 0);
+ return SQLITE_OK;
+}
+
+/**
+ * Filter function for virtual table.
+ * @param cursor virtual table cursor
+ * @param idxNum unused (always 0)
+ * @param idxStr unused
+ * @param argc number arguments (unused, 0)
+ * @param argv argument (nothing)
+ * @result SQLite error code
+ */
+
+static int
+csv_vtab_filter(sqlite3_vtab_cursor *cursor, int idxNum,
+ const char *idxStr, int argc, sqlite3_value **argv)
+{
+ csv_cursor *cur = (csv_cursor *) cursor;
+ csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
+
+ csv_rewind(tab->csv);
+ return csv_vtab_next(cursor);
+}
+
+/**
+ * Return end of table state of virtual table cursor
+ * @param cursor virtual table cursor
+ * @result true/false
+ */
+
+static int
+csv_vtab_eof(sqlite3_vtab_cursor *cursor)
+{
+ csv_cursor *cur = (csv_cursor *) cursor;
+ csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
+
+ return csv_eof(tab->csv);
+}
+
+/**
+ * Return column data of virtual table.
+ * @param cursor virtual table cursor
+ * @param ctx SQLite function context
+ * @param n column index
+ * @result SQLite error code
+ */
+
+static int
+csv_vtab_column(sqlite3_vtab_cursor *cursor, sqlite3_context *ctx, int n)
+{
+ csv_cursor *cur = (csv_cursor *) cursor;
+ csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
+ char *data = csv_coldata(tab->csv, n);
+
+ return process_col(ctx, 0, 0, data, tab->coltypes[n], tab->convert);
+}
+
+/**
+ * Return current rowid of virtual table cursor
+ * @param cursor virtual table cursor
+ * @param rowidp value buffer to receive current rowid
+ * @result SQLite error code
+ */
+
+static int
+csv_vtab_rowid(sqlite3_vtab_cursor *cursor, sqlite_int64 *rowidp)
+{
+ csv_cursor *cur = (csv_cursor *) cursor;
+
+ *rowidp = cur->pos;
+ return SQLITE_OK;
+}
+
+#if (SQLITE_VERSION_NUMBER > 3004000)
+
+/**
+ * Rename virtual table.
+ * @param newname new name for table
+ * @result SQLite error code
+ */
+
+static int
+csv_vtab_rename(sqlite3_vtab *vtab, const char *newname)
+{
+ return SQLITE_OK;
+}
+
+#endif
+
+/**
+ * SQLite module descriptor.
+ */
+
+static const sqlite3_module csv_vtab_mod = {
+ 1, /* iVersion */
+ csv_vtab_create, /* xCreate */
+ csv_vtab_connect, /* xConnect */
+ csv_vtab_bestindex, /* xBestIndex */
+ csv_vtab_disconnect, /* xDisconnect */
+ csv_vtab_destroy, /* xDestroy */
+ csv_vtab_open, /* xOpen */
+ csv_vtab_close, /* xClose */
+ csv_vtab_filter, /* xFilter */
+ csv_vtab_next, /* xNext */
+ csv_vtab_eof, /* xEof */
+ csv_vtab_column, /* xColumn */
+ csv_vtab_rowid, /* xRowid */
+ 0, /* xUpdate */
+ 0, /* xBegin */
+ 0, /* xSync */
+ 0, /* xCommit */
+ 0, /* xRollback */
+ 0, /* xFindFunction */
+#if (SQLITE_VERSION_NUMBER > 3004000)
+ csv_vtab_rename, /* xRename */
+#endif
+};
+
+/**
+ * Import CSV file as table into database
+ * @param ctx SQLite function context
+ * @param argc number of arguments
+ * @param argv argument vector
+ *
+ * Argument vector contains:
+ *
+ * argv[0] - name of table to create (required)<br>
+ * argv[1] - filename (required)<br>
+ * argv[2] - number, when non-zero use first line as column names,
+ * when negative use given type names (optional)<br>
+ * argv[3] - number, when non-zero, translate data (optional, see below)<br>
+ * argv[4] - column separator characters (optional)<br>
+ * argv[5] - string quoting characters (optional)<br>
+ * argv[6] - column/type name for first column (optional)<br>
+ * ..<br>
+ * argv[X] - column/type name for last column (optional)<br><br>
+ *
+ * Translation flags:
+ *
+ * 1 - convert ISO-8859-1 to UTF-8<br>
+ * 2 - perform backslash substitution<br>
+ * 4 - convert and collapse white-space in column names to underscore<br>
+ * 10 - convert \q to single quote, in addition to backslash substitution<br>
+ */
+
+static void
+csv_import_func(sqlite3_context *ctx, int argc, sqlite3_value **argv)
+{
+ csv_file *csv;
+ int rc, i, ncnames, row1, convert = 0, useargs = 0;
+ char *tname, *fname, *sql = 0, **cnames, *coltypes = 0;
+ sqlite3 *db = (sqlite3 *) sqlite3_user_data(ctx);
+ sqlite3_stmt *stmt = 0;
+
+ if (argc < 2) {
+ sqlite3_result_error(ctx, "need at least 2 arguments", -1);
+ return;
+ }
+ tname = (char *) sqlite3_value_text(argv[0]);
+ if (!tname) {
+ sqlite3_result_error(ctx, "table name is NULL", -1);
+ return;
+ }
+ fname = (char *) sqlite3_value_text(argv[1]);
+ if (!fname) {
+ sqlite3_result_error(ctx, "file name is NULL", -1);
+ return;
+ }
+ csv = csv_open(fname,
+ (argc > 4) ? (char *) sqlite3_value_text(argv[4]) : 0,
+ (argc > 5) ? (char *) sqlite3_value_text(argv[5]) : 0);
+ if (!csv) {
+ sqlite3_result_error(ctx, "unable to open input file", -1);
+cleanup:
+ if (stmt) {
+ sqlite3_finalize(stmt);
+ }
+ append_free(&sql);
+ if (coltypes) {
+ sqlite3_free(coltypes);
+ }
+ if (csv) {
+ csv_close(csv);
+ }
+ return;
+ }
+ if (!csv->sep && !csv->quot) {
+ csv_guess(csv);
+ }
+ csv->pos0 = 0;
+ row1 = 0;
+ if (argc > 2) {
+ row1 = sqlite3_value_int(argv[2]);
+ }
+ if (row1) {
+ /* use column names from 1st row */
+ csv_getline(csv, 0);
+ if (csv->ncols < 1) {
+ sqlite3_result_error(ctx, "unable to get column names", -1);
+ goto cleanup;
+ }
+ csv->pos0 = csv_tell(csv);
+ csv_rewind(csv);
+ ncnames = csv_ncols(csv);
+ cnames = csv->cols;
+ } else if (argc > 6) {
+ ncnames = argc - 6;
+ cnames = 0;
+ useargs = 1;
+ } else {
+ /* use number of columns from 1st row */
+ csv_getline(csv, 0);
+ if (csv->ncols < 1) {
+ sqlite3_result_error(ctx, "unable to get column names", -1);
+ goto cleanup;
+ }
+ csv_rewind(csv);
+ ncnames = csv_ncols(csv);
+ cnames = 0;
+ }
+ convert = 0;
+ if (argc > 3) {
+ convert = sqlite3_value_int(argv[3]);
+ if (row1 && (convert & 4)) {
+ conv_names(cnames, ncnames);
+ }
+ }
+ /* test if table exists */
+ append(&sql, "PRAGMA table_info(", 0);
+ append(&sql, tname, '"');
+ append(&sql, ")", 0);
+ if (!sql) {
+oom:
+ sqlite3_result_error(ctx, "out of memory", -1);
+ goto cleanup;
+ }
+ rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
+ append_free(&sql);
+ if (rc != SQLITE_OK) {
+prepfail:
+ sqlite3_result_error(ctx, "prepare failed", -1);
+ goto cleanup;
+ }
+ /* find number of colums */
+ i = 0;
+ rc = sqlite3_step(stmt);
+ while (rc == SQLITE_ROW) {
+ i++;
+ rc = sqlite3_step(stmt);
+ }
+ if (rc != SQLITE_DONE) {
+selfail:
+ sqlite3_result_error(ctx, "select failed", -1);
+ goto cleanup;
+ }
+ if (i > 0) {
+ /* get column types */
+ sqlite3_reset(stmt);
+ ncnames = i;
+ coltypes = sqlite3_malloc(ncnames);
+ if (!coltypes) {
+ goto oom;
+ }
+ rc = sqlite3_step(stmt);
+ i = 0;
+ while (rc == SQLITE_ROW) {
+ coltypes[i++] = maptype((char *) sqlite3_column_text(stmt, 2));
+ rc = sqlite3_step(stmt);
+ }
+ if (rc != SQLITE_DONE) {
+ goto selfail;
+ }
+ } else {
+ /* create new table */
+ sqlite3_finalize(stmt);
+ stmt = 0;
+ coltypes = sqlite3_malloc(ncnames);
+ if (!coltypes) {
+ goto oom;
+ }
+ append(&sql, "CREATE TABLE ", 0);
+ append(&sql, tname, '"');
+ append(&sql, "(", 0);
+ for (i = 0; i < ncnames; i++) {
+ char colname[64];
+
+ coltypes[i] = SQLITE_TEXT;
+ if (useargs) {
+ char *type = (char *) sqlite3_value_text(argv[i + 6]);
+
+ if (!type) {
+ goto defcol;
+ }
+ append(&sql, type, 0);
+ while (*type && !strchr(" \t", *type)) {
+ type++;
+ }
+ while (*type && strchr(" \t", *type)) {
+ type++;
+ }
+ coltypes[i] = maptype(type);
+ } else if (!cnames || !cnames[i]) {
+defcol:
+ sprintf(colname, "column_%d", i + 1);
+ append(&sql, colname, '"');
+ } else if (row1 > 0) {
+ append(&sql, cnames[i], '"');
+ } else if (row1 < 0) {
+ append(&sql, cnames[i], '"');
+ if (i + 6 < argc) {
+ char *type = (char *) sqlite3_value_text(argv[i + 6]);
+
+ if (type) {
+ append(&sql, " ", 0);
+ append(&sql, type, 0);
+ coltypes[i] = maptype(type);
+ }
+ }
+ }
+ if (i < ncnames - 1) {
+ append(&sql, ",", 0);
+ }
+ }
+ append(&sql, ")", 0);
+ rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
+ if (rc != SQLITE_OK) {
+ goto prepfail;
+ }
+ rc = sqlite3_step(stmt);
+ if ((rc != SQLITE_DONE) && (rc != SQLITE_OK)) {
+ sqlite3_result_error(ctx, "create table failed", -1);
+ goto cleanup;
+ }
+ append_free(&sql);
+ }
+ sqlite3_finalize(stmt);
+ stmt = 0;
+ /* make INSERT statement */
+ append(&sql, "INSERT INTO ", 0);
+ append(&sql, tname, '"');
+ append(&sql, " VALUES(", 0);
+ for (i = 0; i < ncnames; i++) {
+ append(&sql, (i < ncnames - 1) ? "?," : "?)", 0);
+ }
+ rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
+ if (rc != SQLITE_OK) {
+ goto prepfail;
+ }
+ append_free(&sql);
+ /* import the CSV file */
+ row1 = 0;
+ while (csv_getline(csv, 0) != EOF) {
+ for (i = 0; i < ncnames; i++) {
+ char *data = csv_coldata(csv, i);
+
+ rc = process_col(0, stmt, i + 1, data, coltypes[i], convert);
+ if (rc != SQLITE_OK) {
+ goto inserr;
+ }
+ }
+ rc = sqlite3_step(stmt);
+ if ((rc != SQLITE_DONE) && (rc != SQLITE_OK)) {
+ if ((rc != SQLITE_MISMATCH) && (rc != SQLITE_CONSTRAINT)) {
+inserr:
+ sqlite3_result_error(ctx, "insert failed", -1);
+ goto cleanup;
+ }
+ } else {
+ row1++;
+ }
+ sqlite3_reset(stmt);
+ }
+ sqlite3_result_int(ctx, row1);
+ goto cleanup;
+}
+
+/**
+ * Module initializer creating SQLite functions and modules
+ * @param db database pointer
+ * @result SQLite error code
+ */
+
+#ifndef STANDALONE
+static
+#endif
+int
+csv_vtab_init(sqlite3 *db)
+{
+ sqlite3_create_function(db, "import_csv", -1, SQLITE_UTF8,
+ (void *) db, csv_import_func, 0, 0);
+ return sqlite3_create_module(db, "csvtable", &csv_vtab_mod, 0);
+}
+
+#ifndef STANDALONE
+
+/**
+ * Initializer for SQLite extension load mechanism.
+ * @param db SQLite database pointer
+ * @param errmsg pointer receiving error message
+ * @param api SQLite API routines
+ * @result SQLite error code
+ */
+
+int
+sqlite3_extension_init(sqlite3 *db, char **errmsg,
+ const sqlite3_api_routines *api)
+{
+ SQLITE_EXTENSION_INIT2(api);
+ return csv_vtab_init(db);
+}
+
+#endif