1 files changed, 372 insertions, 9 deletions
diff --git a/navit/util.c b/navit/util.c
index 51bb4db8d..9398e316c 100644
--- a/navit/util.c
+++ b/navit/util.c
@@ -93,6 +93,245 @@ int navit_utf8_strcasecmp(const char *s1, const char *s2) {
     return cmpres;
 }
 
+/**
+ * @brief Trims all leading and trailing whitespace characters from a string.
+ *
+ * Whitespace characters are all up to and including 0x20.
+ *
+ * This function operates in-place, i.e. `s` will be modified.
+ *
+ * @param s The string to trim
+ */
+static void strtrim(char *s) {
+    char *tmp = g_strdup(s);
+    char *in = tmp;
+    while (strlen(in) && (in[0] <= 0x20))
+        in++;
+    while (strlen(in) && (in[strlen(in) - 1] <= 0x20))
+        in[strlen(in) - 1] = 0;
+    strcpy(s, in);
+    g_free(tmp);
+}
+
+/**
+ * @brief Parser states for `parse_for_systematic_comparison()`.
+ */
+enum parse_state {
+    parse_state_whitespace,
+    parse_state_numeric,
+    parse_state_alpha,
+};
+
+/**
+ * @brief Parses a string for systematic comparison.
+ *
+ * This is a helper function for `compare_name_systematic()`.
+ *
+ * The string is broken down into numeric and non-numeric parts. Whitespace characters are discarded
+ * unless they are surrounded by string characters, in which case the whole unit is treated as one
+ * string part. All strings are converted to lowercase and leading zeroes stripped from numbers.
+ *
+ * @param s The string to parse
+ *
+ * @return A buffer containing the parsed string, parts delimited by a null character, the last part
+ * followed by a double null character.
+ */
+static char * parse_for_systematic_comparison(const char *s) {
+    char *ret = g_malloc0(strlen(s) * 2 + 1);
+    const char *in = s;
+    char *out = ret;
+    char *part;
+    enum parse_state state = parse_state_whitespace;
+    int i = 0;
+    char c;
+
+    dbg(lvl_debug, "enter\n");
+
+    while (i < strlen(in)) {
+        c = in[i];
+        if ((c <= 0x20) || (c == ',') || (c == '-') || (c == '.') || (c == '/')) {
+            /* whitespace */
+            if (state == parse_state_numeric) {
+                part = g_malloc0(i + 1);
+                strncpy(part, in, i);
+                sprintf(part, "%d", atoi(part));
+                strcpy(out, part);
+                out += strlen(part) + 1;
+                dbg(lvl_debug, "part='%s'\n", part);
+                g_free(part);
+                in += i;
+                i = 1;
+                state = parse_state_whitespace;
+            } else
+                i++;
+        } else if ((c >= '0') && (c <= '9')) {
+            /* numeric */
+            if (state == parse_state_alpha) {
+                part = g_malloc0(i + 1);
+                strncpy(part, in, i);
+                strtrim(part);
+                strcpy(out, part);
+                out += strlen(part) + 1;
+                dbg(lvl_debug, "part='%s'\n", part);
+                g_free(part);
+                in += i;
+                i = 1;
+            } else
+                i++;
+            state = parse_state_numeric;
+        } else {
+            /* alpha */
+            if (state == parse_state_numeric) {
+                part = g_malloc0(i + 1);
+                strncpy(part, in, i);
+                sprintf(part, "%d", atoi(part));
+                strcpy(out, part);
+                out += strlen(part) + 1;
+                dbg(lvl_debug, "part='%s'\n", part);
+                g_free(part);
+                in += i;
+                i = 1;
+            } else
+                i++;
+            state = parse_state_alpha;
+        }
+    }
+
+    if (strlen(in) > 0) {
+        if (state == parse_state_numeric) {
+            part = g_malloc0(strlen(in) + 1);
+            strcpy(part, in);
+            sprintf(part, "%d", atoi(part));
+            strcpy(out, part);
+            dbg(lvl_debug, "part='%s'\n", part);
+            g_free(part);
+        } else if (state == parse_state_alpha) {
+            part = g_malloc0(strlen(in) + 1);
+            strcpy(part, in);
+            strtrim(part);
+            strcpy(out, part);
+            dbg(lvl_debug, "part='%s'\n", part);
+            g_free(part);
+        }
+    }
+
+    return ret;
+}
+
+/**
+ * @brief Compares two name_systematic strings.
+ *
+ * A name_systematic string is typically used for road reference numbers (A 4, I-51, SP526). This
+ * function performs a fuzzy comparison: Each string is broken down into numeric and non-numeric parts.
+ * Then both strings are compared part by part. The following rules apply:
+ *
+ * \li Semicolons denote sequences of strings, and the best match between any pair of strings from `s1` and `s2` is
+ * returned.
+ * \li Whitespace bordering on a number is discarded.
+ * \li Whitespace surrounded by string characters is treated as one string with the surrounding characters.
+ * \li If one string has more parts than the other, the shorter string is padded with null parts.
+ * \li null equals null.
+ * \li null does not equal non-null.
+ * \li Numeric parts are compared as integers, hence `'042'` equals `'42'`.
+ * \li Comparison of string parts is case-insensitive.
+ *
+ * Partial matches are currently determined by determining each part of one string with each part of the other. Each
+ * part of one string that is matched by at least one part of the other increases the score. Order is currently not
+ * taken into account, i.e. `'42A'` and `'A-42A'` are both considered full (not partial) matches for `'A42'`. Future
+ * versions may change this.
+ *
+ * @param s1 The first string
+ * @param s2 The second string
+ *
+ * @return 0 if both strings match, nonzero if they do not. `MAX_MISMATCH` indicates a complete mismatch; values in
+ * between indicate partial matches (lower values correspond to better matches).
+ */
+int compare_name_systematic(const char *s1, const char *s2) {
+    int ret = MAX_MISMATCH;
+    int tmp;
+    int elements = 0, matches = 0;
+    char *l = NULL, *r = NULL, *l0, *r0;
+
+    if (!s1 || !s1[0]) {
+        if (!s2 || !s2[0])
+            return 0;
+        else
+            return MAX_MISMATCH;
+    } else if (!s2 || !s2[0])
+        return MAX_MISMATCH;
+
+    /* break up strings at semicolons and parse each separately, return 0 if any two match */
+    if (strchr(s1, ';')) {
+        l = g_strdup(s1);
+        for (l0 = strtok(l, ";"); l0; l0 = strtok(NULL, ";")) {
+            tmp = compare_name_systematic(l0, s2);
+            if (tmp < ret)
+                ret = tmp;
+            if (!ret)
+                break;
+        }
+        g_free(l);
+        return ret;
+    } else if (strchr(s2, ';')) {
+        r = g_strdup(s2);
+        for (r0 = strtok(r, ";"); r0; r0 = strtok(NULL, ";")) {
+            tmp = compare_name_systematic(s1, r0);
+            if (tmp < ret)
+                ret = tmp;
+            if (!ret)
+                break;
+        }
+        g_free(r);
+        return ret;
+    }
+
+    /* s1 and s2 are single strings (no semicolons) */
+    l0 = parse_for_systematic_comparison(s1);
+    r0 = parse_for_systematic_comparison(s2);
+
+    /* count left-hand elements and all left-hand elements matched by a right-hand element */
+    for (l = l0; l[0]; l += strlen(l) + 1) {
+        elements++;
+        for (r = r0; r[0]; r += strlen(r) + 1) {
+            if (atoi(l) || (l[0] == '0')) {
+                if ((atoi(r) || (r[0] == '0')) && (atoi(l) == atoi(r))) {
+                    matches++;
+                    break;
+                }
+            } else if (!strcasecmp(l, r)) {
+                matches++;
+                break;
+            }
+        }
+    }
+
+    /* same in the opposite direction */
+    for (r = r0; r[0]; r += strlen(r) + 1) {
+        elements++;
+        for (l = l0; l[0]; l += strlen(l) + 1) {
+            if (atoi(l) || (l[0] == '0')) {
+                if ((atoi(r) || (r[0] == '0')) && (atoi(l) == atoi(r))) {
+                    matches++;
+                    break;
+                }
+            } else if (!strcasecmp(l, r)) {
+                matches++;
+                break;
+            }
+        }
+    }
+
+    g_free(l0);
+    g_free(r0);
+
+    ret = ((elements - matches) * MAX_MISMATCH) / elements;
+
+    dbg(lvl_debug, "'%s' %s '%s', ret=%d",
+        s1, ret ? (ret == MAX_MISMATCH ? "does NOT match" : "PARTIALLY matches") : "matches", s2, ret);
+
+    return ret;
+}
+
 static void hash_callback(gpointer key, gpointer value, gpointer user_data) {
     GList **l=user_data;
     *l=g_list_prepend(*l, value);
@@ -338,28 +577,152 @@ unsigned int iso8601_to_secs(char *iso8601) {
 }
 
 /**
+ * @brief Converts a `tm` structure to `time_t`
+ *
+ * Returns the value of type `time_t` that represents the UTC time described by the `tm` structure
+ * pointed to by `pt` (which may be modified).
+ *
+ * This function performs the reverse translation that `gmtime()` does. As this functionality is absent
+ * in the standard library, it is emulated by calling `mktime()`, converting its output into both GMT
+ * and local time, comparing the results and calling `mktime()` again with an input adjusted for the
+ * offset in the opposite direction. This ensures maximum portability.
+ *
+ * The values of the `tm_wday` and `tm_yday` members of `pt` are ignored, and the values of the other
+ * members are interpreted even if out of their valid ranges (see `struct tm`). For example, `tm_mday`
+ * may contain values above 31, which are interpreted accordingly as the days that follow the last day
+ * of the selected month.
+ *
+ * A call to this function automatically adjusts the values of the members of `pt` if they are off-range
+ * or—in the case of `tm_wday` and `tm_yday`—if their values are inconsistent with the other members.
+ *
+ */
+time_t mkgmtime(struct tm * pt) {
+    time_t ret;
+
+    /* Input, GMT and local time */
+    struct tm * pti, * pgt, * plt;
+
+    pti = g_memdup(pt, sizeof(struct tm));
+
+    ret = mktime(pti);
+
+    pgt = g_memdup(gmtime(&ret), sizeof(struct tm));
+    plt = g_memdup(localtime(&ret), sizeof(struct tm));
+
+    pti->tm_year = pt->tm_year - pgt->tm_year + plt->tm_year;
+    pti->tm_mon = pt->tm_mon - pgt->tm_mon + plt->tm_mon;
+    pti->tm_mday = pt->tm_mday - pgt->tm_mday + plt->tm_mday;
+    pti->tm_hour = pt->tm_hour - pgt->tm_hour + plt->tm_hour;
+    pti->tm_min = pt->tm_min - pgt->tm_min + plt->tm_min;
+    pti->tm_sec = pt->tm_sec - pgt->tm_sec + plt->tm_sec;
+
+    ret = mktime(pti);
+
+    dbg(lvl_debug, "time %ld (%02d-%02d-%02d %02d:%02d:%02d)\n", ret, pti->tm_year, pti->tm_mon, pti->tm_mday,
+        pti->tm_hour, pti->tm_min, pti->tm_sec);
+
+    g_free(pti);
+    g_free(pgt);
+    g_free(plt);
+
+    return ret;
+}
+
+/**
+ * @brief Converts an ISO 8601-style time string into `time_t`.
+ */
+time_t iso8601_to_time(char * iso8601) {
+    /* Date/time fields (YYYY-MM-DD-hh-mm-ss) */
+    int val[8];
+
+    int i = 0;
+
+    /* Start of next integer portion and current position */
+    char *start = iso8601, *pos = iso8601;
+
+    /* Time struct */
+    struct tm tm;
+
+    memset(&tm, 0, sizeof(struct tm));
+
+    while (*pos && i < 6) {
+        if (*pos < '0' || *pos > '9') {
+            val[i++] = atoi(start);
+            if (i == 6)
+                break;
+            pos++;
+            start = pos;
+        }
+        if (*pos)
+            pos++;
+    }
+    val[6] = 0;
+    val[7] = 0;
+    if (*pos && i == 6) {
+        if (pos[1] && pos[2] && (!pos[3] || pos[3] == ':')) {
+            val[6] = atoi(pos);
+            if (pos[3] == ':') {
+                pos += 3;
+                val[7] = (val[6] < 0) ? -atoi(pos) : atoi(pos);
+            }
+        } else if (pos[1] && pos[2] && pos[3] && pos[4]) {
+            val[6] = atoi(pos) / 100;
+            val[7] = atoi(pos) % 100;
+        }
+    }
+
+    tm.tm_year = val[0] - 1900;
+    tm.tm_mon = val[1] - 1;
+    tm.tm_mday = val[2];
+    tm.tm_hour = val[3] - val[6];
+    tm.tm_min = val[4] - val[7];
+    tm.tm_sec = val[5];
+
+    dbg(lvl_debug, "time %s (%02d-%02d-%02d %02d:%02d:%02d)\n", iso8601, tm.tm_year, tm.tm_mon, tm.tm_mday,
+        tm.tm_hour, tm.tm_min, tm.tm_sec);
+
+    return mkgmtime(&tm);
+}
+
+/**
+ * @brief Converts time to ISO8601 format.
+ *
+ * The caller is responsible for freeing the return value of this function when it is no longer needed.
+ *
+ * @param time The time, as returned by `time()` and related functions
+ *
+ * @return Time in ISO8601 format
+ */
+char * time_to_iso8601(time_t time) {
+    char *timep=NULL;
+    char buffer[32];
+    struct tm *tm;
+
+    tm = gmtime(&time);
+    if (tm) {
+        strftime(buffer, sizeof(buffer), "%Y-%m-%dT%TZ", tm);
+        timep=g_strdup(buffer);
+    }
+    return timep;
+}
+
+/**
  * @brief Outputs local system time in ISO 8601 format.
  *
  * @return Time in ISO 8601 format
  */
 char *current_to_iso8601(void) {
-    char *timep=NULL;
 #ifdef HAVE_API_WIN32_BASE
+    char *timep=NULL;
     SYSTEMTIME ST;
     GetSystemTime(&ST);
     timep=g_strdup_printf("%d-%02d-%02dT%02d:%02d:%02dZ",ST.wYear,ST.wMonth,ST.wDay,ST.wHour,ST.wMinute,ST.wSecond);
+    return timep;
 #else
-    char buffer[32];
     time_t tnow;
-    struct tm *tm;
     tnow = time(0);
-    tm = gmtime(&tnow);
-    if (tm) {
-        strftime(buffer, sizeof(buffer), "%Y-%m-%dT%TZ", tm);
-        timep=g_strdup(buffer);
-    }
+    return time_to_iso8601(tnow);
 #endif
-    return timep;
 }