summaryrefslogtreecommitdiff
path: root/navit/util.c
diff options
context:
space:
mode:
Diffstat (limited to 'navit/util.c')
-rw-r--r--navit/util.c381
1 files changed, 372 insertions, 9 deletions
diff --git a/navit/util.c b/navit/util.c
index 51bb4db8d..9398e316c 100644
--- a/navit/util.c
+++ b/navit/util.c
@@ -93,6 +93,245 @@ int navit_utf8_strcasecmp(const char *s1, const char *s2) {
return cmpres;
}
+/**
+ * @brief Trims all leading and trailing whitespace characters from a string.
+ *
+ * Whitespace characters are all up to and including 0x20.
+ *
+ * This function operates in-place, i.e. `s` will be modified.
+ *
+ * @param s The string to trim
+ */
+static void strtrim(char *s) {
+ char *tmp = g_strdup(s);
+ char *in = tmp;
+ while (strlen(in) && (in[0] <= 0x20))
+ in++;
+ while (strlen(in) && (in[strlen(in) - 1] <= 0x20))
+ in[strlen(in) - 1] = 0;
+ strcpy(s, in);
+ g_free(tmp);
+}
+
+/**
+ * @brief Parser states for `parse_for_systematic_comparison()`.
+ */
+enum parse_state {
+ parse_state_whitespace,
+ parse_state_numeric,
+ parse_state_alpha,
+};
+
+/**
+ * @brief Parses a string for systematic comparison.
+ *
+ * This is a helper function for `compare_name_systematic()`.
+ *
+ * The string is broken down into numeric and non-numeric parts. Whitespace characters are discarded
+ * unless they are surrounded by string characters, in which case the whole unit is treated as one
+ * string part. All strings are converted to lowercase and leading zeroes stripped from numbers.
+ *
+ * @param s The string to parse
+ *
+ * @return A buffer containing the parsed string, parts delimited by a null character, the last part
+ * followed by a double null character.
+ */
+static char * parse_for_systematic_comparison(const char *s) {
+ char *ret = g_malloc0(strlen(s) * 2 + 1);
+ const char *in = s;
+ char *out = ret;
+ char *part;
+ enum parse_state state = parse_state_whitespace;
+ int i = 0;
+ char c;
+
+ dbg(lvl_debug, "enter\n");
+
+ while (i < strlen(in)) {
+ c = in[i];
+ if ((c <= 0x20) || (c == ',') || (c == '-') || (c == '.') || (c == '/')) {
+ /* whitespace */
+ if (state == parse_state_numeric) {
+ part = g_malloc0(i + 1);
+ strncpy(part, in, i);
+ sprintf(part, "%d", atoi(part));
+ strcpy(out, part);
+ out += strlen(part) + 1;
+ dbg(lvl_debug, "part='%s'\n", part);
+ g_free(part);
+ in += i;
+ i = 1;
+ state = parse_state_whitespace;
+ } else
+ i++;
+ } else if ((c >= '0') && (c <= '9')) {
+ /* numeric */
+ if (state == parse_state_alpha) {
+ part = g_malloc0(i + 1);
+ strncpy(part, in, i);
+ strtrim(part);
+ strcpy(out, part);
+ out += strlen(part) + 1;
+ dbg(lvl_debug, "part='%s'\n", part);
+ g_free(part);
+ in += i;
+ i = 1;
+ } else
+ i++;
+ state = parse_state_numeric;
+ } else {
+ /* alpha */
+ if (state == parse_state_numeric) {
+ part = g_malloc0(i + 1);
+ strncpy(part, in, i);
+ sprintf(part, "%d", atoi(part));
+ strcpy(out, part);
+ out += strlen(part) + 1;
+ dbg(lvl_debug, "part='%s'\n", part);
+ g_free(part);
+ in += i;
+ i = 1;
+ } else
+ i++;
+ state = parse_state_alpha;
+ }
+ }
+
+ if (strlen(in) > 0) {
+ if (state == parse_state_numeric) {
+ part = g_malloc0(strlen(in) + 1);
+ strcpy(part, in);
+ sprintf(part, "%d", atoi(part));
+ strcpy(out, part);
+ dbg(lvl_debug, "part='%s'\n", part);
+ g_free(part);
+ } else if (state == parse_state_alpha) {
+ part = g_malloc0(strlen(in) + 1);
+ strcpy(part, in);
+ strtrim(part);
+ strcpy(out, part);
+ dbg(lvl_debug, "part='%s'\n", part);
+ g_free(part);
+ }
+ }
+
+ return ret;
+}
+
+/**
+ * @brief Compares two name_systematic strings.
+ *
+ * A name_systematic string is typically used for road reference numbers (A 4, I-51, SP526). This
+ * function performs a fuzzy comparison: Each string is broken down into numeric and non-numeric parts.
+ * Then both strings are compared part by part. The following rules apply:
+ *
+ * \li Semicolons denote sequences of strings, and the best match between any pair of strings from `s1` and `s2` is
+ * returned.
+ * \li Whitespace bordering on a number is discarded.
+ * \li Whitespace surrounded by string characters is treated as one string with the surrounding characters.
+ * \li If one string has more parts than the other, the shorter string is padded with null parts.
+ * \li null equals null.
+ * \li null does not equal non-null.
+ * \li Numeric parts are compared as integers, hence `'042'` equals `'42'`.
+ * \li Comparison of string parts is case-insensitive.
+ *
+ * Partial matches are currently determined by determining each part of one string with each part of the other. Each
+ * part of one string that is matched by at least one part of the other increases the score. Order is currently not
+ * taken into account, i.e. `'42A'` and `'A-42A'` are both considered full (not partial) matches for `'A42'`. Future
+ * versions may change this.
+ *
+ * @param s1 The first string
+ * @param s2 The second string
+ *
+ * @return 0 if both strings match, nonzero if they do not. `MAX_MISMATCH` indicates a complete mismatch; values in
+ * between indicate partial matches (lower values correspond to better matches).
+ */
+int compare_name_systematic(const char *s1, const char *s2) {
+ int ret = MAX_MISMATCH;
+ int tmp;
+ int elements = 0, matches = 0;
+ char *l = NULL, *r = NULL, *l0, *r0;
+
+ if (!s1 || !s1[0]) {
+ if (!s2 || !s2[0])
+ return 0;
+ else
+ return MAX_MISMATCH;
+ } else if (!s2 || !s2[0])
+ return MAX_MISMATCH;
+
+ /* break up strings at semicolons and parse each separately, return 0 if any two match */
+ if (strchr(s1, ';')) {
+ l = g_strdup(s1);
+ for (l0 = strtok(l, ";"); l0; l0 = strtok(NULL, ";")) {
+ tmp = compare_name_systematic(l0, s2);
+ if (tmp < ret)
+ ret = tmp;
+ if (!ret)
+ break;
+ }
+ g_free(l);
+ return ret;
+ } else if (strchr(s2, ';')) {
+ r = g_strdup(s2);
+ for (r0 = strtok(r, ";"); r0; r0 = strtok(NULL, ";")) {
+ tmp = compare_name_systematic(s1, r0);
+ if (tmp < ret)
+ ret = tmp;
+ if (!ret)
+ break;
+ }
+ g_free(r);
+ return ret;
+ }
+
+ /* s1 and s2 are single strings (no semicolons) */
+ l0 = parse_for_systematic_comparison(s1);
+ r0 = parse_for_systematic_comparison(s2);
+
+ /* count left-hand elements and all left-hand elements matched by a right-hand element */
+ for (l = l0; l[0]; l += strlen(l) + 1) {
+ elements++;
+ for (r = r0; r[0]; r += strlen(r) + 1) {
+ if (atoi(l) || (l[0] == '0')) {
+ if ((atoi(r) || (r[0] == '0')) && (atoi(l) == atoi(r))) {
+ matches++;
+ break;
+ }
+ } else if (!strcasecmp(l, r)) {
+ matches++;
+ break;
+ }
+ }
+ }
+
+ /* same in the opposite direction */
+ for (r = r0; r[0]; r += strlen(r) + 1) {
+ elements++;
+ for (l = l0; l[0]; l += strlen(l) + 1) {
+ if (atoi(l) || (l[0] == '0')) {
+ if ((atoi(r) || (r[0] == '0')) && (atoi(l) == atoi(r))) {
+ matches++;
+ break;
+ }
+ } else if (!strcasecmp(l, r)) {
+ matches++;
+ break;
+ }
+ }
+ }
+
+ g_free(l0);
+ g_free(r0);
+
+ ret = ((elements - matches) * MAX_MISMATCH) / elements;
+
+ dbg(lvl_debug, "'%s' %s '%s', ret=%d",
+ s1, ret ? (ret == MAX_MISMATCH ? "does NOT match" : "PARTIALLY matches") : "matches", s2, ret);
+
+ return ret;
+}
+
static void hash_callback(gpointer key, gpointer value, gpointer user_data) {
GList **l=user_data;
*l=g_list_prepend(*l, value);
@@ -338,28 +577,152 @@ unsigned int iso8601_to_secs(char *iso8601) {
}
/**
+ * @brief Converts a `tm` structure to `time_t`
+ *
+ * Returns the value of type `time_t` that represents the UTC time described by the `tm` structure
+ * pointed to by `pt` (which may be modified).
+ *
+ * This function performs the reverse translation that `gmtime()` does. As this functionality is absent
+ * in the standard library, it is emulated by calling `mktime()`, converting its output into both GMT
+ * and local time, comparing the results and calling `mktime()` again with an input adjusted for the
+ * offset in the opposite direction. This ensures maximum portability.
+ *
+ * The values of the `tm_wday` and `tm_yday` members of `pt` are ignored, and the values of the other
+ * members are interpreted even if out of their valid ranges (see `struct tm`). For example, `tm_mday`
+ * may contain values above 31, which are interpreted accordingly as the days that follow the last day
+ * of the selected month.
+ *
+ * A call to this function automatically adjusts the values of the members of `pt` if they are off-range
+ * or—in the case of `tm_wday` and `tm_yday`—if their values are inconsistent with the other members.
+ *
+ */
+time_t mkgmtime(struct tm * pt) {
+ time_t ret;
+
+ /* Input, GMT and local time */
+ struct tm * pti, * pgt, * plt;
+
+ pti = g_memdup(pt, sizeof(struct tm));
+
+ ret = mktime(pti);
+
+ pgt = g_memdup(gmtime(&ret), sizeof(struct tm));
+ plt = g_memdup(localtime(&ret), sizeof(struct tm));
+
+ pti->tm_year = pt->tm_year - pgt->tm_year + plt->tm_year;
+ pti->tm_mon = pt->tm_mon - pgt->tm_mon + plt->tm_mon;
+ pti->tm_mday = pt->tm_mday - pgt->tm_mday + plt->tm_mday;
+ pti->tm_hour = pt->tm_hour - pgt->tm_hour + plt->tm_hour;
+ pti->tm_min = pt->tm_min - pgt->tm_min + plt->tm_min;
+ pti->tm_sec = pt->tm_sec - pgt->tm_sec + plt->tm_sec;
+
+ ret = mktime(pti);
+
+ dbg(lvl_debug, "time %ld (%02d-%02d-%02d %02d:%02d:%02d)\n", ret, pti->tm_year, pti->tm_mon, pti->tm_mday,
+ pti->tm_hour, pti->tm_min, pti->tm_sec);
+
+ g_free(pti);
+ g_free(pgt);
+ g_free(plt);
+
+ return ret;
+}
+
+/**
+ * @brief Converts an ISO 8601-style time string into `time_t`.
+ */
+time_t iso8601_to_time(char * iso8601) {
+ /* Date/time fields (YYYY-MM-DD-hh-mm-ss) */
+ int val[8];
+
+ int i = 0;
+
+ /* Start of next integer portion and current position */
+ char *start = iso8601, *pos = iso8601;
+
+ /* Time struct */
+ struct tm tm;
+
+ memset(&tm, 0, sizeof(struct tm));
+
+ while (*pos && i < 6) {
+ if (*pos < '0' || *pos > '9') {
+ val[i++] = atoi(start);
+ if (i == 6)
+ break;
+ pos++;
+ start = pos;
+ }
+ if (*pos)
+ pos++;
+ }
+ val[6] = 0;
+ val[7] = 0;
+ if (*pos && i == 6) {
+ if (pos[1] && pos[2] && (!pos[3] || pos[3] == ':')) {
+ val[6] = atoi(pos);
+ if (pos[3] == ':') {
+ pos += 3;
+ val[7] = (val[6] < 0) ? -atoi(pos) : atoi(pos);
+ }
+ } else if (pos[1] && pos[2] && pos[3] && pos[4]) {
+ val[6] = atoi(pos) / 100;
+ val[7] = atoi(pos) % 100;
+ }
+ }
+
+ tm.tm_year = val[0] - 1900;
+ tm.tm_mon = val[1] - 1;
+ tm.tm_mday = val[2];
+ tm.tm_hour = val[3] - val[6];
+ tm.tm_min = val[4] - val[7];
+ tm.tm_sec = val[5];
+
+ dbg(lvl_debug, "time %s (%02d-%02d-%02d %02d:%02d:%02d)\n", iso8601, tm.tm_year, tm.tm_mon, tm.tm_mday,
+ tm.tm_hour, tm.tm_min, tm.tm_sec);
+
+ return mkgmtime(&tm);
+}
+
+/**
+ * @brief Converts time to ISO8601 format.
+ *
+ * The caller is responsible for freeing the return value of this function when it is no longer needed.
+ *
+ * @param time The time, as returned by `time()` and related functions
+ *
+ * @return Time in ISO8601 format
+ */
+char * time_to_iso8601(time_t time) {
+ char *timep=NULL;
+ char buffer[32];
+ struct tm *tm;
+
+ tm = gmtime(&time);
+ if (tm) {
+ strftime(buffer, sizeof(buffer), "%Y-%m-%dT%TZ", tm);
+ timep=g_strdup(buffer);
+ }
+ return timep;
+}
+
+/**
* @brief Outputs local system time in ISO 8601 format.
*
* @return Time in ISO 8601 format
*/
char *current_to_iso8601(void) {
- char *timep=NULL;
#ifdef HAVE_API_WIN32_BASE
+ char *timep=NULL;
SYSTEMTIME ST;
GetSystemTime(&ST);
timep=g_strdup_printf("%d-%02d-%02dT%02d:%02d:%02dZ",ST.wYear,ST.wMonth,ST.wDay,ST.wHour,ST.wMinute,ST.wSecond);
+ return timep;
#else
- char buffer[32];
time_t tnow;
- struct tm *tm;
tnow = time(0);
- tm = gmtime(&tnow);
- if (tm) {
- strftime(buffer, sizeof(buffer), "%Y-%m-%dT%TZ", tm);
- timep=g_strdup(buffer);
- }
+ return time_to_iso8601(tnow);
#endif
- return timep;
}