diff options
author | Carlos Garnacho <carlosg@gnome.org> | 2019-06-09 18:51:27 +0200 |
---|---|---|
committer | Carlos Garnacho <carlosg@gnome.org> | 2019-09-10 00:22:57 +0200 |
commit | 2a013628112417f61be8d27d4c7796da9e3d12e4 (patch) | |
tree | f5198c8ccd0165a69d9309ff8b736cf31501e444 | |
parent | 370e3b17221caaa35a97202e9a88e74da8d15200 (diff) | |
download | tracker-2a013628112417f61be8d27d4c7796da9e3d12e4.tar.gz |
libtracker-data: Implement BASE
It is meant to affect all relative IRIs being parsed.
-rw-r--r-- | src/libtracker-common/tracker-utils.c | 116 | ||||
-rw-r--r-- | src/libtracker-common/tracker-utils.h | 2 | ||||
-rw-r--r-- | src/libtracker-data/tracker-sparql.c | 33 |
3 files changed, 147 insertions, 4 deletions
diff --git a/src/libtracker-common/tracker-utils.c b/src/libtracker-common/tracker-utils.c index f6fb89348..530bb4a70 100644 --- a/src/libtracker-common/tracker-utils.c +++ b/src/libtracker-common/tracker-utils.c @@ -323,3 +323,119 @@ tracker_unescape_unichars (const gchar *str, return g_string_free (copy, FALSE); } + +gboolean +parse_abs_uri (const gchar *uri, + gchar **base, + const gchar **rel_path) +{ + const gchar *loc, *end; + + end = &uri[strlen (uri)]; + loc = uri; + + if (!g_ascii_isalpha (loc[0])) + return FALSE; + + while (loc != end) { + if (loc[0] == ':') + break; + if (!g_ascii_isalpha (loc[0]) && + loc[0] != '+' && loc[0] != '-' && loc[0] != '.') + return FALSE; + loc++; + } + + if (loc == uri) + return FALSE; + + if (strncmp (loc, "://", 3) == 0) { + /* Include authority in base */ + loc += 3; + loc = strchr (loc, '/'); + if (!loc) + loc = end; + } + + *base = g_strndup (uri, loc - uri); + *rel_path = loc + 1; + + return TRUE; +} + +GPtrArray * +remove_dot_segments (gchar **uri_elems) +{ + GPtrArray *array; + gint i; + + array = g_ptr_array_new (); + + for (i = 0; uri_elems[i] != NULL; i++) { + if (g_strcmp0 (uri_elems[i], ".") == 0) { + continue; + } else if (g_strcmp0 (uri_elems[i], "..") == 0) { + if (array->len > 0) + g_ptr_array_remove_index (array, array->len - 1); + continue; + } else if (*uri_elems[i] != '\0') { + /* NB: Not a copy */ + g_ptr_array_add (array, uri_elems[i]); + } + } + + return array; +} + +gchar * +tracker_resolve_relative_uri (const gchar *base, + const gchar *rel_uri) +{ + gchar **base_split, **rel_split, *host; + GPtrArray *base_norm, *rel_norm; + GString *str; + gint i; + + /* Relative IRIs are combined with base IRIs with a simplified version + * of the algorithm described at RFC3986, Section 5.2. We don't care + * about query and fragment parts of an URI, and some simplifications + * are taken on base uri parsing and relative uri validation. + */ + rel_split = g_strsplit (rel_uri, "/", -1); + + /* Rel uri is a full uri? */ + if (strchr (rel_split[0], ':')) { + g_strfreev (rel_split); + return g_strdup (rel_uri); + } + + if (!parse_abs_uri (base, &host, &base)) { + g_strfreev (rel_split); + return g_strdup (rel_uri); + } + + base_split = g_strsplit (base, "/", -1); + + base_norm = remove_dot_segments (base_split); + rel_norm = remove_dot_segments (rel_split); + + for (i = 0; i < rel_norm->len; i++) { + g_ptr_array_add (base_norm, + g_ptr_array_index (rel_norm, i)); + } + + str = g_string_new (host); + for (i = 0; i < base_norm->len; i++) { + g_string_append_c (str, '/'); + g_string_append (str, + g_ptr_array_index (base_norm, i)); + } + + g_ptr_array_unref (base_norm); + g_ptr_array_unref (rel_norm); + g_strfreev (base_split); + g_strfreev (rel_split); + g_free (host); + + return g_string_free (str, FALSE); +} diff --git a/src/libtracker-common/tracker-utils.h b/src/libtracker-common/tracker-utils.h index 2cb78e5ba..c12c9ccae 100644 --- a/src/libtracker-common/tracker-utils.h +++ b/src/libtracker-common/tracker-utils.h @@ -47,6 +47,8 @@ gchar * tracker_utf8_truncate (const gchar *str, gsize max_size); gchar * tracker_unescape_unichars (const gchar *str, gssize len); +gchar * tracker_resolve_relative_uri (const gchar *base, + const gchar *rel_uri); G_END_DECLS diff --git a/src/libtracker-data/tracker-sparql.c b/src/libtracker-data/tracker-sparql.c index a516cb946..c2c336740 100644 --- a/src/libtracker-data/tracker-sparql.c +++ b/src/libtracker-data/tracker-sparql.c @@ -30,6 +30,7 @@ #include "tracker-collation.h" #include "tracker-db-interface-sqlite.h" #include "tracker-sparql-query.h" +#include "tracker-utils.h" #define TRACKER_NS "http://www.tracker-project.org/ontologies/tracker#" #define RDF_NS "http://www.w3.org/1999/02/22-rdf-syntax-ns#" @@ -132,6 +133,7 @@ struct _TrackerSparql GPtrArray *anon_graphs; GPtrArray *named_graphs; + gchar *base; struct { TrackerContext *context; @@ -190,6 +192,7 @@ tracker_sparql_finalize (GObject *object) g_ptr_array_unref (sparql->anon_graphs); g_ptr_array_unref (sparql->var_names); g_array_unref (sparql->var_types); + g_free (sparql->base); if (sparql->blank_nodes) g_variant_builder_unref (sparql->blank_nodes); @@ -249,6 +252,16 @@ tracker_sparql_swap_current_expression_list_separator (TrackerSparql *sparql, } static inline gchar * +tracker_sparql_expand_base (TrackerSparql *sparql, + const gchar *term) +{ + if (sparql->base) + return tracker_resolve_relative_uri (sparql->base, term); + else + return g_strdup (term); +} + +static inline gchar * tracker_sparql_expand_prefix (TrackerSparql *sparql, const gchar *term) { @@ -678,9 +691,17 @@ _extract_node_string (TrackerParserNode *node, add_start = subtract_end = 3; compress = TRUE; break; - case TERMINAL_TYPE_IRIREF: + case TERMINAL_TYPE_IRIREF: { + gchar *unexpanded; + add_start = subtract_end = 1; + unexpanded = g_strndup (terminal_start + add_start, + terminal_end - terminal_start - + add_start - subtract_end); + str = tracker_sparql_expand_base (sparql, unexpanded); + g_free (unexpanded); break; + } case TERMINAL_TYPE_BLANK_NODE_LABEL: add_start = 2; break; @@ -1812,11 +1833,15 @@ translate_BaseDecl (TrackerSparql *sparql, /* BaseDecl ::= 'BASE' IRIREF */ _expect (sparql, RULE_TYPE_LITERAL, LITERAL_BASE); - - /* FIXME: BASE is unimplemented, and we never raised an error */ - _expect (sparql, RULE_TYPE_TERMINAL, TERMINAL_TYPE_IRIREF); + /* Sparql syntax allows for multiple BaseDecl, but it only makes + * sense to keep one. Given that the sparql1.1-query recommendation + * does not define the behavior, just pick the first one. + */ + if (!sparql->base) + sparql->base = _dup_last_string (sparql); + return TRUE; } |