summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Winship <danw@gnome.org>2013-09-21 17:15:00 -0400
committerDan Winship <danw@gnome.org>2014-11-22 17:32:28 -0500
commit7ed12e6d4dc4c037b109ea772ea92d7452fb289a (patch)
tree724877bbd99399e3b8b35d8e0d255748613cdca1
parent7f5c862e166a6f288d8d0ec2622685316f64b87c (diff)
downloadglib-7ed12e6d4dc4c037b109ea772ea92d7452fb289a.tar.gz
guri: new URI parsing and generating functions
Add a set of new URI parsing and generating functions, including a new parsed-URI type GUri. Move all the code from gurifuncs.c into guri.c, reimplementing some of those functions (and g_string_append_uri_encoded()) in terms of the new code.
-rw-r--r--docs/reference/glib/glib-docs.xml2
-rw-r--r--docs/reference/glib/glib-sections.txt45
-rw-r--r--docs/reference/gobject/gobject-sections.txt1
-rw-r--r--glib/Makefile.am4
-rw-r--r--glib/glib.h2
-rw-r--r--glib/gstring.c84
-rw-r--r--glib/guri-notes.txt220
-rw-r--r--glib/guri.c2323
-rw-r--r--glib/guri.h286
-rw-r--r--glib/gurifuncs.c252
-rw-r--r--glib/gurifuncs.h83
-rw-r--r--glib/tests/uri.c443
-rw-r--r--gobject/gboxed.c1
-rw-r--r--gobject/glib-types.h2
14 files changed, 3291 insertions, 457 deletions
diff --git a/docs/reference/glib/glib-docs.xml b/docs/reference/glib/glib-docs.xml
index 7132c5c9d..309c035ab 100644
--- a/docs/reference/glib/glib-docs.xml
+++ b/docs/reference/glib/glib-docs.xml
@@ -80,7 +80,7 @@
<xi:include href="xml/timers.xml" />
<xi:include href="xml/spawn.xml" />
<xi:include href="xml/fileutils.xml" />
- <xi:include href="xml/gurifuncs.xml" />
+ <xi:include href="xml/guri.xml" />
<xi:include href="xml/ghostutils.xml" />
<xi:include href="xml/shell.xml" />
<xi:include href="xml/option.xml" />
diff --git a/docs/reference/glib/glib-sections.txt b/docs/reference/glib/glib-sections.txt
index 19bae6f35..3af87542d 100644
--- a/docs/reference/glib/glib-sections.txt
+++ b/docs/reference/glib/glib-sections.txt
@@ -2856,19 +2856,60 @@ g_base64_decode_inplace
<SECTION>
<TITLE>URI Functions</TITLE>
-<FILE>gurifuncs</FILE>
+<FILE>guri</FILE>
+GUri
+g_uri_ref
+g_uri_unref
+<SUBSECTION>
+GUriFlags
+g_uri_split
+g_uri_split_with_user
+g_uri_split_network
+g_uri_is_valid
+g_uri_join
+g_uri_join_with_user
+g_uri_parse
+g_uri_parse_relative
+g_uri_resolve_relative
+g_uri_build
+g_uri_build_with_user
+g_uri_peek_scheme
+g_uri_parse_scheme
+<SUBSECTION>
+GUriHideFlags;
+g_uri_to_string
+g_uri_to_string_partial
+<SUBSECTION>
+g_uri_get_scheme
+g_uri_get_userinfo
+g_uri_get_user
+g_uri_get_password
+g_uri_get_auth_params
+g_uri_get_host
+g_uri_get_port
+g_uri_get_path
+g_uri_get_query
+g_uri_get_fragment
+<SUBSECTION>
+g_uri_parse_params
+<SUBSECTION>
G_URI_RESERVED_CHARS_ALLOWED_IN_PATH
G_URI_RESERVED_CHARS_ALLOWED_IN_PATH_ELEMENT
G_URI_RESERVED_CHARS_ALLOWED_IN_USERINFO
G_URI_RESERVED_CHARS_GENERIC_DELIMITERS
G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS
-g_uri_parse_scheme
g_uri_escape_string
g_uri_unescape_string
g_uri_unescape_segment
+<SUBSECTION>
g_uri_list_extract_uris
g_filename_from_uri
g_filename_to_uri
+<SUBSECTION>
+G_URI_ERROR
+GUriError
+<SUBSECTION Private>
+g_uri_error_quark
</SECTION>
<SECTION>
diff --git a/docs/reference/gobject/gobject-sections.txt b/docs/reference/gobject/gobject-sections.txt
index 50f1a4c19..218a982de 100644
--- a/docs/reference/gobject/gobject-sections.txt
+++ b/docs/reference/gobject/gobject-sections.txt
@@ -390,6 +390,7 @@ G_TYPE_MARKUP_PARSE_CONTEXT
G_TYPE_SOURCE
G_TYPE_POLLFD
G_TYPE_THREAD
+G_TYPE_URI
GStrv
<SUBSECTION Standard>
diff --git a/glib/Makefile.am b/glib/Makefile.am
index 1b9e08134..3f6f74d4f 100644
--- a/glib/Makefile.am
+++ b/glib/Makefile.am
@@ -184,7 +184,7 @@ libglib_2_0_la_SOURCES = \
gunidecomp.h \
gunidecomp.c \
gunicodeprivate.h \
- gurifuncs.c \
+ guri.c \
gutils.c \
gvariant.h \
gvariant.c \
@@ -304,7 +304,7 @@ glibsubinclude_HEADERS = \
gtree.h \
gtypes.h \
gunicode.h \
- gurifuncs.h \
+ guri.h \
gutils.h \
gvarianttype.h \
gvariant.h \
diff --git a/glib/glib.h b/glib/glib.h
index c7fc999b2..113bc08ec 100644
--- a/glib/glib.h
+++ b/glib/glib.h
@@ -88,7 +88,7 @@
#include <glib/gtree.h>
#include <glib/gtypes.h>
#include <glib/gunicode.h>
-#include <glib/gurifuncs.h>
+#include <glib/guri.h>
#include <glib/gutils.h>
#include <glib/gvarianttype.h>
#include <glib/gvariant.h>
diff --git a/glib/gstring.c b/glib/gstring.c
index f5890bf64..2f69ad60f 100644
--- a/glib/gstring.c
+++ b/glib/gstring.c
@@ -495,90 +495,6 @@ g_string_insert_len (GString *string,
return string;
}
-#define SUB_DELIM_CHARS "!$&'()*+,;="
-
-static gboolean
-is_valid (char c,
- const char *reserved_chars_allowed)
-{
- if (g_ascii_isalnum (c) ||
- c == '-' ||
- c == '.' ||
- c == '_' ||
- c == '~')
- return TRUE;
-
- if (reserved_chars_allowed &&
- strchr (reserved_chars_allowed, c) != NULL)
- return TRUE;
-
- return FALSE;
-}
-
-static gboolean
-gunichar_ok (gunichar c)
-{
- return
- (c != (gunichar) -2) &&
- (c != (gunichar) -1);
-}
-
-/**
- * g_string_append_uri_escaped:
- * @string: a #GString
- * @unescaped: a string
- * @reserved_chars_allowed: a string of reserved characters allowed
- * to be used, or %NULL
- * @allow_utf8: set %TRUE if the escaped string may include UTF8 characters
- *
- * Appends @unescaped to @string, escaped any characters that
- * are reserved in URIs using URI-style escape sequences.
- *
- * Returns: @string
- *
- * Since: 2.16
- */
-GString *
-g_string_append_uri_escaped (GString *string,
- const gchar *unescaped,
- const gchar *reserved_chars_allowed,
- gboolean allow_utf8)
-{
- unsigned char c;
- const gchar *end;
- static const gchar hex[16] = "0123456789ABCDEF";
-
- g_return_val_if_fail (string != NULL, NULL);
- g_return_val_if_fail (unescaped != NULL, NULL);
-
- end = unescaped + strlen (unescaped);
-
- while ((c = *unescaped) != 0)
- {
- if (c >= 0x80 && allow_utf8 &&
- gunichar_ok (g_utf8_get_char_validated (unescaped, end - unescaped)))
- {
- int len = g_utf8_skip [c];
- g_string_append_len (string, unescaped, len);
- unescaped += len;
- }
- else if (is_valid (c, reserved_chars_allowed))
- {
- g_string_append_c (string, c);
- unescaped++;
- }
- else
- {
- g_string_append_c (string, '%');
- g_string_append_c (string, hex[((guchar)c) >> 4]);
- g_string_append_c (string, hex[((guchar)c) & 0xf]);
- unescaped++;
- }
- }
-
- return string;
-}
-
/**
* g_string_append:
* @string: a #GString
diff --git a/glib/guri-notes.txt b/glib/guri-notes.txt
new file mode 100644
index 000000000..6a807ae7a
--- /dev/null
+++ b/glib/guri-notes.txt
@@ -0,0 +1,220 @@
+aaa/aaas (rfc3588):
+aaa://host[:port][;transport=tcp][;protocol=diameter]
+ technically violates 3986, since ";" could appear in authority
+
+acap (rfc2244):
+acap://[user[;AUTH=mech]@]host[:port]/data
+
+cap (rfc4324):
+cap://host[:port][/data]
+
+cid/mid (rfc2392):
+cid:content-id-data
+mid:message-id-data[/content-id-data]
+
+crid (rfc4078):
+crid://host/data
+
+data (rfc2397):
+data:[type/subtype][;attr=value]*[;base64],data
+
+dav (rfc4918):
+dav:data
+opaquelocktoken:uuid[path]
+
+dict (rfc2229):
+dict://[user[;authmech]@]host[:port]/d:word[:database[:nth]]
+dict://[user[;authmech]@]host[:port]/m:word[:database[:strategy[:nth]]]
+
+dns (rfc4501):
+dns:[//host[:port]/]name[?[attr=value[;attr=value]*]]
+
+file (rfc1738):
+file://[host]/path
+
+ftp (rfc1738):
+ftp://[user[:pass]@]host[:port][/path[;type=type]]
+
+geo (draft...):
+geo:data
+
+go (rfc3368):
+go:[//host[:port]?]data[;attr=[type,]value]*
+
+gopher (rfc4266):
+gopher://host[:port]/path
+
+h323 (rfc3508):
+h323:[user@]host[:port][;params]*
+
+http (rfc 2616):
+http://host[:port][/path[?query]]
+
+https (rfc 2818):
+https://host[:port][/path[?query]]
+
+iax (rfc 5456):
+iax:[user@]host[:port][/number[?context]]
+
+icap (rfc3507):
+icap://[userinfo@]host[:port]/path[?query]
+
+im (rfc3860):
+im:mailbox[?[header=value[;header=value]*]]
+
+imap (rfc5092):
+imap://[user[;AUTH=mech]@]host[:port][/[mailbox[validity]]]
+imap://[user[;AUTH=mech]@]host[:port]/mailbox[validity]?search
+imap://[user[;AUTH=mech]@]host[:port]/mailbox[validity]uid[sect][part][auth]
+
+info (rfc4452):
+info:namespace/identifier[#fragment]
+
+ipp (rfc3510):
+ipp://host[:port][/path[?query]]
+
+iris (rfc3981, 3983, 4992, 4993):
+iris[.transport]:urn/[method]/[userinfo@]host[:port][/class/name]
+
+ldap (rfc4516):
+ldap://[host[:port]][/dn[?[attrs][?[scope][?[filter][?exts]]]]]
+
+mailto (rfc2368):
+mailto:mailbox[?[header=value[;header=value]*]]
+
+msrp (rfc4975):
+msrp://authority[/id];tcp[;attr=value]*
+msrps://authority[/id];tcp[;attr=value]*
+ technically violates 3986, since ";" could appear in authority
+
+mtqp (rfc3887):
+mtqp://authority/track/id/secret
+
+mupdate (rfc3656):
+based on imap
+
+news/nntp (rfc5538):
+news:[//authority/]article-or-groups
+nntp://authority/group[/article]
+
+nfs (rfc2224):
+nfs://host[:port][path]
+
+pop (rfc2384):
+pop://[[user][;auth=mech]@]host[:port]
+
+pres (rfc3859):
+pres:mailbox[?[header=value[;header=value]*]]
+
+rtsp (rfc2326):
+rtsp://host[:port][path]
+rtspu://host[:port][path]
+
+service (rfc2609):
+service:... (possibly including authority)
+
+shttp (rfc2660):
+http-ish
+
+sieve (draft...):
+sieve://authority[path]
+
+sip (3261)
+sip:[user[:pass]@]host[:port][;name=val]*[?hname=hval[&hname=hval]*]
+sips:[user[:pass]@]host[:port][;name=val]*[?hname=hval[&hname=hval]*]
+
+sms (5724)
+sms:phone[,phone]*[?name=val[&name=val]*]
+
+snmp (4088)
+snmp://[userinfo@]host[:port][/context...]
+
+soap.beep (4227)
+standard
+
+tag (4151)
+tag:host-or-email,8601date:data[#fragment]
+
+tel (3966)
+tel:number[;attr=value]*
+
+telnet (4248)
+telnet://[user[:pass]@]host[:port][/]
+
+tftp (3617)
+tftp://host/file[;mode=type]
+
+tip (2371)
+tip://host[:port]/path?trans
+
+tv (2838)
+tv:dnsid
+
+urn (2141)
+urn:data
+
+vemmi (2122)
+vemmi://host[:port]/service[;attr=val]*
+
+xmlrpc.beep (3529)
+standard
+
+xmpp (5122)
+xmpp:[//node@host[/]][[node@]host[/resource]][?query[;attr=val]*][#fragment]
+
+x39.50 (2056)
+z39.50r://host[:port][/[database[?docid]][;esn=data][;rs=data]]
+z39.50s://host[:port][/[database[?docid]][;esn=data][;rs=data]]
+
+
+
+HTML5 rules:
+ - strip leading and trailing spaces
+ - chars less than U+0020 or greater than U+007F in unreserved
+ - U+0022, U+003C, U+003E, U+005B..E, U+0060, U+007B..D in unreserved
+ ( " < > [ \ ] ^ ` { | } ~ )
+ - Allow single % except in hostname
+ - Allow # in fragment
+ - if host has non-UTF8 %-encoded, fail
+ - if host fails IDNA ToASCII, fail
+ - forcibly %-encode all reserved/non-ASCII path chars
+ - re-encode query to the URL's encoding (from enclosing doc) and
+ %-encode. replace unencodable chars with '?'
+ - if fully-resolved URI uses authority, replace all \ with /
+
+
+gvfs and EUri both allow '@' in username
+EUri does "parameters", but seems unused
+hide_password in to_string
+CamelURL does hide_auth too
+CamelURL does params and uses them
+CamelURL special-cases file URLs on win32 (via g_filename_to/from_uri)
+
+
+http://www.freedesktop.org/wiki/Specifications/file-uri-spec says:
+ non-ASCII chars are %-encoded
+ URIs are in filesystem encoding, NOT UTF-8
+ accept "file:/path" for compat
+http://blogs.msdn.com/ie/archive/2006/12/06/file-uris-in-windows.aspx:
+ IE allows "file://" + windows path (eg, "file://D:\blah blah\bl%h")
+ file://// == file://
+ don't use %-encoding for non-ASCII, just use non-ASCII.
+ (http://en.wikipedia.org/wiki/File_URI_scheme contradicts this?)
+ IE allows "|" instead of ":" for drive separator, but very deprecated
+ MUST NOT use %-encoding where not required
+
+Bug 489862 - Basic URI operations
+https://bugzilla.gnome.org/show_bug.cgi?id=489862
+
+Bug 620417 - g_uri_unescape_string cannot unescape %00
+https://bugzilla.gnome.org/show_bug.cgi?id=620417
+
+Bug 611687 - gconvert g_filename_to_uri doesn't do what the documentation says
+https://bugzilla.gnome.org/show_bug.cgi?id=611687
+
+Bug 550110 - RFC: g_uri_is_valid (from GStreamer gst_uri_is_valid)
+https://bugzilla.gnome.org/show_bug.cgi?id=550110
+
+Add fireball URI matcher
+(make sure it does email addresses too)
+
diff --git a/glib/guri.c b/glib/guri.c
new file mode 100644
index 000000000..6cc3ac10f
--- /dev/null
+++ b/glib/guri.c
@@ -0,0 +1,2323 @@
+/* -*- mode: C; c-file-style: "gnu"; indent-tabs-mode: nil; -*- */
+
+/* GLIB - Library of useful routines for C programming
+ * Copyright 2010-2014 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General
+ * Public License along with this library; if not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+
+#include "glib.h"
+#include "glibintl.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+/**
+ * SECTION:guri
+ * @short_description: URI-handling utilities
+ * @include: glib.h
+ *
+ * The #GUri type and related functions can be used to parse URIs into
+ * their components, and build valid URIs from individual components.
+ *
+ * ## Parsing URIs
+ *
+ * The most minimalist APIs for parsing URIs are g_uri_split() and
+ * g_uri_split_with_user(). These split a URI into its component
+ * parts, and return the parts; the difference between the two is that
+ * g_uri_split() treats the "userinfo" component of the URI as a
+ * single element, while g_uri_split_with_user() can (depending on the
+ * #GUriFlags you pass) treat it as containing a username, password,
+ * and authentication parameters. Alternatively, g_uri_split_network()
+ * can be used when you are only interested in the components that are
+ * needed to initiate a network connection to the service (scheme,
+ * host, and port).
+ *
+ * g_uri_parse() is similar to g_uri_split(), but instead of returning
+ * individual strings, it returns a #GUri structure (and it requires
+ * that the URI be an absolute URI).
+ *
+ * g_uri_resolve_relative() and g_uri_parse_relative() allow you to
+ * resolve a relative URI relative to a base URI.
+ * g_uri_resolve_relative() takes two strings and returns a string,
+ * and g_uri_parse_relative() takes a #GUri and a string and returns a
+ * #GUri.
+ *
+ * All of the parsing functions take a #GUriFlags argument describing
+ * exactly how to parse the URI; see the documentation for that type
+ * for more details on the specific flags that you can pass. If you
+ * need to choose different flags based on the type of URI, you can
+ * use g_uri_peek_scheme() on the URI string to check the scheme
+ * first, and use that to decide what flags to parse it with.
+ *
+ * ## Building URIs
+ *
+ * g_uri_join() and g_uri_join_with_user() can be used to construct
+ * valid URI strings from a set of component strings; they are the
+ * inverse of g_uri_split() and g_uri_split_with_user().
+ *
+ * Similarly, g_uri_build() and g_uri_build_with_user() can be used to
+ * construct a #GUri from a set of component strings.
+ *
+ * As with the parsing functions, the building functions take a
+ * #GUriFlags argument; in particular, it is important to keep in mind
+ * whether the URI components you are using have `%`-encoded
+ * characters in them or not, and pass the appropriate flags
+ * accordingly.
+ *
+ * ## `file://` URIs
+ *
+ * Note that Windows and Unix both define special rules for parsing
+ * `file://` URIs (involving non-UTF-8 character sets on Unix, and the
+ * interpretation of path separators on Windows). #GUri does not
+ * implement these rules. Use g_filename_from_uri() and
+ * g_filename_to_uri() if you want to properly convert between
+ * `file://` URIs and local filenames.
+ *
+ * ## URI Equality
+ *
+ * Note that there is no `g_uri_equal ()` function, because comparing
+ * URIs usefully requires scheme-specific knowledge that #GUri does
+ * not have. For example, "`http://example.com/`" and
+ * "`http://EXAMPLE.COM:80`" have exactly the same meaning according
+ * to the HTTP specification, and "`data:,foo`" and
+ * "`data:;base64,Zm9v`" resolve to the same thing according to the
+ * `data:` URI specification.
+ */
+
+/**
+ * GUri:
+ *
+ * A parsed absolute URI.
+ *
+ * Since #GUri only represents absolute URIs, all #GUris will have a
+ * URI scheme, so g_uri_get_scheme() will always return a non-%NULL
+ * answer. Likewise, by definition, all URIs have a path component, so
+ * g_uri_get_path() will always return non-%NULL (though it may return
+ * the empty string).
+ *
+ * If the URI string has an "authority" component (that is, if the
+ * scheme is followed by "`://`" rather than just "`:`"), then the
+ * #GUri will contain a hostname, and possibly a port and "userinfo".
+ * Additionally, depending on how the #GUri was constructed/parsed,
+ * the userinfo may be split out into a username, password, and
+ * additional authorization-related parameters.
+ *
+ * Normally, the components of a #GUri will have all `%`-encoded
+ * characters decoded. However, if you construct/parse a #GUri with
+ * %G_URI_ENCODED, then the `%`-encoding will be preserved instead in
+ * the userinfo, path, and query fields (and in the host field if also
+ * created with %G_URI_NON_DNS). In particular, this is necessary if
+ * the URI may contain binary data or non-UTF-8 text, or if decoding
+ * the components might change the interpretation of the URI.
+ *
+ * Since: 2.44
+ */
+struct _GUri {
+ gchar *scheme;
+ gchar *userinfo;
+ gchar *host;
+ gint port;
+ gchar *path;
+ gchar *query;
+ gchar *fragment;
+
+ gchar *user;
+ gchar *password;
+ gchar *auth_params;
+
+ GUriFlags flags;
+ gint ref_count;
+};
+
+/**
+ * GUriFlags:
+ * @G_URI_PARSE_STRICT: Parse the URI strictly according to the RFC
+ * 3986 grammar, rather than fixing up or ignoring common mistakes.
+ * @G_URI_HAS_PASSWORD: The userinfo field may contain a password,
+ * which will be separated from the username by ':'.
+ * @G_URI_HAS_AUTH_PARAMS: The userinfo may contain additional
+ * authentication-related parameters, which will be separated from
+ * the username and/or password by ';'.
+ * @G_URI_NON_DNS: The host component should not be assumed to be a
+ * DNS hostname or IP address. (Eg, for `smb` URIs with NetBIOS
+ * hostnames).
+ * @G_URI_ENCODED: When parsing a URI, this indicates that `%`-encoded
+ * characters in the userinfo, path, query, and fragment fields
+ * should not be decoded. (And likewise the host field if
+ * %G_URI_NON_DNS is also set.) When building a URI, it indicates
+ * that you have already `%`-encoded the components, and so #GUri
+ * should not do any encoding itself.
+ *
+ * Flags that describe a URI.
+ *
+ * When parsing a URI, if you need to choose different flags based on
+ * the type of URI, you can use g_uri_peek_scheme() on the URI string
+ * to check the scheme first, and use that to decide what flags to
+ * parse it with.
+ *
+ * Since: 2.44
+ */
+
+/**
+ * g_uri_ref: (skip)
+ * @uri: a #GUri
+ *
+ * Increments the reference count of @uri by one.
+ *
+ * Returns: @uri
+ *
+ * Since: 2.44
+ */
+GUri *
+g_uri_ref (GUri *uri)
+{
+ g_return_val_if_fail (uri != NULL, NULL);
+ g_return_val_if_fail (uri->ref_count > 0, NULL);
+
+ g_atomic_int_inc (&uri->ref_count);
+
+ return uri;
+}
+
+/**
+ * g_uri_unref: (skip)
+ * @uri: a #GUri
+ *
+ * Atomically decrements the reference count of @uri by one.
+ *
+ * When the reference count reaches zero, the resources allocated by
+ * @uri are freed
+ *
+ * Since: 2.44
+ */
+void
+g_uri_unref (GUri *uri)
+{
+ g_return_if_fail (uri != NULL);
+ g_return_if_fail (uri->ref_count > 0);
+
+ if (!g_atomic_int_dec_and_test (&uri->ref_count))
+ return;
+
+ g_free (uri->scheme);
+ g_free (uri->userinfo);
+ g_free (uri->host);
+ g_free (uri->path);
+ g_free (uri->query);
+ g_free (uri->fragment);
+
+ g_free (uri->user);
+ g_free (uri->password);
+ g_free (uri->auth_params);
+
+ g_slice_free (GUri, uri);
+}
+
+static gboolean
+g_uri_char_is_unreserved (gchar ch)
+{
+ if (g_ascii_isalnum (ch))
+ return TRUE;
+ return ch == '-' || ch == '.' || ch == '_' || ch == '~';
+}
+
+#define XDIGIT(c) ((c) <= '9' ? (c) - '0' : ((c) & 0x4F) - 'A' + 10)
+#define HEXCHAR(s) ((XDIGIT (s[1]) << 4) + XDIGIT (s[2]))
+
+static int
+uri_decoder (gchar **out,
+ const gchar *start,
+ gsize length,
+ gboolean just_normalize,
+ GUriFlags flags,
+ GUriError parse_error,
+ GError **error)
+{
+ gchar *decoded;
+ guchar *s, *end, *d, c;
+ const gchar *invalid;
+
+ if (!(flags & G_URI_ENCODED))
+ just_normalize = FALSE;
+
+ decoded = g_malloc (length + 1);
+ for (s = (guchar *)start, end = s + length, d = (guchar *)decoded; s < end; s++)
+ {
+ if (*s == '%')
+ {
+ if (!g_ascii_isxdigit (s[1]) ||
+ !g_ascii_isxdigit (s[2]))
+ {
+ /* % followed by non-hex; this is an error */
+ if (flags & G_URI_PARSE_STRICT)
+ {
+ g_set_error_literal (error, G_URI_ERROR, parse_error,
+ _("Invalid %-encoding in URI"));
+ g_free (decoded);
+ return -1;
+ }
+
+ /* In non-strict mode, just let it through; we *don't*
+ * fix it to "%25", since that might change the way that
+ * the URI's owner would interpret it.
+ */
+ *d++ = *s;
+ continue;
+ }
+
+ c = HEXCHAR (s);
+ if (just_normalize && !g_uri_char_is_unreserved (c))
+ {
+ /* Leave the % sequence there. */
+ *d++ = *s;
+ }
+ else
+ {
+ *d++ = c;
+ s += 2;
+ }
+ }
+ else
+ *d++ = *s;
+ }
+ *d = '\0';
+
+ if (!g_utf8_validate (decoded, (gchar *)d - decoded, &invalid))
+ {
+ g_set_error_literal (error, G_URI_ERROR, parse_error,
+ _("Non-UTF-8 characters in URI"));
+ g_free (decoded);
+ return -1;
+ }
+
+ if (out)
+ *out = decoded;
+ else
+ g_free (decoded);
+ return d - (guchar *)decoded;
+}
+
+static gboolean
+uri_decode (gchar **out,
+ const gchar *start,
+ gsize length,
+ GUriFlags flags,
+ GUriError parse_error,
+ GError **error)
+{
+ return uri_decoder (out, start, length, FALSE, flags,
+ parse_error, error) != -1;
+}
+
+static gboolean
+uri_normalize (gchar **out,
+ const gchar *start,
+ gsize length,
+ GUriFlags flags,
+ GUriError parse_error,
+ GError **error)
+{
+ return uri_decoder (out, start, length, TRUE, flags,
+ parse_error, error) != -1;
+}
+
+static gboolean
+is_valid (guchar c,
+ const gchar *reserved_chars_allowed)
+{
+ if (g_uri_char_is_unreserved (c))
+ return TRUE;
+
+ if (reserved_chars_allowed && strchr (reserved_chars_allowed, c))
+ return TRUE;
+
+ return FALSE;
+}
+
+static void
+uri_encoder (GString *out,
+ const guchar *start,
+ gsize length,
+ const gchar *reserved_chars_allowed,
+ gboolean allow_utf8)
+{
+ static const gchar hex[16] = "0123456789ABCDEF";
+ const guchar *p = start;
+ const guchar *end = p + length;
+
+ while (p < end)
+ {
+ if (allow_utf8 && *p >= 0x80 &&
+ g_utf8_get_char_validated ((gchar *)p, end - p) > 0)
+ {
+ gint len = g_utf8_skip [*p];
+ g_string_append_len (out, (gchar *)p, len);
+ p += len;
+ }
+ else if (is_valid (*p, reserved_chars_allowed))
+ {
+ g_string_append_c (out, *p);
+ p++;
+ }
+ else
+ {
+ g_string_append_c (out, '%');
+ g_string_append_c (out, hex[*p >> 4]);
+ g_string_append_c (out, hex[*p & 0xf]);
+ p++;
+ }
+ }
+}
+
+static gboolean
+parse_host (const gchar *start,
+ gsize length,
+ GUriFlags flags,
+ gchar **out,
+ GError **error)
+{
+ gchar *decoded, *host, *pct;
+ gchar *addr = NULL;
+
+ if (*start == '[')
+ {
+ if (start[length - 1] != ']')
+ {
+ bad_ipv6_literal:
+ g_free (addr);
+ g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
+ _("Invalid IPv6 address '%.*s' in URI"),
+ (gint)length, start);
+ return FALSE;
+ }
+
+ addr = g_strndup (start + 1, length - 2);
+
+ /* If there's an IPv6 scope id, ignore it for the moment. */
+ pct = strchr (addr, '%');
+ if (pct)
+ *pct = '\0';
+
+ /* addr must be an IPv6 address */
+ if (!g_hostname_is_ip_address (addr) || !strchr (addr, ':'))
+ goto bad_ipv6_literal;
+
+ if (pct)
+ {
+ *pct = '%';
+ if (strchr (pct + 1, '%'))
+ goto bad_ipv6_literal;
+ /* If the '%' is encoded as '%25' (which it should be), decode it */
+ if (pct[1] == '2' && pct[2] == '5' && pct[3])
+ memmove (pct + 1, pct + 3, strlen (pct + 3) + 1);
+ }
+
+ host = addr;
+ goto ok;
+ }
+
+ if (g_ascii_isdigit (*start))
+ {
+ addr = g_strndup (start, length);
+ if (g_hostname_is_ip_address (addr))
+ {
+ host = addr;
+ goto ok;
+ }
+ g_free (addr);
+ }
+
+ if (flags & G_URI_NON_DNS)
+ {
+ if (!uri_normalize (&decoded, start, length, flags,
+ G_URI_ERROR_BAD_HOST, error))
+ return FALSE;
+ host = decoded;
+ goto ok;
+ }
+
+ flags &= ~G_URI_ENCODED;
+ if (!uri_decode (&decoded, start, length, flags,
+ G_URI_ERROR_BAD_HOST, error))
+ return FALSE;
+
+ /* You're not allowed to %-encode an IP address, so if it wasn't
+ * one before, it better not be one now.
+ */
+ if (g_hostname_is_ip_address (decoded))
+ {
+ g_free (decoded);
+ g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
+ _("Illegal encoded IP address '%.*s' in URI"),
+ (gint)length, start);
+ return FALSE;
+ }
+
+ if (g_hostname_is_non_ascii (decoded))
+ {
+ host = g_hostname_to_ascii (decoded);
+ g_free (decoded);
+ }
+ else
+ host = decoded;
+
+ ok:
+ if (out)
+ *out = host;
+ else
+ g_free (host);
+ return TRUE;
+}
+
+static gboolean
+parse_port (const gchar *start,
+ gsize length,
+ gint *out,
+ GError **error)
+{
+ gchar *end;
+ gulong parsed_port;
+
+ /* strtoul() allows leading + or -, so we have to check this first. */
+ if (!g_ascii_isdigit (*start))
+ {
+ g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT,
+ _("Could not parse port '%.*s' in URI"),
+ (gint)length, start);
+ return FALSE;
+ }
+
+ /* We know that *(start + length) is either '\0' or a non-numeric
+ * character, so strtoul() won't scan beyond it.
+ */
+ parsed_port = strtoul (start, &end, 10);
+ if (end != (gchar *)start + length)
+ {
+ g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT,
+ _("Could not parse port '%.*s' in URI"),
+ (gint)length, start);
+ return FALSE;
+ }
+ else if (parsed_port > 65535)
+ {
+ g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_PORT,
+ _("Port '%.*s' in URI is out of range"),
+ (gint)length, start);
+ return FALSE;
+ }
+
+ if (out)
+ *out = parsed_port;
+ return TRUE;
+}
+
+static gboolean
+parse_userinfo (const gchar *start,
+ gsize length,
+ GUriFlags flags,
+ gchar **user,
+ gchar **password,
+ gchar **auth_params,
+ GError **error)
+{
+ const gchar *user_end = NULL, *password_end = NULL, *auth_params_end;
+
+ auth_params_end = start + length;
+ if (flags & G_URI_HAS_AUTH_PARAMS)
+ password_end = memchr (start, ';', auth_params_end - start);
+ if (!password_end)
+ password_end = auth_params_end;
+ if (flags & G_URI_HAS_PASSWORD)
+ user_end = memchr (start, ':', password_end - start);
+ if (!user_end)
+ user_end = password_end;
+
+ if (!uri_normalize (user, start, user_end - start, flags,
+ G_URI_ERROR_BAD_USER, error))
+ return FALSE;
+
+ if (*user_end == ':')
+ {
+ start = user_end + 1;
+ if (!uri_normalize (password, start, password_end - start, flags,
+ G_URI_ERROR_BAD_PASSWORD, error))
+ {
+ g_clear_pointer (user, g_free);
+ return FALSE;
+ }
+ }
+ else if (password)
+ *password = NULL;
+
+ if (*password_end == ';')
+ {
+ start = password_end + 1;
+ if (!uri_normalize (auth_params, start, auth_params_end - start, flags,
+ G_URI_ERROR_BAD_AUTH_PARAMS, error))
+ {
+ g_clear_pointer (user, g_free);
+ g_clear_pointer (password, g_free);
+ return FALSE;
+ }
+ }
+ else if (auth_params)
+ *auth_params = NULL;
+
+ return TRUE;
+}
+
+static gchar *
+uri_cleanup (const gchar *uri_string)
+{
+ GString *copy;
+ const gchar *end;
+
+ /* Skip leading whitespace */
+ while (g_ascii_isspace (*uri_string))
+ uri_string++;
+
+ /* Ignore trailing whitespace */
+ end = uri_string + strlen (uri_string);
+ while (end > uri_string && g_ascii_isspace (*(end - 1)))
+ end--;
+
+ /* Copy the rest, encoding unencoded spaces and stripping other whitespace */
+ copy = g_string_sized_new (end - uri_string);
+ while (uri_string < end)
+ {
+ if (*uri_string == ' ')
+ g_string_append (copy, "%20");
+ else if (g_ascii_isspace (*uri_string))
+ ;
+ else
+ g_string_append_c (copy, *uri_string);
+ uri_string++;
+ }
+
+ return g_string_free (copy, FALSE);
+}
+
+static gboolean
+g_uri_split_internal (const gchar *uri_string,
+ GUriFlags flags,
+ gchar **scheme,
+ gchar **userinfo,
+ gchar **user,
+ gchar **password,
+ gchar **auth_params,
+ gchar **host,
+ gint *port,
+ gchar **path,
+ gchar **query,
+ gchar **fragment,
+ GError **error)
+{
+ const gchar *end, *colon, *at, *path_start, *semi, *question;
+ const gchar *p, *bracket, *hostend;
+ gchar *cleaned_uri_string = NULL;
+
+ if (scheme)
+ *scheme = NULL;
+ if (userinfo)
+ *userinfo = NULL;
+ if (password)
+ *password = NULL;
+ if (auth_params)
+ *auth_params = NULL;
+ if (host)
+ *host = NULL;
+ if (port)
+ *port = -1;
+ if (path)
+ *path = NULL;
+ if (query)
+ *query = NULL;
+ if (fragment)
+ *fragment = NULL;
+
+ if (!(flags & G_URI_PARSE_STRICT) && strpbrk (uri_string, " \t\n\r"))
+ {
+ cleaned_uri_string = uri_cleanup (uri_string);
+ uri_string = cleaned_uri_string;
+ }
+
+ /* Find scheme: initial [a-z+.-]* substring until ":" */
+ p = uri_string;
+ while (*p && (g_ascii_isalnum (*p) ||
+ *p == '.' || *p == '+' || *p == '-'))
+ p++;
+
+ if (p > uri_string && *p == ':')
+ {
+ if (scheme)
+ *scheme = g_ascii_strdown (uri_string, p - uri_string);
+ p++;
+ }
+ else
+ {
+ if (scheme)
+ *scheme = NULL;
+ p = uri_string;
+ }
+
+ /* Check for authority */
+ if (strncmp (p, "//", 2) == 0)
+ {
+ p += 2;
+
+ path_start = p + strcspn (p, "/?#");
+ at = memchr (p, '@', path_start - p);
+ if (at)
+ {
+ if (!(flags & G_URI_PARSE_STRICT))
+ {
+ gchar *next_at;
+
+ /* Any "@"s in the userinfo must be %-encoded, but
+ * people get this wrong sometimes. Since "@"s in the
+ * hostname are unlikely (and also wrong anyway), assume
+ * that if there are extra "@"s, they belong in the
+ * userinfo.
+ */
+ do
+ {
+ next_at = memchr (at + 1, '@', path_start - (at + 1));
+ if (next_at)
+ at = next_at;
+ }
+ while (next_at);
+ }
+
+ if (!uri_normalize (userinfo, p, at - p, flags,
+ G_URI_ERROR_BAD_USER, error))
+ goto fail;
+
+ if (user || password || auth_params)
+ {
+ if (!parse_userinfo (p, at - p, flags,
+ user, password, auth_params,
+ error))
+ goto fail;
+ }
+ p = at + 1;
+ }
+
+ if (!(flags & G_URI_PARSE_STRICT))
+ {
+ semi = strchr (p, ';');
+ if (semi && semi < path_start)
+ {
+ /* Technically, semicolons are allowed in the "host"
+ * production, but no one ever does this, and some
+ * schemes mistakenly use semicolon as a delimiter
+ * marking the start of the path. We have to check this
+ * after checking for userinfo though, because a
+ * semicolon before the "@" must be part of the
+ * userinfo.
+ */
+ path_start = semi;
+ }
+ }
+
+ /* Find host and port. The host may be a bracket-delimited IPv6
+ * address, in which case the colon delimiting the port must come
+ * (immediately) after the close bracket.
+ */
+ if (*p == '[')
+ {
+ bracket = memchr (p, ']', path_start - p);
+ if (bracket && *(bracket + 1) == ':')
+ colon = bracket + 1;
+ else
+ colon = NULL;
+ }
+ else
+ colon = memchr (p, ':', path_start - p);
+
+ hostend = colon ? colon : path_start;
+ if (!parse_host (p, hostend - p, flags, host, error))
+ goto fail;
+
+ if (colon && colon != path_start - 1)
+ {
+ p = colon + 1;
+ if (!parse_port (p, path_start - p, port, error))
+ goto fail;
+ }
+
+ p = path_start;
+ }
+
+ /* Find fragment. */
+ end = p + strcspn (p, "#");
+ if (*end == '#')
+ {
+ if (!uri_decode (fragment, end + 1, strlen (end + 1), flags,
+ G_URI_ERROR_BAD_FRAGMENT, error))
+ goto fail;
+ }
+
+ /* Find query */
+ question = memchr (p, '?', end - p);
+ if (question)
+ {
+ if (!uri_normalize (query, question + 1, end - (question + 1), flags,
+ G_URI_ERROR_BAD_QUERY, error))
+ goto fail;
+ end = question;
+ }
+
+ if (!uri_normalize (path, p, end - p, flags,
+ G_URI_ERROR_BAD_PATH, error))
+ goto fail;
+
+ g_free (cleaned_uri_string);
+ return TRUE;
+
+ fail:
+ if (scheme)
+ g_clear_pointer (scheme, g_free);
+ if (userinfo)
+ g_clear_pointer (userinfo, g_free);
+ if (host)
+ g_clear_pointer (host, g_free);
+ if (port)
+ *port = -1;
+ if (path)
+ g_clear_pointer (path, g_free);
+ if (query)
+ g_clear_pointer (query, g_free);
+ if (fragment)
+ g_clear_pointer (fragment, g_free);
+
+ g_free (cleaned_uri_string);
+ return FALSE;
+}
+
+/**
+ * g_uri_split:
+ * @uri_string: a string containing a relative or absolute URI
+ * @flags: flags for parsing @uri_string
+ * @scheme: (out) (allow-none) (transfer full): on return, contains
+ * the scheme (converted to lowercase), or %NULL
+ * @userinfo: (out) (allow-none) (transfer full): on return, contains
+ * the userinfo, or %NULL
+ * @host: (out) (allow-none) (transfer full): on return, contains the
+ * host, or %NULL
+ * @port: (out) (allow-none) (transfer full): on return, contains the
+ * port, or -1
+ * @path: (out) (allow-none) (transfer full): on return, contains the
+ * path
+ * @query: (out) (allow-none) (transfer full): on return, contains the
+ * query, or %NULL
+ * @fragment: (out) (allow-none) (transfer full): on return, contains
+ * the fragment, or %NULL
+ * @error: #GError for error reporting, or %NULL to ignore.
+ *
+ * Parses @uri_string (which can be an absolute or relative URI)
+ * according to @flags, and returns the pieces. Any component that
+ * doesn't appear in @uri_string will be returned as %NULL (but note
+ * that all URIs always have a path component, though it may be the
+ * empty string).
+ *
+ * If @flags contains %G_URI_ENCODED, then `%`-encoded characters in
+ * @uri_string will remain encoded in the output strings. (If not,
+ * then all such characters will be decoded.) Note that decoding will
+ * only work if the URI components are ASCII or UTF-8, so you will
+ * need to use %G_URI_ENCODED if they are not.
+
+ * Note that the %G_URI_HAS_PASSWORD and %G_URI_HAS_AUTH_PARAMS @flags
+ * are ignored by g_uri_split(), since it always returns only the full
+ * userinfo; use g_uri_split_with_user() if you want it split up.
+ *
+ * Returns: (skip): %TRUE if @uri_string parsed successfully, %FALSE
+ * on error.
+ *
+ * Since: 2.44
+ */
+gboolean
+g_uri_split (const gchar *uri_string,
+ GUriFlags flags,
+ gchar **scheme,
+ gchar **userinfo,
+ gchar **host,
+ gint *port,
+ gchar **path,
+ gchar **query,
+ gchar **fragment,
+ GError **error)
+{
+ return g_uri_split_internal (uri_string, flags,
+ scheme, userinfo, NULL, NULL, NULL,
+ host, port, path, query, fragment,
+ error);
+}
+
+/**
+ * g_uri_split_with_user:
+ * @uri_string: a string containing a relative or absolute URI
+ * @flags: flags for parsing @uri_string
+ * @scheme: (out) (allow-none) (transfer full): on return, contains
+ * the scheme (converted to lowercase), or %NULL
+ * @user: (out) (allow-none) (transfer full): on return, contains
+ * the user, or %NULL
+ * @password: (out) (allow-none) (transfer full): on return, contains
+ * the password, or %NULL
+ * @auth_params: (out) (allow-none) (transfer full): on return, contains
+ * the auth_params, or %NULL
+ * @host: (out) (allow-none) (transfer full): on return, contains the
+ * host, or %NULL
+ * @port: (out) (allow-none) (transfer full): on return, contains the
+ * port, or -1
+ * @path: (out) (allow-none) (transfer full): on return, contains the
+ * path
+ * @query: (out) (allow-none) (transfer full): on return, contains the
+ * query, or %NULL
+ * @fragment: (out) (allow-none) (transfer full): on return, contains
+ * the fragment, or %NULL
+ * @error: #GError for error reporting, or %NULL to ignore.
+ *
+ * Parses @uri_string (which can be an absolute or relative URI)
+ * according to @flags, and returns the pieces. Any component that
+ * doesn't appear in @uri_string will be returned as %NULL (but note
+ * that all URIs always have a path component, though it may be the
+ * empty string).
+ *
+ * See g_uri_split(), and the definition of #GUriFlags, for more
+ * information on the effect of @flags. Note that @password will only
+ * be parsed out if @flags contains %G_URI_HAS_PASSWORD, and
+ * @auth_params will only be parsed out if @flags contains
+ * %G_URI_HAS_AUTH_PARAMS.
+ *
+ * Returns: (skip): %TRUE if @uri_string parsed successfully, %FALSE
+ * on error.
+ *
+ * Since: 2.44
+ */
+gboolean
+g_uri_split_with_user (const gchar *uri_string,
+ GUriFlags flags,
+ gchar **scheme,
+ gchar **user,
+ gchar **password,
+ gchar **auth_params,
+ gchar **host,
+ gint *port,
+ gchar **path,
+ gchar **query,
+ gchar **fragment,
+ GError **error)
+{
+ return g_uri_split_internal (uri_string, flags,
+ scheme, NULL, user, password, auth_params,
+ host, port, path, query, fragment,
+ error);
+}
+
+
+/**
+ * g_uri_split_network:
+ * @uri_string: a string containing a relative or absolute URI
+ * @flags: flags for parsing @uri_string
+ * @scheme: (out) (allow-none) (transfer full): on return, contains
+ * the scheme (converted to lowercase), or %NULL
+ * @host: (out) (allow-none) (transfer full): on return, contains the
+ * host, or %NULL
+ * @port: (out) (allow-none) (transfer full): on return, contains the
+ * port, or -1
+ * @error: #GError for error reporting, or %NULL to ignore.
+ *
+ * Parses @uri_string (which must be an absolute URI) according to
+ * @flags, and returns the pieces relevant to connecting to a host.
+ * See the documentation for g_uri_split() for more details; this is
+ * mostly a wrapper around that function with simpler arguments.
+ * However, it will return an error if @uri_string is a relative URI,
+ * or does not contain a hostname component.
+ *
+ * Returns: (skip): %TRUE if @uri_string parsed successfully,
+ * %FALSE on error.
+ *
+ * Since: 2.44
+ */
+gboolean
+g_uri_split_network (const gchar *uri_string,
+ GUriFlags flags,
+ gchar **scheme,
+ gchar **host,
+ gint *port,
+ GError **error)
+{
+ gchar *my_scheme, *my_host;
+
+ if (!g_uri_split_internal (uri_string, flags,
+ &my_scheme, NULL, NULL, NULL, NULL,
+ &my_host, port, NULL, NULL, NULL,
+ error))
+ return FALSE;
+
+ if (!my_scheme || !my_host)
+ {
+ if (!my_scheme)
+ {
+ g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_SCHEME,
+ _("URI '%s' is not an absolute URI"),
+ uri_string);
+ }
+ else
+ {
+ g_set_error (error, G_URI_ERROR, G_URI_ERROR_BAD_HOST,
+ _("URI '%s' has no host component"),
+ uri_string);
+ }
+ g_free (my_scheme);
+ g_free (my_host);
+
+ return FALSE;
+ }
+
+ if (scheme)
+ *scheme = my_scheme;
+ else
+ g_free (my_scheme);
+ if (host)
+ *host = my_host;
+ else
+ g_free (my_host);
+ return TRUE;
+}
+
+/**
+ * g_uri_is_valid:
+ * @uri_string: a string containing a relative or absolute URI
+ * @flags: flags for parsing @uri_string
+ * @error: #GError for error reporting, or %NULL to ignore.
+ *
+ * Parses @uri_string (which can be an absolute or relative URI)
+ * according to @flags, to determine whether it is valid.
+ *
+ * See g_uri_split(), and the definition of #GUriFlags, for more
+ * information on the effect of @flags.
+ *
+ * Returns: %TRUE if @uri_string parsed successfully, %FALSE on error.
+ *
+ * Since: 2.44
+ */
+gboolean
+g_uri_is_valid (const gchar *uri_string,
+ GUriFlags flags,
+ GError **error)
+{
+ return g_uri_split_internal (uri_string, flags,
+ NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL,
+ error);
+}
+
+
+/* This does the "Remove Dot Segments" algorithm from section 5.2.4 of
+ * RFC 3986, except that @path is modified in place.
+ */
+static void
+remove_dot_segments (gchar *path)
+{
+ gchar *p, *q;
+
+ /* Remove "./" where "." is a complete segment. */
+ for (p = path + 1; *p; )
+ {
+ if (*(p - 1) == '/' &&
+ *p == '.' && *(p + 1) == '/')
+ memmove (p, p + 2, strlen (p + 2) + 1);
+ else
+ p++;
+ }
+ /* Remove "." at end. */
+ if (p > path + 2 &&
+ *(p - 1) == '.' && *(p - 2) == '/')
+ *(p - 1) = '\0';
+
+ /* Remove "<segment>/../" where <segment> != ".." */
+ for (p = path + 1; *p; )
+ {
+ if (!strncmp (p, "../", 3))
+ {
+ p += 3;
+ continue;
+ }
+ q = strchr (p + 1, '/');
+ if (!q)
+ break;
+ if (strncmp (q, "/../", 4) != 0)
+ {
+ p = q + 1;
+ continue;
+ }
+ memmove (p, q + 4, strlen (q + 4) + 1);
+ p = path + 1;
+ }
+ /* Remove "<segment>/.." at end where <segment> != ".." */
+ q = strrchr (path, '/');
+ if (q && !strcmp (q, "/.."))
+ {
+ p = q - 1;
+ while (p > path && *p != '/')
+ p--;
+ if (strncmp (p, "/../", 4) != 0)
+ *(p + 1) = 0;
+ }
+
+ /* Remove extraneous initial "/.."s */
+ while (!strncmp (path, "/../", 4))
+ memmove (path, path + 3, strlen (path) - 2);
+ if (!strcmp (path, "/.."))
+ path[1] = '\0';
+}
+
+/**
+ * g_uri_parse:
+ * @uri_string: a string representing an absolute URI
+ * @flags: flags describing how to parse @uri_string
+ * @error: #GError for error reporting, or %NULL to ignore.
+ *
+ * Parses @uri_string according to @flags. If the result is not a
+ * valid absolute URI, it will be discarded, and an error returned.
+ *
+ * Return value: (transfer full): a new #GUri.
+ *
+ * Since: 2.44
+ */
+GUri *
+g_uri_parse (const gchar *uri_string,
+ GUriFlags flags,
+ GError **error)
+{
+ return g_uri_parse_relative (NULL, uri_string, flags, error);
+}
+
+/**
+ * g_uri_parse_relative:
+ * @base_uri: (allow-none): a base URI
+ * @uri_string: a string representing a relative or absolute URI
+ * @flags: flags describing how to parse @uri_string
+ * @error: #GError for error reporting, or %NULL to ignore.
+ *
+ * Parses @uri_string according to @flags and, if it is a relative
+ * URI, resolves it relative to @base_uri. If the result is not a
+ * valid absolute URI, it will be discarded, and an error returned.
+ *
+ * Return value: (transfer full): a new #GUri.
+ *
+ * Since: 2.44
+ */
+GUri *
+g_uri_parse_relative (GUri *base_uri,
+ const gchar *uri_string,
+ GUriFlags flags,
+ GError **error)
+{
+ GUri *uri = NULL;
+
+ if (base_uri && !base_uri->scheme)
+ {
+ g_set_error_literal (error, G_URI_ERROR, G_URI_ERROR_MISC,
+ _("Base URI is not absolute"));
+ return NULL;
+ }
+
+ uri = g_slice_new0 (GUri);
+ uri->ref_count = 1;
+ uri->flags = flags;
+
+ if (!g_uri_split_internal (uri_string, flags,
+ &uri->scheme, &uri->userinfo,
+ &uri->user, &uri->password, &uri->auth_params,
+ &uri->host, &uri->port,
+ &uri->path, &uri->query, &uri->fragment,
+ error))
+ goto fail;
+
+ if (!uri->scheme && !base_uri)
+ {
+ g_set_error_literal (error, G_URI_ERROR, G_URI_ERROR_MISC,
+ _("URI is not absolute, and no base URI was provided"));
+ goto fail;
+ }
+
+ if (base_uri)
+ {
+ /* This is section 5.2.2 of RFC 3986, except that we're doing
+ * it in place in @uri rather than copying from R to T.
+ */
+ if (uri->scheme)
+ remove_dot_segments (uri->path);
+ else
+ {
+ uri->scheme = g_strdup (base_uri->scheme);
+ if (uri->host)
+ remove_dot_segments (uri->path);
+ else
+ {
+ if (!*uri->path)
+ {
+ g_free (uri->path);
+ uri->path = g_strdup (base_uri->path);
+ if (!uri->query)
+ uri->query = g_strdup (base_uri->query);
+ }
+ else
+ {
+ if (*uri->path == '/')
+ remove_dot_segments (uri->path);
+ else
+ {
+ gchar *newpath, *last;
+
+ last = strrchr (base_uri->path, '/');
+ if (last)
+ {
+ newpath = g_strdup_printf ("%.*s/%s",
+ (gint)(last - base_uri->path),
+ base_uri->path,
+ uri->path);
+ }
+ else
+ newpath = g_strdup_printf ("/%s", uri->path);
+
+ g_free (uri->path);
+ uri->path = newpath;
+
+ remove_dot_segments (uri->path);
+ }
+ }
+
+ uri->userinfo = g_strdup (base_uri->userinfo);
+ uri->user = g_strdup (base_uri->user);
+ uri->password = g_strdup (base_uri->password);
+ uri->auth_params = g_strdup (base_uri->auth_params);
+ uri->host = g_strdup (base_uri->host);
+ uri->port = base_uri->port;
+ }
+ }
+ }
+
+ return uri;
+
+ fail:
+ if (uri)
+ g_uri_unref (uri);
+ return NULL;
+}
+
+/**
+ * g_uri_resolve_relative:
+ * @base_uri_string: (allow-none): a string representing a base URI
+ * @uri_string: a string representing a relative or absolute URI
+ * @flags: flags describing how to parse @uri_string
+ * @error: #GError for error reporting, or %NULL to ignore.
+ *
+ * Parses @uri_string according to @flags and, if it is a relative
+ * URI, resolves it relative to @base_uri_string. If the result is not
+ * a valid absolute URI, it will be discarded, and an error returned.
+ *
+ * (If @base_uri_string is %NULL, this just returns @uri_string, or
+ * %NULL if @uri_string is invalid or not absolute.)
+ *
+ * Return value: the resolved URI string.
+ *
+ * Since: 2.44
+ */
+gchar *
+g_uri_resolve_relative (const gchar *base_uri_string,
+ const gchar *uri_string,
+ GUriFlags flags,
+ GError **error)
+{
+ GUri *base_uri, *resolved_uri;
+ gchar *resolved_uri_string;
+
+ flags |= G_URI_ENCODED;
+
+ if (base_uri_string)
+ {
+ base_uri = g_uri_parse (base_uri_string, flags, error);
+ if (!base_uri)
+ return NULL;
+ }
+ else
+ base_uri = NULL;
+
+ resolved_uri = g_uri_parse_relative (base_uri, uri_string, flags, error);
+ if (base_uri)
+ g_uri_unref (base_uri);
+ if (!resolved_uri)
+ return NULL;
+
+ resolved_uri_string = g_uri_to_string (resolved_uri);
+ g_uri_unref (resolved_uri);
+ return resolved_uri_string;
+}
+
+/* userinfo as a whole can contain sub-delims + ":", but split-out
+ * user can't contain ":" or ";", and split-out password can't contain
+ * ";".
+ */
+#define USERINFO_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_USERINFO
+#define USER_ALLOWED_CHARS "!$&'()*+,="
+#define PASSWORD_ALLOWED_CHARS "!$&'()*+,=:"
+#define AUTH_PARAMS_ALLOWED_CHARS USERINFO_ALLOWED_CHARS
+#define IP_ADDR_ALLOWED_CHARS ":"
+#define HOST_ALLOWED_CHARS G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS
+#define PATH_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH
+#define QUERY_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH "?"
+#define FRAGMENT_ALLOWED_CHARS G_URI_RESERVED_CHARS_ALLOWED_IN_PATH "?"
+
+static gchar *
+g_uri_join_internal (GUriFlags flags,
+ const gchar *scheme,
+ const gchar *user,
+ const gchar *password,
+ const gchar *auth_params,
+ const gchar *host,
+ gint port,
+ const gchar *path,
+ const gchar *query,
+ const gchar *fragment)
+{
+ gboolean encoded = (flags & G_URI_ENCODED);
+ GString *str;
+
+ str = g_string_new (scheme);
+ g_string_append_c (str, ':');
+
+ if (host)
+ {
+ g_string_append (str, "//");
+
+ if (user)
+ {
+ if (encoded)
+ g_string_append (str, user);
+ else
+ {
+ /* Encode ':' and ';' regardless of whether we have a
+ * password or auth params, since it may be parsed later
+ * under the assumption that it does.
+ */
+ g_string_append_uri_escaped (str, user, USER_ALLOWED_CHARS, TRUE);
+ }
+
+ if (password)
+ {
+ g_string_append_c (str, ':');
+ if (encoded)
+ g_string_append (str, password);
+ else
+ g_string_append_uri_escaped (str, user, PASSWORD_ALLOWED_CHARS, TRUE);
+ }
+
+ if (auth_params)
+ {
+ g_string_append_c (str, ';');
+ if (encoded)
+ g_string_append (str, auth_params);
+ else
+ g_string_append_uri_escaped (str, user, AUTH_PARAMS_ALLOWED_CHARS, TRUE);
+ }
+
+ g_string_append_c (str, '@');
+ }
+
+ if (strchr (host, ':'))
+ {
+ g_string_append_c (str, '[');
+ if (encoded)
+ g_string_append (str, host);
+ else
+ g_string_append_uri_escaped (str, host, IP_ADDR_ALLOWED_CHARS, TRUE);
+ g_string_append_c (str, ']');
+ }
+ else
+ {
+ if (encoded)
+ g_string_append (str, host);
+ else
+ g_string_append_uri_escaped (str, host, HOST_ALLOWED_CHARS, TRUE);
+ }
+
+ if (port != -1)
+ g_string_append_printf (str, ":%d", port);
+ }
+
+ if (encoded)
+ g_string_append (str, path);
+ else
+ g_string_append_uri_escaped (str, path, PATH_ALLOWED_CHARS, TRUE);
+
+ if (query)
+ {
+ g_string_append_c (str, '?');
+ if (encoded)
+ g_string_append (str, query);
+ else
+ g_string_append_uri_escaped (str, query, QUERY_ALLOWED_CHARS, TRUE);
+ }
+ if (fragment)
+ {
+ g_string_append_c (str, '#');
+ if (encoded)
+ g_string_append (str, fragment);
+ else
+ g_string_append_uri_escaped (str, fragment, FRAGMENT_ALLOWED_CHARS, TRUE);
+ }
+
+ return g_string_free (str, FALSE);
+}
+
+/**
+ * g_uri_join:
+ * @flags: flags describing how to build the URI string
+ * @scheme: the URI scheme
+ * @userinfo: (allow-none): the userinfo component, or %NULL
+ * @host: (allow-none): the host component, or %NULL
+ * @port: the port, or -1
+ * @path: the path component
+ * @query: (allow-none): the query component, or %NULL
+ * @fragment: (allow-none): the fragment, or %NULL
+ *
+ * Joins the given components together according to @flags to create
+ * a complete URI string. At least @scheme must be specified, and
+ * @path may not be %NULL (though it may be "").
+ *
+ * See also g_uri_join_with_user(), which allows specifying the
+ * components of the "userinfo" separately.
+ *
+ * Return value: a URI string
+ */
+gchar *
+g_uri_join (GUriFlags flags,
+ const gchar *scheme,
+ const gchar *userinfo,
+ const gchar *host,
+ gint port,
+ const gchar *path,
+ const gchar *query,
+ const gchar *fragment)
+{
+ g_return_val_if_fail (scheme != NULL, NULL);
+ g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
+ g_return_val_if_fail (path != NULL, NULL);
+
+ return g_uri_join_internal (flags,
+ scheme,
+ userinfo, NULL, NULL,
+ host,
+ port,
+ path,
+ query,
+ fragment);
+}
+
+/**
+ * g_uri_join_with_user:
+ * @flags: flags describing how to build the URI string
+ * @scheme: the URI scheme
+ * @user: (allow-none): the user component of the userinfo, or %NULL
+ * @password: (allow-none): the password component of the userinfo, or
+ * %NULL
+ * @auth_params: (allow-none): the auth params of the userinfo, or
+ * %NULL
+ * @host: (allow-none): the host component, or %NULL
+ * @port: the port, or -1
+ * @path: the path component
+ * @query: (allow-none): the query component, or %NULL
+ * @fragment: (allow-none): the fragment, or %NULL
+ *
+ * Joins the given components together according to @flags to create
+ * a complete URI string. At least @scheme must be specified, and
+ * @path may not be %NULL (though it may be "").
+ *
+ * In constrast to g_uri_join(), this allows specifying the components
+ * of the "userinfo" separately.
+ *
+ * Return value: a URI string
+ */
+gchar *
+g_uri_join_with_user (GUriFlags flags,
+ const gchar *scheme,
+ const gchar *user,
+ const gchar *password,
+ const gchar *auth_params,
+ const gchar *host,
+ gint port,
+ const gchar *path,
+ const gchar *query,
+ const gchar *fragment)
+{
+ g_return_val_if_fail (scheme != NULL, NULL);
+ g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
+ g_return_val_if_fail (path != NULL, NULL);
+
+ return g_uri_join_internal (flags,
+ scheme,
+ user, password, auth_params,
+ host,
+ port,
+ path,
+ query,
+ fragment);
+}
+
+/**
+ * g_uri_build:
+ * @flags: flags describing how to build the #GUri
+ * @scheme: the URI scheme
+ * @userinfo: the userinfo component, or %NULL
+ * @host: the host component, or %NULL
+ * @port: the port, or -1
+ * @path: the path component
+ * @query: the query component, or %NULL
+ * @fragment: the fragment, or %NULL
+ *
+ * Creates a new #GUri from the given components according to @flags.
+ *
+ * See also g_uri_build_with_user(), which allows specifying the
+ * components of the "userinfo" separately.
+ *
+ * Return value: (transfer full): a new #GUri
+ */
+GUri *
+g_uri_build (GUriFlags flags,
+ const gchar *scheme,
+ const gchar *userinfo,
+ const gchar *host,
+ gint port,
+ const gchar *path,
+ const gchar *query,
+ const gchar *fragment)
+{
+ GUri *uri;
+
+ g_return_val_if_fail (scheme != NULL, NULL);
+ g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
+ g_return_val_if_fail (path != NULL, NULL);
+
+ uri = g_slice_new0 (GUri);
+ uri->ref_count = 1;
+ uri->flags = flags;
+ uri->scheme = g_ascii_strdown (scheme, -1);
+ uri->userinfo = g_strdup (userinfo);
+ uri->host = g_strdup (host);
+ uri->port = port;
+ uri->path = g_strdup (path);
+ uri->query = g_strdup (query);
+ uri->fragment = g_strdup (fragment);
+
+ return uri;
+}
+
+/**
+ * g_uri_build_with_user:
+ * @flags: flags describing how to build the #GUri
+ * @scheme: the URI scheme
+ * @user: the user component of the userinfo, or %NULL
+ * @password: the password component of the userinfo, or %NULL
+ * @auth_params: the auth params of the userinfo, or %NULL
+ * @host: the host component, or %NULL
+ * @port: the port, or -1
+ * @path: the path component
+ * @query: the query component, or %NULL
+ * @fragment: the fragment, or %NULL
+ *
+ * Creates a new #GUri from the given components according to @flags.
+
+ * In constrast to g_uri_build(), this allows specifying the components
+ * of the "userinfo" field separately. Note that @user must be non-%NULL
+ * if either @password or @auth_params is non-%NULL.
+ *
+ * Return value: (transfer full): a new #GUri
+ *
+ * Since: 2.44
+ */
+GUri *
+g_uri_build_with_user (GUriFlags flags,
+ const gchar *scheme,
+ const gchar *user,
+ const gchar *password,
+ const gchar *auth_params,
+ const gchar *host,
+ gint port,
+ const gchar *path,
+ const gchar *query,
+ const gchar *fragment)
+{
+ GUri *uri;
+ GString *userinfo;
+
+ g_return_val_if_fail (scheme != NULL, NULL);
+ g_return_val_if_fail (password == NULL || user != NULL, NULL);
+ g_return_val_if_fail (auth_params == NULL || user != NULL, NULL);
+ g_return_val_if_fail (port >= -1 && port <= 65535, NULL);
+ g_return_val_if_fail (path != NULL, NULL);
+
+ uri = g_slice_new0 (GUri);
+ uri->ref_count = 1;
+ uri->flags = flags;
+ uri->scheme = g_ascii_strdown (scheme, -1);
+ uri->user = g_strdup (user);
+ uri->password = g_strdup (password);
+ uri->auth_params = g_strdup (auth_params);
+ uri->host = g_strdup (host);
+ uri->port = port;
+ uri->path = g_strdup (path);
+ uri->query = g_strdup (query);
+ uri->fragment = g_strdup (fragment);
+
+ if (user)
+ {
+ userinfo = g_string_new (NULL);
+ if (flags & G_URI_ENCODED)
+ g_string_append (userinfo, uri->user);
+ else
+ g_string_append_uri_escaped (userinfo, uri->user, USER_ALLOWED_CHARS, TRUE);
+ if (password)
+ {
+ g_string_append_c (userinfo, ':');
+ if (flags & G_URI_ENCODED)
+ g_string_append (userinfo, uri->password);
+ else
+ g_string_append_uri_escaped (userinfo, uri->password, PASSWORD_ALLOWED_CHARS, TRUE);
+ }
+ if (auth_params)
+ {
+ g_string_append_c (userinfo, ';');
+ if (flags & G_URI_ENCODED)
+ g_string_append (userinfo, uri->auth_params);
+ else
+ g_string_append_uri_escaped (userinfo, uri->auth_params, AUTH_PARAMS_ALLOWED_CHARS, TRUE);
+ }
+ uri->userinfo = g_string_free (userinfo, FALSE);
+ }
+ else
+ uri->userinfo = NULL;
+
+ return uri;
+}
+
+/**
+ * GUriHideFlags:
+ * @G_URI_HIDE_USERINFO: Hide the userinfo
+ * @G_URI_HIDE_PASSWORD: Hide the password
+ * @G_URI_HIDE_AUTH_PARAMS: Hide the auth_params
+ * @G_URI_HIDE_FRAGMENT: Hide the fragment
+ *
+ * Flags describing what parts of the URI to hide in
+ * g_uri_to_string_partial(). Note that %G_URI_HIDE_PASSWORD and
+ * %G_URI_HIDE_AUTH_PARAMS will only work if the #GUri was parsed with
+ * the corresponding flags.
+ *
+ * Since: 2.44
+ */
+
+/**
+ * g_uri_to_string:
+ * @uri: a #GUri
+ *
+ * Returns a string representing @uri.
+ *
+ * This is not guaranteed to return a string which is identical to the
+ * string that @uri was parsed from. However, if the source URI was
+ * syntactically correct (according to RFC 3986), and it was parsed
+ * with %G_URI_ENCODED, then g_uri_to_string() is guaranteed to return
+ * a string which is at least semantically equivalent to the source
+ * URI (according to RFC 3986).
+ *
+ * Return value: a string representing @uri, which the caller must
+ * free.
+ *
+ * Since: 2.44
+ */
+gchar *
+g_uri_to_string (GUri *uri)
+{
+ return g_uri_to_string_partial (uri, 0);
+}
+
+/**
+ * g_uri_to_string_partial:
+ * @uri: a #GUri
+ * @flags: flags describing what parts of @uri to hide
+ *
+ * Returns a string representing @uri, subject to the options in
+ * @flags. See g_uri_to_string() and #GUriHideFlags for more details.
+
+ * Return value: a string representing @uri, which the caller must
+ * free.
+ *
+ * Since: 2.44
+ */
+gchar *
+g_uri_to_string_partial (GUri *uri,
+ GUriHideFlags flags)
+{
+ gboolean hide_user = (flags & G_URI_HIDE_USERINFO);
+ gboolean hide_password = (flags & (G_URI_HIDE_USERINFO | G_URI_HIDE_PASSWORD));
+ gboolean hide_auth_params = (flags & (G_URI_HIDE_USERINFO | G_URI_HIDE_AUTH_PARAMS));
+ gboolean hide_fragment = (flags & G_URI_HIDE_FRAGMENT);
+
+ g_return_val_if_fail (uri != NULL, NULL);
+
+ if (uri->flags & (G_URI_HAS_PASSWORD | G_URI_HAS_AUTH_PARAMS))
+ {
+ return g_uri_join_with_user (uri->flags,
+ uri->scheme,
+ hide_user ? NULL : uri->user,
+ hide_password ? NULL : uri->password,
+ hide_auth_params ? NULL : uri->auth_params,
+ uri->host,
+ uri->port,
+ uri->path,
+ uri->query,
+ hide_fragment ? NULL : uri->fragment);
+ }
+ else
+ {
+ return g_uri_join (uri->flags,
+ uri->scheme,
+ hide_user ? NULL : uri->userinfo,
+ uri->host,
+ uri->port,
+ uri->path,
+ uri->query,
+ hide_fragment ? NULL : uri->fragment);
+ }
+}
+
+/* This is just a copy of g_str_hash() with g_ascii_toupper() added */
+static guint
+str_ascii_case_hash (gconstpointer v)
+{
+ const signed char *p;
+ guint32 h = 5381;
+
+ for (p = v; *p != '\0'; p++)
+ h = (h << 5) + h + g_ascii_toupper (*p);
+
+ return h;
+}
+
+static gboolean
+str_ascii_case_equal (gconstpointer v1,
+ gconstpointer v2)
+{
+ const gchar *string1 = v1;
+ const gchar *string2 = v2;
+
+ return g_ascii_strcasecmp (string1, string2) == 0;
+}
+
+/**
+ * g_uri_parse_params:
+ * @params: a `%`-encoded string containing "attribute=value"
+ * parameters
+ * @length: the length of @params, or -1 if it is NUL-terminated
+ * @separator: the separator character between parameters.
+ * (usually ';', but sometimes '&')
+ * @case_insensitive: whether parameter names are case insensitive
+ *
+ * Many URI schemes include one or more attribute/value pairs
+ * as part of the URI value. This method can be used to parse them
+ * into a hash table.
+ *
+ * The @params string is assumed to still be `%`-encoded, but the
+ * returned values will be fully decoded. (Thus it is possible that
+ * the returned values may contain '=' or @separator, if the value was
+ * encoded in the input.) Invalid `%`-encoding is treated as with the
+ * non-%G_URI_PARSE_STRICT rules for g_uri_new(). (However, if @params
+ * is the path or query string from a #GUri that was parsed with
+ * %G_URI_PARSE_STRICT and %G_URI_ENCODED, then you already know that
+ * it does not contain any invalid encoding.)
+ *
+ * Return value: (transfer full) (element-type utf8 utf8): a hash
+ * table of attribute/value pairs. Both names and values will be
+ * fully-decoded. If @params cannot be parsed (eg, it contains two
+ * @separator characters in a row), then %NULL is returned.
+ *
+ * Since: 2.44
+ */
+GHashTable *
+g_uri_parse_params (const gchar *params,
+ gssize length,
+ gchar separator,
+ gboolean case_insensitive)
+{
+ GHashTable *hash;
+ const gchar *end, *attr, *attr_end, *value, *value_end;
+ gchar *decoded_attr, *decoded_value;
+
+ if (case_insensitive)
+ {
+ hash = g_hash_table_new_full (str_ascii_case_hash,
+ str_ascii_case_equal,
+ g_free, g_free);
+ }
+ else
+ {
+ hash = g_hash_table_new_full (g_str_hash, g_str_equal,
+ g_free, g_free);
+ }
+
+ if (length == -1)
+ end = params + strlen (params);
+ else
+ end = params + length;
+
+ attr = params;
+ while (attr < end)
+ {
+ value_end = memchr (attr, separator, end - attr);
+ if (!value_end)
+ value_end = end;
+
+ attr_end = memchr (attr, '=', value_end - attr);
+ if (!attr_end)
+ {
+ g_hash_table_destroy (hash);
+ return NULL;
+ }
+ if (!uri_decode (&decoded_attr, attr, attr_end - attr,
+ 0, G_URI_ERROR_MISC, NULL))
+ {
+ g_hash_table_destroy (hash);
+ return NULL;
+ }
+
+ value = attr_end + 1;
+ if (!uri_decode (&decoded_value, value, value_end - value,
+ 0, G_URI_ERROR_MISC, NULL))
+ {
+ g_free (decoded_attr);
+ g_hash_table_destroy (hash);
+ return NULL;
+ }
+
+ g_hash_table_insert (hash, decoded_attr, decoded_value);
+ attr = value_end + 1;
+ }
+
+ return hash;
+}
+
+/**
+ * g_uri_get_scheme:
+ * @uri: a #GUri
+ *
+ * Gets @uri's scheme. Note that this will always be all-lowercase,
+ * regardless of the string or strings that @uri was created from.
+ *
+ * Return value: @uri's scheme.
+ *
+ * Since: 2.44
+ */
+const gchar *
+g_uri_get_scheme (GUri *uri)
+{
+ return uri->scheme;
+}
+
+/**
+ * g_uri_get_userinfo:
+ * @uri: a #GUri
+ *
+ * Gets @uri's userinfo, which may contain `%`-encoding, depending on
+ * the flags with which @uri was created.
+ *
+ * Return value: @uri's userinfo.
+ *
+ * Since: 2.44
+ */
+const gchar *
+g_uri_get_userinfo (GUri *uri)
+{
+ return uri->userinfo;
+}
+
+/**
+ * g_uri_get_user:
+ * @uri: a #GUri
+ *
+ * Gets the "username" component of @uri's userinfo, which may contain
+ * `%`-encoding, depending on the flags with which @uri was created.
+ * If @uri was not created with %G_URI_HAS_PASSWORD or
+ * %G_URI_HAS_AUTH_PARAMS, this is the same as g_uri_get_userinfo().
+ *
+ * Return value: @uri's user.
+ *
+ * Since: 2.44
+ */
+const gchar *
+g_uri_get_user (GUri *uri)
+{
+ return uri->user;
+}
+
+/**
+ * g_uri_get_password:
+ * @uri: a #GUri
+ *
+ * Gets @uri's password, which may contain `%`-encoding, depending on
+ * the flags with which @uri was created. (If @uri was not created
+ * with %G_URI_HAS_PASSWORD then this will be %NULL.)
+ *
+ * Return value: @uri's password.
+ *
+ * Since: 2.44
+ */
+const gchar *
+g_uri_get_password (GUri *uri)
+{
+ return uri->password;
+}
+
+/**
+ * g_uri_get_auth_params:
+ * @uri: a #GUri
+ *
+ * Gets @uri's authentication parameters, which may contain
+ * `%`-encoding, depending on the flags with which @uri was created.
+ * (If @uri was not created with %G_URI_HAS_AUTH_PARAMS then this will
+ * be %NULL.)
+ *
+ * Depending on the URI scheme, g_uri_parse_params() may be useful for
+ * further parsing this information.
+ *
+ * Return value: @uri's authentication parameters.
+ *
+ * Since: 2.44
+ */
+const gchar *
+g_uri_get_auth_params (GUri *uri)
+{
+ return uri->auth_params;
+}
+
+/**
+ * g_uri_get_host:
+ * @uri: a #GUri
+ *
+ * Gets @uri's host. This will never have `%`-encoded characters,
+ * unless it is non-UTF-8 (which can only be the case if @uri was
+ * created with %G_URI_NON_DNS).
+ *
+ * If @uri contained an IPv6 address literal, this value will be just
+ * that address, without the brackets around it that are necessary in
+ * the string form of the URI. Note that in this case there may also
+ * be a scope ID attached to the address. Eg, "`fe80::1234%``em1`" (or
+ * "`fe80::1234%``25em1" if the string is still encoded).
+ *
+ * Return value: @uri's host.
+ *
+ * Since: 2.44
+ */
+const gchar *
+g_uri_get_host (GUri *uri)
+{
+ return uri->host;
+}
+
+/**
+ * g_uri_get_port:
+ * @uri: a #GUri
+ *
+ * Gets @uri's port.
+ *
+ * Return value: @uri's port, or -1 if no port was specified.
+ *
+ * Since: 2.44
+ */
+gint
+g_uri_get_port (GUri *uri)
+{
+ return uri->port;
+}
+
+/**
+ * g_uri_get_path:
+ * @uri: a #GUri
+ *
+ * Gets @uri's path, which may contain `%`-encoding, depending on the
+ * flags with which @uri was created.
+ *
+ * Return value: @uri's path.
+ *
+ * Since: 2.44
+ */
+const gchar *
+g_uri_get_path (GUri *uri)
+{
+ return uri->path;
+}
+
+/**
+ * g_uri_get_query:
+ * @uri: a #GUri
+ *
+ * Gets @uri's query, which may contain `%`-encoding, depending on the
+ * flags with which @uri was created.
+ *
+ * For queries consisting of a series of "`name=value`" parameters,
+ * g_uri_parse_params() may be useful.
+ *
+ * Return value: @uri's query.
+ *
+ * Since: 2.44
+ */
+const gchar *
+g_uri_get_query (GUri *uri)
+{
+ return uri->query;
+}
+
+/**
+ * g_uri_get_fragment:
+ * @uri: a #GUri
+ *
+ * Gets @uri's fragment, which may contain `%`-encoding, depending on
+ * the flags with which @uri was created.
+ *
+ * Return value: @uri's fragment.
+ *
+ * Since: 2.44
+ */
+const gchar *
+g_uri_get_fragment (GUri *uri)
+{
+ return uri->fragment;
+}
+
+
+/**
+ * g_uri_unescape_segment:
+ * @escaped_string: (allow-none): A string, may be %NULL
+ * @escaped_string_end: (allow-none): Pointer to end of @escaped_string,
+ * may be %NULL
+ * @illegal_characters: (allow-none): An optional string of illegal
+ * characters not to be allowed, may be %NULL
+ *
+ * Unescapes a segment of an escaped string.
+ *
+ * If any of the characters in @illegal_characters or the NUL
+ * character appears as an escaped character in @escaped_string, then
+ * that is an error and %NULL will be returned. This is useful if you
+ * want to avoid for instance having a slash being expanded in an
+ * escaped path element, which might confuse pathname handling.
+ *
+ * Returns: an unescaped version of @escaped_string or %NULL on error.
+ * The returned string should be freed when no longer needed. As a
+ * special case if %NULL is given for @escaped_string, this function
+ * will return %NULL.
+ *
+ * Since: 2.16
+ **/
+gchar *
+g_uri_unescape_segment (const gchar *escaped_string,
+ const gchar *escaped_string_end,
+ const gchar *illegal_characters)
+{
+ gchar *unescaped, *p;
+ gsize length;
+
+ if (!escaped_string)
+ return NULL;
+
+ if (escaped_string_end)
+ length = escaped_string_end - escaped_string;
+ else
+ length = strlen (escaped_string);
+
+ if (!uri_decode (&unescaped,
+ escaped_string, length,
+ G_URI_PARSE_STRICT,
+ 0, NULL))
+ return NULL;
+
+ if (illegal_characters)
+ {
+ for (p = unescaped; *p; p++)
+ {
+ if (strchr (illegal_characters, *p))
+ {
+ g_free (unescaped);
+ return NULL;
+ }
+ }
+ }
+
+ return unescaped;
+}
+
+/**
+ * g_uri_unescape_string:
+ * @escaped_string: an escaped string to be unescaped.
+ * @illegal_characters: (allow-none): a string of illegal characters
+ * not to be allowed, or %NULL.
+ *
+ * Unescapes a whole escaped string.
+ *
+ * If any of the characters in @illegal_characters or the NUL
+ * character appears as an escaped character in @escaped_string, then
+ * that is an error and %NULL will be returned. This is useful if you
+ * want to avoid for instance having a slash being expanded in an
+ * escaped path element, which might confuse pathname handling.
+ *
+ * Returns: an unescaped version of @escaped_string. The returned string
+ * should be freed when no longer needed.
+ *
+ * Since: 2.16
+ **/
+gchar *
+g_uri_unescape_string (const gchar *escaped_string,
+ const gchar *illegal_characters)
+{
+ return g_uri_unescape_segment (escaped_string, NULL, illegal_characters);
+}
+
+/**
+ * g_string_append_uri_escaped:
+ * @string: a #GString
+ * @unescaped: a string
+ * @reserved_chars_allowed: a string of reserved characters allowed
+ * to be used, or %NULL
+ * @allow_utf8: set %TRUE if the escaped string may include UTF8 characters
+ *
+ * Appends @unescaped to @string, escaping any characters that
+ * are reserved in URIs using URI-style escape sequences.
+ *
+ * Returns: @string
+ *
+ * Since: 2.16
+ */
+GString *
+g_string_append_uri_escaped (GString *string,
+ const gchar *unescaped,
+ const gchar *reserved_chars_allowed,
+ gboolean allow_utf8)
+{
+ uri_encoder (string, (const guchar *) unescaped, strlen (unescaped),
+ reserved_chars_allowed, allow_utf8);
+ return string;
+}
+
+/**
+ * g_uri_escape_string:
+ * @unescaped: the unescaped input string.
+ * @reserved_chars_allowed: (allow-none): a string of reserved
+ * characters that are allowed to be used, or %NULL.
+ * @allow_utf8: %TRUE if the result can include UTF-8 characters.
+ *
+ * Escapes a string for use in a URI.
+ *
+ * Normally all characters that are not "unreserved" (i.e. ASCII
+ * alphanumerical characters plus dash, dot, underscore and tilde) are
+ * escaped. But if you specify characters in @reserved_chars_allowed
+ * they are not escaped. This is useful for the "reserved" characters
+ * in the URI specification, since those are allowed unescaped in some
+ * portions of a URI.
+ *
+ * Returns: an escaped version of @unescaped. The returned string
+ * should be freed when no longer needed.
+ *
+ * Since: 2.16
+ **/
+gchar *
+g_uri_escape_string (const gchar *unescaped,
+ const gchar *reserved_chars_allowed,
+ gboolean allow_utf8)
+{
+ GString *s;
+
+ g_return_val_if_fail (unescaped != NULL, NULL);
+
+ s = g_string_sized_new (strlen (unescaped) * 1.25);
+
+ g_string_append_uri_escaped (s, unescaped, reserved_chars_allowed, allow_utf8);
+
+ return g_string_free (s, FALSE);
+}
+
+/**
+ * g_uri_unescape_bytes:
+ * @escaped_string: A URI-escaped string
+ * @length: the length of @escaped_string to escape, or -1 if it
+ * is NUL-terminated.
+ *
+ * Unescapes a segment of an escaped string as binary data.
+ *
+ * Note that in contrast to g_uri_unescape_string(), this does allow
+ * `NUL` bytes to appear in the output.
+ *
+ * Returns: (transfer full): an unescaped version of @escaped_string
+ * or %NULL on error. The returned #GBytes should be unreffed when no
+ * longer needed.
+ *
+ * Since: 2.44
+ **/
+GBytes *
+g_uri_unescape_bytes (const gchar *escaped_string,
+ gssize length,
+ const gchar *illegal_characters)
+{
+ gchar *buf;
+ gssize unescaped_length;
+
+ if (length == -1)
+ length = strlen (escaped_string);
+
+ unescaped_length = uri_decoder (&buf,
+ escaped_string, length,
+ FALSE, G_URI_PARSE_STRICT,
+ 0, NULL);
+ if (unescaped_length == -1)
+ return NULL;
+ else
+ return g_bytes_new_take (buf, unescaped_length);
+}
+
+/**
+ * g_uri_escape_bytes:
+ * @unescaped: (array length=length): the unescaped input data.
+ * @length: the length of @unescaped
+ * @reserved_chars_allowed: (allow-none): a string of reserved
+ * characters that are allowed to be used, or %NULL.
+ *
+ * Escapes arbitrary data for use in a URI.
+ *
+ * Normally all characters that are not "unreserved" (i.e. ASCII
+ * alphanumerical characters plus dash, dot, underscore and tilde) are
+ * escaped. But if you specify characters in @reserved_chars_allowed
+ * they are not escaped. This is useful for the "reserved" characters
+ * in the URI specification, since those are allowed unescaped in some
+ * portions of a URI.
+ *
+ * Though technically incorrect, this will also allow escaping "0"
+ * bytes as "`%``00`".
+ *
+ * Returns: an escaped version of @unescaped. The returned string
+ * should be freed when no longer needed.
+ *
+ * Since: 2.44
+ */
+gchar *
+g_uri_escape_bytes (const guchar *unescaped,
+ gsize length,
+ const gchar *reserved_chars_allowed)
+{
+ GString *string;
+
+ g_return_val_if_fail (unescaped != NULL, NULL);
+
+ string = g_string_sized_new (length * 1.25);
+
+ uri_encoder (string, unescaped, length,
+ reserved_chars_allowed, FALSE);
+
+ return g_string_free (string, FALSE);
+}
+
+static gint
+g_uri_scheme_length (const gchar *uri)
+{
+ const gchar *p;
+
+ p = uri;
+ if (!g_ascii_isalpha (*p))
+ return -1;
+ p++;
+ while (g_ascii_isalnum (*p) || *p == '.' || *p == '+' || *p == '-')
+ p++;
+
+ if (p > uri && *p == ':')
+ return p - uri;
+ else
+ return -1;
+}
+
+/**
+ * g_uri_parse_scheme:
+ * @uri: a valid URI.
+ *
+ * Gets the scheme portion of a URI string. RFC 3986 decodes the scheme as:
+ * |[
+ * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+ * ]|
+ * Common schemes include "file", "http", "svn+ssh", etc.
+ *
+ * Returns: The "scheme" component of the URI, or %NULL on error.
+ * The returned string should be freed when no longer needed.
+ *
+ * Since: 2.16
+ *
+ * Deprecated: Use g_uri_peek_scheme().
+ **/
+gchar *
+g_uri_parse_scheme (const gchar *uri)
+{
+ gint len;
+
+ g_return_val_if_fail (uri != NULL, NULL);
+
+ len = g_uri_scheme_length (uri);
+ return len == -1 ? NULL : g_strndup (uri, len);
+}
+
+/**
+ * g_uri_peek_scheme:
+ * @uri: a valid URI.
+ *
+ * Gets the scheme portion of a URI string. RFC 3986 decodes the scheme as:
+ * |[
+ * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+ * ]|
+ * Common schemes include "file", "http", "svn+ssh", etc.
+ *
+ * Returns: The "scheme" component of the URI, or %NULL on error. The
+ * returned string is normalized to all-lowercase, and interned via
+ * g_intern_string(), so it does not need to be freed.
+ *
+ * Since: 2.44
+ **/
+const gchar *
+g_uri_peek_scheme (const gchar *uri)
+{
+ gint len;
+ gchar *lower_scheme;
+ const gchar *scheme;
+
+ g_return_val_if_fail (uri != NULL, NULL);
+
+ len = g_uri_scheme_length (uri);
+ if (len == -1)
+ return NULL;
+
+ lower_scheme = g_ascii_strdown (uri, len);
+ scheme = g_intern_string (lower_scheme);
+ g_free (lower_scheme);
+
+ return scheme;
+}
+
+G_DEFINE_QUARK (g-uri-quark, g_uri_error)
diff --git a/glib/guri.h b/glib/guri.h
new file mode 100644
index 000000000..85d9eb991
--- /dev/null
+++ b/glib/guri.h
@@ -0,0 +1,286 @@
+/* GLIB - Library of useful routines for C programming
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Copyright 2010-2013 Red Hat, Inc.
+ */
+
+#if !defined (__GLIB_H_INSIDE__) && !defined (GLIB_COMPILATION)
+#error "Only <glib.h> can be included directly."
+#endif
+
+#ifndef __G_URI_H__
+#define __G_URI_H__
+
+#include <glib/gtypes.h>
+
+G_BEGIN_DECLS
+
+typedef struct _GUri GUri;
+
+GLIB_AVAILABLE_IN_2_44
+GUri * g_uri_ref (GUri *uri);
+GLIB_AVAILABLE_IN_2_44
+void g_uri_unref (GUri *uri);
+
+typedef enum {
+ G_URI_PARSE_STRICT = 1 << 0,
+ G_URI_HAS_PASSWORD = 1 << 1,
+ G_URI_HAS_AUTH_PARAMS = 1 << 2,
+ G_URI_ENCODED = 1 << 3,
+ G_URI_NON_DNS = 1 << 4,
+} GUriFlags;
+
+GLIB_AVAILABLE_IN_2_44
+gboolean g_uri_split (const gchar *uri_string,
+ GUriFlags flags,
+ gchar **scheme,
+ gchar **userinfo,
+ gchar **host,
+ gint *port,
+ gchar **path,
+ gchar **query,
+ gchar **fragment,
+ GError **error);
+GLIB_AVAILABLE_IN_2_44
+gboolean g_uri_split_with_user (const gchar *uri_string,
+ GUriFlags flags,
+ gchar **scheme,
+ gchar **user,
+ gchar **password,
+ gchar **auth_params,
+ gchar **host,
+ gint *port,
+ gchar **path,
+ gchar **query,
+ gchar **fragment,
+ GError **error);
+GLIB_AVAILABLE_IN_2_44
+gboolean g_uri_split_network (const gchar *uri_string,
+ GUriFlags flags,
+ gchar **scheme,
+ gchar **host,
+ gint *port,
+ GError **error);
+
+GLIB_AVAILABLE_IN_2_44
+gboolean g_uri_is_valid (const gchar *uri_string,
+ GUriFlags flags,
+ GError **error);
+
+GLIB_AVAILABLE_IN_2_44
+gchar * g_uri_join (GUriFlags flags,
+ const gchar *scheme,
+ const gchar *userinfo,
+ const gchar *host,
+ gint port,
+ const gchar *path,
+ const gchar *query,
+ const gchar *fragment);
+GLIB_AVAILABLE_IN_2_44
+gchar * g_uri_join_with_user (GUriFlags flags,
+ const gchar *scheme,
+ const gchar *user,
+ const gchar *password,
+ const gchar *auth_params,
+ const gchar *host,
+ gint port,
+ const gchar *path,
+ const gchar *query,
+ const gchar *fragment);
+
+GLIB_AVAILABLE_IN_2_44
+GUri * g_uri_parse (const gchar *uri_string,
+ GUriFlags flags,
+ GError **error);
+GLIB_AVAILABLE_IN_2_44
+GUri * g_uri_parse_relative (GUri *base_uri,
+ const gchar *uri_string,
+ GUriFlags flags,
+ GError **error);
+
+GLIB_AVAILABLE_IN_2_44
+gchar * g_uri_resolve_relative (const gchar *base_uri_string,
+ const gchar *uri_string,
+ GUriFlags flags,
+ GError **error);
+
+GLIB_AVAILABLE_IN_2_44
+GUri * g_uri_build (GUriFlags flags,
+ const gchar *scheme,
+ const gchar *userinfo,
+ const gchar *host,
+ gint port,
+ const gchar *path,
+ const gchar *query,
+ const gchar *fragment);
+GLIB_AVAILABLE_IN_2_44
+GUri * g_uri_build_with_user (GUriFlags flags,
+ const gchar *scheme,
+ const gchar *user,
+ const gchar *password,
+ const gchar *auth_params,
+ const gchar *host,
+ gint port,
+ const gchar *path,
+ const gchar *query,
+ const gchar *fragment);
+
+typedef enum {
+ G_URI_HIDE_USERINFO = 1 << 0,
+ G_URI_HIDE_PASSWORD = 1 << 1,
+ G_URI_HIDE_AUTH_PARAMS = 1 << 2,
+ G_URI_HIDE_FRAGMENT = 1 << 3,
+} GUriHideFlags;
+
+GLIB_AVAILABLE_IN_2_44
+char * g_uri_to_string (GUri *uri);
+GLIB_AVAILABLE_IN_2_44
+char * g_uri_to_string_partial (GUri *uri,
+ GUriHideFlags flags);
+
+GLIB_AVAILABLE_IN_2_44
+const gchar *g_uri_get_scheme (GUri *uri);
+GLIB_AVAILABLE_IN_2_44
+const gchar *g_uri_get_userinfo (GUri *uri);
+GLIB_AVAILABLE_IN_2_44
+const gchar *g_uri_get_user (GUri *uri);
+GLIB_AVAILABLE_IN_2_44
+const gchar *g_uri_get_password (GUri *uri);
+GLIB_AVAILABLE_IN_2_44
+const gchar *g_uri_get_auth_params (GUri *uri);
+GLIB_AVAILABLE_IN_2_44
+const gchar *g_uri_get_host (GUri *uri);
+GLIB_AVAILABLE_IN_2_44
+gint g_uri_get_port (GUri *uri);
+GLIB_AVAILABLE_IN_2_44
+const gchar *g_uri_get_path (GUri *uri);
+GLIB_AVAILABLE_IN_2_44
+const gchar *g_uri_get_query (GUri *uri);
+GLIB_AVAILABLE_IN_2_44
+const gchar *g_uri_get_fragment (GUri *uri);
+
+GLIB_AVAILABLE_IN_2_44
+GHashTable * g_uri_parse_params (const gchar *params,
+ gssize length,
+ gchar separator,
+ gboolean case_insensitive);
+
+/**
+ * G_URI_ERROR:
+ *
+ * Error domain for URI methods. Errors in this domain will be from
+ * the #GUriError enumeration. See #GError for information on error
+ * domains.
+ */
+#define G_URI_ERROR (g_uri_error_quark ())
+GLIB_AVAILABLE_IN_2_44
+GQuark g_uri_error_quark (void);
+
+/**
+ * GUriError:
+ * @G_URI_ERROR_MISC: miscellaneous error
+ * @G_URI_ERROR_BAD_SCHEME: the scheme of a URI could not be parsed.
+ * @G_URI_ERROR_BAD_USER: the user/userinfo of a URI could not be parsed.
+ * @G_URI_ERROR_BAD_PASSWORD: the password of a URI could not be parsed.
+ * @G_URI_ERROR_BAD_AUTH_PARAMS: the authentication parameters of a URI could not be parsed.
+ * @G_URI_ERROR_BAD_HOST: the host of a URI could not be parsed.
+ * @G_URI_ERROR_BAD_PORT: the port of a URI could not be parsed.
+ * @G_URI_ERROR_BAD_PATH: the path of a URI could not be parsed.
+ * @G_URI_ERROR_BAD_QUERY: the query of a URI could not be parsed.
+ * @G_URI_ERROR_BAD_FRAGMENT: the fragment of a URI could not be parsed.
+ *
+ * Error codes returned by #GUri methods.
+ */
+typedef enum
+{
+ G_URI_ERROR_MISC,
+ G_URI_ERROR_BAD_SCHEME,
+ G_URI_ERROR_BAD_USER,
+ G_URI_ERROR_BAD_PASSWORD,
+ G_URI_ERROR_BAD_AUTH_PARAMS,
+ G_URI_ERROR_BAD_HOST,
+ G_URI_ERROR_BAD_PORT,
+ G_URI_ERROR_BAD_PATH,
+ G_URI_ERROR_BAD_QUERY,
+ G_URI_ERROR_BAD_FRAGMENT
+} GUriError;
+
+/**
+ * G_URI_RESERVED_CHARS_GENERIC_DELIMITERS:
+ *
+ * Generic delimiters characters as defined in RFC 3986. Includes ":/?#[]@".
+ **/
+#define G_URI_RESERVED_CHARS_GENERIC_DELIMITERS ":/?#[]@"
+
+/**
+ * G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS:
+ *
+ * Subcomponent delimiter characters as defined in RFC 3986. Includes "!$&'()*+,;=".
+ **/
+#define G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS "!$&'()*+,;="
+
+/**
+ * G_URI_RESERVED_CHARS_ALLOWED_IN_PATH_ELEMENT:
+ *
+ * Allowed characters in path elements. Includes "!$&'()*+,;=:@".
+ **/
+#define G_URI_RESERVED_CHARS_ALLOWED_IN_PATH_ELEMENT G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS ":@"
+
+/**
+ * G_URI_RESERVED_CHARS_ALLOWED_IN_PATH:
+ *
+ * Allowed characters in a path. Includes "!$&'()*+,;=:@/".
+ **/
+#define G_URI_RESERVED_CHARS_ALLOWED_IN_PATH G_URI_RESERVED_CHARS_ALLOWED_IN_PATH_ELEMENT "/"
+
+/**
+ * G_URI_RESERVED_CHARS_ALLOWED_IN_USERINFO:
+ *
+ * Allowed characters in userinfo as defined in RFC 3986. Includes "!$&'()*+,;=:".
+ **/
+#define G_URI_RESERVED_CHARS_ALLOWED_IN_USERINFO G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS ":"
+
+GLIB_AVAILABLE_IN_ALL
+char * g_uri_unescape_string (const char *escaped_string,
+ const char *illegal_characters);
+GLIB_AVAILABLE_IN_ALL
+char * g_uri_unescape_segment (const char *escaped_string,
+ const char *escaped_string_end,
+ const char *illegal_characters);
+
+GLIB_DEPRECATED_IN_2_44_FOR(g_uri_peek_scheme)
+char * g_uri_parse_scheme (const char *uri);
+GLIB_AVAILABLE_IN_2_44
+const char *g_uri_peek_scheme (const char *uri);
+
+GLIB_AVAILABLE_IN_ALL
+char * g_uri_escape_string (const char *unescaped,
+ const char *reserved_chars_allowed,
+ gboolean allow_utf8);
+
+GLIB_AVAILABLE_IN_2_44
+GBytes * g_uri_unescape_bytes (const char *escaped_string,
+ gssize length,
+ const char *illegal_characters);
+GLIB_AVAILABLE_IN_2_44
+char * g_uri_escape_bytes (const guchar *unescaped,
+ gsize length,
+ const char *reserved_chars_allowed);
+
+G_END_DECLS
+
+#endif /* __G_URI_H__ */
diff --git a/glib/gurifuncs.c b/glib/gurifuncs.c
deleted file mode 100644
index 44cf82a07..000000000
--- a/glib/gurifuncs.c
+++ /dev/null
@@ -1,252 +0,0 @@
-/* GIO - GLib Input, Output and Streaming Library
- *
- * Copyright (C) 2006-2007 Red Hat, Inc.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General
- * Public License along with this library; if not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Alexander Larsson <alexl@redhat.com>
- */
-
-#include "config.h"
-
-#include "gurifuncs.h"
-
-#include <glib/gstrfuncs.h>
-#include <glib/gmessages.h>
-#include <glib/gstring.h>
-#include <glib/gmem.h>
-
-#include <string.h>
-
-#include "config.h"
-
-/**
- * SECTION:gurifuncs
- * @title: URI Functions
- * @short_description: manipulating URIs
- *
- * Functions for manipulating Universal Resource Identifiers (URIs) as
- * defined by
- * [RFC 3986](http://www.ietf.org/rfc/rfc3986.txt).
- * It is highly recommended that you have read and
- * understand RFC 3986 for understanding this API.
- */
-
-static int
-unescape_character (const char *scanner)
-{
- int first_digit;
- int second_digit;
-
- first_digit = g_ascii_xdigit_value (*scanner++);
- if (first_digit < 0)
- return -1;
-
- second_digit = g_ascii_xdigit_value (*scanner++);
- if (second_digit < 0)
- return -1;
-
- return (first_digit << 4) | second_digit;
-}
-
-/**
- * g_uri_unescape_segment:
- * @escaped_string: (allow-none): A string, may be %NULL
- * @escaped_string_end: (allow-none): Pointer to end of @escaped_string, may be %NULL
- * @illegal_characters: (allow-none): An optional string of illegal characters not to be allowed, may be %NULL
- *
- * Unescapes a segment of an escaped string.
- *
- * If any of the characters in @illegal_characters or the character zero appears
- * as an escaped character in @escaped_string then that is an error and %NULL
- * will be returned. This is useful it you want to avoid for instance having a
- * slash being expanded in an escaped path element, which might confuse pathname
- * handling.
- *
- * Returns: an unescaped version of @escaped_string or %NULL on error.
- * The returned string should be freed when no longer needed. As a
- * special case if %NULL is given for @escaped_string, this function
- * will return %NULL.
- *
- * Since: 2.16
- **/
-char *
-g_uri_unescape_segment (const char *escaped_string,
- const char *escaped_string_end,
- const char *illegal_characters)
-{
- const char *in;
- char *out, *result;
- gint character;
-
- if (escaped_string == NULL)
- return NULL;
-
- if (escaped_string_end == NULL)
- escaped_string_end = escaped_string + strlen (escaped_string);
-
- result = g_malloc (escaped_string_end - escaped_string + 1);
-
- out = result;
- for (in = escaped_string; in < escaped_string_end; in++)
- {
- character = *in;
-
- if (*in == '%')
- {
- in++;
-
- if (escaped_string_end - in < 2)
- {
- /* Invalid escaped char (to short) */
- g_free (result);
- return NULL;
- }
-
- character = unescape_character (in);
-
- /* Check for an illegal character. We consider '\0' illegal here. */
- if (character <= 0 ||
- (illegal_characters != NULL &&
- strchr (illegal_characters, (char)character) != NULL))
- {
- g_free (result);
- return NULL;
- }
-
- in++; /* The other char will be eaten in the loop header */
- }
- *out++ = (char)character;
- }
-
- *out = '\0';
-
- return result;
-}
-
-/**
- * g_uri_unescape_string:
- * @escaped_string: an escaped string to be unescaped.
- * @illegal_characters: (allow-none): a string of illegal characters not to be
- * allowed, or %NULL.
- *
- * Unescapes a whole escaped string.
- *
- * If any of the characters in @illegal_characters or the character zero appears
- * as an escaped character in @escaped_string then that is an error and %NULL
- * will be returned. This is useful it you want to avoid for instance having a
- * slash being expanded in an escaped path element, which might confuse pathname
- * handling.
- *
- * Returns: an unescaped version of @escaped_string. The returned string
- * should be freed when no longer needed.
- *
- * Since: 2.16
- **/
-char *
-g_uri_unescape_string (const char *escaped_string,
- const char *illegal_characters)
-{
- return g_uri_unescape_segment (escaped_string, NULL, illegal_characters);
-}
-
-/**
- * g_uri_parse_scheme:
- * @uri: a valid URI.
- *
- * Gets the scheme portion of a URI string. RFC 3986 decodes the scheme as:
- * |[
- * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
- * ]|
- * Common schemes include "file", "http", "svn+ssh", etc.
- *
- * Returns: The "Scheme" component of the URI, or %NULL on error.
- * The returned string should be freed when no longer needed.
- *
- * Since: 2.16
- **/
-char *
-g_uri_parse_scheme (const char *uri)
-{
- const char *p;
- char c;
-
- g_return_val_if_fail (uri != NULL, NULL);
-
- /* From RFC 3986 Decodes:
- * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
- */
-
- p = uri;
-
- /* Decode scheme:
- scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
- */
-
- if (!g_ascii_isalpha (*p))
- return NULL;
-
- while (1)
- {
- c = *p++;
-
- if (c == ':')
- break;
-
- if (!(g_ascii_isalnum(c) ||
- c == '+' ||
- c == '-' ||
- c == '.'))
- return NULL;
- }
-
- return g_strndup (uri, p - uri - 1);
-}
-
-/**
- * g_uri_escape_string:
- * @unescaped: the unescaped input string.
- * @reserved_chars_allowed: (allow-none): a string of reserved characters that
- * are allowed to be used, or %NULL.
- * @allow_utf8: %TRUE if the result can include UTF-8 characters.
- *
- * Escapes a string for use in a URI.
- *
- * Normally all characters that are not "unreserved" (i.e. ASCII alphanumerical
- * characters plus dash, dot, underscore and tilde) are escaped.
- * But if you specify characters in @reserved_chars_allowed they are not
- * escaped. This is useful for the "reserved" characters in the URI
- * specification, since those are allowed unescaped in some portions of
- * a URI.
- *
- * Returns: an escaped version of @unescaped. The returned string should be
- * freed when no longer needed.
- *
- * Since: 2.16
- **/
-char *
-g_uri_escape_string (const char *unescaped,
- const char *reserved_chars_allowed,
- gboolean allow_utf8)
-{
- GString *s;
-
- g_return_val_if_fail (unescaped != NULL, NULL);
-
- s = g_string_sized_new (strlen (unescaped) + 10);
-
- g_string_append_uri_escaped (s, unescaped, reserved_chars_allowed, allow_utf8);
-
- return g_string_free (s, FALSE);
-}
diff --git a/glib/gurifuncs.h b/glib/gurifuncs.h
deleted file mode 100644
index e36ac0deb..000000000
--- a/glib/gurifuncs.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/* GIO - GLib Input, Output and Streaming Library
- *
- * Copyright (C) 2006-2007 Red Hat, Inc.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General
- * Public License along with this library; if not, see <http://www.gnu.org/licenses/>.
- *
- * Author: Alexander Larsson <alexl@redhat.com>
- */
-
-#ifndef __G_URI_FUNCS_H__
-#define __G_URI_FUNCS_H__
-
-#if !defined (__GLIB_H_INSIDE__) && !defined (GLIB_COMPILATION)
-#error "Only <glib.h> can be included directly."
-#endif
-
-#include <glib/gtypes.h>
-
-G_BEGIN_DECLS
-
-/**
- * G_URI_RESERVED_CHARS_GENERIC_DELIMITERS:
- *
- * Generic delimiters characters as defined in RFC 3986. Includes ":/?#[]@".
- **/
-#define G_URI_RESERVED_CHARS_GENERIC_DELIMITERS ":/?#[]@"
-
-/**
- * G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS:
- *
- * Subcomponent delimiter characters as defined in RFC 3986. Includes "!$&'()*+,;=".
- **/
-#define G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS "!$&'()*+,;="
-
-/**
- * G_URI_RESERVED_CHARS_ALLOWED_IN_PATH_ELEMENT:
- *
- * Allowed characters in path elements. Includes "!$&'()*+,;=:@".
- **/
-#define G_URI_RESERVED_CHARS_ALLOWED_IN_PATH_ELEMENT G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS ":@"
-
-/**
- * G_URI_RESERVED_CHARS_ALLOWED_IN_PATH:
- *
- * Allowed characters in a path. Includes "!$&'()*+,;=:@/".
- **/
-#define G_URI_RESERVED_CHARS_ALLOWED_IN_PATH G_URI_RESERVED_CHARS_ALLOWED_IN_PATH_ELEMENT "/"
-
-/**
- * G_URI_RESERVED_CHARS_ALLOWED_IN_USERINFO:
- *
- * Allowed characters in userinfo as defined in RFC 3986. Includes "!$&'()*+,;=:".
- **/
-#define G_URI_RESERVED_CHARS_ALLOWED_IN_USERINFO G_URI_RESERVED_CHARS_SUBCOMPONENT_DELIMITERS ":"
-
-GLIB_AVAILABLE_IN_ALL
-char * g_uri_unescape_string (const char *escaped_string,
- const char *illegal_characters);
-GLIB_AVAILABLE_IN_ALL
-char * g_uri_unescape_segment (const char *escaped_string,
- const char *escaped_string_end,
- const char *illegal_characters);
-GLIB_AVAILABLE_IN_ALL
-char * g_uri_parse_scheme (const char *uri);
-GLIB_AVAILABLE_IN_ALL
-char * g_uri_escape_string (const char *unescaped,
- const char *reserved_chars_allowed,
- gboolean allow_utf8);
-
-G_END_DECLS
-
-#endif /* __G_URI_FUNCS_H__ */
diff --git a/glib/tests/uri.c b/glib/tests/uri.c
index 83a2f10e6..1d034d08f 100644
--- a/glib/tests/uri.c
+++ b/glib/tests/uri.c
@@ -33,10 +33,10 @@ typedef struct
char *hostname;
char *expected_result;
GConvertError expected_error; /* If failed */
-} ToUriTest;
+} FileToUriTest;
-ToUriTest
-to_uri_tests[] = {
+FileToUriTest
+file_to_uri_tests[] = {
{ "/etc", NULL, "file:///etc"},
{ "/etc", "", "file:///etc"},
{ "/etc", "otherhost", "file://otherhost/etc"},
@@ -101,10 +101,10 @@ typedef struct
char *expected_filename;
char *expected_hostname;
GConvertError expected_error; /* If failed */
-} FromUriTest;
+} FileFromUriTest;
-FromUriTest
-from_uri_tests[] = {
+FileFromUriTest
+file_from_uri_tests[] = {
{ "file:///etc", "/etc"},
{ "file:/etc", "/etc"},
#ifdef G_OS_WIN32
@@ -163,23 +163,23 @@ from_uri_tests[] = {
};
static void
-run_to_uri_tests (void)
+run_file_to_uri_tests (void)
{
int i;
gchar *res;
GError *error;
- for (i = 0; i < G_N_ELEMENTS (to_uri_tests); i++)
+ for (i = 0; i < G_N_ELEMENTS (file_to_uri_tests); i++)
{
error = NULL;
- res = g_filename_to_uri (to_uri_tests[i].filename,
- to_uri_tests[i].hostname,
+ res = g_filename_to_uri (file_to_uri_tests[i].filename,
+ file_to_uri_tests[i].hostname,
&error);
if (res)
- g_assert_cmpstr (res, ==, to_uri_tests[i].expected_result);
+ g_assert_cmpstr (res, ==, file_to_uri_tests[i].expected_result);
else
- g_assert_error (error, G_CONVERT_ERROR, to_uri_tests[i].expected_error);
+ g_assert_error (error, G_CONVERT_ERROR, file_to_uri_tests[i].expected_error);
g_free (res);
g_clear_error (&error);
@@ -187,25 +187,26 @@ run_to_uri_tests (void)
}
static void
-run_from_uri_tests (void)
+run_file_from_uri_tests (void)
{
int i;
gchar *res;
gchar *hostname;
GError *error;
- for (i = 0; i < G_N_ELEMENTS (from_uri_tests); i++)
+ for (i = 0; i < G_N_ELEMENTS (file_from_uri_tests); i++)
{
error = NULL;
- res = g_filename_from_uri (from_uri_tests[i].uri,
+ res = g_filename_from_uri (file_from_uri_tests[i].uri,
&hostname,
&error);
#ifdef G_OS_WIN32
- if (from_uri_tests[i].expected_filename)
+ if (file_from_uri_tests[i].expected_filename)
{
gchar *p, *slash;
- p = from_uri_tests[i].expected_filename = g_strdup (from_uri_tests[i].expected_filename);
+ p = file_from_uri_tests[i].expected_filename =
+ g_strdup (file_from_uri_tests[i].expected_filename);
while ((slash = strchr (p, '/')) != NULL)
{
*slash = '\\';
@@ -214,10 +215,10 @@ run_from_uri_tests (void)
}
#endif
if (res)
- g_assert_cmpstr (res, ==, from_uri_tests[i].expected_filename);
+ g_assert_cmpstr (res, ==, file_from_uri_tests[i].expected_filename);
else
- g_assert_error (error, G_CONVERT_ERROR, from_uri_tests[i].expected_error);
- g_assert_cmpstr (hostname, ==, from_uri_tests[i].expected_hostname);
+ g_assert_error (error, G_CONVERT_ERROR, file_from_uri_tests[i].expected_error);
+ g_assert_cmpstr (hostname, ==, file_from_uri_tests[i].expected_hostname);
g_free (res);
g_free (hostname);
@@ -266,20 +267,20 @@ safe_strcmp_hostname (const gchar *a, const gchar *b)
}
static void
-run_roundtrip_tests (void)
+run_file_roundtrip_tests (void)
{
int i;
gchar *uri, *hostname, *res;
GError *error;
- for (i = 0; i < G_N_ELEMENTS (to_uri_tests); i++)
+ for (i = 0; i < G_N_ELEMENTS (file_to_uri_tests); i++)
{
- if (to_uri_tests[i].expected_error != 0)
+ if (file_to_uri_tests[i].expected_error != 0)
continue;
error = NULL;
- uri = g_filename_to_uri (to_uri_tests[i].filename,
- to_uri_tests[i].hostname,
+ uri = g_filename_to_uri (file_to_uri_tests[i].filename,
+ file_to_uri_tests[i].hostname,
&error);
g_assert_no_error (error);
@@ -287,8 +288,8 @@ run_roundtrip_tests (void)
res = g_filename_from_uri (uri, &hostname, &error);
g_assert_no_error (error);
- g_assert (safe_strcmp_filename (to_uri_tests[i].filename, res) == 0);
- g_assert (safe_strcmp_hostname (to_uri_tests[i].hostname, hostname) == 0);
+ g_assert (safe_strcmp_filename (file_to_uri_tests[i].filename, res) == 0);
+ g_assert (safe_strcmp_hostname (file_to_uri_tests[i].hostname, hostname) == 0);
g_free (res);
g_free (uri);
g_free (hostname);
@@ -364,15 +365,391 @@ test_uri_escape (void)
static void
test_uri_scheme (void)
{
+ const gchar *s1, *s2;
gchar *s;
+ G_GNUC_BEGIN_IGNORE_DEPRECATIONS;
s = g_uri_parse_scheme ("ftp://ftp.gtk.org");
g_assert_cmpstr (s, ==, "ftp");
g_free (s);
s = g_uri_parse_scheme ("1bad:");
- g_assert (s == NULL);
+ g_assert_cmpstr (s, ==, NULL);
s = g_uri_parse_scheme ("bad");
- g_assert (s == NULL);
+ g_assert_cmpstr (s, ==, NULL);
+ G_GNUC_END_IGNORE_DEPRECATIONS;
+
+ s1 = g_uri_peek_scheme ("ftp://ftp.gtk.org");
+ g_assert_cmpstr (s1, ==, "ftp");
+ s2 = g_uri_peek_scheme ("FTP://ftp.gtk.org");
+ g_assert_cmpstr (s2, ==, "ftp");
+ g_assert_true (s1 == s2);
+ s1 = g_uri_peek_scheme ("1bad:");
+ g_assert_cmpstr (s, ==, NULL);
+ s1 = g_uri_peek_scheme ("bad");
+ g_assert_cmpstr (s, ==, NULL);
+}
+
+typedef struct {
+ const gchar *scheme;
+ const gchar *userinfo;
+ const gchar *host;
+ gint port;
+ const gchar *path;
+ const gchar *query;
+ const gchar *fragment;
+} UriParts;
+
+typedef struct {
+ const gchar *orig;
+ const UriParts parts;
+} UriAbsoluteTest;
+
+static const UriAbsoluteTest absolute_tests[] = {
+ { "foo:",
+ { "foo", NULL, NULL, -1, "", NULL, NULL }
+ },
+ { "file:/dev/null",
+ { "file", NULL, NULL, -1, "/dev/null", NULL, NULL }
+ },
+ { "file:///dev/null",
+ { "file", NULL, "", -1, "/dev/null", NULL, NULL }
+ },
+ { "ftp://user@host/path",
+ { "ftp", "user", "host", -1, "/path", NULL, NULL }
+ },
+ { "ftp://user@host:9999/path",
+ { "ftp", "user", "host", 9999, "/path", NULL, NULL }
+ },
+ { "ftp://user:password@host/path",
+ { "ftp", "user:password", "host", -1, "/path", NULL, NULL }
+ },
+ { "ftp://user:password@host:9999/path",
+ { "ftp", "user:password", "host", 9999, "/path", NULL, NULL }
+ },
+ { "ftp://user:password@host",
+ { "ftp", "user:password", "host", -1, "", NULL, NULL }
+ },
+ { "http://us%65r@host",
+ { "http", "user", "host", -1, "", NULL, NULL }
+ },
+ { "http://us%40r@host",
+ { "http", "us@r", "host", -1, "", NULL, NULL }
+ },
+ { "http://us%3ar@host",
+ { "http", "us:r", "host", -1, "", NULL, NULL }
+ },
+ { "http://us%2fr@host",
+ { "http", "us/r", "host", -1, "", NULL, NULL }
+ },
+ { "http://us%3fr@host",
+ { "http", "us?r", "host", -1, "", NULL, NULL }
+ },
+ { "http://host?query",
+ { "http", NULL, "host", -1, "", "query", NULL }
+ },
+ { "http://host/path?query=http%3A%2F%2Fhost%2Fpath%3Fchildparam%3Dchildvalue&param=value",
+ { "http", NULL, "host", -1, "/path", "query=http://host/path?childparam=childvalue&param=value", NULL }
+ },
+ { "http://control-chars/%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%7F",
+ { "http", NULL, "control-chars", -1, "/\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x7F", NULL, NULL }
+ },
+ { "http://space/%20",
+ { "http", NULL, "space", -1, "/ ", NULL, NULL }
+ },
+ { "http://delims/%3C%3E%23%25%22",
+ { "http", NULL, "delims", -1, "/<>#%\"", NULL, NULL }
+ },
+ { "http://unwise-chars/%7B%7D%7C%5C%5E%5B%5D%60",
+ { "http", NULL, "unwise-chars", -1, "/{}|\\^[]`", NULL, NULL }
+ },
+
+ /* From RFC 2732 */
+ { "http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:80/index.html",
+ { "http", NULL, "FEDC:BA98:7654:3210:FEDC:BA98:7654:3210", 80, "/index.html", NULL, NULL }
+ },
+ { "http://[1080:0:0:0:8:800:200C:417A]/index.html",
+ { "http", NULL, "1080:0:0:0:8:800:200C:417A", -1, "/index.html", NULL, NULL }
+ },
+ { "http://[3ffe:2a00:100:7031::1]",
+ { "http", NULL, "3ffe:2a00:100:7031::1", -1, "", NULL, NULL }
+ },
+ { "http://[1080::8:800:200C:417A]/foo",
+ { "http", NULL, "1080::8:800:200C:417A", -1, "/foo", NULL, NULL }
+ },
+ { "http://[::192.9.5.5]/ipng",
+ { "http", NULL, "::192.9.5.5", -1, "/ipng", NULL, NULL }
+ },
+ { "http://[::FFFF:129.144.52.38]:80/index.html",
+ { "http", NULL, "::FFFF:129.144.52.38", 80, "/index.html", NULL, NULL }
+ },
+ { "http://[2010:836B:4179::836B:4179]",
+ { "http", NULL, "2010:836B:4179::836B:4179", -1, "", NULL, NULL }
+ },
+
+#if 0
+ /* Try to recover certain kinds of invalid URIs */
+ { "http://host/path with spaces",
+ { "http", NULL, "host", -1, "/path%20with%20spaces", NULL, NULL }
+ "http://host/path%20with%20spaces",
+ },
+ { " http://host/path",
+ { "http", NULL, "host", -1, "/path", NULL, NULL }
+ "http://host/path"
+ },
+ { "http://host/path ",
+ { "http", NULL, "host", -1, "/path", NULL, NULL }
+ "http://host/path"
+ },
+ { "http://host ",
+ { "http", NULL, "host", -1, "", NULL, NULL }
+ "http://host"
+ },
+ { "http://host:999 ",
+ { "http", NULL, "host", 999, "", NULL, NULL }
+ "http://host:999"
+ },
+ { "http://host/pa\nth",
+ { "http", NULL, "host", -1, "/path", NULL, NULL }
+ "http://host/path"
+ },
+ { "http:\r\n//host/path",
+ { "http", NULL, "host", -1, "/path", NULL, NULL }
+ "http://host/path"
+ },
+ { "http://\thost/path",
+ { "http", NULL, "host", -1, "/path", NULL, NULL }
+ "http://host/path"
+ },
+#endif
+
+ /* Bug 594405; 0-length is different from not-present */
+ { "http://host/path?",
+ { "http", NULL, "host", -1, "/path", "", NULL }
+ },
+ { "http://host/path#",
+ { "http", NULL, "host", -1, "/path", NULL, "" },
+ },
+
+ /* Bug 590524; ignore bad %-encoding */
+ { "http://host/path%",
+ { "http", NULL, "host", -1, "/path%", NULL, NULL }
+ },
+ { "http://h%ost/path",
+ { "http", NULL, "h%ost", -1, "/path", NULL, NULL }
+ },
+ { "http://host/path%%",
+ { "http", NULL, "host", -1, "/path%%", NULL, NULL }
+ },
+ { "http://host/path%%%",
+ { "http", NULL, "host", -1, "/path%%%", NULL, NULL }
+ },
+ { "http://host/path%/x/",
+ { "http", NULL, "host", -1, "/path%/x/", NULL, NULL }
+ },
+ { "http://host/path%0x/",
+ { "http", NULL, "host", -1, "/path%0x/", NULL, NULL }
+ },
+ { "http://host/path%ax",
+ { "http", NULL, "host", -1, "/path%ax", NULL, NULL }
+ },
+
+#if 0
+ /* Bug 662806; %-encode non-ASCII characters */
+ { "http://host/p\xc3\xa4th/",
+ { "http", NULL, "host", -1, "/p%C3%A4th/", NULL, NULL }
+ { "http", NULL, "host", -1, "/p%C3%A4th/", NULL, NULL }
+ },
+#endif
+
+ { "HTTP:////////////////",
+ { "http", NULL, "", -1, "//////////////", NULL, NULL }
+ },
+
+ { "http://@host",
+ { "http", "", "host", -1, "", NULL, NULL }
+ },
+ { "http://:@host",
+ { "http", ":", "host", -1, "", NULL, NULL }
+ },
+
+#if 0
+ { "http://host/keep%00nuls",
+ { "http", NULL, "host", -1, "/keep%00nuls", NULL, NULL }
+ },
+#endif
+
+ /* IPv6 scope ID parsing (both correct and incorrect) */
+ { "http://[fe80::dead:beef%em1]/",
+ { "http", NULL, "fe80::dead:beef%em1", -1, "/", NULL, NULL }
+ },
+ { "http://[fe80::dead:beef%25em1]/",
+ { "http", NULL, "fe80::dead:beef%em1", -1, "/", NULL, NULL }
+ },
+ { "http://[fe80::dead:beef%10]/",
+ { "http", NULL, "fe80::dead:beef%10", -1, "/", NULL, NULL }
+ }
+};
+static int num_absolute_tests = G_N_ELEMENTS (absolute_tests);
+
+static void
+test_uri_parsing_absolute (void)
+{
+ int i;
+
+ for (i = 0; i < num_absolute_tests; i++)
+ {
+ const UriAbsoluteTest *test = &absolute_tests[i];
+ GError *error = NULL;
+ GUri *uri;
+
+ uri = g_uri_parse (test->orig, 0, &error);
+ g_assert_no_error (error);
+
+ g_assert_cmpstr (g_uri_get_scheme (uri), ==, test->parts.scheme);
+ g_assert_cmpstr (g_uri_get_userinfo (uri), ==, test->parts.userinfo);
+ g_assert_cmpstr (g_uri_get_host (uri), ==, test->parts.host);
+ g_assert_cmpint (g_uri_get_port (uri), ==, test->parts.port);
+ g_assert_cmpstr (g_uri_get_path (uri), ==, test->parts.path);
+ g_assert_cmpstr (g_uri_get_query (uri), ==, test->parts.query);
+ g_assert_cmpstr (g_uri_get_fragment (uri), ==, test->parts.fragment);
+
+ g_uri_unref (uri);
+ }
+}
+
+typedef struct {
+ const gchar *orig, *resolved;
+ UriParts parts;
+} UriRelativeTest;
+
+/* This all comes from RFC 3986 */
+static const char *relative_test_base = "http://a/b/c/d;p?q";
+static const UriRelativeTest relative_tests[] = {
+ { "g:h", "g:h",
+ { "g", NULL, NULL, -1, "h", NULL, NULL } },
+ { "g", "http://a/b/c/g",
+ { "http", NULL, "a", -1, "/b/c/g", NULL, NULL } },
+ { "./g", "http://a/b/c/g",
+ { "http", NULL, "a", -1, "/b/c/g", NULL, NULL } },
+ { "g/", "http://a/b/c/g/",
+ { "http", NULL, "a", -1, "/b/c/g/", NULL, NULL } },
+ { "/g", "http://a/g",
+ { "http", NULL, "a", -1, "/g", NULL, NULL } },
+ { "//g", "http://g",
+ { "http", NULL, "g", -1, "", NULL, NULL } },
+ { "?y", "http://a/b/c/d;p?y",
+ { "http", NULL, "a", -1, "/b/c/d;p", "y", NULL } },
+ { "g?y", "http://a/b/c/g?y",
+ { "http", NULL, "a", -1, "/b/c/g", "y", NULL } },
+ { "#s", "http://a/b/c/d;p?q#s",
+ { "http", NULL, "a", -1, "/b/c/d;p", "q", "s" } },
+ { "g#s", "http://a/b/c/g#s",
+ { "http", NULL, "a", -1, "/b/c/g", NULL, "s" } },
+ { "g?y#s", "http://a/b/c/g?y#s",
+ { "http", NULL, "a", -1, "/b/c/g", "y", "s" } },
+ { ";x", "http://a/b/c/;x",
+ { "http", NULL, "a", -1, "/b/c/;x", NULL, NULL } },
+ { "g;x", "http://a/b/c/g;x",
+ { "http", NULL, "a", -1, "/b/c/g;x", NULL, NULL } },
+ { "g;x?y#s", "http://a/b/c/g;x?y#s",
+ { "http", NULL, "a", -1, "/b/c/g;x", "y", "s" } },
+ { ".", "http://a/b/c/",
+ { "http", NULL, "a", -1, "/b/c/", NULL, NULL } },
+ { "./", "http://a/b/c/",
+ { "http", NULL, "a", -1, "/b/c/", NULL, NULL } },
+ { "..", "http://a/b/",
+ { "http", NULL, "a", -1, "/b/", NULL, NULL } },
+ { "../", "http://a/b/",
+ { "http", NULL, "a", -1, "/b/", NULL, NULL } },
+ { "../g", "http://a/b/g",
+ { "http", NULL, "a", -1, "/b/g", NULL, NULL } },
+ { "../..", "http://a/",
+ { "http", NULL, "a", -1, "/", NULL, NULL } },
+ { "../../", "http://a/",
+ { "http", NULL, "a", -1, "/", NULL, NULL } },
+ { "../../g", "http://a/g",
+ { "http", NULL, "a", -1, "/g", NULL, NULL } },
+ { "", "http://a/b/c/d;p?q",
+ { "http", NULL, "a", -1, "/b/c/d;p", "q", NULL } },
+ { "../../../g", "http://a/g",
+ { "http", NULL, "a", -1, "/g", NULL, NULL } },
+ { "../../../../g", "http://a/g",
+ { "http", NULL, "a", -1, "/g", NULL, NULL } },
+ { "/./g", "http://a/g",
+ { "http", NULL, "a", -1, "/g", NULL, NULL } },
+ { "/../g", "http://a/g",
+ { "http", NULL, "a", -1, "/g", NULL, NULL } },
+ { "g.", "http://a/b/c/g.",
+ { "http", NULL, "a", -1, "/b/c/g.", NULL, NULL } },
+ { ".g", "http://a/b/c/.g",
+ { "http", NULL, "a", -1, "/b/c/.g", NULL, NULL } },
+ { "g..", "http://a/b/c/g..",
+ { "http", NULL, "a", -1, "/b/c/g..", NULL, NULL } },
+ { "..g", "http://a/b/c/..g",
+ { "http", NULL, "a", -1, "/b/c/..g", NULL, NULL } },
+ { "./../g", "http://a/b/g",
+ { "http", NULL, "a", -1, "/b/g", NULL, NULL } },
+ { "./g/.", "http://a/b/c/g/",
+ { "http", NULL, "a", -1, "/b/c/g/", NULL, NULL } },
+ { "g/./h", "http://a/b/c/g/h",
+ { "http", NULL, "a", -1, "/b/c/g/h", NULL, NULL } },
+ { "g/../h", "http://a/b/c/h",
+ { "http", NULL, "a", -1, "/b/c/h", NULL, NULL } },
+ { "g;x=1/./y", "http://a/b/c/g;x=1/y",
+ { "http", NULL, "a", -1, "/b/c/g;x=1/y", NULL, NULL } },
+ { "g;x=1/../y", "http://a/b/c/y",
+ { "http", NULL, "a", -1, "/b/c/y", NULL, NULL } },
+ { "g?y/./x", "http://a/b/c/g?y/./x",
+ { "http", NULL, "a", -1, "/b/c/g", "y/./x", NULL } },
+ { "g?y/../x", "http://a/b/c/g?y/../x",
+ { "http", NULL, "a", -1, "/b/c/g", "y/../x", NULL } },
+ { "g#s/./x", "http://a/b/c/g#s/./x",
+ { "http", NULL, "a", -1, "/b/c/g", NULL, "s/./x" } },
+ { "g#s/../x", "http://a/b/c/g#s/../x",
+ { "http", NULL, "a", -1, "/b/c/g", NULL, "s/../x" } },
+ { "http:g", "http:g",
+ { "http", NULL, NULL, -1, "g", NULL, NULL } }
+};
+static int num_relative_tests = G_N_ELEMENTS (relative_tests);
+
+static void
+test_uri_parsing_relative (void)
+{
+ int i;
+ GUri *base;
+ GError *error = NULL;
+
+ base = g_uri_parse (relative_test_base, 0, &error);
+ g_assert_no_error (error);
+
+ for (i = 0; i < num_relative_tests; i++)
+ {
+ const UriRelativeTest *test = &relative_tests[i];
+ gchar *tostring, *resolved;
+ GUri *uri;
+
+ uri = g_uri_parse_relative (base, test->orig, 0, &error);
+ g_assert_no_error (error);
+
+ g_assert_cmpstr (g_uri_get_scheme (uri), ==, test->parts.scheme);
+ g_assert_cmpstr (g_uri_get_userinfo (uri), ==, test->parts.userinfo);
+ g_assert_cmpstr (g_uri_get_host (uri), ==, test->parts.host);
+ g_assert_cmpint (g_uri_get_port (uri), ==, test->parts.port);
+ g_assert_cmpstr (g_uri_get_path (uri), ==, test->parts.path);
+ g_assert_cmpstr (g_uri_get_query (uri), ==, test->parts.query);
+ g_assert_cmpstr (g_uri_get_fragment (uri), ==, test->parts.fragment);
+
+ tostring = g_uri_to_string (uri);
+ g_assert_cmpstr (tostring, ==, test->resolved);
+ g_free (tostring);
+
+ g_uri_unref (uri);
+
+ resolved = g_uri_resolve_relative (relative_test_base, test->orig, 0, &error);
+ g_assert_no_error (error);
+ g_assert_cmpstr (resolved, ==, test->resolved);
+ g_free (resolved);
+ }
}
int
@@ -381,13 +758,15 @@ main (int argc,
{
g_test_init (&argc, &argv, NULL);
- g_test_add_func ("/uri/to-uri", run_to_uri_tests);
- g_test_add_func ("/uri/from-uri", run_from_uri_tests);
- g_test_add_func ("/uri/roundtrip", run_roundtrip_tests);
+ g_test_add_func ("/uri/file-to-uri", run_file_to_uri_tests);
+ g_test_add_func ("/uri/file-from-uri", run_file_from_uri_tests);
+ g_test_add_func ("/uri/file-roundtrip", run_file_roundtrip_tests);
g_test_add_func ("/uri/list", run_uri_list_tests);
g_test_add_func ("/uri/unescape", test_uri_unescape);
g_test_add_func ("/uri/escape", test_uri_escape);
g_test_add_func ("/uri/scheme", test_uri_scheme);
+ g_test_add_func ("/uri/parsing/absolute", test_uri_parsing_absolute);
+ g_test_add_func ("/uri/parsing/relative", test_uri_parsing_relative);
return g_test_run ();
}
diff --git a/gobject/gboxed.c b/gobject/gboxed.c
index ab419516d..52070735a 100644
--- a/gobject/gboxed.c
+++ b/gobject/gboxed.c
@@ -164,6 +164,7 @@ G_DEFINE_BOXED_TYPE (GMarkupParseContext, g_markup_parse_context, g_markup_parse
G_DEFINE_BOXED_TYPE (GThread, g_thread, g_thread_ref, g_thread_unref)
G_DEFINE_BOXED_TYPE (GChecksum, g_checksum, g_checksum_copy, g_checksum_free)
+G_DEFINE_BOXED_TYPE (GUri, g_uri, g_uri_ref, g_uri_unref)
/* This one can't use G_DEFINE_BOXED_TYPE (GStrv, g_strv, g_strdupv, g_strfreev) */
GType
diff --git a/gobject/glib-types.h b/gobject/glib-types.h
index e9f6472ab..309ccd122 100644
--- a/gobject/glib-types.h
+++ b/gobject/glib-types.h
@@ -342,6 +342,8 @@ GLIB_AVAILABLE_IN_2_36
GType g_markup_parse_context_get_type (void) G_GNUC_CONST;
GLIB_AVAILABLE_IN_2_40
GType g_mapped_file_get_type (void) G_GNUC_CONST;
+GLIB_AVAILABLE_IN_2_40
+GType g_uri_get_type (void) G_GNUC_CONST;
GLIB_DEPRECATED_FOR('G_TYPE_VARIANT')
GType g_variant_get_gtype (void) G_GNUC_CONST;