summaryrefslogtreecommitdiff
path: root/libgo/go/net/url/url.go
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/net/url/url.go')
-rw-r--r--libgo/go/net/url/url.go250
1 files changed, 193 insertions, 57 deletions
diff --git a/libgo/go/net/url/url.go b/libgo/go/net/url/url.go
index 1a93e3496e..42a514bbc1 100644
--- a/libgo/go/net/url/url.go
+++ b/libgo/go/net/url/url.go
@@ -3,9 +3,13 @@
// license that can be found in the LICENSE file.
// Package url parses URLs and implements query escaping.
-// See RFC 3986.
package url
+// See RFC 3986. This package generally follows RFC 3986, except where
+// it deviates for compatibility reasons. When sending changes, first
+// search old issues for history on decisions. Unit tests should also
+// contain references to issue numbers with details.
+
import (
"bytes"
"errors"
@@ -70,6 +74,7 @@ type encoding int
const (
encodePath encoding = 1 + iota
+ encodePathSegment
encodeHost
encodeZone
encodeUserPassword
@@ -128,9 +133,14 @@ func shouldEscape(c byte, mode encoding) bool {
// The RFC allows : @ & = + $ but saves / ; , for assigning
// meaning to individual path segments. This package
// only manipulates the path as a whole, so we allow those
- // last two as well. That leaves only ? to escape.
+ // last three as well. That leaves only ? to escape.
return c == '?'
+ case encodePathSegment: // §3.3
+ // The RFC allows : @ & = + $ but saves / ; , for assigning
+ // meaning to individual path segments.
+ return c == '/' || c == ';' || c == ',' || c == '?'
+
case encodeUserPassword: // §3.2.1
// The RFC allows ';', ':', '&', '=', '+', '$', and ',' in
// userinfo, so we must escape only '@', '/', and '?'.
@@ -160,6 +170,15 @@ func QueryUnescape(s string) (string, error) {
return unescape(s, encodeQueryComponent)
}
+// PathUnescape does the inverse transformation of PathEscape, converting
+// %AB into the byte 0xAB. It returns an error if any % is not followed by
+// two hexadecimal digits.
+//
+// PathUnescape is identical to QueryUnescape except that it does not unescape '+' to ' ' (space).
+func PathUnescape(s string) (string, error) {
+ return unescape(s, encodePathSegment)
+}
+
// unescape unescapes a string; the mode specifies
// which section of the URL string is being unescaped.
func unescape(s string, mode encoding) (string, error) {
@@ -246,6 +265,12 @@ func QueryEscape(s string) string {
return escape(s, encodeQueryComponent)
}
+// PathEscape escapes the string so it can be safely placed
+// inside a URL path segment.
+func PathEscape(s string) string {
+ return escape(s, encodePathSegment)
+}
+
func escape(s string, mode encoding) string {
spaceCount, hexCount := 0, 0
for i := 0; i < len(s); i++ {
@@ -307,14 +332,15 @@ func escape(s string, mode encoding) string {
// construct a URL struct directly and set the Opaque field instead of Path.
// These still work as well.
type URL struct {
- Scheme string
- Opaque string // encoded opaque data
- User *Userinfo // username and password information
- Host string // host or host:port
- Path string
- RawPath string // encoded path hint (Go 1.5 and later only; see EscapedPath method)
- RawQuery string // encoded query values, without '?'
- Fragment string // fragment for references, without '#'
+ Scheme string
+ Opaque string // encoded opaque data
+ User *Userinfo // username and password information
+ Host string // host or host:port
+ Path string
+ RawPath string // encoded path hint (Go 1.5 and later only; see EscapedPath method)
+ ForceQuery bool // append a query ('?') even if RawQuery is empty
+ RawQuery string // encoded query values, without '?'
+ Fragment string // fragment for references, without '#'
}
// User returns a Userinfo containing the provided username
@@ -351,10 +377,7 @@ func (u *Userinfo) Username() string {
// Password returns the password in case it is set, and whether it is set.
func (u *Userinfo) Password() (string, bool) {
- if u.passwordSet {
- return u.password, true
- }
- return "", false
+ return u.password, u.passwordSet
}
// String returns the encoded userinfo information in the standard form
@@ -410,11 +433,12 @@ func split(s string, c string, cutc bool) (string, string) {
// Parse parses rawurl into a URL structure.
// The rawurl may be relative or absolute.
-func Parse(rawurl string) (url *URL, err error) {
+func Parse(rawurl string) (*URL, error) {
// Cut off #frag
u, frag := split(rawurl, "#", true)
- if url, err = parse(u, false); err != nil {
- return nil, err
+ url, err := parse(u, false)
+ if err != nil {
+ return nil, &Error{"parse", u, err}
}
if frag == "" {
return url, nil
@@ -425,41 +449,50 @@ func Parse(rawurl string) (url *URL, err error) {
return url, nil
}
-// ParseRequestURI parses rawurl into a URL structure. It assumes that
+// ParseRequestURI parses rawurl into a URL structure. It assumes that
// rawurl was received in an HTTP request, so the rawurl is interpreted
// only as an absolute URI or an absolute path.
// The string rawurl is assumed not to have a #fragment suffix.
// (Web browsers strip #fragment before sending the URL to a web server.)
-func ParseRequestURI(rawurl string) (url *URL, err error) {
- return parse(rawurl, true)
+func ParseRequestURI(rawurl string) (*URL, error) {
+ url, err := parse(rawurl, true)
+ if err != nil {
+ return nil, &Error{"parse", rawurl, err}
+ }
+ return url, nil
}
-// parse parses a URL from a string in one of two contexts. If
+// parse parses a URL from a string in one of two contexts. If
// viaRequest is true, the URL is assumed to have arrived via an HTTP request,
// in which case only absolute URLs or path-absolute relative URLs are allowed.
// If viaRequest is false, all forms of relative URLs are allowed.
-func parse(rawurl string, viaRequest bool) (url *URL, err error) {
+func parse(rawurl string, viaRequest bool) (*URL, error) {
var rest string
+ var err error
if rawurl == "" && viaRequest {
- err = errors.New("empty url")
- goto Error
+ return nil, errors.New("empty url")
}
- url = new(URL)
+ url := new(URL)
if rawurl == "*" {
url.Path = "*"
- return
+ return url, nil
}
// Split off possible leading "http:", "mailto:", etc.
// Cannot contain escaped characters.
if url.Scheme, rest, err = getscheme(rawurl); err != nil {
- goto Error
+ return nil, err
}
url.Scheme = strings.ToLower(url.Scheme)
- rest, url.RawQuery = split(rest, "?", true)
+ if strings.HasSuffix(rest, "?") && strings.Count(rest, "?") == 1 {
+ url.ForceQuery = true
+ rest = rest[:len(rest)-1]
+ } else {
+ rest, url.RawQuery = split(rest, "?", true)
+ }
if !strings.HasPrefix(rest, "/") {
if url.Scheme != "" {
@@ -468,8 +501,20 @@ func parse(rawurl string, viaRequest bool) (url *URL, err error) {
return url, nil
}
if viaRequest {
- err = errors.New("invalid URI for request")
- goto Error
+ return nil, errors.New("invalid URI for request")
+ }
+
+ // Avoid confusion with malformed schemes, like cache_object:foo/bar.
+ // See golang.org/issue/16822.
+ //
+ // RFC 3986, §3.3:
+ // In addition, a URI reference (Section 4.1) may be a relative-path reference,
+ // in which case the first path segment cannot contain a colon (":") character.
+ colon := strings.Index(rest, ":")
+ slash := strings.Index(rest, "/")
+ if colon >= 0 && (slash < 0 || colon < slash) {
+ // First path segment has colon. Not allowed in relative URL.
+ return nil, errors.New("first path segment in URL cannot contain colon")
}
}
@@ -478,23 +523,17 @@ func parse(rawurl string, viaRequest bool) (url *URL, err error) {
authority, rest = split(rest[2:], "/", false)
url.User, url.Host, err = parseAuthority(authority)
if err != nil {
- goto Error
+ return nil, err
}
}
- if url.Path, err = unescape(rest, encodePath); err != nil {
- goto Error
- }
- // RawPath is a hint as to the encoding of Path to use
- // in url.EscapedPath. If that method already gets the
- // right answer without RawPath, leave it empty.
- // This will help make sure that people don't rely on it in general.
- if url.EscapedPath() != rest && validEncodedPath(rest) {
- url.RawPath = rest
+ // Set Path and, optionally, RawPath.
+ // RawPath is a hint of the encoding of Path. We don't want to set it if
+ // the default escaping of Path is equivalent, to help make sure that people
+ // don't rely on it in general.
+ if err := url.setPath(rest); err != nil {
+ return nil, err
}
return url, nil
-
-Error:
- return nil, &Error{"parse", rawurl, err}
}
func parseAuthority(authority string) (user *Userinfo, host string, err error) {
@@ -511,7 +550,7 @@ func parseAuthority(authority string) (user *Userinfo, host string, err error) {
return nil, host, nil
}
userinfo := authority[:i]
- if strings.Index(userinfo, ":") < 0 {
+ if !strings.Contains(userinfo, ":") {
if userinfo, err = unescape(userinfo, encodeUserPassword); err != nil {
return nil, "", err
}
@@ -575,6 +614,29 @@ func parseHost(host string) (string, error) {
return host, nil
}
+// setPath sets the Path and RawPath fields of the URL based on the provided
+// escaped path p. It maintains the invariant that RawPath is only specified
+// when it differs from the default encoding of the path.
+// For example:
+// - setPath("/foo/bar") will set Path="/foo/bar" and RawPath=""
+// - setPath("/foo%2fbar") will set Path="/foo/bar" and RawPath="/foo%2fbar"
+// setPath will return an error only if the provided path contains an invalid
+// escaping.
+func (u *URL) setPath(p string) error {
+ path, err := unescape(p, encodePath)
+ if err != nil {
+ return err
+ }
+ u.Path = path
+ if escp := escape(path, encodePath); p == escp {
+ // Default encoding is fine.
+ u.RawPath = ""
+ } else {
+ u.RawPath = p
+ }
+ return nil
+}
+
// EscapedPath returns the escaped form of u.Path.
// In general there are multiple possible escaped forms of any path.
// EscapedPath returns u.RawPath when it is a valid escaping of u.Path.
@@ -682,9 +744,20 @@ func (u *URL) String() string {
if path != "" && path[0] != '/' && u.Host != "" {
buf.WriteByte('/')
}
+ if buf.Len() == 0 {
+ // RFC 3986 §4.2
+ // A path segment that contains a colon character (e.g., "this:that")
+ // cannot be used as the first segment of a relative-path reference, as
+ // it would be mistaken for a scheme name. Such a segment must be
+ // preceded by a dot-segment (e.g., "./this:that") to make a relative-
+ // path reference.
+ if i := strings.IndexByte(path, ':'); i > -1 && strings.IndexByte(path[:i], '/') == -1 {
+ buf.WriteString("./")
+ }
+ }
buf.WriteString(path)
}
- if u.RawQuery != "" {
+ if u.ForceQuery || u.RawQuery != "" {
buf.WriteByte('?')
buf.WriteString(u.RawQuery)
}
@@ -709,8 +782,8 @@ func (v Values) Get(key string) string {
if v == nil {
return ""
}
- vs, ok := v[key]
- if !ok || len(vs) == 0 {
+ vs := v[key]
+ if len(vs) == 0 {
return ""
}
return vs[0]
@@ -738,10 +811,14 @@ func (v Values) Del(key string) {
// ParseQuery always returns a non-nil map containing all the
// valid query parameters found; err describes the first decoding error
// encountered, if any.
-func ParseQuery(query string) (m Values, err error) {
- m = make(Values)
- err = parseQuery(m, query)
- return
+//
+// Query is expected to be a list of key=value settings separated by
+// ampersands or semicolons. A setting without an equals sign is
+// interpreted as a key set to an empty value.
+func ParseQuery(query string) (Values, error) {
+ m := make(Values)
+ err := parseQuery(m, query)
+ return m, err
}
func parseQuery(m Values, query string) (err error) {
@@ -841,12 +918,13 @@ func resolvePath(base, ref string) string {
}
// IsAbs reports whether the URL is absolute.
+// Absolute means that it has a non-empty scheme.
func (u *URL) IsAbs() bool {
return u.Scheme != ""
}
-// Parse parses a URL in the context of the receiver. The provided URL
-// may be relative or absolute. Parse returns nil, err on parse
+// Parse parses a URL in the context of the receiver. The provided URL
+// may be relative or absolute. Parse returns nil, err on parse
// failure, otherwise its return value is the same as ResolveReference.
func (u *URL) Parse(ref string) (*URL, error) {
refurl, err := Parse(ref)
@@ -858,7 +936,7 @@ func (u *URL) Parse(ref string) (*URL, error) {
// ResolveReference resolves a URI reference to an absolute URI from
// an absolute base URI, per RFC 3986 Section 5.2. The URI reference
-// may be relative or absolute. ResolveReference always returns a new
+// may be relative or absolute. ResolveReference always returns a new
// URL instance, even if the returned URL is identical to either the
// base or reference. If ref is an absolute URL, then ResolveReference
// ignores base and returns a copy of ref.
@@ -869,7 +947,9 @@ func (u *URL) ResolveReference(ref *URL) *URL {
}
if ref.Scheme != "" || ref.Host != "" || ref.User != nil {
// The "absoluteURI" or "net_path" cases.
- url.Path = resolvePath(ref.Path, "")
+ // We can ignore the error from setPath since we know we provided a
+ // validly-escaped path.
+ url.setPath(resolvePath(ref.EscapedPath(), ""))
return &url
}
if ref.Opaque != "" {
@@ -889,7 +969,7 @@ func (u *URL) ResolveReference(ref *URL) *URL {
// The "abs_path" or "rel_path" cases.
url.Host = u.Host
url.User = u.User
- url.Path = resolvePath(u.Path, ref.Path)
+ url.setPath(resolvePath(u.EscapedPath(), ref.EscapedPath()))
return &url
}
@@ -913,8 +993,64 @@ func (u *URL) RequestURI() string {
result = u.Scheme + ":" + result
}
}
- if u.RawQuery != "" {
+ if u.ForceQuery || u.RawQuery != "" {
result += "?" + u.RawQuery
}
return result
}
+
+// Hostname returns u.Host, without any port number.
+//
+// If Host is an IPv6 literal with a port number, Hostname returns the
+// IPv6 literal without the square brackets. IPv6 literals may include
+// a zone identifier.
+func (u *URL) Hostname() string {
+ return stripPort(u.Host)
+}
+
+// Port returns the port part of u.Host, without the leading colon.
+// If u.Host doesn't contain a port, Port returns an empty string.
+func (u *URL) Port() string {
+ return portOnly(u.Host)
+}
+
+func stripPort(hostport string) string {
+ colon := strings.IndexByte(hostport, ':')
+ if colon == -1 {
+ return hostport
+ }
+ if i := strings.IndexByte(hostport, ']'); i != -1 {
+ return strings.TrimPrefix(hostport[:i], "[")
+ }
+ return hostport[:colon]
+}
+
+func portOnly(hostport string) string {
+ colon := strings.IndexByte(hostport, ':')
+ if colon == -1 {
+ return ""
+ }
+ if i := strings.Index(hostport, "]:"); i != -1 {
+ return hostport[i+len("]:"):]
+ }
+ if strings.Contains(hostport, "]") {
+ return ""
+ }
+ return hostport[colon+len(":"):]
+}
+
+// Marshaling interface implementations.
+// Would like to implement MarshalText/UnmarshalText but that will change the JSON representation of URLs.
+
+func (u *URL) MarshalBinary() (text []byte, err error) {
+ return []byte(u.String()), nil
+}
+
+func (u *URL) UnmarshalBinary(text []byte) error {
+ u1, err := Parse(string(text))
+ if err != nil {
+ return err
+ }
+ *u = *u1
+ return nil
+}