1 files changed, 120 insertions, 82 deletions
diff --git a/libgo/go/net/mail/message.go b/libgo/go/net/mail/message.go
index 923630c49c..702b765c34 100644
--- a/libgo/go/net/mail/message.go
+++ b/libgo/go/net/mail/message.go
@@ -5,13 +5,15 @@
 /*
 Package mail implements parsing of mail messages.
 
-For the most part, this package follows the syntax as specified by RFC 5322.
+For the most part, this package follows the syntax as specified by RFC 5322 and
+extended by RFC 6532.
 Notable divergences:
 	* Obsolete address formats are not parsed, including addresses with
 	  embedded route information.
 	* Group addresses are not parsed.
 	* The full range of spacing (the CFWS syntax element) is not supported,
 	  such as breaking addresses across lines.
+	* No unicode normalization is performed.
 */
 package mail
 
@@ -26,6 +28,7 @@ import (
 	"net/textproto"
 	"strings"
 	"time"
+	"unicode/utf8"
 )
 
 var debug = debugT(false)
@@ -89,7 +92,8 @@ func init() {
 	}
 }
 
-func parseDate(date string) (time.Time, error) {
+// ParseDate parses an RFC 5322 date string.
+func ParseDate(date string) (time.Time, error) {
 	for _, layout := range dateLayouts {
 		t, err := time.Parse(layout, date)
 		if err == nil {
@@ -103,7 +107,11 @@ func parseDate(date string) (time.Time, error) {
 type Header map[string][]string
 
 // Get gets the first value associated with the given key.
+// It is case insensitive; CanonicalMIMEHeaderKey is used
+// to canonicalize the provided key.
 // If there are no values associated with the key, Get returns "".
+// To access multiple values of a key, or to use non-canonical keys,
+// access the map directly.
 func (h Header) Get(key string) string {
 	return textproto.MIMEHeader(h).Get(key)
 }
@@ -116,7 +124,7 @@ func (h Header) Date() (time.Time, error) {
 	if hdr == "" {
 		return time.Time{}, ErrHeaderNotPresent
 	}
-	return parseDate(hdr)
+	return ParseDate(hdr)
 }
 
 // AddressList parses the named header field as a list of addresses.
@@ -138,7 +146,7 @@ type Address struct {
 
 // Parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>"
 func ParseAddress(address string) (*Address, error) {
-	return (&addrParser{s: address}).parseAddress()
+	return (&addrParser{s: address}).parseSingleAddress()
 }
 
 // ParseAddressList parses the given string as a list of addresses.
@@ -155,7 +163,7 @@ type AddressParser struct {
 // Parse parses a single RFC 5322 address of the
 // form "Gogh Fir <gf@example.com>" or "foo@example.com".
 func (p *AddressParser) Parse(address string) (*Address, error) {
-	return (&addrParser{s: address, dec: p.WordDecoder}).parseAddress()
+	return (&addrParser{s: address, dec: p.WordDecoder}).parseSingleAddress()
 }
 
 // ParseList parses the given string as a list of comma-separated addresses
@@ -168,7 +176,6 @@ func (p *AddressParser) ParseList(list string) ([]*Address, error) {
 // If the address's name contains non-ASCII characters
 // the name will be rendered according to RFC 2047.
 func (a *Address) String() string {
-
 	// Format address local@domain
 	at := strings.LastIndex(a.Address, "@")
 	var local, domain string
@@ -181,15 +188,12 @@ func (a *Address) String() string {
 	}
 
 	// Add quotes if needed
-	// TODO: rendering quoted local part and rendering printable name
-	//       should be merged in helper function.
 	quoteLocal := false
-	for i := 0; i < len(local); i++ {
-		ch := local[i]
-		if isAtext(ch, false) {
+	for i, r := range local {
+		if isAtext(r, false) {
 			continue
 		}
-		if ch == '.' {
+		if r == '.' {
 			// Dots are okay if they are surrounded by atext.
 			// We only need to check that the previous byte is
 			// not a dot, and this isn't the end of the string.
@@ -213,25 +217,16 @@ func (a *Address) String() string {
 
 	// If every character is printable ASCII, quoting is simple.
 	allPrintable := true
-	for i := 0; i < len(a.Name); i++ {
+	for _, r := range a.Name {
 		// isWSP here should actually be isFWS,
 		// but we don't support folding yet.
-		if !isVchar(a.Name[i]) && !isWSP(a.Name[i]) {
+		if !isVchar(r) && !isWSP(r) || isMultibyte(r) {
 			allPrintable = false
 			break
 		}
 	}
 	if allPrintable {
-		b := bytes.NewBufferString(`"`)
-		for i := 0; i < len(a.Name); i++ {
-			if !isQtext(a.Name[i]) && !isWSP(a.Name[i]) {
-				b.WriteByte('\\')
-			}
-			b.WriteByte(a.Name[i])
-		}
-		b.WriteString(`" `)
-		b.WriteString(s)
-		return b.String()
+		return quoteString(a.Name) + " " + s
 	}
 
 	// Text in an encoded-word in a display-name must not contain certain
@@ -269,6 +264,18 @@ func (p *addrParser) parseAddressList() ([]*Address, error) {
 	return list, nil
 }
 
+func (p *addrParser) parseSingleAddress() (*Address, error) {
+	addr, err := p.parseAddress()
+	if err != nil {
+		return nil, err
+	}
+	p.skipSpace()
+	if !p.empty() {
+		return nil, fmt.Errorf("mail: expected single address, got %q", p.s)
+	}
+	return addr, nil
+}
+
 // parseAddress parses a single RFC 5322 address at the start of p.
 func (p *addrParser) parseAddress() (addr *Address, err error) {
 	debug.Printf("parseAddress: %q", p.s)
@@ -343,6 +350,9 @@ func (p *addrParser) consumeAddrSpec() (spec string, err error) {
 		// quoted-string
 		debug.Printf("consumeAddrSpec: parsing quoted-string")
 		localPart, err = p.consumeQuotedString()
+		if localPart == "" {
+			err = errors.New("mail: empty quoted string in addr-spec")
+		}
 	} else {
 		// dot-atom
 		debug.Printf("consumeAddrSpec: parsing dot-atom")
@@ -416,57 +426,81 @@ func (p *addrParser) consumePhrase() (phrase string, err error) {
 func (p *addrParser) consumeQuotedString() (qs string, err error) {
 	// Assume first byte is '"'.
 	i := 1
-	qsb := make([]byte, 0, 10)
+	qsb := make([]rune, 0, 10)
+
+	escaped := false
+
 Loop:
 	for {
-		if i >= p.len() {
+		r, size := utf8.DecodeRuneInString(p.s[i:])
+
+		switch {
+		case size == 0:
 			return "", errors.New("mail: unclosed quoted-string")
-		}
-		switch c := p.s[i]; {
-		case c == '"':
-			break Loop
-		case c == '\\':
-			if i+1 == p.len() {
-				return "", errors.New("mail: unclosed quoted-string")
+
+		case size == 1 && r == utf8.RuneError:
+			return "", fmt.Errorf("mail: invalid utf-8 in quoted-string: %q", p.s)
+
+		case escaped:
+			//  quoted-pair = ("\" (VCHAR / WSP))
+
+			if !isVchar(r) && !isWSP(r) {
+				return "", fmt.Errorf("mail: bad character in quoted-string: %q", r)
 			}
-			qsb = append(qsb, p.s[i+1])
-			i += 2
-		case isQtext(c), c == ' ':
+
+			qsb = append(qsb, r)
+			escaped = false
+
+		case isQtext(r) || isWSP(r):
 			// qtext (printable US-ASCII excluding " and \), or
 			// FWS (almost; we're ignoring CRLF)
-			qsb = append(qsb, c)
-			i++
+			qsb = append(qsb, r)
+
+		case r == '"':
+			break Loop
+
+		case r == '\\':
+			escaped = true
+
 		default:
-			return "", fmt.Errorf("mail: bad character in quoted-string: %q", c)
+			return "", fmt.Errorf("mail: bad character in quoted-string: %q", r)
+
 		}
+
+		i += size
 	}
 	p.s = p.s[i+1:]
-	if len(qsb) == 0 {
-		return "", errors.New("mail: empty quoted-string")
-	}
 	return string(qsb), nil
 }
 
-var errNonASCII = errors.New("mail: unencoded non-ASCII text in address")
-
 // consumeAtom parses an RFC 5322 atom at the start of p.
 // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead.
 // If permissive is true, consumeAtom will not fail on
 // leading/trailing/double dots in the atom (see golang.org/issue/4938).
 func (p *addrParser) consumeAtom(dot bool, permissive bool) (atom string, err error) {
-	if c := p.peek(); !isAtext(c, false) {
-		if c > 127 {
-			return "", errNonASCII
+	i := 0
+
+Loop:
+	for {
+		r, size := utf8.DecodeRuneInString(p.s[i:])
+
+		switch {
+		case size == 1 && r == utf8.RuneError:
+			return "", fmt.Errorf("mail: invalid utf-8 in address: %q", p.s)
+
+		case size == 0 || !isAtext(r, dot):
+			break Loop
+
+		default:
+			i += size
+
 		}
-		return "", errors.New("mail: invalid string")
 	}
-	i := 1
-	for ; i < p.len() && isAtext(p.s[i], dot); i++ {
-	}
-	if i < p.len() && p.s[i] > 127 {
-		return "", errNonASCII
+
+	if i == 0 {
+		return "", errors.New("mail: invalid string")
 	}
-	atom, p.s = string(p.s[:i]), p.s[i:]
+	atom, p.s = p.s[:i], p.s[i:]
 	if !permissive {
 		if strings.HasPrefix(atom, ".") {
 			return "", errors.New("mail: leading dot in atom")
@@ -536,54 +570,58 @@ func (e charsetError) Error() string {
 	return fmt.Sprintf("charset not supported: %q", string(e))
 }
 
-var atextChars = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZ" +
-	"abcdefghijklmnopqrstuvwxyz" +
-	"0123456789" +
-	"!#$%&'*+-/=?^_`{|}~")
-
-// isAtext reports whether c is an RFC 5322 atext character.
+// isAtext reports whether r is an RFC 5322 atext character.
 // If dot is true, period is included.
-func isAtext(c byte, dot bool) bool {
-	if dot && c == '.' {
-		return true
+func isAtext(r rune, dot bool) bool {
+	switch r {
+	case '.':
+		return dot
+
+	case '(', ')', '<', '>', '[', ']', ':', ';', '@', '\\', ',', '"': // RFC 5322 3.2.3. specials
+		return false
 	}
-	return bytes.IndexByte(atextChars, c) >= 0
+	return isVchar(r)
 }
 
-// isQtext reports whether c is an RFC 5322 qtext character.
-func isQtext(c byte) bool {
+// isQtext reports whether r is an RFC 5322 qtext character.
+func isQtext(r rune) bool {
 	// Printable US-ASCII, excluding backslash or quote.
-	if c == '\\' || c == '"' {
+	if r == '\\' || r == '"' {
 		return false
 	}
-	return '!' <= c && c <= '~'
+	return isVchar(r)
 }
 
-// quoteString renders a string as a RFC5322 quoted-string.
+// quoteString renders a string as an RFC 5322 quoted-string.
 func quoteString(s string) string {
 	var buf bytes.Buffer
 	buf.WriteByte('"')
-	for _, c := range s {
-		ch := byte(c)
-		if isQtext(ch) || isWSP(ch) {
-			buf.WriteByte(ch)
-		} else if isVchar(ch) {
+	for _, r := range s {
+		if isQtext(r) || isWSP(r) {
+			buf.WriteRune(r)
+		} else if isVchar(r) {
 			buf.WriteByte('\\')
-			buf.WriteByte(ch)
+			buf.WriteRune(r)
 		}
 	}
 	buf.WriteByte('"')
 	return buf.String()
 }
 
-// isVchar reports whether c is an RFC 5322 VCHAR character.
-func isVchar(c byte) bool {
+// isVchar reports whether r is an RFC 5322 VCHAR character.
+func isVchar(r rune) bool {
 	// Visible (printing) characters.
-	return '!' <= c && c <= '~'
+	return '!' <= r && r <= '~' || isMultibyte(r)
+}
+
+// isMultibyte reports whether r is a multi-byte UTF-8 character
+// as supported by RFC 6532
+func isMultibyte(r rune) bool {
+	return r >= utf8.RuneSelf
 }
 
-// isWSP reports whether c is a WSP (white space).
-// WSP is a space or horizontal tab (RFC5234 Appendix B).
-func isWSP(c byte) bool {
-	return c == ' ' || c == '\t'
+// isWSP reports whether r is a WSP (white space).
+// WSP is a space or horizontal tab (RFC 5234 Appendix B).
+func isWSP(r rune) bool {
+	return r == ' ' || r == '\t'
 }