diff options
Diffstat (limited to 'libgo/go/net/mail')
-rw-r--r-- | libgo/go/net/mail/message.go | 187 | ||||
-rw-r--r-- | libgo/go/net/mail/message_test.go | 74 |
2 files changed, 172 insertions, 89 deletions
diff --git a/libgo/go/net/mail/message.go b/libgo/go/net/mail/message.go index 923630c49ce..0c000697f7b 100644 --- a/libgo/go/net/mail/message.go +++ b/libgo/go/net/mail/message.go @@ -5,13 +5,15 @@ /* Package mail implements parsing of mail messages. -For the most part, this package follows the syntax as specified by RFC 5322. +For the most part, this package follows the syntax as specified by RFC 5322 and +extended by RFC 6532. Notable divergences: * Obsolete address formats are not parsed, including addresses with embedded route information. * Group addresses are not parsed. * The full range of spacing (the CFWS syntax element) is not supported, such as breaking addresses across lines. + * No unicode normalization is performed. */ package mail @@ -26,6 +28,7 @@ import ( "net/textproto" "strings" "time" + "unicode/utf8" ) var debug = debugT(false) @@ -138,7 +141,7 @@ type Address struct { // Parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>" func ParseAddress(address string) (*Address, error) { - return (&addrParser{s: address}).parseAddress() + return (&addrParser{s: address}).parseSingleAddress() } // ParseAddressList parses the given string as a list of addresses. @@ -155,7 +158,7 @@ type AddressParser struct { // Parse parses a single RFC 5322 address of the // form "Gogh Fir <gf@example.com>" or "foo@example.com". func (p *AddressParser) Parse(address string) (*Address, error) { - return (&addrParser{s: address, dec: p.WordDecoder}).parseAddress() + return (&addrParser{s: address, dec: p.WordDecoder}).parseSingleAddress() } // ParseList parses the given string as a list of comma-separated addresses @@ -168,7 +171,6 @@ func (p *AddressParser) ParseList(list string) ([]*Address, error) { // If the address's name contains non-ASCII characters // the name will be rendered according to RFC 2047. func (a *Address) String() string { - // Format address local@domain at := strings.LastIndex(a.Address, "@") var local, domain string @@ -181,15 +183,12 @@ func (a *Address) String() string { } // Add quotes if needed - // TODO: rendering quoted local part and rendering printable name - // should be merged in helper function. quoteLocal := false - for i := 0; i < len(local); i++ { - ch := local[i] - if isAtext(ch, false) { + for i, r := range local { + if isAtext(r, false) { continue } - if ch == '.' { + if r == '.' { // Dots are okay if they are surrounded by atext. // We only need to check that the previous byte is // not a dot, and this isn't the end of the string. @@ -213,25 +212,16 @@ func (a *Address) String() string { // If every character is printable ASCII, quoting is simple. allPrintable := true - for i := 0; i < len(a.Name); i++ { + for _, r := range a.Name { // isWSP here should actually be isFWS, // but we don't support folding yet. - if !isVchar(a.Name[i]) && !isWSP(a.Name[i]) { + if !isVchar(r) && !isWSP(r) || isMultibyte(r) { allPrintable = false break } } if allPrintable { - b := bytes.NewBufferString(`"`) - for i := 0; i < len(a.Name); i++ { - if !isQtext(a.Name[i]) && !isWSP(a.Name[i]) { - b.WriteByte('\\') - } - b.WriteByte(a.Name[i]) - } - b.WriteString(`" `) - b.WriteString(s) - return b.String() + return quoteString(a.Name) + " " + s } // Text in an encoded-word in a display-name must not contain certain @@ -269,6 +259,18 @@ func (p *addrParser) parseAddressList() ([]*Address, error) { return list, nil } +func (p *addrParser) parseSingleAddress() (*Address, error) { + addr, err := p.parseAddress() + if err != nil { + return nil, err + } + p.skipSpace() + if !p.empty() { + return nil, fmt.Errorf("mail: expected single address, got %q", p.s) + } + return addr, nil +} + // parseAddress parses a single RFC 5322 address at the start of p. func (p *addrParser) parseAddress() (addr *Address, err error) { debug.Printf("parseAddress: %q", p.s) @@ -416,29 +418,48 @@ func (p *addrParser) consumePhrase() (phrase string, err error) { func (p *addrParser) consumeQuotedString() (qs string, err error) { // Assume first byte is '"'. i := 1 - qsb := make([]byte, 0, 10) + qsb := make([]rune, 0, 10) + + escaped := false + Loop: for { - if i >= p.len() { + r, size := utf8.DecodeRuneInString(p.s[i:]) + + switch { + case size == 0: return "", errors.New("mail: unclosed quoted-string") - } - switch c := p.s[i]; { - case c == '"': - break Loop - case c == '\\': - if i+1 == p.len() { - return "", errors.New("mail: unclosed quoted-string") + + case size == 1 && r == utf8.RuneError: + return "", fmt.Errorf("mail: invalid utf-8 in quoted-string: %q", p.s) + + case escaped: + // quoted-pair = ("\" (VCHAR / WSP)) + + if !isVchar(r) && !isWSP(r) { + return "", fmt.Errorf("mail: bad character in quoted-string: %q", r) } - qsb = append(qsb, p.s[i+1]) - i += 2 - case isQtext(c), c == ' ': + + qsb = append(qsb, r) + escaped = false + + case isQtext(r) || isWSP(r): // qtext (printable US-ASCII excluding " and \), or // FWS (almost; we're ignoring CRLF) - qsb = append(qsb, c) - i++ + qsb = append(qsb, r) + + case r == '"': + break Loop + + case r == '\\': + escaped = true + default: - return "", fmt.Errorf("mail: bad character in quoted-string: %q", c) + return "", fmt.Errorf("mail: bad character in quoted-string: %q", r) + } + + i += size } p.s = p.s[i+1:] if len(qsb) == 0 { @@ -447,26 +468,34 @@ Loop: return string(qsb), nil } -var errNonASCII = errors.New("mail: unencoded non-ASCII text in address") - // consumeAtom parses an RFC 5322 atom at the start of p. // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead. // If permissive is true, consumeAtom will not fail on // leading/trailing/double dots in the atom (see golang.org/issue/4938). func (p *addrParser) consumeAtom(dot bool, permissive bool) (atom string, err error) { - if c := p.peek(); !isAtext(c, false) { - if c > 127 { - return "", errNonASCII + i := 0 + +Loop: + for { + r, size := utf8.DecodeRuneInString(p.s[i:]) + + switch { + case size == 1 && r == utf8.RuneError: + return "", fmt.Errorf("mail: invalid utf-8 in address: %q", p.s) + + case size == 0 || !isAtext(r, dot): + break Loop + + default: + i += size + } - return "", errors.New("mail: invalid string") - } - i := 1 - for ; i < p.len() && isAtext(p.s[i], dot); i++ { } - if i < p.len() && p.s[i] > 127 { - return "", errNonASCII + + if i == 0 { + return "", errors.New("mail: invalid string") } - atom, p.s = string(p.s[:i]), p.s[i:] + atom, p.s = p.s[:i], p.s[i:] if !permissive { if strings.HasPrefix(atom, ".") { return "", errors.New("mail: leading dot in atom") @@ -536,54 +565,58 @@ func (e charsetError) Error() string { return fmt.Sprintf("charset not supported: %q", string(e)) } -var atextChars = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZ" + - "abcdefghijklmnopqrstuvwxyz" + - "0123456789" + - "!#$%&'*+-/=?^_`{|}~") - -// isAtext reports whether c is an RFC 5322 atext character. +// isAtext reports whether r is an RFC 5322 atext character. // If dot is true, period is included. -func isAtext(c byte, dot bool) bool { - if dot && c == '.' { - return true +func isAtext(r rune, dot bool) bool { + switch r { + case '.': + return dot + + case '(', ')', '<', '>', '[', ']', ':', ';', '@', '\\', ',', '"': // RFC 5322 3.2.3. specials + return false } - return bytes.IndexByte(atextChars, c) >= 0 + return isVchar(r) } -// isQtext reports whether c is an RFC 5322 qtext character. -func isQtext(c byte) bool { +// isQtext reports whether r is an RFC 5322 qtext character. +func isQtext(r rune) bool { // Printable US-ASCII, excluding backslash or quote. - if c == '\\' || c == '"' { + if r == '\\' || r == '"' { return false } - return '!' <= c && c <= '~' + return isVchar(r) } -// quoteString renders a string as a RFC5322 quoted-string. +// quoteString renders a string as an RFC 5322 quoted-string. func quoteString(s string) string { var buf bytes.Buffer buf.WriteByte('"') - for _, c := range s { - ch := byte(c) - if isQtext(ch) || isWSP(ch) { - buf.WriteByte(ch) - } else if isVchar(ch) { + for _, r := range s { + if isQtext(r) || isWSP(r) { + buf.WriteRune(r) + } else if isVchar(r) { buf.WriteByte('\\') - buf.WriteByte(ch) + buf.WriteRune(r) } } buf.WriteByte('"') return buf.String() } -// isVchar reports whether c is an RFC 5322 VCHAR character. -func isVchar(c byte) bool { +// isVchar reports whether r is an RFC 5322 VCHAR character. +func isVchar(r rune) bool { // Visible (printing) characters. - return '!' <= c && c <= '~' + return '!' <= r && r <= '~' || isMultibyte(r) +} + +// isMultibyte reports whether r is a multi-byte UTF-8 character +// as supported by RFC 6532 +func isMultibyte(r rune) bool { + return r >= utf8.RuneSelf } -// isWSP reports whether c is a WSP (white space). -// WSP is a space or horizontal tab (RFC5234 Appendix B). -func isWSP(c byte) bool { - return c == ' ' || c == '\t' +// isWSP reports whether r is a WSP (white space). +// WSP is a space or horizontal tab (RFC 5234 Appendix B). +func isWSP(r rune) bool { + return r == ' ' || r == '\t' } diff --git a/libgo/go/net/mail/message_test.go b/libgo/go/net/mail/message_test.go index 4e718e26367..bbbba6b584a 100644 --- a/libgo/go/net/mail/message_test.go +++ b/libgo/go/net/mail/message_test.go @@ -92,7 +92,7 @@ func TestDateParsing(t *testing.T) { "Fri, 21 Nov 1997 09:55:06 -0600", time.Date(1997, 11, 21, 9, 55, 6, 0, time.FixedZone("", -6*60*60)), }, - // RFC5322, Appendix A.6.2 + // RFC 5322, Appendix A.6.2 // Obsolete date. { "21 Nov 97 09:55:06 GMT", @@ -120,18 +120,24 @@ func TestDateParsing(t *testing.T) { } func TestAddressParsingError(t *testing.T) { - const txt = "=?iso-8859-2?Q?Bogl=E1rka_Tak=E1cs?= <unknown@gmail.com>" - _, err := ParseAddress(txt) - if err == nil || !strings.Contains(err.Error(), "charset not supported") { - t.Errorf(`mail.ParseAddress(%q) err: %q, want ".*charset not supported.*"`, txt, err) + mustErrTestCases := [...]struct { + text string + wantErrText string + }{ + 0: {"=?iso-8859-2?Q?Bogl=E1rka_Tak=E1cs?= <unknown@gmail.com>", "charset not supported"}, + 1: {"a@gmail.com b@gmail.com", "expected single address"}, + 2: {string([]byte{0xed, 0xa0, 0x80}) + " <micro@example.net>", "invalid utf-8 in address"}, + 3: {"\"" + string([]byte{0xed, 0xa0, 0x80}) + "\" <half-surrogate@example.com>", "invalid utf-8 in quoted-string"}, + 4: {"\"\\" + string([]byte{0x80}) + "\" <escaped-invalid-unicode@example.net>", "invalid utf-8 in quoted-string"}, + 5: {"\"\x00\" <null@example.net>", "bad character in quoted-string"}, + 6: {"\"\\\x00\" <escaped-null@example.net>", "bad character in quoted-string"}, } -} -func TestAddressParsingErrorUnquotedNonASCII(t *testing.T) { - const txt = "µ <micro@example.net>" - _, err := ParseAddress(txt) - if err == nil || !strings.Contains(err.Error(), "unencoded non-ASCII text in address") { - t.Errorf(`mail.ParseAddress(%q) err: %q, want ".*unencoded non-ASCII text in address.*"`, txt, err) + for i, tc := range mustErrTestCases { + _, err := ParseAddress(tc.text) + if err == nil || !strings.Contains(err.Error(), tc.wantErrText) { + t.Errorf(`mail.ParseAddress(%q) #%d want %q, got %v`, tc.text, i, tc.wantErrText, err) + } } } @@ -264,6 +270,46 @@ func TestAddressParsing(t *testing.T) { }, }, }, + // RFC 6532 3.2.3, qtext /= UTF8-non-ascii + { + `"Gø Pher" <gopher@example.com>`, + []*Address{ + { + Name: `Gø Pher`, + Address: "gopher@example.com", + }, + }, + }, + // RFC 6532 3.2, atext /= UTF8-non-ascii + { + `µ <micro@example.com>`, + []*Address{ + { + Name: `µ`, + Address: "micro@example.com", + }, + }, + }, + // RFC 6532 3.2.2, local address parts allow UTF-8 + { + `Micro <µ@example.com>`, + []*Address{ + { + Name: `Micro`, + Address: "µ@example.com", + }, + }, + }, + // RFC 6532 3.2.4, domains parts allow UTF-8 + { + `Micro <micro@µ.example.com>`, + []*Address{ + { + Name: `Micro`, + Address: "micro@µ.example.com", + }, + }, + }, } for _, test := range tests { if len(test.exp) == 1 { @@ -515,6 +561,11 @@ func TestAddressString(t *testing.T) { &Address{Name: "world?=", Address: "hello@world.com"}, `"world?=" <hello@world.com>`, }, + { + // should q-encode even for invalid utf-8. + &Address{Name: string([]byte{0xed, 0xa0, 0x80}), Address: "invalid-utf8@example.net"}, + "=?utf-8?q?=ED=A0=80?= <invalid-utf8@example.net>", + }, } for _, test := range tests { s := test.addr.String() @@ -610,7 +661,6 @@ func TestAddressParsingAndFormatting(t *testing.T) { `< @example.com>`, `<""test""blah""@example.com>`, `<""@0>`, - "<\"\t0\"@0>", } for _, test := range badTests { |