// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// Package mail implements parsing of mail messages according to RFC 5322.
+/*
+Package mail implements parsing of mail messages.
+
+For the most part, this package follows the syntax as specified by RFC 5322.
+Notable divergences:
+ * Obsolete address formats are not parsed, including addresses with
+ embedded route information.
+ * Group addresses are not parsed.
+ * The full range of spacing (the CFWS syntax element) is not supported,
+ such as breaking addresses across lines.
+*/
package mail
import (
"bufio"
+ "bytes"
+ "fmt"
"io"
+ "log"
"net/textproto"
"os"
+ "strconv"
+ "strings"
"time"
)
+var debug = debugT(false)
+
+type debugT bool
+
+func (d debugT) Printf(format string, args ...interface{}) {
+ if d {
+ log.Printf(format, args...)
+ }
+}
+
// A Message represents a parsed mail message.
type Message struct {
Header Header
}
return parseDate(hdr)
}
+
+// AddressList parses the named header field as a list of addresses.
+func (h Header) AddressList(key string) ([]*Address, os.Error) {
+ hdr := h.Get(key)
+ if hdr == "" {
+ return nil, ErrHeaderNotPresent
+ }
+ return newAddrParser(hdr).parseAddressList()
+}
+
+// Address represents a single mail address.
+// An address such as "Barry Gibbs <bg@example.com>" is represented
+// as Address{Name: "Barry Gibbs", Address: "bg@example.com"}.
+type Address struct {
+ Name string // Proper name; may be empty.
+ Address string // user@domain
+}
+
+func (a *Address) String() string {
+ s := "<" + a.Address + ">"
+ if a.Name == "" {
+ return s
+ }
+ return "\"" + strconv.Quote(a.Name) + "\" " + s
+}
+
+type addrParser []byte
+
+func newAddrParser(s string) *addrParser {
+ p := addrParser([]byte(s))
+ return &p
+}
+
+func (p *addrParser) parseAddressList() ([]*Address, os.Error) {
+ var list []*Address
+ for {
+ p.skipSpace()
+ addr, err := p.parseAddress()
+ if err != nil {
+ return nil, err
+ }
+ list = append(list, addr)
+
+ p.skipSpace()
+ if p.empty() {
+ break
+ }
+ if !p.consume(',') {
+ return nil, os.ErrorString("mail: expected comma")
+ }
+ }
+ return list, nil
+}
+
+// parseAddress parses a single RFC 5322 address at the start of p.
+func (p *addrParser) parseAddress() (addr *Address, err os.Error) {
+ debug.Printf("parseAddress: %q", *p)
+ p.skipSpace()
+ if p.empty() {
+ return nil, os.ErrorString("mail: no address")
+ }
+
+ // address = name-addr / addr-spec
+ // TODO(dsymonds): Support parsing group address.
+
+ // addr-spec has a more restricted grammar than name-addr,
+ // so try parsing it first, and fallback to name-addr.
+ // TODO(dsymonds): Is this really correct?
+ spec, err := p.consumeAddrSpec()
+ if err == nil {
+ return &Address{
+ Address: spec,
+ }, err
+ }
+ debug.Printf("parseAddress: not an addr-spec: %v", err)
+ debug.Printf("parseAddress: state is now %q", *p)
+
+ // display-name
+ var displayName string
+ if p.peek() != '<' {
+ displayName, err = p.consumePhrase()
+ if err != nil {
+ return nil, err
+ }
+ }
+ debug.Printf("parseAddress: displayName=%q", displayName)
+
+ // angle-addr = "<" addr-spec ">"
+ p.skipSpace()
+ if !p.consume('<') {
+ return nil, os.ErrorString("mail: no angle-addr")
+ }
+ spec, err = p.consumeAddrSpec()
+ if err != nil {
+ return nil, err
+ }
+ if !p.consume('>') {
+ return nil, os.ErrorString("mail: unclosed angle-addr")
+ }
+ debug.Printf("parseAddress: spec=%q", spec)
+
+ return &Address{
+ Name: displayName,
+ Address: spec,
+ }, nil
+}
+
+// consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p.
+func (p *addrParser) consumeAddrSpec() (spec string, err os.Error) {
+ debug.Printf("consumeAddrSpec: %q", *p)
+
+ orig := *p
+ defer func() {
+ if err != nil {
+ *p = orig
+ }
+ }()
+
+ // local-part = dot-atom / quoted-string
+ var localPart string
+ p.skipSpace()
+ if p.empty() {
+ return "", os.ErrorString("mail: no addr-spec")
+ }
+ if p.peek() == '"' {
+ // quoted-string
+ debug.Printf("consumeAddrSpec: parsing quoted-string")
+ localPart, err = p.consumeQuotedString()
+ } else {
+ // dot-atom
+ debug.Printf("consumeAddrSpec: parsing dot-atom")
+ localPart, err = p.consumeAtom(true)
+ }
+ if err != nil {
+ debug.Printf("consumeAddrSpec: failed: %v", err)
+ return "", err
+ }
+
+ if !p.consume('@') {
+ return "", os.ErrorString("mail: missing @ in addr-spec")
+ }
+
+ // domain = dot-atom / domain-literal
+ var domain string
+ p.skipSpace()
+ if p.empty() {
+ return "", os.ErrorString("mail: no domain in addr-spec")
+ }
+ // TODO(dsymonds): Handle domain-literal
+ domain, err = p.consumeAtom(true)
+ if err != nil {
+ return "", err
+ }
+
+ return localPart + "@" + domain, nil
+}
+
+// consumePhrase parses the RFC 5322 phrase at the start of p.
+func (p *addrParser) consumePhrase() (phrase string, err os.Error) {
+ debug.Printf("consumePhrase: [%s]", *p)
+ // phrase = 1*word
+ var words []string
+ for {
+ // word = atom / quoted-string
+ var word string
+ p.skipSpace()
+ if p.empty() {
+ return "", os.ErrorString("mail: missing phrase")
+ }
+ if p.peek() == '"' {
+ // quoted-string
+ word, err = p.consumeQuotedString()
+ } else {
+ // atom
+ word, err = p.consumeAtom(false)
+ }
+ if err != nil {
+ break
+ }
+ debug.Printf("consumePhrase: consumed %q", word)
+ words = append(words, word)
+ }
+ // Ignore any error if we got at least one word.
+ if err != nil && len(words) == 0 {
+ debug.Printf("consumePhrase: hit err: %v", err)
+ return "", os.ErrorString("mail: missing word in phrase")
+ }
+ return strings.Join(words, " "), nil
+}
+
+// consumeQuotedString parses the quoted string at the start of p.
+func (p *addrParser) consumeQuotedString() (qs string, err os.Error) {
+ // Assume first byte is '"'.
+ i := 1
+ qsb := make([]byte, 0, 10)
+Loop:
+ for {
+ if i >= p.len() {
+ return "", os.ErrorString("mail: unclosed quoted-string")
+ }
+ switch c := (*p)[i]; {
+ case c == '"':
+ break Loop
+ case c == '\\':
+ if i+1 == p.len() {
+ return "", os.ErrorString("mail: unclosed quoted-string")
+ }
+ qsb = append(qsb, (*p)[i+1])
+ i += 2
+ case '!' <= c && c <= '~', c == ' ' || c == '\t':
+ // qtext (printable US-ASCII excluding " and \), or
+ // FWS (almost; we're ignoring CRLF)
+ qsb = append(qsb, c)
+ i++
+ default:
+ return "", fmt.Errorf("mail: bad character in quoted-string: %q", c)
+ }
+ }
+ *p = (*p)[i+1:]
+ return string(qsb), nil
+}
+
+// consumeAtom parses an RFC 5322 atom at the start of p.
+// If dot is true, consumeAtom parses an RFC 5322 dot-atom instead.
+func (p *addrParser) consumeAtom(dot bool) (atom string, err os.Error) {
+ if !isAtext(p.peek(), false) {
+ return "", os.ErrorString("mail: invalid string")
+ }
+ i := 1
+ for ; i < p.len() && isAtext((*p)[i], dot); i++ {
+ }
+ // TODO(dsymonds): Remove the []byte() conversion here when 6g doesn't need it.
+ atom, *p = string([]byte((*p)[:i])), (*p)[i:]
+ return atom, nil
+}
+
+func (p *addrParser) consume(c byte) bool {
+ if p.empty() || p.peek() != c {
+ return false
+ }
+ *p = (*p)[1:]
+ return true
+}
+
+// skipSpace skips the leading space and tab characters.
+func (p *addrParser) skipSpace() {
+ *p = bytes.TrimLeft(*p, " \t")
+}
+
+func (p *addrParser) peek() byte {
+ return (*p)[0]
+}
+
+func (p *addrParser) empty() bool {
+ return p.len() == 0
+}
+
+func (p *addrParser) len() int {
+ return len(*p)
+}
+
+var atextChars = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZ" +
+ "abcdefghijklmnopqrstuvwxyz" +
+ "0123456789" +
+ "!#$%&'*+-/=?^_`{|}~")
+
+// isAtext returns true if c is an RFC 5322 atext character.
+// If dot is true, period is included.
+func isAtext(c byte, dot bool) bool {
+ if dot && c == '.' {
+ return true
+ }
+ return bytes.IndexByte(atextChars, c) >= 0
+}
}
}
}
+
+func TestAddressParsing(t *testing.T) {
+ tests := []struct {
+ addrsStr string
+ exp []*Address
+ }{
+ // Bare address
+ {
+ `jdoe@machine.example`,
+ []*Address{&Address{
+ Address: "jdoe@machine.example",
+ }},
+ },
+ // RFC 5322, Appendix A.1.1
+ {
+ `John Doe <jdoe@machine.example>`,
+ []*Address{&Address{
+ Name: "John Doe",
+ Address: "jdoe@machine.example",
+ }},
+ },
+ // RFC 5322, Appendix A.1.2
+ {
+ `"Joe Q. Public" <john.q.public@example.com>`,
+ []*Address{&Address{
+ Name: "Joe Q. Public",
+ Address: "john.q.public@example.com",
+ }},
+ },
+ {
+ `Mary Smith <mary@x.test>, jdoe@example.org, Who? <one@y.test>`,
+ []*Address{
+ &Address{
+ Name: "Mary Smith",
+ Address: "mary@x.test",
+ },
+ &Address{
+ Address: "jdoe@example.org",
+ },
+ &Address{
+ Name: "Who?",
+ Address: "one@y.test",
+ },
+ },
+ },
+ {
+ `<boss@nil.test>, "Giant; \"Big\" Box" <sysservices@example.net>`,
+ []*Address{
+ &Address{
+ Address: "boss@nil.test",
+ },
+ &Address{
+ Name: `Giant; "Big" Box`,
+ Address: "sysservices@example.net",
+ },
+ },
+ },
+ // RFC 5322, Appendix A.1.3
+ // TODO(dsymonds): Group addresses.
+ }
+ for _, test := range tests {
+ addrs, err := newAddrParser(test.addrsStr).parseAddressList()
+ if err != nil {
+ t.Errorf("Failed parsing %q: %v", test.addrsStr, err)
+ continue
+ }
+ if !reflect.DeepEqual(addrs, test.exp) {
+ t.Errorf("Parse of %q: got %+v, want %+v", test.addrsStr, addrs, test.exp)
+ }
+ }
+}