]> Cypherpunks repositories - gostls13.git/commitdiff
mail: address list parsing.
authorDavid Symonds <dsymonds@golang.org>
Mon, 6 Jun 2011 06:46:14 +0000 (16:46 +1000)
committerDavid Symonds <dsymonds@golang.org>
Mon, 6 Jun 2011 06:46:14 +0000 (16:46 +1000)
R=golang-dev, r, r
CC=golang-dev
https://golang.org/cl/4547084

src/pkg/mail/message.go
src/pkg/mail/message_test.go

index 9723863fee1a183d7eab4addb1f4ce13c3542c81..50d89d35748efd8a6820ab383c44f481319b40cd 100644 (file)
@@ -2,17 +2,42 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// Package mail implements parsing of mail messages according to RFC 5322.
+/*
+Package mail implements parsing of mail messages.
+
+For the most part, this package follows the syntax as specified by RFC 5322.
+Notable divergences:
+       * Obsolete address formats are not parsed, including addresses with
+         embedded route information.
+       * Group addresses are not parsed.
+       * The full range of spacing (the CFWS syntax element) is not supported,
+         such as breaking addresses across lines.
+*/
 package mail
 
 import (
        "bufio"
+       "bytes"
+       "fmt"
        "io"
+       "log"
        "net/textproto"
        "os"
+       "strconv"
+       "strings"
        "time"
 )
 
+var debug = debugT(false)
+
+type debugT bool
+
+func (d debugT) Printf(format string, args ...interface{}) {
+       if d {
+               log.Printf(format, args...)
+       }
+}
+
 // A Message represents a parsed mail message.
 type Message struct {
        Header Header
@@ -93,3 +118,277 @@ func (h Header) Date() (*time.Time, os.Error) {
        }
        return parseDate(hdr)
 }
+
+// AddressList parses the named header field as a list of addresses.
+func (h Header) AddressList(key string) ([]*Address, os.Error) {
+       hdr := h.Get(key)
+       if hdr == "" {
+               return nil, ErrHeaderNotPresent
+       }
+       return newAddrParser(hdr).parseAddressList()
+}
+
+// Address represents a single mail address.
+// An address such as "Barry Gibbs <bg@example.com>" is represented
+// as Address{Name: "Barry Gibbs", Address: "bg@example.com"}.
+type Address struct {
+       Name    string // Proper name; may be empty.
+       Address string // user@domain
+}
+
+func (a *Address) String() string {
+       s := "<" + a.Address + ">"
+       if a.Name == "" {
+               return s
+       }
+       return "\"" + strconv.Quote(a.Name) + "\" " + s
+}
+
+type addrParser []byte
+
+func newAddrParser(s string) *addrParser {
+       p := addrParser([]byte(s))
+       return &p
+}
+
+func (p *addrParser) parseAddressList() ([]*Address, os.Error) {
+       var list []*Address
+       for {
+               p.skipSpace()
+               addr, err := p.parseAddress()
+               if err != nil {
+                       return nil, err
+               }
+               list = append(list, addr)
+
+               p.skipSpace()
+               if p.empty() {
+                       break
+               }
+               if !p.consume(',') {
+                       return nil, os.ErrorString("mail: expected comma")
+               }
+       }
+       return list, nil
+}
+
+// parseAddress parses a single RFC 5322 address at the start of p.
+func (p *addrParser) parseAddress() (addr *Address, err os.Error) {
+       debug.Printf("parseAddress: %q", *p)
+       p.skipSpace()
+       if p.empty() {
+               return nil, os.ErrorString("mail: no address")
+       }
+
+       // address = name-addr / addr-spec
+       // TODO(dsymonds): Support parsing group address.
+
+       // addr-spec has a more restricted grammar than name-addr,
+       // so try parsing it first, and fallback to name-addr.
+       // TODO(dsymonds): Is this really correct?
+       spec, err := p.consumeAddrSpec()
+       if err == nil {
+               return &Address{
+                       Address: spec,
+               }, err
+       }
+       debug.Printf("parseAddress: not an addr-spec: %v", err)
+       debug.Printf("parseAddress: state is now %q", *p)
+
+       // display-name
+       var displayName string
+       if p.peek() != '<' {
+               displayName, err = p.consumePhrase()
+               if err != nil {
+                       return nil, err
+               }
+       }
+       debug.Printf("parseAddress: displayName=%q", displayName)
+
+       // angle-addr = "<" addr-spec ">"
+       p.skipSpace()
+       if !p.consume('<') {
+               return nil, os.ErrorString("mail: no angle-addr")
+       }
+       spec, err = p.consumeAddrSpec()
+       if err != nil {
+               return nil, err
+       }
+       if !p.consume('>') {
+               return nil, os.ErrorString("mail: unclosed angle-addr")
+       }
+       debug.Printf("parseAddress: spec=%q", spec)
+
+       return &Address{
+               Name:    displayName,
+               Address: spec,
+       }, nil
+}
+
+// consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p.
+func (p *addrParser) consumeAddrSpec() (spec string, err os.Error) {
+       debug.Printf("consumeAddrSpec: %q", *p)
+
+       orig := *p
+       defer func() {
+               if err != nil {
+                       *p = orig
+               }
+       }()
+
+       // local-part = dot-atom / quoted-string
+       var localPart string
+       p.skipSpace()
+       if p.empty() {
+               return "", os.ErrorString("mail: no addr-spec")
+       }
+       if p.peek() == '"' {
+               // quoted-string
+               debug.Printf("consumeAddrSpec: parsing quoted-string")
+               localPart, err = p.consumeQuotedString()
+       } else {
+               // dot-atom
+               debug.Printf("consumeAddrSpec: parsing dot-atom")
+               localPart, err = p.consumeAtom(true)
+       }
+       if err != nil {
+               debug.Printf("consumeAddrSpec: failed: %v", err)
+               return "", err
+       }
+
+       if !p.consume('@') {
+               return "", os.ErrorString("mail: missing @ in addr-spec")
+       }
+
+       // domain = dot-atom / domain-literal
+       var domain string
+       p.skipSpace()
+       if p.empty() {
+               return "", os.ErrorString("mail: no domain in addr-spec")
+       }
+       // TODO(dsymonds): Handle domain-literal
+       domain, err = p.consumeAtom(true)
+       if err != nil {
+               return "", err
+       }
+
+       return localPart + "@" + domain, nil
+}
+
+// consumePhrase parses the RFC 5322 phrase at the start of p.
+func (p *addrParser) consumePhrase() (phrase string, err os.Error) {
+       debug.Printf("consumePhrase: [%s]", *p)
+       // phrase = 1*word
+       var words []string
+       for {
+               // word = atom / quoted-string
+               var word string
+               p.skipSpace()
+               if p.empty() {
+                       return "", os.ErrorString("mail: missing phrase")
+               }
+               if p.peek() == '"' {
+                       // quoted-string
+                       word, err = p.consumeQuotedString()
+               } else {
+                       // atom
+                       word, err = p.consumeAtom(false)
+               }
+               if err != nil {
+                       break
+               }
+               debug.Printf("consumePhrase: consumed %q", word)
+               words = append(words, word)
+       }
+       // Ignore any error if we got at least one word.
+       if err != nil && len(words) == 0 {
+               debug.Printf("consumePhrase: hit err: %v", err)
+               return "", os.ErrorString("mail: missing word in phrase")
+       }
+       return strings.Join(words, " "), nil
+}
+
+// consumeQuotedString parses the quoted string at the start of p.
+func (p *addrParser) consumeQuotedString() (qs string, err os.Error) {
+       // Assume first byte is '"'.
+       i := 1
+       qsb := make([]byte, 0, 10)
+Loop:
+       for {
+               if i >= p.len() {
+                       return "", os.ErrorString("mail: unclosed quoted-string")
+               }
+               switch c := (*p)[i]; {
+               case c == '"':
+                       break Loop
+               case c == '\\':
+                       if i+1 == p.len() {
+                               return "", os.ErrorString("mail: unclosed quoted-string")
+                       }
+                       qsb = append(qsb, (*p)[i+1])
+                       i += 2
+               case '!' <= c && c <= '~', c == ' ' || c == '\t':
+                       // qtext (printable US-ASCII excluding " and \), or
+                       // FWS (almost; we're ignoring CRLF)
+                       qsb = append(qsb, c)
+                       i++
+               default:
+                       return "", fmt.Errorf("mail: bad character in quoted-string: %q", c)
+               }
+       }
+       *p = (*p)[i+1:]
+       return string(qsb), nil
+}
+
+// consumeAtom parses an RFC 5322 atom at the start of p.
+// If dot is true, consumeAtom parses an RFC 5322 dot-atom instead.
+func (p *addrParser) consumeAtom(dot bool) (atom string, err os.Error) {
+       if !isAtext(p.peek(), false) {
+               return "", os.ErrorString("mail: invalid string")
+       }
+       i := 1
+       for ; i < p.len() && isAtext((*p)[i], dot); i++ {
+       }
+       // TODO(dsymonds): Remove the []byte() conversion here when 6g doesn't need it.
+       atom, *p = string([]byte((*p)[:i])), (*p)[i:]
+       return atom, nil
+}
+
+func (p *addrParser) consume(c byte) bool {
+       if p.empty() || p.peek() != c {
+               return false
+       }
+       *p = (*p)[1:]
+       return true
+}
+
+// skipSpace skips the leading space and tab characters.
+func (p *addrParser) skipSpace() {
+       *p = bytes.TrimLeft(*p, " \t")
+}
+
+func (p *addrParser) peek() byte {
+       return (*p)[0]
+}
+
+func (p *addrParser) empty() bool {
+       return p.len() == 0
+}
+
+func (p *addrParser) len() int {
+       return len(*p)
+}
+
+var atextChars = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZ" +
+       "abcdefghijklmnopqrstuvwxyz" +
+       "0123456789" +
+       "!#$%&'*+-/=?^_`{|}~")
+
+// isAtext returns true if c is an RFC 5322 atext character.
+// If dot is true, period is included.
+func isAtext(c byte, dot bool) bool {
+       if dot && c == '.' {
+               return true
+       }
+       return bytes.IndexByte(atextChars, c) >= 0
+}
index 1d1c6352ea294a52490c7598126b83604030aa68..c3ec236816a5589b971e389956498704af687ba5 100644 (file)
@@ -127,3 +127,74 @@ func TestDateParsing(t *testing.T) {
                }
        }
 }
+
+func TestAddressParsing(t *testing.T) {
+       tests := []struct {
+               addrsStr string
+               exp      []*Address
+       }{
+               // Bare address
+               {
+                       `jdoe@machine.example`,
+                       []*Address{&Address{
+                               Address: "jdoe@machine.example",
+                       }},
+               },
+               // RFC 5322, Appendix A.1.1
+               {
+                       `John Doe <jdoe@machine.example>`,
+                       []*Address{&Address{
+                               Name:    "John Doe",
+                               Address: "jdoe@machine.example",
+                       }},
+               },
+               // RFC 5322, Appendix A.1.2
+               {
+                       `"Joe Q. Public" <john.q.public@example.com>`,
+                       []*Address{&Address{
+                               Name:    "Joe Q. Public",
+                               Address: "john.q.public@example.com",
+                       }},
+               },
+               {
+                       `Mary Smith <mary@x.test>, jdoe@example.org, Who? <one@y.test>`,
+                       []*Address{
+                               &Address{
+                                       Name:    "Mary Smith",
+                                       Address: "mary@x.test",
+                               },
+                               &Address{
+                                       Address: "jdoe@example.org",
+                               },
+                               &Address{
+                                       Name:    "Who?",
+                                       Address: "one@y.test",
+                               },
+                       },
+               },
+               {
+                       `<boss@nil.test>, "Giant; \"Big\" Box" <sysservices@example.net>`,
+                       []*Address{
+                               &Address{
+                                       Address: "boss@nil.test",
+                               },
+                               &Address{
+                                       Name:    `Giant; "Big" Box`,
+                                       Address: "sysservices@example.net",
+                               },
+                       },
+               },
+               // RFC 5322, Appendix A.1.3
+               // TODO(dsymonds): Group addresses.
+       }
+       for _, test := range tests {
+               addrs, err := newAddrParser(test.addrsStr).parseAddressList()
+               if err != nil {
+                       t.Errorf("Failed parsing %q: %v", test.addrsStr, err)
+                       continue
+               }
+               if !reflect.DeepEqual(addrs, test.exp) {
+                       t.Errorf("Parse of %q: got %+v, want %+v", test.addrsStr, addrs, test.exp)
+               }
+       }
+}