]> Cypherpunks repositories - gostls13.git/commitdiff
path: add Match
authorRuss Cox <rsc@golang.org>
Thu, 25 Feb 2010 00:11:14 +0000 (16:11 -0800)
committerRuss Cox <rsc@golang.org>
Thu, 25 Feb 2010 00:11:14 +0000 (16:11 -0800)
R=eridius, r, rog
CC=golang-dev
https://golang.org/cl/217088

src/pkg/path/Makefile
src/pkg/path/match.go [new file with mode: 0644]
src/pkg/path/match_test.go [new file with mode: 0644]

index 199b6800848b4daacc4bf34fd0cc5fba84490710..9372cdf371f252811540208e632bc658c1f78d1b 100644 (file)
@@ -6,6 +6,7 @@ include ../../Make.$(GOARCH)
 
 TARG=path
 GOFILES=\
+       match.go\
        path.go\
 
 include ../../Make.pkg
diff --git a/src/pkg/path/match.go b/src/pkg/path/match.go
new file mode 100644 (file)
index 0000000..4e42b6a
--- /dev/null
@@ -0,0 +1,197 @@
+package path
+
+import (
+       "os"
+       "strings"
+       "utf8"
+)
+
+var ErrBadPattern = os.NewError("syntax error in pattern")
+
+// Match returns true if name matches the shell file name pattern.
+// The syntax used by pattern is:
+//
+//     pattern:
+//             { term }
+//     term:
+//             '*'         matches any sequence of non-/ characters
+//             '?'         matches any single non-/ character
+//             '[' [ '^' ] { character-range } ']'
+//                         character class (must be non-empty)
+//             c           matches character c (c != '*', '?', '\\', '[')
+//             '\\' c      matches character c
+//
+//     character-range:
+//             c           matches character c (c != '\\', '-', ']')
+//             '\\' c      matches character c
+//             lo '-' hi   matches character c for lo <= c <= hi
+//
+// Match requires pattern to match all of name, not just a substring.
+// The only possible error return is when pattern is malformed.
+//
+func Match(pattern, name string) (matched bool, err os.Error) {
+Pattern:
+       for len(pattern) > 0 {
+               var star bool
+               var chunk string
+               star, chunk, pattern = scanChunk(pattern)
+               if star && chunk == "" {
+                       // Trailing * matches rest of string unless it has a /.
+                       return strings.Index(name, "/") < 0, nil
+               }
+               // Look for match at current position.
+               t, ok, err := matchChunk(chunk, name)
+               if ok {
+                       name = t
+                       continue
+               }
+               if err != nil {
+                       return false, err
+               }
+               if star {
+                       // Look for match skipping i+1 bytes.
+                       // Cannot skip /.
+                       for i := 0; i < len(name) && name[i] != '/'; i++ {
+                               t, ok, err := matchChunk(chunk, name[i+1:])
+                               if ok {
+                                       name = t
+                                       continue Pattern
+                               }
+                               if err != nil {
+                                       return false, err
+                               }
+                       }
+               }
+               return false, nil
+       }
+       return len(name) == 0, nil
+}
+
+// scanChunk gets the next section of pattern, which is a non-star string
+// possibly preceded by a star.
+func scanChunk(pattern string) (star bool, chunk, rest string) {
+       for len(pattern) > 0 && pattern[0] == '*' {
+               pattern = pattern[1:]
+               star = true
+       }
+       inrange := false
+       var i int
+Scan:
+       for i = 0; i < len(pattern); i++ {
+               switch pattern[i] {
+               case '\\':
+                       // error check handled in matchChunk: bad pattern.
+                       if i+1 < len(pattern) {
+                               i++
+                       }
+                       continue
+               case '[':
+                       inrange = true
+               case ']':
+                       inrange = false
+               case '*':
+                       if !inrange {
+                               break Scan
+                       }
+               }
+       }
+       return star, pattern[0:i], pattern[i:]
+}
+
+// matchChunk checks whether chunk matches the beginning of s.
+// If so, it returns the remainder of s (after the match).
+// Chunk is all single-character operators: literals, char classes, and ?.
+func matchChunk(chunk, s string) (rest string, ok bool, err os.Error) {
+       for len(chunk) > 0 {
+               if len(s) == 0 {
+                       return
+               }
+               switch chunk[0] {
+               case '[':
+                       // character class
+                       r, n := utf8.DecodeRuneInString(s)
+                       s = s[n:]
+                       chunk = chunk[1:]
+                       // possibly negated
+                       notNegated := true
+                       if len(chunk) > 0 && chunk[0] == '^' {
+                               notNegated = false
+                               chunk = chunk[1:]
+                       }
+                       // parse all ranges
+                       match := false
+                       nrange := 0
+                       for {
+                               if len(chunk) > 0 && chunk[0] == ']' && nrange > 0 {
+                                       chunk = chunk[1:]
+                                       break
+                               }
+                               var lo, hi int
+                               if lo, chunk, err = getEsc(chunk); err != nil {
+                                       return
+                               }
+                               hi = lo
+                               if chunk[0] == '-' {
+                                       if hi, chunk, err = getEsc(chunk[1:]); err != nil {
+                                               return
+                                       }
+                               }
+                               if lo <= r && r <= hi {
+                                       match = true
+                               }
+                               nrange++
+                       }
+                       if match != notNegated {
+                               return
+                       }
+
+               case '?':
+                       if s[0] == '/' {
+                               return
+                       }
+                       _, n := utf8.DecodeRuneInString(s)
+                       s = s[n:]
+                       chunk = chunk[1:]
+
+               case '\\':
+                       chunk = chunk[1:]
+                       if len(chunk) == 0 {
+                               err = ErrBadPattern
+                               return
+                       }
+                       fallthrough
+
+               default:
+                       if chunk[0] != s[0] {
+                               return
+                       }
+                       s = s[1:]
+                       chunk = chunk[1:]
+               }
+       }
+       return s, true, nil
+}
+
+// getEsc gets a possibly-escaped character from chunk, for a character class.
+func getEsc(chunk string) (r int, nchunk string, err os.Error) {
+       if len(chunk) == 0 || chunk[0] == '-' || chunk[0] == ']' {
+               err = ErrBadPattern
+               return
+       }
+       if chunk[0] == '\\' {
+               chunk = chunk[1:]
+               if len(chunk) == 0 {
+                       err = ErrBadPattern
+                       return
+               }
+       }
+       r, n := utf8.DecodeRuneInString(chunk)
+       if r == utf8.RuneError && n == 1 {
+               err = ErrBadPattern
+       }
+       nchunk = chunk[n:]
+       if len(nchunk) == 0 {
+               err = ErrBadPattern
+       }
+       return
+}
diff --git a/src/pkg/path/match_test.go b/src/pkg/path/match_test.go
new file mode 100644 (file)
index 0000000..d3cd088
--- /dev/null
@@ -0,0 +1,76 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package path
+
+import (
+       "os"
+       "testing"
+)
+
+type MatchTest struct {
+       pattern, s string
+       match      bool
+       err        os.Error
+}
+
+var matchTests = []MatchTest{
+       MatchTest{"abc", "abc", true, nil},
+       MatchTest{"*", "abc", true, nil},
+       MatchTest{"*c", "abc", true, nil},
+       MatchTest{"a*", "a", true, nil},
+       MatchTest{"a*", "abc", true, nil},
+       MatchTest{"a*", "ab/c", false, nil},
+       MatchTest{"a*/b", "abc/b", true, nil},
+       MatchTest{"a*/b", "a/c/b", false, nil},
+       MatchTest{"a*b*c*d*e*/f", "axbxcxdxe/f", true, nil},
+       MatchTest{"a*b*c*d*e*/f", "axbxcxdxexxx/f", true, nil},
+       MatchTest{"a*b*c*d*e*/f", "axbxcxdxe/xxx/f", false, nil},
+       MatchTest{"a*b*c*d*e*/f", "axbxcxdxexxx/fff", false, nil},
+       MatchTest{"a*b?c*x", "abxbbxdbxebxczzx", true, nil},
+       MatchTest{"a*b?c*x", "abxbbxdbxebxczzy", false, nil},
+       MatchTest{"ab[c]", "abc", true, nil},
+       MatchTest{"ab[b-d]", "abc", true, nil},
+       MatchTest{"ab[e-g]", "abc", false, nil},
+       MatchTest{"ab[^c]", "abc", false, nil},
+       MatchTest{"ab[^b-d]", "abc", false, nil},
+       MatchTest{"ab[^e-g]", "abc", true, nil},
+       MatchTest{"a\\*b", "a*b", true, nil},
+       MatchTest{"a\\*b", "ab", false, nil},
+       MatchTest{"a?b", "a☺b", true, nil},
+       MatchTest{"a[^a]b", "a☺b", true, nil},
+       MatchTest{"a???b", "a☺b", false, nil},
+       MatchTest{"a[^a][^a][^a]b", "a☺b", false, nil},
+       MatchTest{"[a-ζ]*", "α", true, nil},
+       MatchTest{"*[a-ζ]", "A", false, nil},
+       MatchTest{"a?b", "a/b", false, nil},
+       MatchTest{"a*b", "a/b", false, nil},
+       MatchTest{"[\\]a]", "]", true, nil},
+       MatchTest{"[\\-]", "-", true, nil},
+       MatchTest{"[x\\-]", "x", true, nil},
+       MatchTest{"[x\\-]", "-", true, nil},
+       MatchTest{"[x\\-]", "z", false, nil},
+       MatchTest{"[\\-x]", "x", true, nil},
+       MatchTest{"[\\-x]", "-", true, nil},
+       MatchTest{"[\\-x]", "a", false, nil},
+       MatchTest{"[]a]", "]", false, ErrBadPattern},
+       MatchTest{"[-]", "-", false, ErrBadPattern},
+       MatchTest{"[x-]", "x", false, ErrBadPattern},
+       MatchTest{"[x-]", "-", false, ErrBadPattern},
+       MatchTest{"[x-]", "z", false, ErrBadPattern},
+       MatchTest{"[-x]", "x", false, ErrBadPattern},
+       MatchTest{"[-x]", "-", false, ErrBadPattern},
+       MatchTest{"[-x]", "a", false, ErrBadPattern},
+       MatchTest{"\\", "a", false, ErrBadPattern},
+       MatchTest{"[a-b-c]", "a", false, ErrBadPattern},
+}
+
+func TestMatch(t *testing.T) {
+       for _, tt := range matchTests {
+               ok, err := Match(tt.pattern, tt.s)
+               if ok != tt.match || err != tt.err {
+                       t.Errorf("Match(%#q, %#q) = %v, %v want %v, nil\n", tt.pattern, tt.s, ok, err, tt.match)
+               }
+       }
+}