From 597e57f4d5b6aad29717a41c683e821ad7ee3d8d Mon Sep 17 00:00:00 2001 From: Ivan Krasin Date: Wed, 6 Jan 2010 07:47:40 -0800 Subject: [PATCH] Add http.CanonicalPath and tests for it. Remove BUG(rsc) from url.go. R=rsc, imkrasin CC=golang-dev https://golang.org/cl/179126 --- src/pkg/http/url.go | 66 ++++++++++++++++++++++++++++++++++------ src/pkg/http/url_test.go | 38 +++++++++++++++++++++++ 2 files changed, 95 insertions(+), 9 deletions(-) diff --git a/src/pkg/http/url.go b/src/pkg/http/url.go index ca8c344be1..f879f8d8fe 100644 --- a/src/pkg/http/url.go +++ b/src/pkg/http/url.go @@ -67,6 +67,63 @@ func shouldEscape(c byte) bool { return false } +// CanonicalPath applies the algorithm specified in RFC 2396 to +// simplify the path, removing unnecessary . and .. elements. +func CanonicalPath(path string) string { + buf := strings.Bytes(path) + a := buf[0:0] + // state helps to find /.. ^.. ^. and /. patterns. + // state == 1 - prev char is '/' or beginning of the string. + // state > 1 - prev state > 0 and prev char was '.' + // state == 0 - otherwise + state := 1 + cnt := 0 + for _, v := range buf { + switch v { + case '/': + s := state + state = 1 + switch s { + case 2: + a = a[0 : len(a)-1] + continue + case 3: + if cnt > 0 { + i := len(a) - 4 + for ; i >= 0 && a[i] != '/'; i-- { + } + a = a[0 : i+1] + cnt-- + continue + } + default: + if len(a) > 0 { + cnt++ + } + } + case '.': + if state > 0 { + state++ + } + default: + state = 0 + } + l := len(a) + a = a[0 : l+1] + a[l] = v + } + switch { + case state == 2: + a = a[0 : len(a)-1] + case state == 3 && cnt > 0: + i := len(a) - 4 + for ; i >= 0 && a[i] != '/'; i-- { + } + a = a[0 : i+1] + } + return string(a) +} + // URLUnescape unescapes a URL-encoded string, // converting %AB into the byte 0xAB and '+' into ' ' (space). // It returns an error if any % is not followed @@ -221,13 +278,6 @@ func split(s string, c byte, cutc bool) (string, string) { return s, "" } -// TODO(rsc): The BUG comment is supposed to appear in the godoc output -// in a BUGS section, but that got lost in the transition to godoc. - -// BUG(rsc): ParseURL should canonicalize the path, -// removing unnecessary . and .. elements. - - // ParseURL parses rawurl into a URL structure. // The string rawurl is assumed not to have a #fragment suffix. // (Web browsers strip #fragment before sending the URL to a web server.) @@ -264,8 +314,6 @@ func ParseURL(rawurl string) (url *URL, err os.Error) { url.Userinfo, url.Host = split(url.Authority, '@', true) } - // What's left is the path. - // TODO: Canonicalize (remove . and ..)? if url.Path, err = URLUnescape(path); err != nil { goto Error } diff --git a/src/pkg/http/url_test.go b/src/pkg/http/url_test.go index 0e3fa2d6ec..542ad0a38b 100644 --- a/src/pkg/http/url_test.go +++ b/src/pkg/http/url_test.go @@ -355,3 +355,41 @@ func TestURLEscape(t *testing.T) { } } } + +type CanonicalPathTest struct { + in string + out string +} + +var canonicalTests = []CanonicalPathTest{ + CanonicalPathTest{"", ""}, + CanonicalPathTest{"/", "/"}, + CanonicalPathTest{".", ""}, + CanonicalPathTest{"./", ""}, + CanonicalPathTest{"/a/", "/a/"}, + CanonicalPathTest{"a/", "a/"}, + CanonicalPathTest{"a/./", "a/"}, + CanonicalPathTest{"./a", "a"}, + CanonicalPathTest{"/a/../b", "/b"}, + CanonicalPathTest{"a/../b", "b"}, + CanonicalPathTest{"a/../../b", "../b"}, + CanonicalPathTest{"a/.", "a/"}, + CanonicalPathTest{"../.././a", "../../a"}, + CanonicalPathTest{"/../.././a", "/../../a"}, + CanonicalPathTest{"a/b/g/../..", "a/"}, + CanonicalPathTest{"a/b/..", "a/"}, + CanonicalPathTest{"a/b/.", "a/b/"}, + CanonicalPathTest{"a/b/../../../..", "../.."}, + CanonicalPathTest{"a./", "a./"}, + CanonicalPathTest{"/../a/b/../../../", "/../../"}, + CanonicalPathTest{"../a/b/../../../", "../../"}, +} + +func TestCanonicalPath(t *testing.T) { + for _, tt := range canonicalTests { + actual := CanonicalPath(tt.in) + if tt.out != actual { + t.Errorf("CanonicalPath(%q) = %q, want %q", tt.in, actual, tt.out) + } + } +} -- 2.50.0