]> Cypherpunks repositories - gostls13.git/commitdiff
bytes, strings: allow -1 in Map to mean "drop this character".
authorKei Son <hey.calmdown@gmail.com>
Fri, 11 Dec 2009 18:37:48 +0000 (10:37 -0800)
committerRuss Cox <rsc@golang.org>
Fri, 11 Dec 2009 18:37:48 +0000 (10:37 -0800)
xml: drop invalid characters in attribute names
    when constructing struct field names.

R=rsc
CC=r
https://golang.org/cl/157104

src/pkg/bytes/bytes.go
src/pkg/bytes/bytes_test.go
src/pkg/strings/strings.go
src/pkg/strings/strings_test.go
src/pkg/xml/read.go
src/pkg/xml/read_test.go

index 9ab199ceb6bd153666ee9596c86dc3b5299e62b3..85d4f9fd760409c4a6d026298c5c0074bdf4b745 100644 (file)
@@ -207,7 +207,8 @@ func HasSuffix(s, suffix []byte) bool {
 }
 
 // Map returns a copy of the byte array s with all its characters modified
-// according to the mapping function.
+// according to the mapping function. If mapping returns a negative value, the character is
+// dropped from the string with no replacement.
 func Map(mapping func(rune int) int, s []byte) []byte {
        // In the worst case, the array can grow when mapped, making
        // things unpleasant.  But it's so rare we barge in assuming it's
@@ -222,16 +223,18 @@ func Map(mapping func(rune int) int, s []byte) []byte {
                        rune, wid = utf8.DecodeRune(s[i:])
                }
                rune = mapping(rune);
-               if nbytes+utf8.RuneLen(rune) > maxbytes {
-                       // Grow the buffer.
-                       maxbytes = maxbytes*2 + utf8.UTFMax;
-                       nb := make([]byte, maxbytes);
-                       for i, c := range b[0:nbytes] {
-                               nb[i] = c
+               if rune >= 0 {
+                       if nbytes+utf8.RuneLen(rune) > maxbytes {
+                               // Grow the buffer.
+                               maxbytes = maxbytes*2 + utf8.UTFMax;
+                               nb := make([]byte, maxbytes);
+                               for i, c := range b[0:nbytes] {
+                                       nb[i] = c
+                               }
+                               b = nb;
                        }
-                       b = nb;
+                       nbytes += utf8.EncodeRune(rune, b[nbytes:maxbytes]);
                }
-               nbytes += utf8.EncodeRune(rune, b[nbytes:maxbytes]);
                i += wid;
        }
        return b[0:nbytes];
index 553ceb7c5ac3647efc5c784c94fc63c16c93f294..3f77e6e9ff3409fa29e9cd776c18652bd5cc0d7a 100644 (file)
@@ -365,6 +365,19 @@ func TestMap(t *testing.T) {
        if string(m) != expect {
                t.Errorf("rot13: expected %q got %q", expect, m)
        }
+
+       // 5. Drop
+       dropNotLatin := func(rune int) int {
+               if unicode.Is(unicode.Latin, rune) {
+                       return rune
+               }
+               return -1;
+       };
+       m = Map(dropNotLatin, Bytes("Hello, 세계"));
+       expect = "Hello";
+       if string(m) != expect {
+               t.Errorf("drop: expected %q got %q", expect, m)
+       }
 }
 
 func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) }
index 7be98e6c1024d0b6f159e7963b345ef95afcd93c..4e375b4d5bb66aa76908524ce337d5472128451a 100644 (file)
@@ -178,7 +178,8 @@ func HasSuffix(s, suffix string) bool {
 }
 
 // Map returns a copy of the string s with all its characters modified
-// according to the mapping function.
+// according to the mapping function. If mapping returns a negative value, the character is
+// dropped from the string with no replacement.
 func Map(mapping func(rune int) int, s string) string {
        // In the worst case, the string can grow when mapped, making
        // things unpleasant.  But it's so rare we barge in assuming it's
@@ -188,20 +189,22 @@ func Map(mapping func(rune int) int, s string) string {
        b := make([]byte, maxbytes);
        for _, c := range s {
                rune := mapping(c);
-               wid := 1;
-               if rune >= utf8.RuneSelf {
-                       wid = utf8.RuneLen(rune)
-               }
-               if nbytes+wid > maxbytes {
-                       // Grow the buffer.
-                       maxbytes = maxbytes*2 + utf8.UTFMax;
-                       nb := make([]byte, maxbytes);
-                       for i, c := range b[0:nbytes] {
-                               nb[i] = c
+               if rune >= 0 {
+                       wid := 1;
+                       if rune >= utf8.RuneSelf {
+                               wid = utf8.RuneLen(rune)
+                       }
+                       if nbytes+wid > maxbytes {
+                               // Grow the buffer.
+                               maxbytes = maxbytes*2 + utf8.UTFMax;
+                               nb := make([]byte, maxbytes);
+                               for i, c := range b[0:nbytes] {
+                                       nb[i] = c
+                               }
+                               b = nb;
                        }
-                       b = nb;
+                       nbytes += utf8.EncodeRune(rune, b[nbytes:maxbytes]);
                }
-               nbytes += utf8.EncodeRune(rune, b[nbytes:maxbytes]);
        }
        return string(b[0:nbytes]);
 }
index ce77c5c2f2406d7bb8452ecaabf9efec458e4d6d..e3e7f38aeddaf4dbae307426e1ce686babd30da7 100644 (file)
@@ -279,6 +279,19 @@ func TestMap(t *testing.T) {
        if m != expect {
                t.Errorf("rot13: expected %q got %q", expect, m)
        }
+
+       // 5. Drop
+       dropNotLatin := func(rune int) int {
+               if unicode.Is(unicode.Latin, rune) {
+                       return rune
+               }
+               return -1;
+       };
+       m = Map(dropNotLatin, "Hello, 세계");
+       expect = "Hello";
+       if m != expect {
+               t.Errorf("drop: expected %q got %q", expect, m)
+       }
 }
 
 func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) }
index 5685787230039ba609035e633875f17ae45b304b..d8ee78a123f079d4af0508ac53e18d7144c3aa1d 100644 (file)
@@ -10,6 +10,7 @@ import (
        "os";
        "reflect";
        "strings";
+       "unicode";
 )
 
 // BUG(rsc): Mapping between XML elements and data structures is inherently flawed:
@@ -144,6 +145,20 @@ func (p *Parser) Unmarshal(val interface{}, start *StartElement) os.Error {
        return p.unmarshal(v.Elem(), start);
 }
 
+// fieldName strips invalid characters from an XML name
+// to create a valid Go struct name.  It also converts the
+// name to lower case letters.
+func fieldName(original string) string {
+       return strings.Map(
+               func(x int) int {
+                       if unicode.IsDigit(x) || unicode.IsLetter(x) {
+                               return unicode.ToLower(x)
+                       }
+                       return -1;
+               },
+               original)
+}
+
 // Unmarshal a single XML element into val.
 func (p *Parser) unmarshal(val reflect.Value, start *StartElement) os.Error {
        // Find start element if we need it.
@@ -269,7 +284,7 @@ func (p *Parser) unmarshal(val reflect.Value, start *StartElement) os.Error {
                                val := "";
                                k := strings.ToLower(f.Name);
                                for _, a := range start.Attr {
-                                       if strings.ToLower(a.Name.Local) == k {
+                                       if fieldName(a.Name.Local) == k {
                                                val = a.Value;
                                                break;
                                        }
@@ -303,7 +318,7 @@ Loop:
                        // Look up by tag name.
                        // If that fails, fall back to mop-up field named "Any".
                        if sv != nil {
-                               k := strings.ToLower(t.Name.Local);
+                               k := fieldName(t.Name.Local);
                                any := -1;
                                for i, n := 0, styp.NumField(); i < n; i++ {
                                        f := styp.Field(i);
index 14ad11a31804bcf6963660284abbaf645528df00..ca9b30d9b75af236f5082e3210335bbe3bd6ff85 100644 (file)
@@ -24,8 +24,8 @@ func TestUnmarshalFeed(t *testing.T) {
 // hget http://codereview.appspot.com/rss/mine/rsc
 const rssFeedString = `
 <?xml version="1.0" encoding="utf-8"?>
-<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-us"><title>Code Review - My issues</title><link href="http://codereview.appspot.com/" rel="alternate"></link><link href="http://codereview.appspot.com/rss/mine/rsc" rel="self"></link><id>http://codereview.appspot.com/</id><updated>2009-10-04T01:35:58+00:00</updated><author><name>rietveld</name></author><entry><title>rietveld: an attempt at pubsubhubbub
-</title><link href="http://codereview.appspot.com/126085" rel="alternate"></link><updated>2009-10-04T01:35:58+00:00</updated><author><name>email-address-removed</name></author><id>urn:md5:134d9179c41f806be79b3a5f7877d19a</id><summary type="html">
+<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-us"><title>Code Review - My issues</title><link href="http://codereview.appspot.com/" rel="alternate"></link><li-nk href="http://codereview.appspot.com/rss/mine/rsc" rel="self"></li-nk><id>http://codereview.appspot.com/</id><updated>2009-10-04T01:35:58+00:00</updated><author><name>rietveld</name></author><entry><title>rietveld: an attempt at pubsubhubbub
+</title><link hre-f="http://codereview.appspot.com/126085" rel="alternate"></link><updated>2009-10-04T01:35:58+00:00</updated><author><name>email-address-removed</name></author><id>urn:md5:134d9179c41f806be79b3a5f7877d19a</id><summary type="html">
   An attempt at adding pubsubhubbub support to Rietveld.
 http://code.google.com/p/pubsubhubbub
 http://code.google.com/p/rietveld/issues/detail?id=155
@@ -208,3 +208,21 @@ not being used from outside intra_region_diff.py.
                },
        },
 }
+
+type FieldNameTest struct {
+       in, out string;
+}
+
+var FieldNameTests = []FieldNameTest{
+       FieldNameTest{"Profile-Image", "profileimage"},
+       FieldNameTest{"_score", "score"},
+}
+
+func TestFieldName(t *testing.T) {
+       for _, tt := range FieldNameTests {
+               a := fieldName(tt.in);
+               if a != tt.out {
+                       t.Fatalf("have %#v\nwant %#v\n\n", a, tt.out)
+               }
+       }
+}