encoding/xml: add InputOffset method to Decoder

author Russ Cox <rsc@golang.org>

Wed, 6 Aug 2014 22:00:06 +0000 (18:00 -0400)

committer Russ Cox <rsc@golang.org>

Wed, 6 Aug 2014 22:00:06 +0000 (18:00 -0400)
author Russ Cox <rsc@golang.org>
Wed, 6 Aug 2014 22:00:06 +0000 (18:00 -0400)
committer Russ Cox <rsc@golang.org>
Wed, 6 Aug 2014 22:00:06 +0000 (18:00 -0400)
diff --git a/src/pkg/encoding/xml/xml.go b/src/pkg/encoding/xml/xml.go

index b473cb845847447de8f3947197751a31c700ef20..a4cd4e29e01300b7ecb9f10bebd82ffbba59671b 100644 (file)
--- a/src/pkg/encoding/xml/xml.go
+++ b/src/pkg/encoding/xml/xml.go
@@ -29,6 +29,7 @@ import (
  type SyntaxError struct {
         Msg  string
         Line int
+       Byte int64 // byte offset from start of stream
  }
  
  func (e *SyntaxError) Error() string {
@@ -196,6 +197,7 @@ type Decoder struct {
         ns             map[string]string
         err            error
         line           int
+       offset         int64
         unmarshalDepth int
  }
  
@@ -859,9 +861,17 @@ func (d *Decoder) getc() (b byte, ok bool) {
         if b == '\n' {
                 d.line++
         }
+       d.offset++
         return b, true
  }
  
+// InputOffset returns the input stream byte offset of the current decoder position.
+// The offset gives the location of the end of the most recently returned token
+// and the beginning of the next token.
+func (d *Decoder) InputOffset() int64 {
+       return d.offset
+}
+
  // Return saved offset.
  // If we did ungetc (nextByte >= 0), have to back up one.
  func (d *Decoder) savedOffset() int {
@@ -891,6 +901,7 @@ func (d *Decoder) ungetc(b byte) {
                 d.line--
         }
         d.nextByte = int(b)
+       d.offset--
  }
  
  var entity = map[string]int{
diff --git a/src/pkg/encoding/xml/xml_test.go b/src/pkg/encoding/xml/xml_test.go

index 7723ab1c9f09a033e43e7b56c98eb29c53b11801..be995c0d52c035fbc8c6f93a7a7a66b8127c79be 100644 (file)
--- a/src/pkg/encoding/xml/xml_test.go
+++ b/src/pkg/encoding/xml/xml_test.go
@@ -170,7 +170,7 @@ var xmlInput = []string{
  func TestRawToken(t *testing.T) {
         d := NewDecoder(strings.NewReader(testInput))
         d.Entity = testEntity
-       testRawToken(t, d, rawTokens)
+       testRawToken(t, d, testInput, rawTokens)
  }
  
  const nonStrictInput = `
@@ -225,7 +225,7 @@ var nonStrictTokens = []Token{
  func TestNonStrictRawToken(t *testing.T) {
         d := NewDecoder(strings.NewReader(nonStrictInput))
         d.Strict = false
-       testRawToken(t, d, nonStrictTokens)
+       testRawToken(t, d, nonStrictInput, nonStrictTokens)
  }
  
  type downCaser struct {
@@ -254,7 +254,7 @@ func TestRawTokenAltEncoding(t *testing.T) {
                 }
                 return &downCaser{t, input.(io.ByteReader)}, nil
         }
-       testRawToken(t, d, rawTokensAltEncoding)
+       testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding)
  }
  
  func TestRawTokenAltEncodingNoConverter(t *testing.T) {
@@ -280,9 +280,12 @@ func TestRawTokenAltEncodingNoConverter(t *testing.T) {
         }
  }
  
-func testRawToken(t *testing.T, d *Decoder, rawTokens []Token) {
+func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) {
+       lastEnd := int64(0)
         for i, want := range rawTokens {
+               start := d.InputOffset()
                 have, err := d.RawToken()
+               end := d.InputOffset()
                 if err != nil {
                         t.Fatalf("token %d: unexpected error: %s", i, err)
                 }
@@ -300,6 +303,26 @@ func testRawToken(t *testing.T, d *Decoder, rawTokens []Token) {
                         }
                         t.Errorf("token %d = %s, want %s", i, shave, swant)
                 }
+
+               // Check that InputOffset returned actual token.
+               switch {
+               case start < lastEnd:
+                       t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have)
+               case start >= end:
+                       // Special case: EndElement can be synthesized.
+                       if start == end && end == lastEnd {
+                               break
+                       }
+                       t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have)
+               case end > int64(len(raw)):
+                       t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have)
+               default:
+                       text := raw[start:end]
+                       if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) {
+                               t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have)
+                       }
+               }
+               lastEnd = end
         }
  }
author	Russ Cox <rsc@golang.org>
	Wed, 6 Aug 2014 22:00:06 +0000 (18:00 -0400)
committer	Russ Cox <rsc@golang.org>
	Wed, 6 Aug 2014 22:00:06 +0000 (18:00 -0400)
src/pkg/encoding/xml/xml.go		patch \| blob \| history
src/pkg/encoding/xml/xml_test.go		patch \| blob \| history