go/printer, gofmt: smarter handling of multi-line raw strings

author Robert Griesemer <gri@golang.org>

Sat, 5 Feb 2011 01:34:16 +0000 (17:34 -0800)

committer Robert Griesemer <gri@golang.org>

Sat, 5 Feb 2011 01:34:16 +0000 (17:34 -0800)
author Robert Griesemer <gri@golang.org>
Sat, 5 Feb 2011 01:34:16 +0000 (17:34 -0800)
committer Robert Griesemer <gri@golang.org>
Sat, 5 Feb 2011 01:34:16 +0000 (17:34 -0800)
diff --git a/src/pkg/ebnf/ebnf_test.go b/src/pkg/ebnf/ebnf_test.go

index bbe530c278f4e2e6c536a31ab058d7c6a0879219..69ad5fed1cf6aa05c4c2bcc79fcf69ddc4326e30 100644 (file)
--- a/src/pkg/ebnf/ebnf_test.go
+++ b/src/pkg/ebnf/ebnf_test.go
@@ -15,31 +15,31 @@ var fset = token.NewFileSet()
  
  
  var grammars = []string{
-       `Program = .
-       `,
-
-       `Program = foo .
-       foo = "foo" .
-       `,
-
-       `Program = "a" | "b" "c" .
-       `,
-
-       `Program = "a" ... "z" .
-       `,
-
-       `Program = Song .
-        Song = { Note } .
-        Note = Do | (Re | Mi | Fa | So | La) | Ti .
-        Do = "c" .
-        Re = "d" .
-        Mi = "e" .
-        Fa = "f" .
-        So = "g" .
-        La = "a" .
-        Ti = ti .
-        ti = "b" .
-       `,
+`Program = .
+`,
+
+`Program = foo .
+foo = "foo" .
+`,
+
+`Program = "a" | "b" "c" .
+`,
+
+`Program = "a" ... "z" .
+`,
+
+`Program = Song .
+ Song = { Note } .
+ Note = Do | (Re | Mi | Fa | So | La) | Ti .
+ Do = "c" .
+ Re = "d" .
+ Mi = "e" .
+ Fa = "f" .
+ So = "g" .
+ La = "a" .
+ Ti = ti .
+ ti = "b" .
+`,
  }
  
  
diff --git a/src/pkg/go/printer/printer.go b/src/pkg/go/printer/printer.go

index 34b0c4e2dc4c764e723d66f083aa09b322d9979f..2790a5c34c5531386b7c45fd8246363fe5c38ddf 100644 (file)
--- a/src/pkg/go/printer/printer.go
+++ b/src/pkg/go/printer/printer.go
@@ -34,6 +34,12 @@ const (
  )
  
  
+const (
+       esc2 = '\xfe'                        // an escape byte that cannot occur in regular UTF-8
+       _    = 1 / (esc2 - tabwriter.Escape) // cause compiler error if esc2 == tabwriter.Escape
+)
+
+
  var (
         esc       = []byte{tabwriter.Escape}
         htab      = []byte{'\t'}
@@ -843,6 +849,19 @@ func (p *printer) print(args ...interface{}) {
                         escData = append(escData, tabwriter.Escape)
                         data = escData
                         tok = x.Kind
+                       // If we have a raw string that spans multiple lines and
+                       // the opening quote (`) is on a line preceded only by
+                       // indentation, we don't want to write that indentation
+                       // because the following lines of the raw string are not
+                       // indented. It's easiest to correct the output at the end
+                       // via the trimmer (because of the complex handling of
+                       // white space).
+                       // Mark multi-line raw strings by replacing the opening
+                       // quote with esc2 and have the trimmer take care of fixing
+                       // it up. (Do this _after_ making a copy of data!)
+                       if data[1] == '`' && bytes.IndexByte(data, '\n') > 0 {
+                               data[1] = esc2
+                       }
                 case token.Token:
                         s := x.String()
                         if mayCombine(p.lastTok, s[0]) {
@@ -927,21 +946,26 @@ func (p *printer) flush(next token.Position, tok token.Token) (droppedFF bool) {
  // through unchanged.
  //
  type trimmer struct {
-       output io.Writer
-       space  bytes.Buffer
-       state  int
+       output  io.Writer
+       state   int
+       space   bytes.Buffer
+       hasText bool
  }
  
  
  // trimmer is implemented as a state machine.
  // It can be in one of the following states:
  const (
-       inSpace = iota
-       inEscape
-       inText
+       inSpace  = iota // inside space
+       atEscape        // inside space and the last char was an opening tabwriter.Escape
+       inEscape        // inside text bracketed by tabwriter.Escapes
+       inText          // inside text
  )
  
  
+var backquote = []byte{'`'}
+
+
  // Design note: It is tempting to eliminate extra blanks occurring in
  //              whitespace in this function as it could simplify some
  //              of the blanks logic in the node printing functions.
@@ -949,7 +973,13 @@ const (
  //              the tabwriter.
  
  func (p *trimmer) Write(data []byte) (n int, err os.Error) {
-       m := 0 // if p.state != inSpace, data[m:n] is unwritten
+       // invariants:
+       // p.state == inSpace, atEscape:
+       //      p.space is unwritten
+       //      p.hasText indicates if there is any text on this line
+       // p.state == inEscape, inText:
+       //      data[m:n] is unwritten
+       m := 0
         var b byte
         for n, b = range data {
                 if b == '\v' {
@@ -960,37 +990,55 @@ func (p *trimmer) Write(data []byte) (n int, err os.Error) {
                         switch b {
                         case '\t', ' ':
                                 p.space.WriteByte(b) // WriteByte returns no errors
-                       case '\f', '\n':
+                       case '\n', '\f':
                                 p.space.Reset()                        // discard trailing space
                                 _, err = p.output.Write(newlines[0:1]) // write newline
+                               p.hasText = false
                         case tabwriter.Escape:
-                               _, err = p.output.Write(p.space.Bytes())
-                               p.space.Reset()
-                               p.state = inEscape
-                               m = n + 1 // drop tabwriter.Escape
+                               p.state = atEscape
                         default:
                                 _, err = p.output.Write(p.space.Bytes())
-                               p.space.Reset()
                                 p.state = inText
                                 m = n
                         }
+               case atEscape:
+                       // discard indentation if we have a multi-line raw string
+                       // (see printer.print for details)
+                       if b != esc2 || p.hasText {
+                               _, err = p.output.Write(p.space.Bytes())
+                       }
+                       p.state = inEscape
+                       m = n
+                       if b == esc2 {
+                               _, err = p.output.Write(backquote) // convert back
+                               m++
+                       }
                 case inEscape:
                         if b == tabwriter.Escape {
                                 _, err = p.output.Write(data[m:n])
                                 p.state = inSpace
+                               p.space.Reset()
+                               p.hasText = true
                         }
                 case inText:
                         switch b {
                         case '\t', ' ':
                                 _, err = p.output.Write(data[m:n])
                                 p.state = inSpace
+                               p.space.Reset()
                                 p.space.WriteByte(b) // WriteByte returns no errors
-                       case '\f':
-                               data[n] = '\n' // convert to newline
+                               p.hasText = true
+                       case '\n', '\f':
+                               _, err = p.output.Write(data[m:n])
+                               p.state = inSpace
+                               p.space.Reset()
+                               _, err = p.output.Write(newlines[0:1]) // write newline
+                               p.hasText = false
                         case tabwriter.Escape:
                                 _, err = p.output.Write(data[m:n])
-                               p.state = inEscape
-                               m = n + 1 // drop tabwriter.Escape
+                               p.state = atEscape
+                               p.space.Reset()
+                               p.hasText = true
                         }
                 }
                 if err != nil {
@@ -999,9 +1047,12 @@ func (p *trimmer) Write(data []byte) (n int, err os.Error) {
         }
         n = len(data)
  
-       if p.state != inSpace {
+       switch p.state {
+       case inEscape, inText:
                 _, err = p.output.Write(data[m:n])
                 p.state = inSpace
+               p.space.Reset()
+               p.hasText = true
         }
  
         return
diff --git a/src/pkg/go/printer/testdata/expressions.golden b/src/pkg/go/printer/testdata/expressions.golden

index 882c7624c017fc7c8a8c03e5f3667ea10fd34696..7f18f338a633ccf8fc45a4e434b1d61b10e55e44 100644 (file)
--- a/src/pkg/go/printer/testdata/expressions.golden
+++ b/src/pkg/go/printer/testdata/expressions.golden
@@ -247,6 +247,77 @@ they must not be removed`
  }
  
  
+func _() {
+       // smart handling of indentation for multi-line raw strings
+       var _ = ``
+       var _ = `foo`
+       var _ = `foo
+bar`
+
+       var _ = ``
+       var _ = `foo`
+       var _ =
+       // the next line should not be indented
+`foo
+bar`
+
+       var _ = // comment
+       ``
+       var _ = // comment
+       `foo`
+       var _ = // comment
+       // the next line should not be indented
+`foo
+bar`
+
+       var _ = /* comment */ ``
+       var _ = /* comment */ `foo`
+       var _ = /* comment */ `foo
+bar`
+
+       var _ = /* comment */
+       ``
+       var _ = /* comment */
+       `foo`
+       var _ = /* comment */
+       // the next line should not be indented
+`foo
+bar`
+
+       var board = []int(
+`...........
+...........
+....●●●....
+....●●●....
+..●●●●●●●..
+..●●●○●●●..
+..●●●●●●●..
+....●●●....
+....●●●....
+...........
+...........
+`)
+
+       var state = S{
+               "foo",
+               // the next line should not be indented
+`...........
+...........
+....●●●....
+....●●●....
+..●●●●●●●..
+..●●●○●●●..
+..●●●●●●●..
+....●●●....
+....●●●....
+...........
+...........
+`,
+               "bar",
+       }
+}
+
+
  func _() {
         // one-line function literals (body is on a single line)
         _ = func() {}
diff --git a/src/pkg/go/printer/testdata/expressions.input b/src/pkg/go/printer/testdata/expressions.input

index 647706b0923c668dfa162720c5fba983cc6fe234..6bcd9b5f89ebd37a2d368c342e9026e7db952886 100644 (file)
--- a/src/pkg/go/printer/testdata/expressions.input
+++ b/src/pkg/go/printer/testdata/expressions.input
@@ -243,6 +243,85 @@ they must not be removed`
  }
  
  
+func _() {
+       // smart handling of indentation for multi-line raw strings
+       var _ = ``
+       var _ = `foo`
+       var _ = `foo
+bar`
+
+
+var _ =
+       ``
+var _ =
+       `foo`
+var _ =
+       // the next line should not be indented
+       `foo
+bar`
+
+
+       var _ = // comment
+               ``
+       var _ = // comment
+               `foo`
+       var _ = // comment
+               // the next line should not be indented
+               `foo
+bar`
+
+
+var _ = /* comment */ ``
+var _ = /* comment */ `foo`
+var _ = /* comment */ `foo
+bar`
+
+
+       var _ = /* comment */
+               ``
+       var _ = /* comment */
+               `foo`
+       var _ = /* comment */
+               // the next line should not be indented
+               `foo
+bar`
+
+
+var board = []int(
+       `...........
+...........
+....●●●....
+....●●●....
+..●●●●●●●..
+..●●●○●●●..
+..●●●●●●●..
+....●●●....
+....●●●....
+...........
+...........
+`)
+
+
+       var state = S{
+               "foo",
+               // the next line should not be indented
+               `...........
+...........
+....●●●....
+....●●●....
+..●●●●●●●..
+..●●●○●●●..
+..●●●●●●●..
+....●●●....
+....●●●....
+...........
+...........
+`,
+               "bar",
+       }
+}
+
+
  func _() {
         // one-line function literals (body is on a single line)
         _ = func() {}
diff --git a/src/pkg/go/printer/testdata/expressions.raw b/src/pkg/go/printer/testdata/expressions.raw

index 62be00cc3015ca962d298fdc1c7132087f26d947..f1944c94bb4d9ebd3fbac8fc480cf9e204d433c1 100644 (file)
--- a/src/pkg/go/printer/testdata/expressions.raw
+++ b/src/pkg/go/printer/testdata/expressions.raw
@@ -243,7 +243,77 @@ func _() {
         _ = `foo
                 bar`
         _ = `three spaces before the end of the line starting here:   
-they must not be removed`
+they must not be removed`\f}
+
+
+func _() {
+       // smart handling of indentation for multi-line raw strings
+       var _ = ``
+       var _ = `foo`
+       var _ = `foo
+bar`
+
+       var _ = ``
+       var _ = `foo`
+       var _ =
+       // the next line should not be indented
+`foo
+bar`
+
+       var _ = // comment
+       ``
+       var _ = // comment
+       `foo`
+       var _ = // comment
+       // the next line should not be indented
+`foo
+bar`
+
+       var _ = /* comment */ ``
+       var _ = /* comment */ `foo`
+       var _ = /* comment */ `foo
+bar`
+
+       var _ = /* comment */
+       ``
+       var _ = /* comment */
+       `foo`
+       var _ = /* comment */
+       // the next line should not be indented
+`foo
+bar`
+
+       var board = []int(
+`...........
+...........
+....●●●....
+....●●●....
+..●●●●●●●..
+..●●●○●●●..
+..●●●●●●●..
+....●●●....
+....●●●....
+...........
+...........
+`)
+
+       var state = S{
+               "foo",
+               // the next line should not be indented
+`...........
+...........
+....●●●....
+....●●●....
+..●●●●●●●..
+..●●●○●●●..
+..●●●●●●●..
+....●●●....
+....●●●....
+...........
+...........
+`,
+               "bar",
+       }
  }
author	Robert Griesemer <gri@golang.org>
	Sat, 5 Feb 2011 01:34:16 +0000 (17:34 -0800)
committer	Robert Griesemer <gri@golang.org>
	Sat, 5 Feb 2011 01:34:16 +0000 (17:34 -0800)
src/pkg/ebnf/ebnf_test.go		patch \| blob \| history
src/pkg/go/printer/printer.go		patch \| blob \| history
src/pkg/go/printer/testdata/expressions.golden		patch \| blob \| history
src/pkg/go/printer/testdata/expressions.input		patch \| blob \| history
src/pkg/go/printer/testdata/expressions.raw		patch \| blob \| history