1) Fix a problem with tabwriter.Flush: any pending text not yet

author Robert Griesemer <gri@golang.org>

Fri, 19 Jun 2009 00:06:08 +0000 (17:06 -0700)

committer Robert Griesemer <gri@golang.org>

Fri, 19 Jun 2009 00:06:08 +0000 (17:06 -0700)
author Robert Griesemer <gri@golang.org>
Fri, 19 Jun 2009 00:06:08 +0000 (17:06 -0700)
committer Robert Griesemer <gri@golang.org>
Fri, 19 Jun 2009 00:06:08 +0000 (17:06 -0700)
diff --git a/src/pkg/tabwriter/tabwriter.go b/src/pkg/tabwriter/tabwriter.go

index 6799f72d1089599f443bd996494e3d5d4d5fd403..aa942d482e06928a70f7a672d08df0838068e1dc 100644 (file)
--- a/src/pkg/tabwriter/tabwriter.go
+++ b/src/pkg/tabwriter/tabwriter.go
@@ -20,6 +20,16 @@ import (
  // ----------------------------------------------------------------------------
  // Filter implementation
  
+// A cell represents a segment of text delineated by tabs, form-feed,
+// or newline chars. The text itself is stored in a separate buffer;
+// cell only describes the segment's size in bytes and width in runes.
+//
+type cell struct {
+       size int;  // cell size in bytes
+       width int;  // cell width in runes
+}
+
+
  // A Writer is a filter that inserts padding around
  // tab-delimited columns in its input to align them
  // in the output.
@@ -37,8 +47,8 @@ import (
  // UTF-8 characters.
  //
  // If a Writer is configured to filter HTML, HTML tags and entities
-// are simply passed through and their widths are assumed to be zero
-// for formatting purposes.
+// are simply passed through. The widths of tags and entities are
+// assumed to be zero (tags) and one (entities) for formatting purposes.
  //
  // The form feed character ('\f') acts like a newline but it also
  // terminates all columns in the current line (effectively calling
@@ -59,22 +69,42 @@ type Writer struct {
         flags uint;
  
         // current state
-       html_char byte;  // terminating char of html tag/entity, or 0 ('>', ';', or 0)
         buf io.ByteBuffer;  // collected text w/o tabs, newlines, or form feed chars
-       size int;  // size of incomplete cell in bytes
-       width int;  // width of incomplete cell in runes up to buf[pos] w/o ignored sections
         pos int;  // buffer position up to which width of incomplete cell has been computed
-       lines_size vector.Vector;  // list of lines; each line is a list of cell sizes in bytes
-       lines_width vector.Vector;  // list of lines; each line is a list of cell widths in runes
+       cell cell;  // current incomplete cell; cell.width is up to buf[pos] w/o ignored sections
+       html_char byte;  // terminating char of html tag/entity, or 0 ('>', ';', or 0)
+       lines vector.Vector;  // list if lines; each line is a list of cells
         widths vector.IntVector;  // list of column widths in runes - re-used during formatting
  }
  
  
+func (b *Writer) addLine() {
+       b.lines.Push(vector.New(0));
+}
+
+
+func (b *Writer) line(i int) *vector.Vector {
+       return b.lines.At(i).(*vector.Vector);
+}
+
+
+// Reset the current state.
+func (b *Writer) reset() {
+       b.buf.Reset();
+       b.pos = 0;
+       b.cell = cell{};
+       b.html_char = 0;
+       b.lines.Init(0);
+       b.widths.Init(0);
+       b.addLine();
+}
+
+
  // Internal representation (current state):
  //
  // - all text written is appended to buf; form feed chars, tabs and newlines are stripped away
  // - at any given time there is a (possibly empty) incomplete cell at the end
-//   (the cell starts after a tab or newline)
+//   (the cell starts after a tab, form feed, or newline)
  // - size is the number of bytes belonging to the cell so far
  // - width is text width in runes of that cell from the start of the cell to
  //   position pos; html tags and entities are excluded from this width if html
@@ -94,12 +124,6 @@ type Writer struct {
  // buf                start of incomplete cell  pos
  
  
-func (b *Writer) addLine() {
-       b.lines_size.Push(vector.NewIntVector(0));
-       b.lines_width.Push(vector.NewIntVector(0));
-}
-
-
  // Formatting can be controlled with these flags.
  const (
         // Ignore html tags and treat entities (starting with '&'
@@ -144,22 +168,12 @@ func (b *Writer) Init(output io.Writer, cellwidth, padding int, padchar byte, fl
         }
         b.flags = flags;
  
-       b.lines_size.Init(0);
-       b.lines_width.Init(0);
-       b.widths.Init(0);
-       b.addLine();  // the very first line
+       b.reset();
  
         return b;
  }
  
  
-func (b *Writer) line(i int) (*vector.IntVector, *vector.IntVector) {
-       return
-               b.lines_size.At(i).(*vector.IntVector),
-               b.lines_width.At(i).(*vector.IntVector);
-}
-
-
  // debugging support (keep code around)
  /*
  func (b *Writer) dump() {
@@ -219,19 +233,19 @@ func (b *Writer) writePadding(textw, cellw int) os.Error {
  func (b *Writer) writeLines(pos0 int, line0, line1 int) (int, os.Error) {
         pos := pos0;
         for i := line0; i < line1; i++ {
-               line_size, line_width := b.line(i);
-               for j := 0; j < line_size.Len(); j++ {
-                       s, w := line_size.At(j), line_width.At(j);
+               line := b.line(i);
+               for j := 0; j < line.Len(); j++ {
+                       c := line.At(j).(cell);
  
                         switch {
                         default: // align left
  
-                               if err := b.write0(b.buf.Data()[pos : pos + s]); err != nil {
+                               if err := b.write0(b.buf.Data()[pos : pos + c.size]); err != nil {
                                         return pos, err;
                                 }
-                               pos += s;
+                               pos += c.size;
                                 if j < b.widths.Len() {
-                                       if err := b.writePadding(w, b.widths.At(j)); err != nil {
+                                       if err := b.writePadding(c.width, b.widths.At(j)); err != nil {
                                                 return pos, err;
                                         }
                                 }
@@ -239,24 +253,24 @@ func (b *Writer) writeLines(pos0 int, line0, line1 int) (int, os.Error) {
                         case b.flags & AlignRight != 0:  // align right
  
                                 if j < b.widths.Len() {
-                                       if err := b.writePadding(w, b.widths.At(j)); err != nil {
+                                       if err := b.writePadding(c.width, b.widths.At(j)); err != nil {
                                                 return pos, err;
                                         }
                                 }
-                               if err := b.write0(b.buf.Data()[pos : pos + s]); err != nil {
+                               if err := b.write0(b.buf.Data()[pos : pos + c.size]); err != nil {
                                         return pos, err;
                                 }
-                               pos += s;
+                               pos += c.size;
                         }
                 }
  
-               if i+1 == b.lines_size.Len() {
+               if i+1 == b.lines.Len() {
                         // last buffered line - we don't have a newline, so just write
                         // any outstanding buffered data
-                       if err := b.write0(b.buf.Data()[pos : pos + b.size]); err != nil {
+                       if err := b.write0(b.buf.Data()[pos : pos + b.cell.size]); err != nil {
                                 return pos, err;
                         }
-                       pos += b.size;
+                       pos += b.cell.size;
                 } else {
                         // not the last line - write newline
                         if err := b.write0(newline); err != nil {
@@ -273,9 +287,9 @@ func (b *Writer) format(pos0 int, line0, line1 int) (pos int, err os.Error) {
         column := b.widths.Len();
         last := line0;
         for this := line0; this < line1; this++ {
-               line_size, line_width := b.line(this);
+               line := b.line(this);
  
-               if column < line_size.Len() - 1 {
+               if column < line.Len() - 1 {
                         // cell exists in this column
                         // (note that the last cell per line is ignored)
  
@@ -289,10 +303,10 @@ func (b *Writer) format(pos0 int, line0, line1 int) (pos int, err os.Error) {
                         // column block begin
                         width := b.cellwidth;  // minimal width
                         for ; this < line1; this++ {
-                               line_size, line_width = b.line(this);
-                               if column < line_size.Len() - 1 {
+                               line = b.line(this);
+                               if column < line.Len() - 1 {
                                         // cell exists in this column => update width
-                                       w := line_width.At(column) + b.padding;
+                                       w := line.At(column).(cell).width + b.padding;
                                         if w > width {
                                                 width = w;
                                         }
@@ -316,40 +330,77 @@ func (b *Writer) format(pos0 int, line0, line1 int) (pos int, err os.Error) {
  }
  
  
-// Flush should be called after the last call to Write to ensure
-// that any data buffered in the Writer is written to output.
+// Append text to current cell. Only update the cell width if updateWidth
+// is set (the cell width can only be updated if we know that we cannot be
+// in the middle of a UTF-8 encoded Unicode character).
  //
-func (b *Writer) Flush() os.Error {
-       _, err := b.format(0, 0, b.lines_size.Len());
-       // reset (even in the presence of errors)
-       b.buf.Reset();
-       b.size, b.width = 0, 0;
-       b.pos = 0;
-       b.lines_size.Init(0);
-       b.lines_width.Init(0);
-       b.addLine();
-       return err;
+func (b *Writer) append(text []byte, updateWidth bool) {
+       b.buf.Write(text);
+       b.cell.size += len(text);
+       if updateWidth {
+               b.cell.width += utf8.RuneCount(b.buf.Data()[b.pos : b.buf.Len()]);
+               b.pos = b.buf.Len();
+       }
  }
  
  
-func unicodeLen(buf []byte) int {
-       l := 0;
-       for i := 0; i < len(buf); {
-               if buf[i] < utf8.RuneSelf {
-                       i++;
-               } else {
-                       rune, size := utf8.DecodeRune(buf[i : len(buf)]);
-                       i += size;
-               }
-               l++;
+// Start HTML-escape mode.
+func (b *Writer) startHTML(ch byte) {
+       if ch == '<' {
+               b.html_char = '>';
+       } else {
+               b.html_char = ';';
+       }
+}
+
+
+// Terminate HTML-escape mode. If the HTML text was an entity, its width
+// is assumed to be one for formatting purposes; otherwise it assumed to
+// be zero.
+//
+func (b *Writer) terminateHTML() {
+       if b.html_char == ';' {
+               // was entity, count as one rune
+               b.cell.width++;
         }
-       return l;
+       b.pos = b.buf.Len();
+       b.html_char = 0;
  }
  
  
-func (b *Writer) append(buf []byte) {
-       b.buf.Write(buf);
-       b.size += len(buf);
+// Terminate the current cell by adding it to the list of cells of the
+// current line. Returns the number of cells in that line.
+//
+func (b *Writer) terminateCell() int {
+       line := b.line(b.lines.Len() - 1);
+       line.Push(b.cell);
+       b.cell = cell{};
+       return line.Len();
+}
+
+
+// Flush should be called after the last call to Write to ensure
+// that any data buffered in the Writer is written to output. Any
+// incomplete HTML tag or entity at the end is simply considered
+// complete for formatting purposes.
+//
+func (b *Writer) Flush() os.Error {
+       // add current cell if not empty
+       if b.cell.size > 0 {
+               if b.html_char != 0 {
+                       // inside html tag/entity - terminate it even if incomplete
+                       b.terminateHTML();
+               }
+               b.terminateCell();
+       }
+
+       // format contents of buffer
+       _, err := b.format(0, 0, b.lines.Len());
+
+       // reset, even in the presence of errors
+       b.reset();
+
+       return err;
  }
  
  
@@ -358,31 +409,21 @@ func (b *Writer) append(buf []byte) {
  // while writing to the underlying output stream.
  //
  func (b *Writer) Write(buf []byte) (written int, err os.Error) {
-       i0, n := 0, len(buf);
-
         // split text into cells
-       for i := 0; i < n; i++ {
-               ch := buf[i];
-
+       i0 := 0;
+       for i, ch := range buf {
                 if b.html_char == 0 {
                         // outside html tag/entity
                         switch ch {
                         case '\t', '\n', '\f':
-                               b.append(buf[i0 : i]);
-                               i0 = i + 1;  // exclude ch from (next) cell
-                               b.width += unicodeLen(b.buf.Data()[b.pos : b.buf.Len()]);
-                               b.pos = b.buf.Len();
-
-                               // terminate cell
-                               last_size, last_width := b.line(b.lines_size.Len() - 1);
-                               last_size.Push(b.size);
-                               last_width.Push(b.width);
-                               b.size, b.width = 0, 0;
-
+                               // end of cell
+                               b.append(buf[i0 : i], true);
+                               i0 = i+1;  // exclude ch from (next) cell
+                               ncells := b.terminateCell();
                                 if ch != '\t' {
                                         // terminate line
                                         b.addLine();
-                                       if ch == '\f' || last_size.Len() == 1 {
+                                       if ch == '\f' || ncells == 1 {
                                                 // A '\f' always forces a flush. Otherwise, if the previous
                                                 // line has only one cell which does not have an impact on
                                                 // the formatting of the following lines (the last cell per
@@ -395,37 +436,29 @@ func (b *Writer) Write(buf []byte) (written int, err os.Error) {
                                 }
  
                         case '<', '&':
+                               // possibly an html tag/entity 
                                 if b.flags & FilterHTML != 0 {
-                                       b.append(buf[i0 : i]);
+                                       // begin of tag/entity
+                                       b.append(buf[i0 : i], true);
                                         i0 = i;
-                                       b.width += unicodeLen(b.buf.Data()[b.pos : b.buf.Len()]);
-                                       b.pos = -1;  // preventative - should not be used (will cause index out of bounds)
-                                       if ch == '<' {
-                                               b.html_char = '>';
-                                       } else {
-                                               b.html_char = ';';
-                                       }
+                                       b.startHTML(ch);
                                 }
                         }
  
                 } else {
                         // inside html tag/entity
                         if ch == b.html_char {
-                               // reached the end of tag/entity
-                               b.append(buf[i0 : i + 1]);
-                               i0 = i + 1;
-                               if b.html_char == ';' {
-                                       b.width++;  // count as one char
-                               }
-                               b.pos = b.buf.Len();
-                               b.html_char = 0;
+                               // end of tag/entity
+                               b.append(buf[i0 : i+1], false);
+                               i0 = i+1;  // exclude ch from (next) cell
+                               b.terminateHTML();
                         }
                 }
         }
  
         // append leftover text
-       b.append(buf[i0 : n]);
-       return n, nil;
+       b.append(buf[i0 : len(buf)], false);
+       return len(buf), nil;
  }
  
  
diff --git a/src/pkg/tabwriter/tabwriter_test.go b/src/pkg/tabwriter/tabwriter_test.go

index 7026446e62be3f92d36ccbe5a6662a9a3e821c1d..7967a467433417015ed61ca7c2398523aa583bef 100644 (file)
--- a/src/pkg/tabwriter/tabwriter_test.go
+++ b/src/pkg/tabwriter/tabwriter_test.go
@@ -47,31 +47,31 @@ func (b *buffer) String() string {
  }
  
  
-func write(t *testing.T, w *tabwriter.Writer, src string) {
+func write(t *testing.T, testname string, w *tabwriter.Writer, src string) {
         written, err := io.WriteString(w, src);
         if err != nil {
-               t.Errorf("--- src:\n%s\n--- write error: %v\n", src, err);
+               t.Errorf("--- test: %s\n--- src:\n%s\n--- write error: %v\n", testname, src, err);
         }
         if written != len(src) {
-               t.Errorf("--- src:\n%s\n--- written = %d, len(src) = %d\n", src, written, len(src));
+               t.Errorf("--- test: %s\n--- src:\n%s\n--- written = %d, len(src) = %d\n", testname, src, written, len(src));
         }
  }
  
  
-func verify(t *testing.T, w *tabwriter.Writer, b *buffer, src, expected string) {
+func verify(t *testing.T, testname string, w *tabwriter.Writer, b *buffer, src, expected string) {
         err := w.Flush();
         if err != nil {
-               t.Errorf("--- src:\n%s\n--- flush error: %v\n", src, err);
+               t.Errorf("--- test: %s\n--- src:\n%s\n--- flush error: %v\n", testname, src, err);
         }
  
         res := b.String();
         if res != expected {
-               t.Errorf("--- src:\n%s\n--- found:\n%s\n--- expected:\n%s\n", src, res, expected)
+               t.Errorf("--- test: %s\n--- src:\n%s\n--- found:\n%s\n--- expected:\n%s\n", testname, src, res, expected)
         }
  }
  
  
-func check(t *testing.T, tabwidth, padding int, padchar byte, flags uint, src, expected string) {
+func check(t *testing.T, testname string, tabwidth, padding int, padchar byte, flags uint, src, expected string) {
         var b buffer;
         b.init(1000);
  
@@ -80,30 +80,31 @@ func check(t *testing.T, tabwidth, padding int, padchar byte, flags uint, src, e
  
         // write all at once
         b.clear();
-       write(t, &w, src);
-       verify(t, &w, &b, src, expected);
+       write(t, testname, &w, src);
+       verify(t, testname, &w, &b, src, expected);
  
         // write byte-by-byte
         b.clear();
         for i := 0; i < len(src); i++ {
-               write(t, &w, src[i : i+1]);
+               write(t, testname, &w, src[i : i+1]);
         }
-       verify(t, &w, &b, src, expected);
+       verify(t, testname, &w, &b, src, expected);
  
         // write using Fibonacci slice sizes
         b.clear();
         for i, d := 0, 0; i < len(src); {
-               write(t, &w, src[i : i+d]);
+               write(t, testname, &w, src[i : i+d]);
                 i, d = i+d, d+1;
                 if i+d > len(src) {
                         d = len(src) - i;
                 }
         }
-       verify(t, &w, &b, src, expected);
+       verify(t, testname, &w, &b, src, expected);
  }
  
  
  type entry struct {
+       testname string;
         tabwidth, padding int;
         padchar byte;
         flags uint;
@@ -113,108 +114,133 @@ type entry struct {
  
  var tests = []entry {
         entry{
+               "1",
                 8, 1, '.', 0,
                 "",
                 ""
         },
  
         entry{
+               "2",
                 8, 1, '.', 0,
                 "\n\n\n",
                 "\n\n\n"
         },
  
         entry{
+               "3",
                 8, 1, '.', 0,
                 "a\nb\nc",
                 "a\nb\nc"
         },
  
         entry{
+               "4a",
                 8, 1, '.', 0,
                 "\t",  // '\t' terminates an empty cell on last line - nothing to print
                 ""
         },
  
         entry{
+               "4b",
                 8, 1, '.', tabwriter.AlignRight,
                 "\t",  // '\t' terminates an empty cell on last line - nothing to print
                 ""
         },
  
         entry{
+               "5",
                 8, 1, '.', 0,
                 "*\t*",
-               "**"
+               "*.......*"
         },
  
         entry{
+               "5b",
                 8, 1, '.', 0,
                 "*\t*\n",
                 "*.......*\n"
         },
  
         entry{
+               "5c",
                 8, 1, '.', 0,
                 "*\t*\t",
                 "*.......*"
         },
  
         entry{
+               "5d",
                 8, 1, '.', tabwriter.AlignRight,
                 "*\t*\t",
                 ".......**"
         },
  
         entry{
+               "6",
                 8, 1, '.', 0,
                 "\t\n",
                 "........\n"
         },
  
         entry{
+               "7a",
                 8, 1, '.', 0,
                 "a) foo",
                 "a) foo"
         },
  
         entry{
+               "7b",
                 8, 1, ' ', 0,
-               "b) foo\tbar",  // "bar" is not in any cell - not formatted, just flushed
-               "b) foobar"
+               "b) foo\tbar",
+               "b) foo  bar"
         },
  
         entry{
+               "7c",
                 8, 1, '.', 0,
                 "c) foo\tbar\t",
                 "c) foo..bar"
         },
  
         entry{
+               "7d",
                 8, 1, '.', 0,
                 "d) foo\tbar\n",
                 "d) foo..bar\n"
         },
  
         entry{
+               "7e",
                 8, 1, '.', 0,
                 "e) foo\tbar\t\n",
                 "e) foo..bar.....\n"
         },
  
         entry{
+               "7f",
                 8, 1, '.', tabwriter.FilterHTML,
                 "f) f&lt;o\t<b>bar</b>\t\n",
                 "f) f&lt;o..<b>bar</b>.....\n"
         },
  
         entry{
+               "7g",
+               8, 1, '.', tabwriter.FilterHTML,
+               "g) f&lt;o\t<b>bar</b>\t non-terminated entity &amp",
+               "g) f&lt;o..<b>bar</b>..... non-terminated entity &amp"
+       },
+
+       entry{
+               "8",
                 8, 1, '*', 0,
                 "Hello, world!\n",
                 "Hello, world!\n"
         },
  
         entry{
+               "9a",
                 0, 0, '.', 0,
                 "1\t2\t3\t4\n"
                 "11\t222\t3333\t44444\n",
@@ -224,6 +250,7 @@ var tests = []entry {
         },
  
         entry{
+               "9b",
                 0, 0, '.', tabwriter.FilterHTML,
                 "1\t2<!---\f--->\t3\t4\n"  // \f inside HTML is ignored
                 "11\t222\t3333\t44444\n",
@@ -233,6 +260,7 @@ var tests = []entry {
         },
  
         entry{
+               "9c",
                 0, 0, '.', 0,
                 "1\t2\t3\t4\f"  // \f causes a newline and flush
                 "11\t222\t3333\t44444\n",
@@ -242,18 +270,21 @@ var tests = []entry {
         },
  
         entry{
+               "10a",
                 5, 0, '.', 0,
                 "1\t2\t3\t4\n",
                 "1....2....3....4\n"
         },
  
         entry{
+               "10b",
                 5, 0, '.', 0,
                 "1\t2\t3\t4\t\n",
                 "1....2....3....4....\n"
         },
  
         entry{
+               "11",
                 8, 1, '.', 0,
                 "本\tb\tc\n"
                 "aa\t\u672c\u672c\u672c\tcccc\tddddd\n"
@@ -265,6 +296,7 @@ var tests = []entry {
         },
  
         entry{
+               "12a",
                 8, 1, ' ', tabwriter.AlignRight,
                 "a\tè\tc\t\n"
                 "aa\tèèè\tcccc\tddddd\t\n"
@@ -276,6 +308,7 @@ var tests = []entry {
         },
  
         entry{
+               "12b",
                 2, 0, ' ', 0,
                 "a\tb\tc\n"
                 "aa\tbbb\tcccc\n"
@@ -287,6 +320,7 @@ var tests = []entry {
         },
  
         entry{
+               "12c",
                 8, 1, '_', 0,
                 "a\tb\tc\n"
                 "aa\tbbb\tcccc\n"
@@ -298,6 +332,7 @@ var tests = []entry {
         },
  
         entry{
+               "13a",
                 4, 1, '-', 0,
                 "4444\t日本語\t22\t1\t333\n"
                 "999999999\t22\n"
@@ -317,6 +352,7 @@ var tests = []entry {
         },
  
         entry{
+               "13b",
                 4, 3, '.', 0,
                 "4444\t333\t22\t1\t333\n"
                 "999999999\t22\n"
@@ -336,6 +372,7 @@ var tests = []entry {
         },
  
         entry{
+               "13c",
                 8, 1, '\t', tabwriter.FilterHTML,
                 "4444\t333\t22\t1\t333\n"
                 "999999999\t22\n"
@@ -355,6 +392,7 @@ var tests = []entry {
         },
  
         entry{
+               "14",
                 0, 2, ' ', tabwriter.AlignRight,
                 ".0\t.3\t2.4\t-5.1\t\n"
                 "23.0\t12345678.9\t2.4\t-989.4\t\n"
@@ -375,6 +413,6 @@ var tests = []entry {
  
  func Test(t *testing.T) {
         for _, e := range tests {
-               check(t, e.tabwidth, e.padding, e.padchar, e.flags, e.src, e.expected);
+               check(t, e.testname, e.tabwidth, e.padding, e.padchar, e.flags, e.src, e.expected);
         }
  }
author	Robert Griesemer <gri@golang.org>
	Fri, 19 Jun 2009 00:06:08 +0000 (17:06 -0700)
committer	Robert Griesemer <gri@golang.org>
	Fri, 19 Jun 2009 00:06:08 +0000 (17:06 -0700)
src/pkg/tabwriter/tabwriter.go		patch \| blob \| history
src/pkg/tabwriter/tabwriter_test.go		patch \| blob \| history