permit escaped text segments which pass through tabwriter

author Robert Griesemer <gri@golang.org>

Thu, 15 Oct 2009 16:28:52 +0000 (09:28 -0700)

committer Robert Griesemer <gri@golang.org>

Thu, 15 Oct 2009 16:28:52 +0000 (09:28 -0700)
author Robert Griesemer <gri@golang.org>
Thu, 15 Oct 2009 16:28:52 +0000 (09:28 -0700)
committer Robert Griesemer <gri@golang.org>
Thu, 15 Oct 2009 16:28:52 +0000 (09:28 -0700)
diff --git a/src/pkg/tabwriter/tabwriter.go b/src/pkg/tabwriter/tabwriter.go

index 372ebff8b27f5df513abfaa07067ccc973f0bc06..fe716e91ae069355715a9dbe215027199ddd8970 100644 (file)
--- a/src/pkg/tabwriter/tabwriter.go
+++ b/src/pkg/tabwriter/tabwriter.go
@@ -51,6 +51,11 @@ type cell struct {
  // terminated by horizontal (or "hard") tabs are not affected by
  // this flag.
  //
+// A segment of text may be escaped by bracketing it with Escape
+// characters. The tabwriter strips the Escape characters but otherwise
+// passes escaped text segments through unchanged. In particular, it
+// does not interpret any tabs or line breaks within the segment.
+//
  // The Writer assumes that all characters have the same width;
  // this may not be true in some fonts, especially with certain
  // UTF-8 characters.
@@ -62,8 +67,8 @@ type cell struct {
  // The formfeed character ('\f') acts like a newline but it also
  // terminates all columns in the current line (effectively calling
  // Flush). Cells in the next line start new columns. Unless found
-// inside an HTML tag, formfeed characters appear as newlines in
-// the output.
+// inside an HTML tag or inside an escaped text segment, formfeed
+// characters appear as newlines in the output.
  //
  // The Writer must buffer input internally, because proper spacing
  // of one line may depend on the cells in future lines. Clients must
@@ -81,7 +86,7 @@ type Writer struct {
         buf bytes.Buffer;  // collected text w/o tabs, newlines, or formfeed chars
         pos int;  // buffer position up to which width of incomplete cell has been computed
         cell cell;  // current incomplete cell; cell.width is up to buf[pos] w/o ignored sections
-       html_char byte;  // terminating char of html tag/entity, or 0 ('>', ';', or 0)
+       endChar byte;  // terminating char of escaped sequence (Escape for escapes, '>', ';' for HTML tags/entities, or 0)
         lines vector.Vector;  // list if lines; each line is a list of cells
         widths vector.IntVector;  // list of column widths in runes - re-used during formatting
  }
@@ -102,7 +107,7 @@ func (b *Writer) reset() {
         b.buf.Reset();
         b.pos = 0;
         b.cell = cell{};
-       b.html_char = 0;
+       b.endChar = 0;
         b.lines.Init(0);
         b.widths.Init(0);
         b.addLine();
@@ -378,41 +383,53 @@ func (b *Writer) format(pos0 int, line0, line1 int) (pos int, err os.Error) {
  }
  
  
-// Append text to current cell. Only update the cell width if updateWidth
-// is set (the cell width can only be updated if we know that we cannot be
-// in the middle of a UTF-8 encoded Unicode character).
-//
-func (b *Writer) append(text []byte, updateWidth bool) {
+// Append text to current cell.
+func (b *Writer) append(text []byte) {
         b.buf.Write(text);
         b.cell.size += len(text);
-       if updateWidth {
-               b.cell.width += utf8.RuneCount(b.buf.Bytes()[b.pos : b.buf.Len()]);
-               b.pos = b.buf.Len();
-       }
  }
  
  
-// Start HTML-escape mode.
-func (b *Writer) startHTML(ch byte) {
-       if ch == '<' {
-               b.html_char = '>';
-       } else {
-               b.html_char = ';';
+// Update the cell width.
+func (b *Writer) updateWidth() {
+       b.cell.width += utf8.RuneCount(b.buf.Bytes()[b.pos : b.buf.Len()]);
+       b.pos = b.buf.Len();
+}
+
+
+// To escape a text segment, bracket it with Escape characters.
+// For instance, the tab in this string "Ignore this tab: \xff\t\xff"
+// does not terminate a cell and constitutes a single character of
+// width one for formatting purposes.
+//
+// The value 0xff was chosen because it cannot appear in a valid UTF-8 sequence.
+//
+const Escape ='\xff'
+
+
+// Start escaped mode.
+func (b *Writer) startEscape(ch byte) {
+       switch ch {
+       case Escape: b.endChar = Escape;
+       case '<': b.endChar = '>';
+       case '&': b.endChar = ';';
         }
  }
  
  
-// Terminate HTML-escape mode. If the HTML text was an entity, its width
-// is assumed to be one for formatting purposes; otherwise it assumed to
-// be zero.
+// Terminate escaped mode. If the escaped text was an HTML tag, its width
+// is assumed to be zero for formatting purposes; if it was an HTML entity,
+// its width is assumed to be one. In all other cases, the width is the
+// unicode width of the text.
  //
-func (b *Writer) terminateHTML() {
-       if b.html_char == ';' {
-               // was entity, count as one rune
-               b.cell.width++;
+func (b *Writer) endEscape() {
+       switch b.endChar {
+       case Escape: b.updateWidth();
+       case '>': // tag of zero width
+       case ';': b.cell.width++;  // entity, count as one rune
         }
         b.pos = b.buf.Len();
-       b.html_char = 0;
+       b.endChar = 0;
  }
  
  
@@ -430,15 +447,15 @@ func (b *Writer) terminateCell(htab bool) int {
  
  // Flush should be called after the last call to Write to ensure
  // that any data buffered in the Writer is written to output. Any
-// incomplete HTML tag or entity at the end is simply considered
+// incomplete escape sequence at the end is simply considered
  // complete for formatting purposes.
  //
  func (b *Writer) Flush() os.Error {
         // add current cell if not empty
         if b.cell.size > 0 {
-               if b.html_char != 0 {
-                       // inside html tag/entity - terminate it even if incomplete
-                       b.terminateHTML();
+               if b.endChar != 0 {
+                       // inside escape - terminate it even if incomplete
+                       b.endEscape();
                 }
                 b.terminateCell(false);
         }
@@ -457,17 +474,18 @@ func (b *Writer) Flush() os.Error {
  // The only errors returned are ones encountered
  // while writing to the underlying output stream.
  //
-func (b *Writer) Write(buf []byte) (written int, err os.Error) {
+func (b *Writer) Write(buf []byte) (n int, err os.Error) {
         // split text into cells
-       i0 := 0;
+       n = 0;
         for i, ch := range buf {
-               if b.html_char == 0 {
-                       // outside html tag/entity
+               if b.endChar == 0 {
+                       // outside escape
                         switch ch {
                         case '\t', '\v', '\n', '\f':
                                 // end of cell
-                               b.append(buf[i0 : i], true);
-                               i0 = i+1;  // exclude ch from (next) cell
+                               b.append(buf[n : i]);
+                               b.updateWidth();
+                               n = i+1;  // ch consumed
                                 ncells := b.terminateCell(ch == '\t');
                                 if ch == '\n' || ch == '\f' {
                                         // terminate line
@@ -479,35 +497,48 @@ func (b *Writer) Write(buf []byte) (written int, err os.Error) {
                                                 // line is ignored by format()), thus we can flush the
                                                 // Writer contents.
                                                 if err = b.Flush(); err != nil {
-                                                       return i0, err;
+                                                       return;
                                                 }
                                         }
                                 }
  
+                       case Escape:
+                               // start of escaped sequence
+                               b.append(buf[n : i]);
+                               b.updateWidth();
+                               n = i+1;  // exclude Escape
+                               b.startEscape(Escape);
+
                         case '<', '&':
                                 // possibly an html tag/entity
                                 if b.flags & FilterHTML != 0 {
                                         // begin of tag/entity
-                                       b.append(buf[i0 : i], true);
-                                       i0 = i;
-                                       b.startHTML(ch);
+                                       b.append(buf[n : i]);
+                                       b.updateWidth();
+                                       n = i;
+                                       b.startEscape(ch);
                                 }
                         }
  
                 } else {
-                       // inside html tag/entity
-                       if ch == b.html_char {
+                       // inside escape
+                       if ch == b.endChar {
                                 // end of tag/entity
-                               b.append(buf[i0 : i+1], false);
-                               i0 = i+1;  // exclude ch from (next) cell
-                               b.terminateHTML();
+                               j := i+1;
+                               if ch == Escape {
+                                       j = i;  // exclude Escape
+                               }
+                               b.append(buf[n : j]);
+                               n = i+1;  // ch consumed
+                               b.endEscape();
                         }
                 }
         }
  
         // append leftover text
-       b.append(buf[i0 : len(buf)], false);
-       return len(buf), nil;
+       b.append(buf[n : len(buf)]);
+       n = len(buf);
+       return;
  }
  
  
diff --git a/src/pkg/tabwriter/tabwriter_test.go b/src/pkg/tabwriter/tabwriter_test.go

index 927ed5e21438241dacee9aa7cfbd46a4882e3267..bbca42b991a9a9de2b641b0ad8940734442d3bee 100644 (file)
--- a/src/pkg/tabwriter/tabwriter_test.go
+++ b/src/pkg/tabwriter/tabwriter_test.go
@@ -113,19 +113,47 @@ type entry struct {
  
  var tests = []entry {
         entry{
-               "1",
+               "1a",
                 8, 1, '.', 0,
                 "",
                 ""
         },
  
         entry{
-               "1 debug",
+               "1a debug",
                 8, 1, '.', Debug,
                 "",
                 ""
         },
  
+       entry{
+               "1b esc",
+               8, 1, '.', 0,
+               "\xff\xff",
+               ""
+       },
+
+       entry{
+               "1c esc",
+               8, 1, '.', 0,
+               "\xff\t\xff",
+               "\t"
+       },
+
+       entry{
+               "1d esc",
+               8, 1, '.', 0,
+               "\xff\"foo\t\n\tbar\"\xff",
+               "\"foo\t\n\tbar\"",
+       },
+
+       entry{
+               "1e esc",
+               8, 1, '.', 0,
+               "abc\xff\tdef",  // unterminated escape
+               "abc\tdef",
+       },
+
         entry{
                 "2",
                 8, 1, '.', 0,
author	Robert Griesemer <gri@golang.org>
	Thu, 15 Oct 2009 16:28:52 +0000 (09:28 -0700)
committer	Robert Griesemer <gri@golang.org>
	Thu, 15 Oct 2009 16:28:52 +0000 (09:28 -0700)
src/pkg/tabwriter/tabwriter.go		patch \| blob \| history
src/pkg/tabwriter/tabwriter_test.go		patch \| blob \| history