From: Robert Griesemer <gri@golang.org>
Date: Thu, 15 Oct 2009 16:28:52 +0000 (-0700)
Subject: permit escaped text segments which pass through tabwriter
X-Git-Tag: weekly.2009-11-06~302
X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=b55e6d1ba5a7db27bde0c8de81213cb33ec52e92;p=gostls13.git

permit escaped text segments which pass through tabwriter
undisturbed and uninterpreted

R=rsc
DELTA=141  (82 added, 23 deleted, 36 changed)
OCL=35747
CL=35769
---

diff --git a/src/pkg/tabwriter/tabwriter.go b/src/pkg/tabwriter/tabwriter.go
index 372ebff8b2..fe716e91ae 100644
--- a/src/pkg/tabwriter/tabwriter.go
+++ b/src/pkg/tabwriter/tabwriter.go
@@ -51,6 +51,11 @@ type cell struct {
 // terminated by horizontal (or "hard") tabs are not affected by
 // this flag.
 //
+// A segment of text may be escaped by bracketing it with Escape
+// characters. The tabwriter strips the Escape characters but otherwise
+// passes escaped text segments through unchanged. In particular, it
+// does not interpret any tabs or line breaks within the segment.
+//
 // The Writer assumes that all characters have the same width;
 // this may not be true in some fonts, especially with certain
 // UTF-8 characters.
@@ -62,8 +67,8 @@ type cell struct {
 // The formfeed character ('\f') acts like a newline but it also
 // terminates all columns in the current line (effectively calling
 // Flush). Cells in the next line start new columns. Unless found
-// inside an HTML tag, formfeed characters appear as newlines in
-// the output.
+// inside an HTML tag or inside an escaped text segment, formfeed
+// characters appear as newlines in the output.
 //
 // The Writer must buffer input internally, because proper spacing
 // of one line may depend on the cells in future lines. Clients must
@@ -81,7 +86,7 @@ type Writer struct {
 	buf bytes.Buffer;  // collected text w/o tabs, newlines, or formfeed chars
 	pos int;  // buffer position up to which width of incomplete cell has been computed
 	cell cell;  // current incomplete cell; cell.width is up to buf[pos] w/o ignored sections
-	html_char byte;  // terminating char of html tag/entity, or 0 ('>', ';', or 0)
+	endChar byte;  // terminating char of escaped sequence (Escape for escapes, '>', ';' for HTML tags/entities, or 0)
 	lines vector.Vector;  // list if lines; each line is a list of cells
 	widths vector.IntVector;  // list of column widths in runes - re-used during formatting
 }
@@ -102,7 +107,7 @@ func (b *Writer) reset() {
 	b.buf.Reset();
 	b.pos = 0;
 	b.cell = cell{};
-	b.html_char = 0;
+	b.endChar = 0;
 	b.lines.Init(0);
 	b.widths.Init(0);
 	b.addLine();
@@ -378,41 +383,53 @@ func (b *Writer) format(pos0 int, line0, line1 int) (pos int, err os.Error) {
 }
 
 
-// Append text to current cell. Only update the cell width if updateWidth
-// is set (the cell width can only be updated if we know that we cannot be
-// in the middle of a UTF-8 encoded Unicode character).
-//
-func (b *Writer) append(text []byte, updateWidth bool) {
+// Append text to current cell.
+func (b *Writer) append(text []byte) {
 	b.buf.Write(text);
 	b.cell.size += len(text);
-	if updateWidth {
-		b.cell.width += utf8.RuneCount(b.buf.Bytes()[b.pos : b.buf.Len()]);
-		b.pos = b.buf.Len();
-	}
 }
 
 
-// Start HTML-escape mode.
-func (b *Writer) startHTML(ch byte) {
-	if ch == '<' {
-		b.html_char = '>';
-	} else {
-		b.html_char = ';';
+// Update the cell width.
+func (b *Writer) updateWidth() {
+	b.cell.width += utf8.RuneCount(b.buf.Bytes()[b.pos : b.buf.Len()]);
+	b.pos = b.buf.Len();
+}
+
+
+// To escape a text segment, bracket it with Escape characters.
+// For instance, the tab in this string "Ignore this tab: \xff\t\xff"
+// does not terminate a cell and constitutes a single character of
+// width one for formatting purposes.
+//
+// The value 0xff was chosen because it cannot appear in a valid UTF-8 sequence.
+//
+const Escape ='\xff'
+
+
+// Start escaped mode.
+func (b *Writer) startEscape(ch byte) {
+	switch ch {
+	case Escape: b.endChar = Escape;
+	case '<': b.endChar = '>';
+	case '&': b.endChar = ';';
 	}
 }
 
 
-// Terminate HTML-escape mode. If the HTML text was an entity, its width
-// is assumed to be one for formatting purposes; otherwise it assumed to
-// be zero.
+// Terminate escaped mode. If the escaped text was an HTML tag, its width
+// is assumed to be zero for formatting purposes; if it was an HTML entity,
+// its width is assumed to be one. In all other cases, the width is the
+// unicode width of the text.
 //
-func (b *Writer) terminateHTML() {
-	if b.html_char == ';' {
-		// was entity, count as one rune
-		b.cell.width++;
+func (b *Writer) endEscape() {
+	switch b.endChar {
+	case Escape: b.updateWidth();
+	case '>': // tag of zero width
+	case ';': b.cell.width++;  // entity, count as one rune
 	}
 	b.pos = b.buf.Len();
-	b.html_char = 0;
+	b.endChar = 0;
 }
 
 
@@ -430,15 +447,15 @@ func (b *Writer) terminateCell(htab bool) int {
 
 // Flush should be called after the last call to Write to ensure
 // that any data buffered in the Writer is written to output. Any
-// incomplete HTML tag or entity at the end is simply considered
+// incomplete escape sequence at the end is simply considered
 // complete for formatting purposes.
 //
 func (b *Writer) Flush() os.Error {
 	// add current cell if not empty
 	if b.cell.size > 0 {
-		if b.html_char != 0 {
-			// inside html tag/entity - terminate it even if incomplete
-			b.terminateHTML();
+		if b.endChar != 0 {
+			// inside escape - terminate it even if incomplete
+			b.endEscape();
 		}
 		b.terminateCell(false);
 	}
@@ -457,17 +474,18 @@ func (b *Writer) Flush() os.Error {
 // The only errors returned are ones encountered
 // while writing to the underlying output stream.
 //
-func (b *Writer) Write(buf []byte) (written int, err os.Error) {
+func (b *Writer) Write(buf []byte) (n int, err os.Error) {
 	// split text into cells
-	i0 := 0;
+	n = 0;
 	for i, ch := range buf {
-		if b.html_char == 0 {
-			// outside html tag/entity
+		if b.endChar == 0 {
+			// outside escape
 			switch ch {
 			case '\t', '\v', '\n', '\f':
 				// end of cell
-				b.append(buf[i0 : i], true);
-				i0 = i+1;  // exclude ch from (next) cell
+				b.append(buf[n : i]);
+				b.updateWidth();
+				n = i+1;  // ch consumed
 				ncells := b.terminateCell(ch == '\t');
 				if ch == '\n' || ch == '\f' {
 					// terminate line
@@ -479,35 +497,48 @@ func (b *Writer) Write(buf []byte) (written int, err os.Error) {
 						// line is ignored by format()), thus we can flush the
 						// Writer contents.
 						if err = b.Flush(); err != nil {
-							return i0, err;
+							return;
 						}
 					}
 				}
 
+			case Escape:
+				// start of escaped sequence
+				b.append(buf[n : i]);
+				b.updateWidth();
+				n = i+1;  // exclude Escape
+				b.startEscape(Escape);
+
 			case '<', '&':
 				// possibly an html tag/entity
 				if b.flags & FilterHTML != 0 {
 					// begin of tag/entity
-					b.append(buf[i0 : i], true);
-					i0 = i;
-					b.startHTML(ch);
+					b.append(buf[n : i]);
+					b.updateWidth();
+					n = i;
+					b.startEscape(ch);
 				}
 			}
 
 		} else {
-			// inside html tag/entity
-			if ch == b.html_char {
+			// inside escape
+			if ch == b.endChar {
 				// end of tag/entity
-				b.append(buf[i0 : i+1], false);
-				i0 = i+1;  // exclude ch from (next) cell
-				b.terminateHTML();
+				j := i+1;
+				if ch == Escape {
+					j = i;  // exclude Escape
+				}
+				b.append(buf[n : j]);
+				n = i+1;  // ch consumed
+				b.endEscape();
 			}
 		}
 	}
 
 	// append leftover text
-	b.append(buf[i0 : len(buf)], false);
-	return len(buf), nil;
+	b.append(buf[n : len(buf)]);
+	n = len(buf);
+	return;
 }
 
 
diff --git a/src/pkg/tabwriter/tabwriter_test.go b/src/pkg/tabwriter/tabwriter_test.go
index 927ed5e214..bbca42b991 100644
--- a/src/pkg/tabwriter/tabwriter_test.go
+++ b/src/pkg/tabwriter/tabwriter_test.go
@@ -113,19 +113,47 @@ type entry struct {
 
 var tests = []entry {
 	entry{
-		"1",
+		"1a",
 		8, 1, '.', 0,
 		"",
 		""
 	},
 
 	entry{
-		"1 debug",
+		"1a debug",
 		8, 1, '.', Debug,
 		"",
 		""
 	},
 
+	entry{
+		"1b esc",
+		8, 1, '.', 0,
+		"\xff\xff",
+		""
+	},
+
+	entry{
+		"1c esc",
+		8, 1, '.', 0,
+		"\xff\t\xff",
+		"\t"
+	},
+
+	entry{
+		"1d esc",
+		8, 1, '.', 0,
+		"\xff\"foo\t\n\tbar\"\xff",
+		"\"foo\t\n\tbar\"",
+	},
+
+	entry{
+		"1e esc",
+		8, 1, '.', 0,
+		"abc\xff\tdef",  // unterminated escape
+		"abc\tdef",
+	},
+
 	entry{
 		"2",
 		8, 1, '.', 0,