From: Robert Griesemer Date: Thu, 15 Oct 2009 16:28:52 +0000 (-0700) Subject: permit escaped text segments which pass through tabwriter X-Git-Tag: weekly.2009-11-06~302 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=b55e6d1ba5a7db27bde0c8de81213cb33ec52e92;p=gostls13.git permit escaped text segments which pass through tabwriter undisturbed and uninterpreted R=rsc DELTA=141 (82 added, 23 deleted, 36 changed) OCL=35747 CL=35769 --- diff --git a/src/pkg/tabwriter/tabwriter.go b/src/pkg/tabwriter/tabwriter.go index 372ebff8b2..fe716e91ae 100644 --- a/src/pkg/tabwriter/tabwriter.go +++ b/src/pkg/tabwriter/tabwriter.go @@ -51,6 +51,11 @@ type cell struct { // terminated by horizontal (or "hard") tabs are not affected by // this flag. // +// A segment of text may be escaped by bracketing it with Escape +// characters. The tabwriter strips the Escape characters but otherwise +// passes escaped text segments through unchanged. In particular, it +// does not interpret any tabs or line breaks within the segment. +// // The Writer assumes that all characters have the same width; // this may not be true in some fonts, especially with certain // UTF-8 characters. @@ -62,8 +67,8 @@ type cell struct { // The formfeed character ('\f') acts like a newline but it also // terminates all columns in the current line (effectively calling // Flush). Cells in the next line start new columns. Unless found -// inside an HTML tag, formfeed characters appear as newlines in -// the output. +// inside an HTML tag or inside an escaped text segment, formfeed +// characters appear as newlines in the output. // // The Writer must buffer input internally, because proper spacing // of one line may depend on the cells in future lines. Clients must @@ -81,7 +86,7 @@ type Writer struct { buf bytes.Buffer; // collected text w/o tabs, newlines, or formfeed chars pos int; // buffer position up to which width of incomplete cell has been computed cell cell; // current incomplete cell; cell.width is up to buf[pos] w/o ignored sections - html_char byte; // terminating char of html tag/entity, or 0 ('>', ';', or 0) + endChar byte; // terminating char of escaped sequence (Escape for escapes, '>', ';' for HTML tags/entities, or 0) lines vector.Vector; // list if lines; each line is a list of cells widths vector.IntVector; // list of column widths in runes - re-used during formatting } @@ -102,7 +107,7 @@ func (b *Writer) reset() { b.buf.Reset(); b.pos = 0; b.cell = cell{}; - b.html_char = 0; + b.endChar = 0; b.lines.Init(0); b.widths.Init(0); b.addLine(); @@ -378,41 +383,53 @@ func (b *Writer) format(pos0 int, line0, line1 int) (pos int, err os.Error) { } -// Append text to current cell. Only update the cell width if updateWidth -// is set (the cell width can only be updated if we know that we cannot be -// in the middle of a UTF-8 encoded Unicode character). -// -func (b *Writer) append(text []byte, updateWidth bool) { +// Append text to current cell. +func (b *Writer) append(text []byte) { b.buf.Write(text); b.cell.size += len(text); - if updateWidth { - b.cell.width += utf8.RuneCount(b.buf.Bytes()[b.pos : b.buf.Len()]); - b.pos = b.buf.Len(); - } } -// Start HTML-escape mode. -func (b *Writer) startHTML(ch byte) { - if ch == '<' { - b.html_char = '>'; - } else { - b.html_char = ';'; +// Update the cell width. +func (b *Writer) updateWidth() { + b.cell.width += utf8.RuneCount(b.buf.Bytes()[b.pos : b.buf.Len()]); + b.pos = b.buf.Len(); +} + + +// To escape a text segment, bracket it with Escape characters. +// For instance, the tab in this string "Ignore this tab: \xff\t\xff" +// does not terminate a cell and constitutes a single character of +// width one for formatting purposes. +// +// The value 0xff was chosen because it cannot appear in a valid UTF-8 sequence. +// +const Escape ='\xff' + + +// Start escaped mode. +func (b *Writer) startEscape(ch byte) { + switch ch { + case Escape: b.endChar = Escape; + case '<': b.endChar = '>'; + case '&': b.endChar = ';'; } } -// Terminate HTML-escape mode. If the HTML text was an entity, its width -// is assumed to be one for formatting purposes; otherwise it assumed to -// be zero. +// Terminate escaped mode. If the escaped text was an HTML tag, its width +// is assumed to be zero for formatting purposes; if it was an HTML entity, +// its width is assumed to be one. In all other cases, the width is the +// unicode width of the text. // -func (b *Writer) terminateHTML() { - if b.html_char == ';' { - // was entity, count as one rune - b.cell.width++; +func (b *Writer) endEscape() { + switch b.endChar { + case Escape: b.updateWidth(); + case '>': // tag of zero width + case ';': b.cell.width++; // entity, count as one rune } b.pos = b.buf.Len(); - b.html_char = 0; + b.endChar = 0; } @@ -430,15 +447,15 @@ func (b *Writer) terminateCell(htab bool) int { // Flush should be called after the last call to Write to ensure // that any data buffered in the Writer is written to output. Any -// incomplete HTML tag or entity at the end is simply considered +// incomplete escape sequence at the end is simply considered // complete for formatting purposes. // func (b *Writer) Flush() os.Error { // add current cell if not empty if b.cell.size > 0 { - if b.html_char != 0 { - // inside html tag/entity - terminate it even if incomplete - b.terminateHTML(); + if b.endChar != 0 { + // inside escape - terminate it even if incomplete + b.endEscape(); } b.terminateCell(false); } @@ -457,17 +474,18 @@ func (b *Writer) Flush() os.Error { // The only errors returned are ones encountered // while writing to the underlying output stream. // -func (b *Writer) Write(buf []byte) (written int, err os.Error) { +func (b *Writer) Write(buf []byte) (n int, err os.Error) { // split text into cells - i0 := 0; + n = 0; for i, ch := range buf { - if b.html_char == 0 { - // outside html tag/entity + if b.endChar == 0 { + // outside escape switch ch { case '\t', '\v', '\n', '\f': // end of cell - b.append(buf[i0 : i], true); - i0 = i+1; // exclude ch from (next) cell + b.append(buf[n : i]); + b.updateWidth(); + n = i+1; // ch consumed ncells := b.terminateCell(ch == '\t'); if ch == '\n' || ch == '\f' { // terminate line @@ -479,35 +497,48 @@ func (b *Writer) Write(buf []byte) (written int, err os.Error) { // line is ignored by format()), thus we can flush the // Writer contents. if err = b.Flush(); err != nil { - return i0, err; + return; } } } + case Escape: + // start of escaped sequence + b.append(buf[n : i]); + b.updateWidth(); + n = i+1; // exclude Escape + b.startEscape(Escape); + case '<', '&': // possibly an html tag/entity if b.flags & FilterHTML != 0 { // begin of tag/entity - b.append(buf[i0 : i], true); - i0 = i; - b.startHTML(ch); + b.append(buf[n : i]); + b.updateWidth(); + n = i; + b.startEscape(ch); } } } else { - // inside html tag/entity - if ch == b.html_char { + // inside escape + if ch == b.endChar { // end of tag/entity - b.append(buf[i0 : i+1], false); - i0 = i+1; // exclude ch from (next) cell - b.terminateHTML(); + j := i+1; + if ch == Escape { + j = i; // exclude Escape + } + b.append(buf[n : j]); + n = i+1; // ch consumed + b.endEscape(); } } } // append leftover text - b.append(buf[i0 : len(buf)], false); - return len(buf), nil; + b.append(buf[n : len(buf)]); + n = len(buf); + return; } diff --git a/src/pkg/tabwriter/tabwriter_test.go b/src/pkg/tabwriter/tabwriter_test.go index 927ed5e214..bbca42b991 100644 --- a/src/pkg/tabwriter/tabwriter_test.go +++ b/src/pkg/tabwriter/tabwriter_test.go @@ -113,19 +113,47 @@ type entry struct { var tests = []entry { entry{ - "1", + "1a", 8, 1, '.', 0, "", "" }, entry{ - "1 debug", + "1a debug", 8, 1, '.', Debug, "", "" }, + entry{ + "1b esc", + 8, 1, '.', 0, + "\xff\xff", + "" + }, + + entry{ + "1c esc", + 8, 1, '.', 0, + "\xff\t\xff", + "\t" + }, + + entry{ + "1d esc", + 8, 1, '.', 0, + "\xff\"foo\t\n\tbar\"\xff", + "\"foo\t\n\tbar\"", + }, + + entry{ + "1e esc", + 8, 1, '.', 0, + "abc\xff\tdef", // unterminated escape + "abc\tdef", + }, + entry{ "2", 8, 1, '.', 0,