// ----------------------------------------------------------------------------
// Filter implementation
+// A cell represents a segment of text delineated by tabs, form-feed,
+// or newline chars. The text itself is stored in a separate buffer;
+// cell only describes the segment's size in bytes and width in runes.
+//
+type cell struct {
+ size int; // cell size in bytes
+ width int; // cell width in runes
+}
+
+
// A Writer is a filter that inserts padding around
// tab-delimited columns in its input to align them
// in the output.
// UTF-8 characters.
//
// If a Writer is configured to filter HTML, HTML tags and entities
-// are simply passed through and their widths are assumed to be zero
-// for formatting purposes.
+// are simply passed through. The widths of tags and entities are
+// assumed to be zero (tags) and one (entities) for formatting purposes.
//
// The form feed character ('\f') acts like a newline but it also
// terminates all columns in the current line (effectively calling
flags uint;
// current state
- html_char byte; // terminating char of html tag/entity, or 0 ('>', ';', or 0)
buf io.ByteBuffer; // collected text w/o tabs, newlines, or form feed chars
- size int; // size of incomplete cell in bytes
- width int; // width of incomplete cell in runes up to buf[pos] w/o ignored sections
pos int; // buffer position up to which width of incomplete cell has been computed
- lines_size vector.Vector; // list of lines; each line is a list of cell sizes in bytes
- lines_width vector.Vector; // list of lines; each line is a list of cell widths in runes
+ cell cell; // current incomplete cell; cell.width is up to buf[pos] w/o ignored sections
+ html_char byte; // terminating char of html tag/entity, or 0 ('>', ';', or 0)
+ lines vector.Vector; // list if lines; each line is a list of cells
widths vector.IntVector; // list of column widths in runes - re-used during formatting
}
+func (b *Writer) addLine() {
+ b.lines.Push(vector.New(0));
+}
+
+
+func (b *Writer) line(i int) *vector.Vector {
+ return b.lines.At(i).(*vector.Vector);
+}
+
+
+// Reset the current state.
+func (b *Writer) reset() {
+ b.buf.Reset();
+ b.pos = 0;
+ b.cell = cell{};
+ b.html_char = 0;
+ b.lines.Init(0);
+ b.widths.Init(0);
+ b.addLine();
+}
+
+
// Internal representation (current state):
//
// - all text written is appended to buf; form feed chars, tabs and newlines are stripped away
// - at any given time there is a (possibly empty) incomplete cell at the end
-// (the cell starts after a tab or newline)
+// (the cell starts after a tab, form feed, or newline)
// - size is the number of bytes belonging to the cell so far
// - width is text width in runes of that cell from the start of the cell to
// position pos; html tags and entities are excluded from this width if html
// buf start of incomplete cell pos
-func (b *Writer) addLine() {
- b.lines_size.Push(vector.NewIntVector(0));
- b.lines_width.Push(vector.NewIntVector(0));
-}
-
-
// Formatting can be controlled with these flags.
const (
// Ignore html tags and treat entities (starting with '&'
}
b.flags = flags;
- b.lines_size.Init(0);
- b.lines_width.Init(0);
- b.widths.Init(0);
- b.addLine(); // the very first line
+ b.reset();
return b;
}
-func (b *Writer) line(i int) (*vector.IntVector, *vector.IntVector) {
- return
- b.lines_size.At(i).(*vector.IntVector),
- b.lines_width.At(i).(*vector.IntVector);
-}
-
-
// debugging support (keep code around)
/*
func (b *Writer) dump() {
func (b *Writer) writeLines(pos0 int, line0, line1 int) (int, os.Error) {
pos := pos0;
for i := line0; i < line1; i++ {
- line_size, line_width := b.line(i);
- for j := 0; j < line_size.Len(); j++ {
- s, w := line_size.At(j), line_width.At(j);
+ line := b.line(i);
+ for j := 0; j < line.Len(); j++ {
+ c := line.At(j).(cell);
switch {
default: // align left
- if err := b.write0(b.buf.Data()[pos : pos + s]); err != nil {
+ if err := b.write0(b.buf.Data()[pos : pos + c.size]); err != nil {
return pos, err;
}
- pos += s;
+ pos += c.size;
if j < b.widths.Len() {
- if err := b.writePadding(w, b.widths.At(j)); err != nil {
+ if err := b.writePadding(c.width, b.widths.At(j)); err != nil {
return pos, err;
}
}
case b.flags & AlignRight != 0: // align right
if j < b.widths.Len() {
- if err := b.writePadding(w, b.widths.At(j)); err != nil {
+ if err := b.writePadding(c.width, b.widths.At(j)); err != nil {
return pos, err;
}
}
- if err := b.write0(b.buf.Data()[pos : pos + s]); err != nil {
+ if err := b.write0(b.buf.Data()[pos : pos + c.size]); err != nil {
return pos, err;
}
- pos += s;
+ pos += c.size;
}
}
- if i+1 == b.lines_size.Len() {
+ if i+1 == b.lines.Len() {
// last buffered line - we don't have a newline, so just write
// any outstanding buffered data
- if err := b.write0(b.buf.Data()[pos : pos + b.size]); err != nil {
+ if err := b.write0(b.buf.Data()[pos : pos + b.cell.size]); err != nil {
return pos, err;
}
- pos += b.size;
+ pos += b.cell.size;
} else {
// not the last line - write newline
if err := b.write0(newline); err != nil {
column := b.widths.Len();
last := line0;
for this := line0; this < line1; this++ {
- line_size, line_width := b.line(this);
+ line := b.line(this);
- if column < line_size.Len() - 1 {
+ if column < line.Len() - 1 {
// cell exists in this column
// (note that the last cell per line is ignored)
// column block begin
width := b.cellwidth; // minimal width
for ; this < line1; this++ {
- line_size, line_width = b.line(this);
- if column < line_size.Len() - 1 {
+ line = b.line(this);
+ if column < line.Len() - 1 {
// cell exists in this column => update width
- w := line_width.At(column) + b.padding;
+ w := line.At(column).(cell).width + b.padding;
if w > width {
width = w;
}
}
-// Flush should be called after the last call to Write to ensure
-// that any data buffered in the Writer is written to output.
+// Append text to current cell. Only update the cell width if updateWidth
+// is set (the cell width can only be updated if we know that we cannot be
+// in the middle of a UTF-8 encoded Unicode character).
//
-func (b *Writer) Flush() os.Error {
- _, err := b.format(0, 0, b.lines_size.Len());
- // reset (even in the presence of errors)
- b.buf.Reset();
- b.size, b.width = 0, 0;
- b.pos = 0;
- b.lines_size.Init(0);
- b.lines_width.Init(0);
- b.addLine();
- return err;
+func (b *Writer) append(text []byte, updateWidth bool) {
+ b.buf.Write(text);
+ b.cell.size += len(text);
+ if updateWidth {
+ b.cell.width += utf8.RuneCount(b.buf.Data()[b.pos : b.buf.Len()]);
+ b.pos = b.buf.Len();
+ }
}
-func unicodeLen(buf []byte) int {
- l := 0;
- for i := 0; i < len(buf); {
- if buf[i] < utf8.RuneSelf {
- i++;
- } else {
- rune, size := utf8.DecodeRune(buf[i : len(buf)]);
- i += size;
- }
- l++;
+// Start HTML-escape mode.
+func (b *Writer) startHTML(ch byte) {
+ if ch == '<' {
+ b.html_char = '>';
+ } else {
+ b.html_char = ';';
+ }
+}
+
+
+// Terminate HTML-escape mode. If the HTML text was an entity, its width
+// is assumed to be one for formatting purposes; otherwise it assumed to
+// be zero.
+//
+func (b *Writer) terminateHTML() {
+ if b.html_char == ';' {
+ // was entity, count as one rune
+ b.cell.width++;
}
- return l;
+ b.pos = b.buf.Len();
+ b.html_char = 0;
}
-func (b *Writer) append(buf []byte) {
- b.buf.Write(buf);
- b.size += len(buf);
+// Terminate the current cell by adding it to the list of cells of the
+// current line. Returns the number of cells in that line.
+//
+func (b *Writer) terminateCell() int {
+ line := b.line(b.lines.Len() - 1);
+ line.Push(b.cell);
+ b.cell = cell{};
+ return line.Len();
+}
+
+
+// Flush should be called after the last call to Write to ensure
+// that any data buffered in the Writer is written to output. Any
+// incomplete HTML tag or entity at the end is simply considered
+// complete for formatting purposes.
+//
+func (b *Writer) Flush() os.Error {
+ // add current cell if not empty
+ if b.cell.size > 0 {
+ if b.html_char != 0 {
+ // inside html tag/entity - terminate it even if incomplete
+ b.terminateHTML();
+ }
+ b.terminateCell();
+ }
+
+ // format contents of buffer
+ _, err := b.format(0, 0, b.lines.Len());
+
+ // reset, even in the presence of errors
+ b.reset();
+
+ return err;
}
// while writing to the underlying output stream.
//
func (b *Writer) Write(buf []byte) (written int, err os.Error) {
- i0, n := 0, len(buf);
-
// split text into cells
- for i := 0; i < n; i++ {
- ch := buf[i];
-
+ i0 := 0;
+ for i, ch := range buf {
if b.html_char == 0 {
// outside html tag/entity
switch ch {
case '\t', '\n', '\f':
- b.append(buf[i0 : i]);
- i0 = i + 1; // exclude ch from (next) cell
- b.width += unicodeLen(b.buf.Data()[b.pos : b.buf.Len()]);
- b.pos = b.buf.Len();
-
- // terminate cell
- last_size, last_width := b.line(b.lines_size.Len() - 1);
- last_size.Push(b.size);
- last_width.Push(b.width);
- b.size, b.width = 0, 0;
-
+ // end of cell
+ b.append(buf[i0 : i], true);
+ i0 = i+1; // exclude ch from (next) cell
+ ncells := b.terminateCell();
if ch != '\t' {
// terminate line
b.addLine();
- if ch == '\f' || last_size.Len() == 1 {
+ if ch == '\f' || ncells == 1 {
// A '\f' always forces a flush. Otherwise, if the previous
// line has only one cell which does not have an impact on
// the formatting of the following lines (the last cell per
}
case '<', '&':
+ // possibly an html tag/entity
if b.flags & FilterHTML != 0 {
- b.append(buf[i0 : i]);
+ // begin of tag/entity
+ b.append(buf[i0 : i], true);
i0 = i;
- b.width += unicodeLen(b.buf.Data()[b.pos : b.buf.Len()]);
- b.pos = -1; // preventative - should not be used (will cause index out of bounds)
- if ch == '<' {
- b.html_char = '>';
- } else {
- b.html_char = ';';
- }
+ b.startHTML(ch);
}
}
} else {
// inside html tag/entity
if ch == b.html_char {
- // reached the end of tag/entity
- b.append(buf[i0 : i + 1]);
- i0 = i + 1;
- if b.html_char == ';' {
- b.width++; // count as one char
- }
- b.pos = b.buf.Len();
- b.html_char = 0;
+ // end of tag/entity
+ b.append(buf[i0 : i+1], false);
+ i0 = i+1; // exclude ch from (next) cell
+ b.terminateHTML();
}
}
}
// append leftover text
- b.append(buf[i0 : n]);
- return n, nil;
+ b.append(buf[i0 : len(buf)], false);
+ return len(buf), nil;
}
}
-func write(t *testing.T, w *tabwriter.Writer, src string) {
+func write(t *testing.T, testname string, w *tabwriter.Writer, src string) {
written, err := io.WriteString(w, src);
if err != nil {
- t.Errorf("--- src:\n%s\n--- write error: %v\n", src, err);
+ t.Errorf("--- test: %s\n--- src:\n%s\n--- write error: %v\n", testname, src, err);
}
if written != len(src) {
- t.Errorf("--- src:\n%s\n--- written = %d, len(src) = %d\n", src, written, len(src));
+ t.Errorf("--- test: %s\n--- src:\n%s\n--- written = %d, len(src) = %d\n", testname, src, written, len(src));
}
}
-func verify(t *testing.T, w *tabwriter.Writer, b *buffer, src, expected string) {
+func verify(t *testing.T, testname string, w *tabwriter.Writer, b *buffer, src, expected string) {
err := w.Flush();
if err != nil {
- t.Errorf("--- src:\n%s\n--- flush error: %v\n", src, err);
+ t.Errorf("--- test: %s\n--- src:\n%s\n--- flush error: %v\n", testname, src, err);
}
res := b.String();
if res != expected {
- t.Errorf("--- src:\n%s\n--- found:\n%s\n--- expected:\n%s\n", src, res, expected)
+ t.Errorf("--- test: %s\n--- src:\n%s\n--- found:\n%s\n--- expected:\n%s\n", testname, src, res, expected)
}
}
-func check(t *testing.T, tabwidth, padding int, padchar byte, flags uint, src, expected string) {
+func check(t *testing.T, testname string, tabwidth, padding int, padchar byte, flags uint, src, expected string) {
var b buffer;
b.init(1000);
// write all at once
b.clear();
- write(t, &w, src);
- verify(t, &w, &b, src, expected);
+ write(t, testname, &w, src);
+ verify(t, testname, &w, &b, src, expected);
// write byte-by-byte
b.clear();
for i := 0; i < len(src); i++ {
- write(t, &w, src[i : i+1]);
+ write(t, testname, &w, src[i : i+1]);
}
- verify(t, &w, &b, src, expected);
+ verify(t, testname, &w, &b, src, expected);
// write using Fibonacci slice sizes
b.clear();
for i, d := 0, 0; i < len(src); {
- write(t, &w, src[i : i+d]);
+ write(t, testname, &w, src[i : i+d]);
i, d = i+d, d+1;
if i+d > len(src) {
d = len(src) - i;
}
}
- verify(t, &w, &b, src, expected);
+ verify(t, testname, &w, &b, src, expected);
}
type entry struct {
+ testname string;
tabwidth, padding int;
padchar byte;
flags uint;
var tests = []entry {
entry{
+ "1",
8, 1, '.', 0,
"",
""
},
entry{
+ "2",
8, 1, '.', 0,
"\n\n\n",
"\n\n\n"
},
entry{
+ "3",
8, 1, '.', 0,
"a\nb\nc",
"a\nb\nc"
},
entry{
+ "4a",
8, 1, '.', 0,
"\t", // '\t' terminates an empty cell on last line - nothing to print
""
},
entry{
+ "4b",
8, 1, '.', tabwriter.AlignRight,
"\t", // '\t' terminates an empty cell on last line - nothing to print
""
},
entry{
+ "5",
8, 1, '.', 0,
"*\t*",
- "**"
+ "*.......*"
},
entry{
+ "5b",
8, 1, '.', 0,
"*\t*\n",
"*.......*\n"
},
entry{
+ "5c",
8, 1, '.', 0,
"*\t*\t",
"*.......*"
},
entry{
+ "5d",
8, 1, '.', tabwriter.AlignRight,
"*\t*\t",
".......**"
},
entry{
+ "6",
8, 1, '.', 0,
"\t\n",
"........\n"
},
entry{
+ "7a",
8, 1, '.', 0,
"a) foo",
"a) foo"
},
entry{
+ "7b",
8, 1, ' ', 0,
- "b) foo\tbar", // "bar" is not in any cell - not formatted, just flushed
- "b) foobar"
+ "b) foo\tbar",
+ "b) foo bar"
},
entry{
+ "7c",
8, 1, '.', 0,
"c) foo\tbar\t",
"c) foo..bar"
},
entry{
+ "7d",
8, 1, '.', 0,
"d) foo\tbar\n",
"d) foo..bar\n"
},
entry{
+ "7e",
8, 1, '.', 0,
"e) foo\tbar\t\n",
"e) foo..bar.....\n"
},
entry{
+ "7f",
8, 1, '.', tabwriter.FilterHTML,
"f) f<o\t<b>bar</b>\t\n",
"f) f<o..<b>bar</b>.....\n"
},
entry{
+ "7g",
+ 8, 1, '.', tabwriter.FilterHTML,
+ "g) f<o\t<b>bar</b>\t non-terminated entity &",
+ "g) f<o..<b>bar</b>..... non-terminated entity &"
+ },
+
+ entry{
+ "8",
8, 1, '*', 0,
"Hello, world!\n",
"Hello, world!\n"
},
entry{
+ "9a",
0, 0, '.', 0,
"1\t2\t3\t4\n"
"11\t222\t3333\t44444\n",
},
entry{
+ "9b",
0, 0, '.', tabwriter.FilterHTML,
"1\t2<!---\f--->\t3\t4\n" // \f inside HTML is ignored
"11\t222\t3333\t44444\n",
},
entry{
+ "9c",
0, 0, '.', 0,
"1\t2\t3\t4\f" // \f causes a newline and flush
"11\t222\t3333\t44444\n",
},
entry{
+ "10a",
5, 0, '.', 0,
"1\t2\t3\t4\n",
"1....2....3....4\n"
},
entry{
+ "10b",
5, 0, '.', 0,
"1\t2\t3\t4\t\n",
"1....2....3....4....\n"
},
entry{
+ "11",
8, 1, '.', 0,
"本\tb\tc\n"
"aa\t\u672c\u672c\u672c\tcccc\tddddd\n"
},
entry{
+ "12a",
8, 1, ' ', tabwriter.AlignRight,
"a\tè\tc\t\n"
"aa\tèèè\tcccc\tddddd\t\n"
},
entry{
+ "12b",
2, 0, ' ', 0,
"a\tb\tc\n"
"aa\tbbb\tcccc\n"
},
entry{
+ "12c",
8, 1, '_', 0,
"a\tb\tc\n"
"aa\tbbb\tcccc\n"
},
entry{
+ "13a",
4, 1, '-', 0,
"4444\t日本語\t22\t1\t333\n"
"999999999\t22\n"
},
entry{
+ "13b",
4, 3, '.', 0,
"4444\t333\t22\t1\t333\n"
"999999999\t22\n"
},
entry{
+ "13c",
8, 1, '\t', tabwriter.FilterHTML,
"4444\t333\t22\t1\t333\n"
"999999999\t22\n"
},
entry{
+ "14",
0, 2, ' ', tabwriter.AlignRight,
".0\t.3\t2.4\t-5.1\t\n"
"23.0\t12345678.9\t2.4\t-989.4\t\n"
func Test(t *testing.T) {
for _, e := range tests {
- check(t, e.tabwidth, e.padding, e.padchar, e.flags, e.src, e.expected);
+ check(t, e.testname, e.tabwidth, e.padding, e.padchar, e.flags, e.src, e.expected);
}
}