// ----------------------------------------------------------------------------
// Filter implementation
-// A cell represents a segment of text delineated by tabs, formfeed,
-// or newline chars. The text itself is stored in a separate buffer;
-// cell only describes the segment's size in bytes, its width in runes,
-// and whether it's an htab ('\t') or vtab ('\v') terminated call.
+// A cell represents a segment of text terminated by tabs or line breaks.
+// The text itself is stored in a separate buffer; cell only describes the
+// segment's size in bytes, its width in runes, and whether it's an htab
+// ('\t') terminated cell.
//
type cell struct {
size int; // cell size in bytes
// tab-delimited columns in its input to align them
// in the output.
//
-// The Writer treats incoming bytes as UTF-8 encoded text
-// consisting of tab-terminated cells. Cells in adjacent lines
-// constitute a column. The Writer inserts padding as needed
-// to make all cells in a column have the same width, effectively
-// aligning the columns. Note that cells are tab-terminated,
-// not tab-separated: trailing non-tab text at the end of a line
-// is not part of any cell.
+// The Writer treats incoming bytes as UTF-8 encoded text consisting
+// of cells terminated by (horizontal or vertical) tabs or line
+// breaks (newline or formfeed characters). Cells in adjacent lines
+// constitute a column. The Writer inserts padding as needed to
+// make all cells in a column have the same width, effectively
+// aligning the columns. It assumes that all characters have the
+// same width except for tabs for which a tabwidth must be specified.
+// Note that cells are tab-terminated, not tab-separated: trailing
+// non-tab text at the end of a line does not form a column cell.
//
-// Horizontal and vertical tabs may be used to terminate a cell.
// If DiscardEmptyColumns is set, empty columns that are terminated
// entirely by vertical (or "soft") tabs are discarded. Columns
// terminated by horizontal (or "hard") tabs are not affected by
type Writer struct {
// configuration
output io.Writer;
- cellwidth int;
+ minwidth int;
+ tabwidth int;
padding int;
padbytes [8]byte;
flags uint;
// current state
- buf bytes.Buffer; // collected text w/o tabs, newlines, or formfeed chars
- pos int; // buffer position up to which width of incomplete cell has been computed
- cell cell; // current incomplete cell; cell.width is up to buf[pos] w/o ignored sections
+ buf bytes.Buffer; // collected text excluding tabs or line breaks
+ pos int; // buffer position up to which cell.width of incomplete cell has been computed
+ cell cell; // current incomplete cell; cell.width is up to buf[pos] excluding ignored sections
endChar byte; // terminating char of escaped sequence (Escape for escapes, '>', ';' for HTML tags/entities, or 0)
- lines vector.Vector; // list if lines; each line is a list of cells
+ lines vector.Vector; // list of lines; each line is a list of cells
widths vector.IntVector; // list of column widths in runes - re-used during formatting
}
// Internal representation (current state):
//
-// - all text written is appended to buf; formfeed chars, tabs and newlines are stripped away
+// - all text written is appended to buf; tabs and line breaks are stripped away
// - at any given time there is a (possibly empty) incomplete cell at the end
-// (the cell starts after a tab, formfeed, or newline)
+// (the cell starts after a tab or line break)
// - cell.size is the number of bytes belonging to the cell so far
// - cell.width is text width in runes of that cell from the start of the cell to
// position pos; html tags and entities are excluded from this width if html
// the input in the first place.
DiscardEmptyColumns;
+ // Always use tabs for indentation columns (i.e., padding of
+ // leading empty cells on the left) independent of padchar.
+ TabIndent;
+
// Print a vertical bar ('|') between columns (after formatting).
// Discarded colums appear as zero-width columns ("||").
Debug;
// A Writer must be initialized with a call to Init. The first parameter (output)
// specifies the filter output. The remaining parameters control the formatting:
//
-// cellwidth minimal cell width
-// padding cell padding added to cell before computing its width
+// minwidth minimal cell width including any padding
+// tabwidth width of tab characters (equivalent number of spaces)
+// padding padding added to a cell before computing its width
// padchar ASCII char used for padding
// if padchar == '\t', the Writer will assume that the
-// width of a '\t' in the formatted output is cellwidth,
+// width of a '\t' in the formatted output is tabwidth,
// and cells are left-aligned independent of align_left
-// (for correct-looking results, cellwidth must correspond
+// (for correct-looking results, tabwidth must correspond
// to the tab width in the viewer displaying the result)
// flags formatting control
//
// To format in tab-separated columns with a tab stop of 8:
-// b.Init(w, 8, 1, '\t', 0);
+// b.Init(w, 8, 1, 8, '\t', 0);
//
// To format in space-separated columns with at least 4 spaces between columns:
-// b.Init(w, 1, 4, ' ', 0);
+// b.Init(w, 0, 4, 8, ' ', 0);
//
-func (b *Writer) Init(output io.Writer, cellwidth, padding int, padchar byte, flags uint) *Writer {
- if cellwidth < 0 {
- panic("negative cellwidth")
- }
- if padding < 0 {
- panic("negative padding")
+func (b *Writer) Init(output io.Writer, minwidth, tabwidth, padding int, padchar byte, flags uint) *Writer {
+ if minwidth < 0 || tabwidth < 0 || padding < 0 {
+ panic("negative minwidth, tabwidth, or padding")
}
b.output = output;
- b.cellwidth = cellwidth;
+ b.minwidth = minwidth;
+ b.tabwidth = tabwidth;
b.padding = padding;
- for i := len(b.padbytes) - 1; i >= 0; i-- {
+ for i := range b.padbytes {
b.padbytes[i] = padchar
}
if padchar == '\t' {
- // tab enforces left-alignment
+ // tab padding enforces left-alignment
flags &^= AlignRight
}
b.flags = flags;
// debugging support (keep code around)
-/*
func (b *Writer) dump() {
pos := 0;
- for i := 0; i < b.lines_size.Len(); i++ {
- line_size, line_width := b.line(i);
+ for i := 0; i < b.lines.Len(); i++ {
+ line := b.line(i);
print("(", i, ") ");
- for j := 0; j < line_size.Len(); j++ {
- s := line_size.At(j);
- print("[", string(b.buf.slice(pos, pos + s)), "]");
- pos += s;
+ for j := 0; j < line.Len(); j++ {
+ c := line.At(j).(cell);
+ print("[", string(b.buf.Bytes()[pos:pos+c.size]), "]");
+ pos += c.size;
}
print("\n");
}
print("\n");
}
-*/
func (b *Writer) write0(buf []byte) os.Error {
}
-var newline = []byte{'\n'}
-
-func (b *Writer) writePadding(textw, cellw int) os.Error {
- if b.cellwidth == 0 {
- return nil
+func (b *Writer) writeN(src []byte, n int) os.Error {
+ for n > len(src) {
+ if err := b.write0(src); err != nil {
+ return err
+ }
+ n -= len(src);
}
+ return b.write0(src[0:n]);
+}
- if b.padbytes[0] == '\t' {
- // make cell width a multiple of cellwidth
- cellw = ((cellw + b.cellwidth - 1) / b.cellwidth) * b.cellwidth
- }
- n := cellw - textw;
- if n < 0 {
- panic("internal error")
- }
+var (
+ newline = []byte{'\n'};
+ tabs = []byte{'\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t'};
+)
- if b.padbytes[0] == '\t' {
- n = (n + b.cellwidth - 1) / b.cellwidth
- }
- for n > len(b.padbytes) {
- if err := b.write0(&b.padbytes); err != nil {
- return err
+func (b *Writer) writePadding(textw, cellw int, useTabs bool) os.Error {
+ if b.padbytes[0] == '\t' || useTabs {
+ // padding is done with tabs
+ if b.tabwidth == 0 {
+ return nil // tabs have no width - can't do any padding
}
- n -= len(b.padbytes);
+ // make cellw the smallest multiple of b.tabwidth
+ cellw = (cellw + b.tabwidth - 1) / b.tabwidth * b.tabwidth;
+ n := cellw - textw; // amount of padding
+ if n < 0 {
+ panic("internal error")
+ }
+ return b.writeN(tabs, (n+b.tabwidth-1)/b.tabwidth);
}
- return b.write0(b.padbytes[0:n]);
+ // padding is done with non-tab characters
+ return b.writeN(&b.padbytes, cellw-textw);
}
pos = pos0;
for i := line0; i < line1; i++ {
line := b.line(i);
+
+ // if TabIndent is set, use tabs to pad leading empty cells
+ useTabs := b.flags&TabIndent != 0;
+
for j := 0; j < line.Len(); j++ {
c := line.At(j).(cell);
return
}
}
- switch {
- default: // align left
- if err = b.write0(b.buf.Bytes()[pos : pos+c.size]); err != nil {
- return
- }
- pos += c.size;
+ if c.size == 0 {
+ // empty cell
if j < b.widths.Len() {
- if err = b.writePadding(c.width, b.widths.At(j)); err != nil {
+ if err = b.writePadding(c.width, b.widths.At(j), useTabs); err != nil {
return
}
}
-
- case b.flags&AlignRight != 0: // align right
-
- if j < b.widths.Len() {
- if err = b.writePadding(c.width, b.widths.At(j)); err != nil {
+ } else {
+ // non-empty cell
+ useTabs = false;
+ if b.flags&AlignRight == 0 { // align left
+ if err = b.write0(b.buf.Bytes()[pos : pos+c.size]); err != nil {
return
}
+ pos += c.size;
+ if j < b.widths.Len() {
+ if err = b.writePadding(c.width, b.widths.At(j), false); err != nil {
+ return
+ }
+ }
+ } else { // align right
+ if j < b.widths.Len() {
+ if err = b.writePadding(c.width, b.widths.At(j), false); err != nil {
+ return
+ }
+ }
+ if err = b.write0(b.buf.Bytes()[pos : pos+c.size]); err != nil {
+ return
+ }
+ pos += c.size;
}
- if err = b.write0(b.buf.Bytes()[pos : pos+c.size]); err != nil {
- return
- }
- pos += c.size;
}
}
line0 = this;
// column block begin
- width := b.cellwidth; // minimal column width
+ width := b.minwidth; // minimal column width
discardable := true; // true if all cells in this column are empty and "soft"
for ; this < line1; this++ {
line = b.line(this);
// NewWriter allocates and initializes a new tabwriter.Writer.
// The parameters are the same as for the the Init function.
//
-func NewWriter(output io.Writer, cellwidth, padding int, padchar byte, flags uint) *Writer {
- return new(Writer).Init(output, cellwidth, padding, padchar, flags)
+func NewWriter(output io.Writer, minwidth, tabwidth, padding int, padchar byte, flags uint) *Writer {
+ return new(Writer).Init(output, minwidth, tabwidth, padding, padchar, flags)
}
}
-func check(t *testing.T, testname string, tabwidth, padding int, padchar byte, flags uint, src, expected string) {
+func check(t *testing.T, testname string, minwidth, tabwidth, padding int, padchar byte, flags uint, src, expected string) {
var b buffer;
b.init(1000);
var w Writer;
- w.Init(&b, tabwidth, padding, padchar, flags);
+ w.Init(&b, minwidth, tabwidth, padding, padchar, flags);
// write all at once
b.clear();
type entry struct {
- testname string;
- tabwidth, padding int;
- padchar byte;
- flags uint;
- src, expected string;
+ testname string;
+ minwidth, tabwidth, padding int;
+ padchar byte;
+ flags uint;
+ src, expected string;
}
var tests = []entry{
entry{
"1a",
- 8, 1, '.', 0,
+ 8, 0, 1, '.', 0,
"",
"",
},
entry{
"1a debug",
- 8, 1, '.', Debug,
+ 8, 0, 1, '.', Debug,
"",
"",
},
entry{
"1b esc",
- 8, 1, '.', 0,
+ 8, 0, 1, '.', 0,
"\xff\xff",
"",
},
entry{
"1c esc",
- 8, 1, '.', 0,
+ 8, 0, 1, '.', 0,
"\xff\t\xff",
"\t",
},
entry{
"1d esc",
- 8, 1, '.', 0,
+ 8, 0, 1, '.', 0,
"\xff\"foo\t\n\tbar\"\xff",
"\"foo\t\n\tbar\"",
},
entry{
"1e esc",
- 8, 1, '.', 0,
+ 8, 0, 1, '.', 0,
"abc\xff\tdef", // unterminated escape
"abc\tdef",
},
entry{
"2",
- 8, 1, '.', 0,
+ 8, 0, 1, '.', 0,
"\n\n\n",
"\n\n\n",
},
entry{
"3",
- 8, 1, '.', 0,
+ 8, 0, 1, '.', 0,
"a\nb\nc",
"a\nb\nc",
},
entry{
"4a",
- 8, 1, '.', 0,
+ 8, 0, 1, '.', 0,
"\t", // '\t' terminates an empty cell on last line - nothing to print
"",
},
entry{
"4b",
- 8, 1, '.', AlignRight,
+ 8, 0, 1, '.', AlignRight,
"\t", // '\t' terminates an empty cell on last line - nothing to print
"",
},
entry{
"5",
- 8, 1, '.', 0,
+ 8, 0, 1, '.', 0,
"*\t*",
"*.......*",
},
entry{
"5b",
- 8, 1, '.', 0,
+ 8, 0, 1, '.', 0,
"*\t*\n",
"*.......*\n",
},
entry{
"5c",
- 8, 1, '.', 0,
+ 8, 0, 1, '.', 0,
"*\t*\t",
"*.......*",
},
entry{
"5c debug",
- 8, 1, '.', Debug,
+ 8, 0, 1, '.', Debug,
"*\t*\t",
"*.......|*",
},
entry{
"5d",
- 8, 1, '.', AlignRight,
+ 8, 0, 1, '.', AlignRight,
"*\t*\t",
".......**",
},
entry{
"6",
- 8, 1, '.', 0,
+ 8, 0, 1, '.', 0,
"\t\n",
"........\n",
},
entry{
"7a",
- 8, 1, '.', 0,
+ 8, 0, 1, '.', 0,
"a) foo",
"a) foo",
},
entry{
"7b",
- 8, 1, ' ', 0,
+ 8, 0, 1, ' ', 0,
"b) foo\tbar",
"b) foo bar",
},
entry{
"7c",
- 8, 1, '.', 0,
+ 8, 0, 1, '.', 0,
"c) foo\tbar\t",
"c) foo..bar",
},
entry{
"7d",
- 8, 1, '.', 0,
+ 8, 0, 1, '.', 0,
"d) foo\tbar\n",
"d) foo..bar\n",
},
entry{
"7e",
- 8, 1, '.', 0,
+ 8, 0, 1, '.', 0,
"e) foo\tbar\t\n",
"e) foo..bar.....\n",
},
entry{
"7f",
- 8, 1, '.', FilterHTML,
+ 8, 0, 1, '.', FilterHTML,
"f) f<o\t<b>bar</b>\t\n",
"f) f<o..<b>bar</b>.....\n",
},
entry{
"7g",
- 8, 1, '.', FilterHTML,
+ 8, 0, 1, '.', FilterHTML,
"g) f<o\t<b>bar</b>\t non-terminated entity &",
"g) f<o..<b>bar</b>..... non-terminated entity &",
},
entry{
"7g debug",
- 8, 1, '.', FilterHTML | Debug,
+ 8, 0, 1, '.', FilterHTML | Debug,
"g) f<o\t<b>bar</b>\t non-terminated entity &",
"g) f<o..|<b>bar</b>.....| non-terminated entity &",
},
entry{
"8",
- 8, 1, '*', 0,
+ 8, 0, 1, '*', 0,
"Hello, world!\n",
"Hello, world!\n",
},
entry{
"9a",
- 1, 0, '.', 0,
+ 1, 0, 0, '.', 0,
"1\t2\t3\t4\n"
"11\t222\t3333\t44444\n",
entry{
"9b",
- 1, 0, '.', FilterHTML,
+ 1, 0, 0, '.', FilterHTML,
"1\t2<!---\f--->\t3\t4\n" // \f inside HTML is ignored
"11\t222\t3333\t44444\n",
entry{
"9c",
- 1, 0, '.', 0,
+ 1, 0, 0, '.', 0,
"1\t2\t3\t4\f" // \f causes a newline and flush
"11\t222\t3333\t44444\n",
entry{
"9c debug",
- 1, 0, '.', Debug,
+ 1, 0, 0, '.', Debug,
"1\t2\t3\t4\f" // \f causes a newline and flush
"11\t222\t3333\t44444\n",
entry{
"10a",
- 5, 0, '.', 0,
+ 5, 0, 0, '.', 0,
"1\t2\t3\t4\n",
"1....2....3....4\n",
},
entry{
"10b",
- 5, 0, '.', 0,
+ 5, 0, 0, '.', 0,
"1\t2\t3\t4\t\n",
"1....2....3....4....\n",
},
entry{
"11",
- 8, 1, '.', 0,
+ 8, 0, 1, '.', 0,
"本\tb\tc\n"
"aa\t\u672c\u672c\u672c\tcccc\tddddd\n"
"aaa\tbbbb\n",
entry{
"12a",
- 8, 1, ' ', AlignRight,
+ 8, 0, 1, ' ', AlignRight,
"a\tè\tc\t\n"
"aa\tèèè\tcccc\tddddd\t\n"
"aaa\tèèèè\t\n",
entry{
"12b",
- 2, 0, ' ', 0,
+ 2, 0, 0, ' ', 0,
"a\tb\tc\n"
"aa\tbbb\tcccc\n"
"aaa\tbbbb\n",
entry{
"12c",
- 8, 1, '_', 0,
+ 8, 0, 1, '_', 0,
"a\tb\tc\n"
"aa\tbbb\tcccc\n"
"aaa\tbbbb\n",
entry{
"13a",
- 4, 1, '-', 0,
+ 4, 0, 1, '-', 0,
"4444\t日本語\t22\t1\t333\n"
"999999999\t22\n"
"7\t22\n"
entry{
"13b",
- 4, 3, '.', 0,
+ 4, 0, 3, '.', 0,
"4444\t333\t22\t1\t333\n"
"999999999\t22\n"
"7\t22\n"
entry{
"13c",
- 8, 1, '\t', FilterHTML,
+ 8, 8, 1, '\t', FilterHTML,
"4444\t333\t22\t1\t333\n"
"999999999\t22\n"
"7\t22\n"
entry{
"14",
- 1, 2, ' ', AlignRight,
+ 1, 0, 2, ' ', AlignRight,
".0\t.3\t2.4\t-5.1\t\n"
"23.0\t12345678.9\t2.4\t-989.4\t\n"
"5.1\t12.0\t2.4\t-7.0\t\n"
entry{
"14 debug",
- 1, 2, ' ', AlignRight | Debug,
+ 1, 0, 2, ' ', AlignRight | Debug,
".0\t.3\t2.4\t-5.1\t\n"
"23.0\t12345678.9\t2.4\t-989.4\t\n"
"5.1\t12.0\t2.4\t-7.0\t\n"
entry{
"15a",
- 4, 0, '.', 0,
+ 4, 0, 0, '.', 0,
"a\t\tb",
"a.......b",
},
entry{
"15b",
- 4, 0, '.', DiscardEmptyColumns,
+ 4, 0, 0, '.', DiscardEmptyColumns,
"a\t\tb", // htabs - do not discard column
"a.......b",
},
entry{
"15c",
- 4, 0, '.', DiscardEmptyColumns,
+ 4, 0, 0, '.', DiscardEmptyColumns,
"a\v\vb",
"a...b",
},
entry{
"15d",
- 4, 0, '.', AlignRight | DiscardEmptyColumns,
+ 4, 0, 0, '.', AlignRight | DiscardEmptyColumns,
"a\v\vb",
"...ab",
},
entry{
"16a",
- 100, 0, '\t', 0,
+ 100, 100, 0, '\t', 0,
"a\tb\t\td\n"
"a\tb\t\td\te\n"
"a\n"
entry{
"16b",
- 100, 0, '\t', DiscardEmptyColumns,
+ 100, 100, 0, '\t', DiscardEmptyColumns,
"a\vb\v\vd\n"
"a\vb\v\vd\ve\n"
"a\n"
entry{
"16b debug",
- 100, 0, '\t', DiscardEmptyColumns | Debug,
+ 100, 100, 0, '\t', DiscardEmptyColumns | Debug,
"a\vb\v\vd\n"
"a\vb\v\vd\ve\n"
"a\n"
entry{
"16c",
- 100, 0, '\t', DiscardEmptyColumns,
+ 100, 100, 0, '\t', DiscardEmptyColumns,
"a\tb\t\td\n" // hard tabs - do not discard column
"a\tb\t\td\te\n"
"a\n"
entry{
"16c debug",
- 100, 0, '\t', DiscardEmptyColumns | Debug,
+ 100, 100, 0, '\t', DiscardEmptyColumns | Debug,
"a\tb\t\td\n" // hard tabs - do not discard column
"a\tb\t\td\te\n"
"a\n"
func Test(t *testing.T) {
for _, e := range tests {
- check(t, e.testname, e.tabwidth, e.padding, e.padchar, e.flags, e.src, e.expected)
+ check(t, e.testname, e.minwidth, e.tabwidth, e.padding, e.padchar, e.flags, e.src, e.expected)
}
}