// ----------------------------------------------------------------------------
-// ByteArray
+// Basic ByteArray support
type ByteArray struct {
a *[]byte;
}
+func (b *ByteArray) Len() int {
+ return len(b.a);
+}
+
+
func (b *ByteArray) Clear() {
b.a = b.a[0 : 0];
}
//
// Formatting can be controlled via parameters:
//
-// cellwidth minimal cell width
-// padding additional cell padding
-// padchar ASCII char used for padding
-// if padchar == '\t', the Writer will assume that the
-// width of a '\t' in the formatted output is tabwith,
-// and cells are left-aligned independent of align_left
-// (for correct-looking results, cellwidth must correspond
-// to the tabwidth in the editor used to look at the result)
-
+// cellwidth minimal cell width
+// padding additional cell padding
+// padchar ASCII char used for padding
+// if padchar == '\t', the Writer will assume that the
+// width of a '\t' in the formatted output is cellwidth,
+// and cells are left-aligned independent of align_left
+// (for correct-looking results, cellwidth must correspond
+// to the tabwidth in the viewer displaying the result)
+// filter_html ignores html tags and handles entities (starting with '&'
+// and ending in ';') as single characters (width = 1)
export type Writer struct {
// TODO should not export any of the fields
padding int;
padbytes [8]byte;
align_left bool;
+ filter_html bool;
// current state
+ html_char byte; // terminating char of html tag/entity, or 0 ('>', ';', or 0)
buf ByteArray; // collected text w/o tabs and newlines
- size int; // size of last incomplete cell in bytes
- width int; // width of last incomplete cell in runes
+ size int; // size of incomplete cell in bytes
+ width int; // width of incomplete cell in runes up to buf[pos] w/o ignored sections
+ pos int; // buffer position up to which width of incomplete cell has been computed
lines_size array.Array; // list of lines; each line is a list of cell sizes in bytes
lines_width array.Array; // list of lines; each line is a list of cell widths in runes
widths array.IntArray; // list of column widths in runes - re-used during formatting
}
+// Internal representation (current state):
+//
+// - all text written is appended to buf; tabs and newlines are stripped away
+// - at any given time there is a (possibly empty) incomplete cell at the end
+// (the cell starts after a tab or newline)
+// - size is the number of bytes belonging to the cell so far
+// - width is text width in runes of that cell from the start of the cell to
+// position pos; html tags and entities are excluded from this width if html
+// filtering is enabled
+// - the sizes and widths of processed text are kept in the lines_size and
+// lines_width arrays, which contain an array of sizes or widths for each line
+// - the widths array is a temporary array with current widths used during
+// formatting; it is kept in Writer because it's re-used
+//
+// |<---------- size ---------->|
+// | |
+// |<- width ->|<- ignored ->| |
+// | | | |
+// [---processed---tab------------<tag>...</tag>...]
+// ^ ^ ^
+// | | |
+// buf start of incomplete cell pos
+
func (b *Writer) AddLine() {
b.lines_size.Push(array.NewIntArray(0));
}
-func (b *Writer) Init(writer io.Write, cellwidth, padding int, padchar byte, align_left bool) *Writer {
+func (b *Writer) Init(writer io.Write, cellwidth, padding int, padchar byte, align_left, filter_html bool) *Writer {
if cellwidth < 0 {
panic("negative cellwidth");
}
b.padbytes[i] = padchar;
}
b.align_left = align_left || padchar == '\t'; // tab enforces left-alignment
-
+ b.filter_html = filter_html;
+
b.buf.Init(1024);
b.lines_size.Init(0);
b.lines_width.Init(0);
s, w := line_size.At(j), line_width.At(j);
if b.align_left {
- err = b.Write0(b.buf.a[pos : pos + s]);
+ err = b.Write0(b.buf.Slice(pos, pos + s));
if err != nil {
goto exit;
}
goto exit;
}
}
- err = b.Write0(b.buf.a[pos : pos + s]);
+ err = b.Write0(b.buf.Slice(pos, pos + s));
if err != nil {
goto exit;
}
if i+1 == b.lines_size.Len() {
// last buffered line - we don't have a newline, so just write
// any outstanding buffered data
- err = b.Write0(b.buf.a[pos : pos + b.size]);
+ err = b.Write0(b.buf.Slice(pos, pos + b.size));
pos += b.size;
- b.size, b.width = 0, 0;
} else {
// not the last line - write newline
err = b.Write0(Newline);
}
+/* export */ func (b *Writer) Flush() *os.Error {
+ dummy, err := b.Format(0, 0, b.lines_size.Len());
+ // reset (even in the presence of errors)
+ b.buf.Clear();
+ b.size, b.width = 0, 0;
+ b.pos = 0;
+ b.lines_size.Init(0);
+ b.lines_width.Init(0);
+ b.AddLine();
+ return err;
+}
+
+
func UnicodeLen(buf *[]byte) int {
l := 0;
for i := 0; i < len(buf); {
func (b *Writer) Append(buf *[]byte) {
b.buf.Append(buf);
b.size += len(buf);
- b.width += UnicodeLen(buf);
-}
-
-
-/* export */ func (b *Writer) Flush() *os.Error {
- dummy, err := b.Format(0, 0, b.lines_size.Len());
- // reset (even in the presence of errors)
- b.buf.Clear();
- b.size, b.width = 0, 0;
- b.lines_size.Init(0);
- b.lines_width.Init(0);
- b.AddLine();
- return err;
}
/* export */ func (b *Writer) Write(buf *[]byte) (written int, err *os.Error) {
i0, n := 0, len(buf);
-
+
// split text into cells
for i := 0; i < n; i++ {
- if ch := buf[i]; ch == '\t' || ch == '\n' {
- b.Append(buf[i0 : i]);
- i0 = i + 1; // exclude ch from (next) cell
-
- // terminate cell
- last_size, last_width := b.Line(b.lines_size.Len() - 1);
- last_size.Push(b.size);
- last_width.Push(b.width);
- b.size, b.width = 0, 0;
-
- if ch == '\n' {
- b.AddLine();
- if last_size.Len() == 1 {
- // The previous line has only one cell which does not have
- // an impact on the formatting of the following lines (the
- // last cell per line is ignored by Format), thus we can
- // flush the Writer contents.
- err = b.Flush();
- if err != nil {
- return i0, err;
+ ch := buf[i];
+
+ if b.html_char == 0 {
+ // outside html tag/entity
+ switch ch {
+ case '\t', '\n':
+ b.Append(buf[i0 : i]);
+ i0 = i + 1; // exclude ch from (next) cell
+ b.width += UnicodeLen(b.buf.Slice(b.pos, b.buf.Len()));
+ b.pos = b.buf.Len();
+
+ // terminate cell
+ last_size, last_width := b.Line(b.lines_size.Len() - 1);
+ last_size.Push(b.size);
+ last_width.Push(b.width);
+ b.size, b.width = 0, 0;
+
+ if ch == '\n' {
+ b.AddLine();
+ if last_size.Len() == 1 {
+ // The previous line has only one cell which does not have
+ // an impact on the formatting of the following lines (the
+ // last cell per line is ignored by Format), thus we can
+ // flush the Writer contents.
+ err = b.Flush();
+ if err != nil {
+ return i0, err;
+ }
+ }
+ }
+
+ case '<', '&':
+ if b.filter_html {
+ b.Append(buf[i0 : i]);
+ i0 = i;
+ b.width += UnicodeLen(b.buf.Slice(b.pos, b.buf.Len()));
+ b.pos = -1; // preventative - should not be used (will cause index out of bounds)
+ if ch == '<' {
+ b.html_char = '>';
+ } else {
+ b.html_char = ';';
}
}
}
+
+ } else {
+ // inside html tag/entity
+ if ch == b.html_char {
+ // reached the end of tag/entity
+ b.Append(buf[i0 : i + 1]);
+ i0 = i + 1;
+ if b.html_char == ';' {
+ b.width++; // count as one char
+ }
+ b.pos = b.buf.Len();
+ b.html_char = 0;
+ }
}
}
}
-export func New(writer io.Write, cellwidth, padding int, padchar byte, align_left bool) *Writer {
- return new(Writer).Init(writer, cellwidth, padding, padchar, align_left)
+export func New(writer io.Write, cellwidth, padding int, padchar byte, align_left, filter_html bool) *Writer {
+ return new(Writer).Init(writer, cellwidth, padding, padchar, align_left, filter_html)
}
}
+func (b *Buffer) Clear() {
+ b.a = b.a[0 : 0];
+}
+
+
func (b *Buffer) Write(buf *[]byte) (written int, err *os.Error) {
n := len(b.a);
m := len(buf);
}
-func Check(t *testing.T, tabwidth, padding int, padchar byte, align_left bool, src, expected string) {
- var b Buffer;
- b.Init(1000);
-
- var w tabwriter.Writer;
- w.Init(&b, tabwidth, padding, padchar, align_left);
-
- written, err := io.WriteString(&w, src);
+func Write(t *testing.T, w *tabwriter.Writer, src string) {
+ written, err := io.WriteString(w, src);
if err != nil {
t.Errorf("--- src:\n%s\n--- write error: %v\n", src, err);
}
if written != len(src) {
t.Errorf("--- src:\n%s\n--- written = %d, len(src) = %d\n", src, written, len(src));
}
+}
+
- err = w.Flush();
+func Verify(t *testing.T, w *tabwriter.Writer, b *Buffer, src, expected string) {
+ err := w.Flush();
if err != nil {
t.Errorf("--- src:\n%s\n--- flush error: %v\n", src, err);
}
}
+func Check(t *testing.T, tabwidth, padding int, padchar byte, align_left, filter_html bool, src, expected string) {
+ var b Buffer;
+ b.Init(1000);
+
+ var w tabwriter.Writer;
+ w.Init(&b, tabwidth, padding, padchar, align_left, filter_html);
+
+ // write all at once
+ b.Clear();
+ Write(t, &w, src);
+ Verify(t, &w, &b, src, expected);
+
+ // write byte-by-byte
+ b.Clear();
+ for i := 0; i < len(src); i++ {
+ Write(t, &w, src[i : i+1]);
+ }
+ Verify(t, &w, &b, src, expected);
+
+ // write using Fibonacci slice sizes
+ b.Clear();
+ for i, d := 0, 0; i < len(src); {
+ Write(t, &w, src[i : i+d]);
+ i, d = i+d, d+1;
+ if i+d > len(src) {
+ d = len(src) - i;
+ }
+ }
+ Verify(t, &w, &b, src, expected);
+}
+
+
export func Test(t *testing.T) {
Check(
- t, 8, 1, '.', true,
+ t, 8, 1, '.', true, false,
"",
""
);
Check(
- t, 8, 1, '.', true,
+ t, 8, 1, '.', true, false,
"\n\n\n",
"\n\n\n"
);
Check(
- t, 8, 1, '.', true,
+ t, 8, 1, '.', true, false,
"a\nb\nc",
"a\nb\nc"
);
Check(
- t, 8, 1, '.', true,
+ t, 8, 1, '.', true, false,
"\t", // '\t' terminates an empty cell on last line - nothing to print
""
);
Check(
- t, 8, 1, '.', false,
+ t, 8, 1, '.', false, false,
"\t", // '\t' terminates an empty cell on last line - nothing to print
""
);
Check(
- t, 8, 1, '.', true,
+ t, 8, 1, '.', true, false,
"*\t*",
"**"
);
Check(
- t, 8, 1, '.', true,
+ t, 8, 1, '.', true, false,
"*\t*\n",
"*.......*\n"
);
Check(
- t, 8, 1, '.', true,
+ t, 8, 1, '.', true, false,
"*\t*\t",
"*.......*"
);
Check(
- t, 8, 1, '.', false,
+ t, 8, 1, '.', false, false,
"*\t*\t",
".......**"
);
Check(
- t, 8, 1, '.', true,
+ t, 8, 1, '.', true, false,
"\t\n",
"........\n"
);
Check(
- t, 8, 1, '.', true,
+ t, 8, 1, '.', true, false,
"a) foo",
"a) foo"
);
Check(
- t, 8, 1, ' ', true,
+ t, 8, 1, ' ', true, false,
"b) foo\tbar", // "bar" is not in any cell - not formatted, just flushed
"b) foobar"
);
Check(
- t, 8, 1, '.', true,
+ t, 8, 1, '.', true, false,
"c) foo\tbar\t",
"c) foo..bar"
);
Check(
- t, 8, 1, '.', true,
+ t, 8, 1, '.', true, false,
"d) foo\tbar\n",
"d) foo..bar\n"
);
Check(
- t, 8, 1, '.', true,
+ t, 8, 1, '.', true, false,
"e) foo\tbar\t\n",
"e) foo..bar.....\n"
);
Check(
- t, 8, 1, '*', true,
+ t, 8, 1, '.', true, true,
+ "e) f<o\t<b>bar</b>\t\n",
+ "e) f<o..<b>bar</b>.....\n"
+ );
+
+ Check(
+ t, 8, 1, '*', true, false,
"Hello, world!\n",
"Hello, world!\n"
);
Check(
- t, 0, 0, '.', true,
+ t, 0, 0, '.', true, false,
"1\t2\t3\t4\n"
"11\t222\t3333\t44444\n",
);
Check(
- t, 5, 0, '.', true,
+ t, 5, 0, '.', true, false,
"1\t2\t3\t4\n",
"1....2....3....4\n"
);
Check(
- t, 5, 0, '.', true,
+ t, 5, 0, '.', true, false,
"1\t2\t3\t4\t\n",
"1....2....3....4....\n"
);
Check(
- t, 8, 1, ' ', true,
+ t, 8, 1, '.', true, false,
"本\tb\tc\n"
"aa\t\u672c\u672c\u672c\tcccc\tddddd\n"
"aaa\tbbbb\n",
- "本 b c\n"
- "aa 本本本 cccc ddddd\n"
- "aaa bbbb\n"
+ "本.......b.......c\n"
+ "aa......本本本.....cccc....ddddd\n"
+ "aaa.....bbbb\n"
);
Check(
- t, 8, 1, ' ', false,
+ t, 8, 1, ' ', false, false,
"a\tè\tc\t\n"
"aa\tèèè\tcccc\tddddd\t\n"
"aaa\tèèèè\t\n",
);
Check(
- t, 2, 0, ' ', true,
+ t, 2, 0, ' ', true, false,
"a\tb\tc\n"
"aa\tbbb\tcccc\n"
"aaa\tbbbb\n",
);
Check(
- t, 8, 1, '_', true,
+ t, 8, 1, '_', true, false,
"a\tb\tc\n"
"aa\tbbb\tcccc\n"
"aaa\tbbbb\n",
);
Check(
- t, 4, 1, '-', true,
+ t, 4, 1, '-', true, false,
"4444\t日本語\t22\t1\t333\n"
"999999999\t22\n"
"7\t22\n"
);
Check(
- t, 4, 3, '.', true,
+ t, 4, 3, '.', true, false,
"4444\t333\t22\t1\t333\n"
"999999999\t22\n"
"7\t22\n"
);
Check(
- t, 8, 1, '\t', true,
+ t, 8, 1, '\t', true, true,
"4444\t333\t22\t1\t333\n"
"999999999\t22\n"
"7\t22\n"
"\t\t\t88888888\n"
"\n"
"666666\t666666\t666666\t4444\n"
- "1\t1\t999999999\t0000000000\n",
+ "1\t1\t<font color=red attr=日本語>999999999</font>\t0000000000\n",
"4444\t\t333\t22\t1\t333\n"
"999999999\t22\n"
"\t\t\t\t88888888\n"
"\n"
"666666\t666666\t666666\t\t4444\n"
- "1\t1\t999999999\t0000000000\n"
+ "1\t1\t<font color=red attr=日本語>999999999</font>\t0000000000\n"
);
Check(
- t, 0, 2, ' ', false,
+ t, 0, 2, ' ', false, false,
".0\t.3\t2.4\t-5.1\t\n"
"23.0\t12345678.9\t2.4\t-989.4\t\n"
"5.1\t12.0\t2.4\t-7.0\t\n"